diff options
-rw-r--r-- | shell/ash.c | 23 | ||||
-rw-r--r-- | shell/ash_test/ash-quoting/unicode_8x_chars.right | 6 | ||||
-rwxr-xr-x | shell/ash_test/ash-quoting/unicode_8x_chars.tests | 28 | ||||
-rw-r--r-- | shell/hush_test/hush-quoting/unicode_8x_chars.right | 6 | ||||
-rwxr-xr-x | shell/hush_test/hush-quoting/unicode_8x_chars.tests | 28 |
5 files changed, 90 insertions, 1 deletions
diff --git a/shell/ash.c b/shell/ash.c index 6d46e37..e5fdd16 100644 --- a/shell/ash.c +++ b/shell/ash.c @@ -5913,6 +5913,7 @@ rmescapes(char *str, int flag) while (*p) { if ((unsigned char)*p == CTLQUOTEMARK) { // Note: both inquotes and protect_against_glob only affect whether +// CTLESC,<ch> gets converted to <ch> or to \<ch> inquotes = ~inquotes; p++; protect_against_glob = globbing; @@ -5925,7 +5926,27 @@ rmescapes(char *str, int flag) ash_msg_and_raise_error("CTLESC at EOL (shouldn't happen)"); #endif if (protect_against_glob) { - *q++ = '\\'; + /* + * We used to trust glob() and fnmatch() to eat + * superfluous escapes (\z where z has no + * special meaning anyway). But this causes + * bugs such as string of one greek letter rho + * (unicode-encoded as two bytes 'cf,81") + * getting encoded as "cf,CTLESC,81" + * and here, converted to "cf,\,81" - + * which does not go well with some flavors + * of fnmatch() in unicode locales. + * + * Lets add "\" only on the chars which need it. + */ + if (*p == '*' + || *p == '?' + || *p == '[' + /* || *p == ']' maybe also this? */ + || *p == '\\' + ) { + *q++ = '\\'; + } } } else if (*p == '\\' && !inquotes) { /* naked back slash */ diff --git a/shell/ash_test/ash-quoting/unicode_8x_chars.right b/shell/ash_test/ash-quoting/unicode_8x_chars.right new file mode 100644 index 0000000..7780b88 --- /dev/null +++ b/shell/ash_test/ash-quoting/unicode_8x_chars.right @@ -0,0 +1,6 @@ +ok +ok +ok +ok +ok +ok diff --git a/shell/ash_test/ash-quoting/unicode_8x_chars.tests b/shell/ash_test/ash-quoting/unicode_8x_chars.tests new file mode 100755 index 0000000..1258745 --- /dev/null +++ b/shell/ash_test/ash-quoting/unicode_8x_chars.tests @@ -0,0 +1,28 @@ +# Unicode: cf 80 +case π in +( "π" ) echo ok ;; +( * ) echo WRONG ;; +esac +# Unicode: cf 81 +case ρ in +( "ρ" ) echo ok ;; +( * ) echo WRONG ;; +esac +# Unicode: cf 82 +case ς in +( "ς" ) echo ok ;; +( * ) echo WRONG ;; +esac + +case "π" in +( π ) echo ok ;; +( * ) echo WRONG ;; +esac +case "ρ" in +( ρ ) echo ok ;; +( * ) echo WRONG ;; +esac +case "ς" in +( ς ) echo ok ;; +( * ) echo WRONG ;; +esac diff --git a/shell/hush_test/hush-quoting/unicode_8x_chars.right b/shell/hush_test/hush-quoting/unicode_8x_chars.right new file mode 100644 index 0000000..7780b88 --- /dev/null +++ b/shell/hush_test/hush-quoting/unicode_8x_chars.right @@ -0,0 +1,6 @@ +ok +ok +ok +ok +ok +ok diff --git a/shell/hush_test/hush-quoting/unicode_8x_chars.tests b/shell/hush_test/hush-quoting/unicode_8x_chars.tests new file mode 100755 index 0000000..1258745 --- /dev/null +++ b/shell/hush_test/hush-quoting/unicode_8x_chars.tests @@ -0,0 +1,28 @@ +# Unicode: cf 80 +case π in +( "π" ) echo ok ;; +( * ) echo WRONG ;; +esac +# Unicode: cf 81 +case ρ in +( "ρ" ) echo ok ;; +( * ) echo WRONG ;; +esac +# Unicode: cf 82 +case ς in +( "ς" ) echo ok ;; +( * ) echo WRONG ;; +esac + +case "π" in +( π ) echo ok ;; +( * ) echo WRONG ;; +esac +case "ρ" in +( ρ ) echo ok ;; +( * ) echo WRONG ;; +esac +case "ς" in +( ς ) echo ok ;; +( * ) echo WRONG ;; +esac |