diff options
author | Denys Vlasenko | 2017-07-05 19:10:21 +0200 |
---|---|---|
committer | Denys Vlasenko | 2017-07-05 19:10:21 +0200 |
commit | fda9fafe279d9394ad53313320a949c86f646734 (patch) | |
tree | 3552e44de84460a8d2526f5b9703e70dea1a6259 /shell | |
parent | 6798486141057f7989c0e59d5f645aba87a58f62 (diff) | |
download | busybox-fda9fafe279d9394ad53313320a949c86f646734.zip busybox-fda9fafe279d9394ad53313320a949c86f646734.tar.gz |
ash: fix matching of unicode greek letter rho (cf 81) and similar cases
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
Diffstat (limited to 'shell')
-rw-r--r-- | shell/ash.c | 23 | ||||
-rw-r--r-- | shell/ash_test/ash-quoting/unicode_8x_chars.right | 6 | ||||
-rwxr-xr-x | shell/ash_test/ash-quoting/unicode_8x_chars.tests | 28 | ||||
-rw-r--r-- | shell/hush_test/hush-quoting/unicode_8x_chars.right | 6 | ||||
-rwxr-xr-x | shell/hush_test/hush-quoting/unicode_8x_chars.tests | 28 |
5 files changed, 90 insertions, 1 deletions
diff --git a/shell/ash.c b/shell/ash.c index 6d46e37..e5fdd16 100644 --- a/shell/ash.c +++ b/shell/ash.c @@ -5913,6 +5913,7 @@ rmescapes(char *str, int flag) while (*p) { if ((unsigned char)*p == CTLQUOTEMARK) { // Note: both inquotes and protect_against_glob only affect whether +// CTLESC,<ch> gets converted to <ch> or to \<ch> inquotes = ~inquotes; p++; protect_against_glob = globbing; @@ -5925,7 +5926,27 @@ rmescapes(char *str, int flag) ash_msg_and_raise_error("CTLESC at EOL (shouldn't happen)"); #endif if (protect_against_glob) { - *q++ = '\\'; + /* + * We used to trust glob() and fnmatch() to eat + * superfluous escapes (\z where z has no + * special meaning anyway). But this causes + * bugs such as string of one greek letter rho + * (unicode-encoded as two bytes 'cf,81") + * getting encoded as "cf,CTLESC,81" + * and here, converted to "cf,\,81" - + * which does not go well with some flavors + * of fnmatch() in unicode locales. + * + * Lets add "\" only on the chars which need it. + */ + if (*p == '*' + || *p == '?' + || *p == '[' + /* || *p == ']' maybe also this? */ + || *p == '\\' + ) { + *q++ = '\\'; + } } } else if (*p == '\\' && !inquotes) { /* naked back slash */ diff --git a/shell/ash_test/ash-quoting/unicode_8x_chars.right b/shell/ash_test/ash-quoting/unicode_8x_chars.right new file mode 100644 index 0000000..7780b88 --- /dev/null +++ b/shell/ash_test/ash-quoting/unicode_8x_chars.right @@ -0,0 +1,6 @@ +ok +ok +ok +ok +ok +ok diff --git a/shell/ash_test/ash-quoting/unicode_8x_chars.tests b/shell/ash_test/ash-quoting/unicode_8x_chars.tests new file mode 100755 index 0000000..1258745 --- /dev/null +++ b/shell/ash_test/ash-quoting/unicode_8x_chars.tests @@ -0,0 +1,28 @@ +# Unicode: cf 80 +case π in +( "π" ) echo ok ;; +( * ) echo WRONG ;; +esac +# Unicode: cf 81 +case ρ in +( "ρ" ) echo ok ;; +( * ) echo WRONG ;; +esac +# Unicode: cf 82 +case ς in +( "ς" ) echo ok ;; +( * ) echo WRONG ;; +esac + +case "π" in +( π ) echo ok ;; +( * ) echo WRONG ;; +esac +case "ρ" in +( ρ ) echo ok ;; +( * ) echo WRONG ;; +esac +case "ς" in +( ς ) echo ok ;; +( * ) echo WRONG ;; +esac diff --git a/shell/hush_test/hush-quoting/unicode_8x_chars.right b/shell/hush_test/hush-quoting/unicode_8x_chars.right new file mode 100644 index 0000000..7780b88 --- /dev/null +++ b/shell/hush_test/hush-quoting/unicode_8x_chars.right @@ -0,0 +1,6 @@ +ok +ok +ok +ok +ok +ok diff --git a/shell/hush_test/hush-quoting/unicode_8x_chars.tests b/shell/hush_test/hush-quoting/unicode_8x_chars.tests new file mode 100755 index 0000000..1258745 --- /dev/null +++ b/shell/hush_test/hush-quoting/unicode_8x_chars.tests @@ -0,0 +1,28 @@ +# Unicode: cf 80 +case π in +( "π" ) echo ok ;; +( * ) echo WRONG ;; +esac +# Unicode: cf 81 +case ρ in +( "ρ" ) echo ok ;; +( * ) echo WRONG ;; +esac +# Unicode: cf 82 +case ς in +( "ς" ) echo ok ;; +( * ) echo WRONG ;; +esac + +case "π" in +( π ) echo ok ;; +( * ) echo WRONG ;; +esac +case "ρ" in +( ρ ) echo ok ;; +( * ) echo WRONG ;; +esac +case "ς" in +( ς ) echo ok ;; +( * ) echo WRONG ;; +esac |