summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDenys Vlasenko2017-07-05 19:10:21 +0200
committerDenys Vlasenko2017-07-05 19:10:21 +0200
commitfda9fafe279d9394ad53313320a949c86f646734 (patch)
tree3552e44de84460a8d2526f5b9703e70dea1a6259
parent6798486141057f7989c0e59d5f645aba87a58f62 (diff)
downloadbusybox-fda9fafe279d9394ad53313320a949c86f646734.zip
busybox-fda9fafe279d9394ad53313320a949c86f646734.tar.gz
ash: fix matching of unicode greek letter rho (cf 81) and similar cases
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r--shell/ash.c23
-rw-r--r--shell/ash_test/ash-quoting/unicode_8x_chars.right6
-rwxr-xr-xshell/ash_test/ash-quoting/unicode_8x_chars.tests28
-rw-r--r--shell/hush_test/hush-quoting/unicode_8x_chars.right6
-rwxr-xr-xshell/hush_test/hush-quoting/unicode_8x_chars.tests28
5 files changed, 90 insertions, 1 deletions
diff --git a/shell/ash.c b/shell/ash.c
index 6d46e37..e5fdd16 100644
--- a/shell/ash.c
+++ b/shell/ash.c
@@ -5913,6 +5913,7 @@ rmescapes(char *str, int flag)
while (*p) {
if ((unsigned char)*p == CTLQUOTEMARK) {
// Note: both inquotes and protect_against_glob only affect whether
+// CTLESC,<ch> gets converted to <ch> or to \<ch>
inquotes = ~inquotes;
p++;
protect_against_glob = globbing;
@@ -5925,7 +5926,27 @@ rmescapes(char *str, int flag)
ash_msg_and_raise_error("CTLESC at EOL (shouldn't happen)");
#endif
if (protect_against_glob) {
- *q++ = '\\';
+ /*
+ * We used to trust glob() and fnmatch() to eat
+ * superfluous escapes (\z where z has no
+ * special meaning anyway). But this causes
+ * bugs such as string of one greek letter rho
+ * (unicode-encoded as two bytes 'cf,81")
+ * getting encoded as "cf,CTLESC,81"
+ * and here, converted to "cf,\,81" -
+ * which does not go well with some flavors
+ * of fnmatch() in unicode locales.
+ *
+ * Lets add "\" only on the chars which need it.
+ */
+ if (*p == '*'
+ || *p == '?'
+ || *p == '['
+ /* || *p == ']' maybe also this? */
+ || *p == '\\'
+ ) {
+ *q++ = '\\';
+ }
}
} else if (*p == '\\' && !inquotes) {
/* naked back slash */
diff --git a/shell/ash_test/ash-quoting/unicode_8x_chars.right b/shell/ash_test/ash-quoting/unicode_8x_chars.right
new file mode 100644
index 0000000..7780b88
--- /dev/null
+++ b/shell/ash_test/ash-quoting/unicode_8x_chars.right
@@ -0,0 +1,6 @@
+ok
+ok
+ok
+ok
+ok
+ok
diff --git a/shell/ash_test/ash-quoting/unicode_8x_chars.tests b/shell/ash_test/ash-quoting/unicode_8x_chars.tests
new file mode 100755
index 0000000..1258745
--- /dev/null
+++ b/shell/ash_test/ash-quoting/unicode_8x_chars.tests
@@ -0,0 +1,28 @@
+# Unicode: cf 80
+case π in
+( "π" ) echo ok ;;
+( * ) echo WRONG ;;
+esac
+# Unicode: cf 81
+case ρ in
+( "ρ" ) echo ok ;;
+( * ) echo WRONG ;;
+esac
+# Unicode: cf 82
+case ς in
+( "ς" ) echo ok ;;
+( * ) echo WRONG ;;
+esac
+
+case "π" in
+( π ) echo ok ;;
+( * ) echo WRONG ;;
+esac
+case "ρ" in
+( ρ ) echo ok ;;
+( * ) echo WRONG ;;
+esac
+case "ς" in
+( ς ) echo ok ;;
+( * ) echo WRONG ;;
+esac
diff --git a/shell/hush_test/hush-quoting/unicode_8x_chars.right b/shell/hush_test/hush-quoting/unicode_8x_chars.right
new file mode 100644
index 0000000..7780b88
--- /dev/null
+++ b/shell/hush_test/hush-quoting/unicode_8x_chars.right
@@ -0,0 +1,6 @@
+ok
+ok
+ok
+ok
+ok
+ok
diff --git a/shell/hush_test/hush-quoting/unicode_8x_chars.tests b/shell/hush_test/hush-quoting/unicode_8x_chars.tests
new file mode 100755
index 0000000..1258745
--- /dev/null
+++ b/shell/hush_test/hush-quoting/unicode_8x_chars.tests
@@ -0,0 +1,28 @@
+# Unicode: cf 80
+case π in
+( "π" ) echo ok ;;
+( * ) echo WRONG ;;
+esac
+# Unicode: cf 81
+case ρ in
+( "ρ" ) echo ok ;;
+( * ) echo WRONG ;;
+esac
+# Unicode: cf 82
+case ς in
+( "ς" ) echo ok ;;
+( * ) echo WRONG ;;
+esac
+
+case "π" in
+( π ) echo ok ;;
+( * ) echo WRONG ;;
+esac
+case "ρ" in
+( ρ ) echo ok ;;
+( * ) echo WRONG ;;
+esac
+case "ς" in
+( ς ) echo ok ;;
+( * ) echo WRONG ;;
+esac