summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDenys Vlasenko2022-01-23 18:48:49 +0100
committerDenys Vlasenko2022-01-23 18:48:49 +0100
commite998c7c032458a05a7afcc13ce0dc980b99ecc6c (patch)
treed3e7e3bf11d99237339d37b9a4eca95e67d62e5f
parent33a9f34df5c53d3dd074a2168ff40d612a36667a (diff)
downloadbusybox-e998c7c032458a05a7afcc13ce0dc980b99ecc6c.zip
busybox-e998c7c032458a05a7afcc13ce0dc980b99ecc6c.tar.gz
sed: fix handling of escaped delimiters in s/// search pattern, closes 14541
function old new delta copy_parsing_escapes 67 96 +29 parse_regex_delim 109 111 +2 get_address 213 215 +2 add_cmd 1176 1178 +2 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 4/0 up/down: 35/0) Total: 35 bytes Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r--editors/sed.c19
-rwxr-xr-xtestsuite/sed.tests10
2 files changed, 21 insertions, 8 deletions
diff --git a/editors/sed.c b/editors/sed.c
index 48b0dbf..02a527b 100644
--- a/editors/sed.c
+++ b/editors/sed.c
@@ -246,7 +246,6 @@ static void cleanup_outname(void)
}
/* strcpy, replacing "\from" with 'to'. If to is NUL, replacing "\any" with 'any' */
-
static unsigned parse_escapes(char *dest, const char *string, int len, char from, char to)
{
char *d = dest;
@@ -276,7 +275,7 @@ static unsigned parse_escapes(char *dest, const char *string, int len, char from
return d - dest;
}
-static char *copy_parsing_escapes(const char *string, int len)
+static char *copy_parsing_escapes(const char *string, int len, char delim)
{
const char *s;
char *dest = xmalloc(len + 1);
@@ -287,10 +286,15 @@ static char *copy_parsing_escapes(const char *string, int len)
len = parse_escapes(dest, string, len, s[1], s[0]);
string = dest;
}
+ if (delim) {
+ /* we additionally unescape any instances of escaped delimiter.
+ * For example, in 's+9\++X+' the pattern is "9+", not "9\+".
+ */
+ len = parse_escapes(dest, string, len, delim, delim);
+ }
return dest;
}
-
/*
* index_of_next_unescaped_regexp_delim - walks left to right through a string
* beginning at a specified index and returns the index of the next regular
@@ -347,12 +351,11 @@ static int parse_regex_delim(const char *cmdstr, char **match, char **replace)
/* save the match string */
idx = index_of_next_unescaped_regexp_delim(delimiter, cmdstr_ptr);
- *match = copy_parsing_escapes(cmdstr_ptr, idx);
-
+ *match = copy_parsing_escapes(cmdstr_ptr, idx, delimiter);
/* save the replacement string */
cmdstr_ptr += idx + 1;
idx = index_of_next_unescaped_regexp_delim(- (int)delimiter, cmdstr_ptr);
- *replace = copy_parsing_escapes(cmdstr_ptr, idx);
+ *replace = copy_parsing_escapes(cmdstr_ptr, idx, 0);
return ((cmdstr_ptr - cmdstr) + idx);
}
@@ -380,7 +383,7 @@ static int get_address(const char *my_str, int *linenum, regex_t ** regex)
delimiter = *++pos;
next = index_of_next_unescaped_regexp_delim(delimiter, ++pos);
if (next != 0) {
- temp = copy_parsing_escapes(pos, next);
+ temp = copy_parsing_escapes(pos, next, 0);
G.previous_regex_ptr = *regex = xzalloc(sizeof(regex_t));
xregcomp(*regex, temp, G.regex_type);
free(temp);
@@ -575,7 +578,7 @@ static const char *parse_cmd_args(sed_cmd_t *sed_cmd, const char *cmdstr)
cmdstr++;
}
len = strlen(cmdstr);
- sed_cmd->string = copy_parsing_escapes(cmdstr, len);
+ sed_cmd->string = copy_parsing_escapes(cmdstr, len, 0);
cmdstr += len;
/* "\anychar" -> "anychar" */
parse_escapes(sed_cmd->string, sed_cmd->string, -1, '\0', '\0');
diff --git a/testsuite/sed.tests b/testsuite/sed.tests
index e62b839..440996a 100755
--- a/testsuite/sed.tests
+++ b/testsuite/sed.tests
@@ -324,6 +324,16 @@ testing "sed zero chars match/replace logic must not falsely trigger here 2" \
"sed 's/ *$/_/g'" \
"qwerty_\n" "" "qwerty\n"
+# the pattern here is interpreted as "9+", not as "9\+"
+testing "sed special char as s/// delimiter, in pattern" \
+ "sed 's+9\++X+'" \
+ "X8=17\n" "" "9+8=17\n"
+
+# but in replacement string, "\&" remains "\&", not interpreted as "&"
+testing "sed special char as s/// delimiter, in replacement" \
+ "sed 's&9&X\&&'" \
+ "X&+8=17\n" "" "9+8=17\n"
+
testing "sed /\$_in_regex/ should not match newlines, only end-of-line" \
"sed ': testcont; /\\\\$/{ =; N; b testcont }'" \
"\