diff options
author | Denys Vlasenko | 2022-01-23 18:48:49 +0100 |
---|---|---|
committer | Denys Vlasenko | 2022-01-23 18:48:49 +0100 |
commit | e998c7c032458a05a7afcc13ce0dc980b99ecc6c (patch) | |
tree | d3e7e3bf11d99237339d37b9a4eca95e67d62e5f | |
parent | 33a9f34df5c53d3dd074a2168ff40d612a36667a (diff) | |
download | busybox-e998c7c032458a05a7afcc13ce0dc980b99ecc6c.zip busybox-e998c7c032458a05a7afcc13ce0dc980b99ecc6c.tar.gz |
sed: fix handling of escaped delimiters in s/// search pattern, closes 14541
function old new delta
copy_parsing_escapes 67 96 +29
parse_regex_delim 109 111 +2
get_address 213 215 +2
add_cmd 1176 1178 +2
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 4/0 up/down: 35/0) Total: 35 bytes
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r-- | editors/sed.c | 19 | ||||
-rwxr-xr-x | testsuite/sed.tests | 10 |
2 files changed, 21 insertions, 8 deletions
diff --git a/editors/sed.c b/editors/sed.c index 48b0dbf..02a527b 100644 --- a/editors/sed.c +++ b/editors/sed.c @@ -246,7 +246,6 @@ static void cleanup_outname(void) } /* strcpy, replacing "\from" with 'to'. If to is NUL, replacing "\any" with 'any' */ - static unsigned parse_escapes(char *dest, const char *string, int len, char from, char to) { char *d = dest; @@ -276,7 +275,7 @@ static unsigned parse_escapes(char *dest, const char *string, int len, char from return d - dest; } -static char *copy_parsing_escapes(const char *string, int len) +static char *copy_parsing_escapes(const char *string, int len, char delim) { const char *s; char *dest = xmalloc(len + 1); @@ -287,10 +286,15 @@ static char *copy_parsing_escapes(const char *string, int len) len = parse_escapes(dest, string, len, s[1], s[0]); string = dest; } + if (delim) { + /* we additionally unescape any instances of escaped delimiter. + * For example, in 's+9\++X+' the pattern is "9+", not "9\+". + */ + len = parse_escapes(dest, string, len, delim, delim); + } return dest; } - /* * index_of_next_unescaped_regexp_delim - walks left to right through a string * beginning at a specified index and returns the index of the next regular @@ -347,12 +351,11 @@ static int parse_regex_delim(const char *cmdstr, char **match, char **replace) /* save the match string */ idx = index_of_next_unescaped_regexp_delim(delimiter, cmdstr_ptr); - *match = copy_parsing_escapes(cmdstr_ptr, idx); - + *match = copy_parsing_escapes(cmdstr_ptr, idx, delimiter); /* save the replacement string */ cmdstr_ptr += idx + 1; idx = index_of_next_unescaped_regexp_delim(- (int)delimiter, cmdstr_ptr); - *replace = copy_parsing_escapes(cmdstr_ptr, idx); + *replace = copy_parsing_escapes(cmdstr_ptr, idx, 0); return ((cmdstr_ptr - cmdstr) + idx); } @@ -380,7 +383,7 @@ static int get_address(const char *my_str, int *linenum, regex_t ** regex) delimiter = *++pos; next = index_of_next_unescaped_regexp_delim(delimiter, ++pos); if (next != 0) { - temp = copy_parsing_escapes(pos, next); + temp = copy_parsing_escapes(pos, next, 0); G.previous_regex_ptr = *regex = xzalloc(sizeof(regex_t)); xregcomp(*regex, temp, G.regex_type); free(temp); @@ -575,7 +578,7 @@ static const char *parse_cmd_args(sed_cmd_t *sed_cmd, const char *cmdstr) cmdstr++; } len = strlen(cmdstr); - sed_cmd->string = copy_parsing_escapes(cmdstr, len); + sed_cmd->string = copy_parsing_escapes(cmdstr, len, 0); cmdstr += len; /* "\anychar" -> "anychar" */ parse_escapes(sed_cmd->string, sed_cmd->string, -1, '\0', '\0'); diff --git a/testsuite/sed.tests b/testsuite/sed.tests index e62b839..440996a 100755 --- a/testsuite/sed.tests +++ b/testsuite/sed.tests @@ -324,6 +324,16 @@ testing "sed zero chars match/replace logic must not falsely trigger here 2" \ "sed 's/ *$/_/g'" \ "qwerty_\n" "" "qwerty\n" +# the pattern here is interpreted as "9+", not as "9\+" +testing "sed special char as s/// delimiter, in pattern" \ + "sed 's+9\++X+'" \ + "X8=17\n" "" "9+8=17\n" + +# but in replacement string, "\&" remains "\&", not interpreted as "&" +testing "sed special char as s/// delimiter, in replacement" \ + "sed 's&9&X\&&'" \ + "X&+8=17\n" "" "9+8=17\n" + testing "sed /\$_in_regex/ should not match newlines, only end-of-line" \ "sed ': testcont; /\\\\$/{ =; N; b testcont }'" \ "\ |