From 2ca39ffd447ca874fcea933194829717d5573247 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Thu, 8 Jun 2023 10:42:39 +0200 Subject: awk: fix subst code to handle "start of word" pattern correctly (needs REG_STARTEND) function old new delta awk_sub 637 714 +77 Signed-off-by: Denys Vlasenko --- editors/awk.c | 49 ++++++++++++++++++++++++++++++++++++------------- testsuite/awk.tests | 28 +++++++++++++++------------- 2 files changed, 51 insertions(+), 26 deletions(-) diff --git a/editors/awk.c b/editors/awk.c index df9b7fd..171f0a7 100644 --- a/editors/awk.c +++ b/editors/awk.c @@ -2504,17 +2504,46 @@ static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest /*,in regex_t sreg, *regex; /* True only if called to implement gensub(): */ int subexp = (src != dest); - +#if defined(REG_STARTEND) + const char *src_string; + size_t src_strlen; + regexec_flags = REG_STARTEND; +#else + regexec_flags = 0; +#endif resbuf = NULL; residx = 0; match_no = 0; - regexec_flags = 0; regex = as_regex(rn, &sreg); sp = getvar_s(src ? src : intvar[F0]); +#if defined(REG_STARTEND) + src_string = sp; + src_strlen = strlen(src_string); +#endif replen = strlen(repl); - while (regexec(regex, sp, 10, pmatch, regexec_flags) == 0) { - int so = pmatch[0].rm_so; - int eo = pmatch[0].rm_eo; + for (;;) { + int so, eo; + +#if defined(REG_STARTEND) +// REG_STARTEND: "This flag is a BSD extension, not present in POSIX" + size_t start_ofs = sp - src_string; + pmatch[0].rm_so = start_ofs; + pmatch[0].rm_eo = src_strlen; + if (regexec(regex, src_string, 10, pmatch, regexec_flags) != 0) + break; + eo = pmatch[0].rm_eo - start_ofs; + so = pmatch[0].rm_so - start_ofs; +#else +// BUG: +// gsub(/\" (!!!), not "\\" as you would expect. @@ -572,31 +570,35 @@ testing 'awk gensub backslashes \\\' \ 'awk '$sq'BEGIN { s="\\\\\\"; print "s=" s; print gensub("a", s, "g", "a|a") }'$sq \ 's=\\\\\\ \\\\|\\\\ -' \ - '' '' +' '' '' testing 'awk gensub backslashes \\\\' \ 'awk '$sq'BEGIN { s="\\\\\\\\"; print "s=" s; print gensub("a", s, "g", "a|a") }'$sq \ 's=\\\\\\\\ \\\\|\\\\ -' \ - '' '' +' '' '' testing 'awk gensub backslashes \&' \ 'awk '$sq'BEGIN { s="\\&"; print "s=" s; print gensub("a", s, "g", "a|a") }'$sq \ 's=\\& &|& -' \ - '' '' +' '' '' testing 'awk gensub backslashes \0' \ 'awk '$sq'BEGIN { s="\\0"; print "s=" s; print gensub("a", s, "g", "a|a") }'$sq \ 's=\\0 a|a -' \ - '' '' +' '' '' testing 'awk gensub backslashes \\0' \ 'awk '$sq'BEGIN { s="\\\\0"; print "s=" s; print gensub("a", s, "g", "a|a") }'$sq \ 's=\\\\0 \\0|\\0 -' \ +' '' '' + +# The "b" in "abc" should not match