summaryrefslogtreecommitdiff
path: root/editors/awk.c
diff options
context:
space:
mode:
authorDenys Vlasenko2023-06-08 10:42:39 +0200
committerDenys Vlasenko2023-06-08 10:42:39 +0200
commit2ca39ffd447ca874fcea933194829717d5573247 (patch)
tree6d9eb4ba80ad9feec70c3f4f25dd3f7629c5fe5a /editors/awk.c
parent113685fbcd4c3432ec9b640583d50ba8da2102e8 (diff)
downloadbusybox-2ca39ffd447ca874fcea933194829717d5573247.zip
busybox-2ca39ffd447ca874fcea933194829717d5573247.tar.gz
awk: fix subst code to handle "start of word" pattern correctly (needs REG_STARTEND)
function old new delta awk_sub 637 714 +77 Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
Diffstat (limited to 'editors/awk.c')
-rw-r--r--editors/awk.c49
1 files changed, 36 insertions, 13 deletions
diff --git a/editors/awk.c b/editors/awk.c
index df9b7fd..171f0a7 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -2504,17 +2504,46 @@ static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest /*,in
regex_t sreg, *regex;
/* True only if called to implement gensub(): */
int subexp = (src != dest);
-
+#if defined(REG_STARTEND)
+ const char *src_string;
+ size_t src_strlen;
+ regexec_flags = REG_STARTEND;
+#else
+ regexec_flags = 0;
+#endif
resbuf = NULL;
residx = 0;
match_no = 0;
- regexec_flags = 0;
regex = as_regex(rn, &sreg);
sp = getvar_s(src ? src : intvar[F0]);
+#if defined(REG_STARTEND)
+ src_string = sp;
+ src_strlen = strlen(src_string);
+#endif
replen = strlen(repl);
- while (regexec(regex, sp, 10, pmatch, regexec_flags) == 0) {
- int so = pmatch[0].rm_so;
- int eo = pmatch[0].rm_eo;
+ for (;;) {
+ int so, eo;
+
+#if defined(REG_STARTEND)
+// REG_STARTEND: "This flag is a BSD extension, not present in POSIX"
+ size_t start_ofs = sp - src_string;
+ pmatch[0].rm_so = start_ofs;
+ pmatch[0].rm_eo = src_strlen;
+ if (regexec(regex, src_string, 10, pmatch, regexec_flags) != 0)
+ break;
+ eo = pmatch[0].rm_eo - start_ofs;
+ so = pmatch[0].rm_so - start_ofs;
+#else
+// BUG:
+// gsub(/\<b*/,"") on "abc" matches empty string at "a...",
+// advances sp one char (see "Empty match" comment later) to "bc"
+// ... and erroneously matches "b" even though it is NOT at the word start.
+ enum { start_ofs = 0 };
+ if (regexec(regex, sp, 10, pmatch, regexec_flags) != 0)
+ break;
+ so = pmatch[0].rm_so;
+ eo = pmatch[0].rm_eo;
+#endif
//bb_error_msg("match %u: [%u,%u] '%s'%p", match_no+1, so, eo, sp,sp);
resbuf = qrealloc(resbuf, residx + eo + replen, &resbufsize);
@@ -2543,7 +2572,7 @@ static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest /*,in
}
n = pmatch[j].rm_eo - pmatch[j].rm_so;
resbuf = qrealloc(resbuf, residx + replen + n, &resbufsize);
- memcpy(resbuf + residx, sp + pmatch[j].rm_so, n);
+ memcpy(resbuf + residx, sp + pmatch[j].rm_so - start_ofs, n);
residx += n;
} else
resbuf[residx++] = c;
@@ -2557,12 +2586,6 @@ static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest /*,in
if (eo == so) {
/* Empty match (e.g. "b*" will match anywhere).
* Advance by one char. */
-//BUG (bug 1333):
-//gsub(/\<b*/,"") on "abc" will reach this point, advance to "bc"
-//... and will erroneously match "b" even though it is NOT at the word start.
-//we need REG_NOTBOW but it does not exist...
-//TODO: if EXTRA_COMPAT=y, use GNU matching and re_search,
-//it should be able to do it correctly.
/* Subtle: this is safe only because
* qrealloc allocated at least one extra byte */
resbuf[residx] = *sp;
@@ -2571,7 +2594,7 @@ static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest /*,in
sp++;
residx++;
}
- regexec_flags = REG_NOTBOL;
+ regexec_flags |= REG_NOTBOL;
}
resbuf = qrealloc(resbuf, residx + strlen(sp), &resbufsize);