From fab288cf0b31ff64a562cc496b20add822a6abbd Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sun, 4 Apr 2010 01:17:30 +0200 Subject: awk: don't append bogus data after NUL in sub(); shrink also renamed variables to more sensible names function old new delta mk_re_node 56 49 -7 awk_sub 601 591 -10 Signed-off-by: Denys Vlasenko --- editors/awk.c | 118 +++++++++++++++++++++++++++++++++------------------------- 1 file changed, 68 insertions(+), 50 deletions(-) (limited to 'editors') diff --git a/editors/awk.c b/editors/awk.c index 30c6b88..3ba1a42 100644 --- a/editors/awk.c +++ b/editors/awk.c @@ -1134,15 +1134,13 @@ static node *new_node(uint32_t info) return n; } -static node *mk_re_node(const char *s, node *n, regex_t *re) +static void mk_re_node(const char *s, node *n, regex_t *re) { n->info = OC_REGEXP; n->l.re = re; n->r.ire = re + 1; xregcomp(re, s, REG_EXTENDED); xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE); - - return n; } static node *condition(void) @@ -1541,7 +1539,10 @@ static regex_t *as_regex(node *op, regex_t *preg) return preg; } -/* gradually increasing buffer */ +/* gradually increasing buffer. + * note that we reallocate even if n == old_size, + * and thus there is at least one extra allocated byte. + */ static char* qrealloc(char *b, int n, int *size) { if (!b || n >= *size) { @@ -1983,83 +1984,100 @@ static char *awk_printf(node *n) return b; } -/* common substitution routine - * replace (nm) substring of (src) that match (n) with (repl), store - * result into (dest), return number of substitutions. If nm=0, replace - * all matches. If src or dst is NULL, use $0. If ex=TRUE, enable - * subexpression matching (\1-\9) +/* Common substitution routine. + * Replace (nm)'th substring of (src) that matches (rn) with (repl), + * store result into (dest), return number of substitutions. + * If nm = 0, replace all matches. + * If src or dst is NULL, use $0. + * If subexp != 0, enable subexpression matching (\1-\9). */ -static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int ex) +static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int subexp) { - char *ds = NULL; - const char *s; + char *resbuf; const char *sp; - int c, i, j, di, rl, so, eo, nbs, n, dssize; + int match_no, residx, replen, resbufsize; + int regexec_flags; regmatch_t pmatch[10]; - regex_t sreg, *re; + regex_t sreg, *regex; + + resbuf = NULL; + residx = 0; + match_no = 0; + regexec_flags = 0; + regex = as_regex(rn, &sreg); + sp = getvar_s(src ? src : intvar[F0]); + replen = strlen(repl); + while (regexec(regex, sp, 10, pmatch, regexec_flags) == 0) { + int so = pmatch[0].rm_so; + int eo = pmatch[0].rm_eo; + + //bb_error_msg("match %u: [%u,%u] '%s'%p", match_no+1, so, eo, sp,sp); + resbuf = qrealloc(resbuf, residx + eo + replen, &resbufsize); + memcpy(resbuf + residx, sp, eo); + residx += eo; + if (++match_no >= nm) { + const char *s; + int nbs; - re = as_regex(rn, &sreg); - if (!src) - src = intvar[F0]; - if (!dest) - dest = intvar[F0]; - - i = di = 0; - sp = getvar_s(src); - rl = strlen(repl); - while (regexec(re, sp, 10, pmatch, sp==getvar_s(src) ? 0 : REG_NOTBOL) == 0) { - so = pmatch[0].rm_so; - eo = pmatch[0].rm_eo; - - ds = qrealloc(ds, di + eo + rl, &dssize); - memcpy(ds + di, sp, eo); - di += eo; - if (++i >= nm) { /* replace */ - di -= (eo - so); + residx -= (eo - so); nbs = 0; for (s = repl; *s; s++) { - ds[di++] = c = *s; + char c = resbuf[residx++] = *s; if (c == '\\') { nbs++; continue; } - if (c == '&' || (ex && c >= '0' && c <= '9')) { - di -= ((nbs + 3) >> 1); + if (c == '&' || (subexp && c >= '0' && c <= '9')) { + int j; + residx -= ((nbs + 3) >> 1); j = 0; if (c != '&') { j = c - '0'; nbs++; } if (nbs % 2) { - ds[di++] = c; + resbuf[residx++] = c; } else { - n = pmatch[j].rm_eo - pmatch[j].rm_so; - ds = qrealloc(ds, di + rl + n, &dssize); - memcpy(ds + di, sp + pmatch[j].rm_so, n); - di += n; + int n = pmatch[j].rm_eo - pmatch[j].rm_so; + resbuf = qrealloc(resbuf, residx + replen + n, &resbufsize); + memcpy(resbuf + residx, sp + pmatch[j].rm_so, n); + residx += n; } } nbs = 0; } } + regexec_flags = REG_NOTBOL; sp += eo; - if (i == nm) + if (match_no == nm) break; if (eo == so) { - ds[di] = *sp++; - if (!ds[di++]) - break; + /* Empty match (e.g. "b*" will match anywhere). + * Advance by one char. */ +//BUG (bug 1333): +//gsub(/\