diff options
author | Denys Vlasenko | 2020-12-02 19:07:31 +0100 |
---|---|---|
committer | Denys Vlasenko | 2020-12-02 19:07:31 +0100 |
commit | 665a65953076ea21be49250b8279ddb1f0f99f38 (patch) | |
tree | bfb46738da9fec6715843197b5987ad56d4fcf76 /editors/awk.c | |
parent | 50ead33c45919abffde35313daac4c2dfd8641ca (diff) | |
download | busybox-665a65953076ea21be49250b8279ddb1f0f99f38.zip busybox-665a65953076ea21be49250b8279ddb1f0f99f38.tar.gz |
awk: FS regex matches only non-empty separators (gawk compat)
function old new delta
awk_split 484 553 +69
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
Diffstat (limited to 'editors/awk.c')
-rw-r--r-- | editors/awk.c | 33 |
1 files changed, 25 insertions, 8 deletions
diff --git a/editors/awk.c b/editors/awk.c index d56d633..2c15f9e 100644 --- a/editors/awk.c +++ b/editors/awk.c @@ -1763,6 +1763,29 @@ static void fsrealloc(int size) nfields = size; } +static int regexec1_nonempty(const regex_t *preg, const char *s, regmatch_t pmatch[]) +{ + int r = regexec(preg, s, 1, pmatch, 0); + if (r == 0 && pmatch[0].rm_eo == 0) { + /* For example, happens when FS can match + * an empty string (awk -F ' *'). Logically, + * this should split into one-char fields. + * However, gawk 5.0.1 searches for first + * _non-empty_ separator string match: + */ + size_t ofs = 0; + do { + ofs++; + if (!s[ofs]) + return REG_NOMATCH; + regexec(preg, s + ofs, 1, pmatch, 0); + } while (pmatch[0].rm_eo == 0); + pmatch[0].rm_so += ofs; + pmatch[0].rm_eo += ofs; + } + return r; +} + static int awk_split(const char *s, node *spl, char **slist) { int n; @@ -1788,17 +1811,11 @@ static int awk_split(const char *s, node *spl, char **slist) regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough... l = strcspn(s, c+2); /* len till next NUL or \n */ - if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0 + if (regexec1_nonempty(icase ? spl->r.ire : spl->l.re, s, pmatch) == 0 && pmatch[0].rm_so <= l ) { + /* if (pmatch[0].rm_eo == 0) ... - impossible */ l = pmatch[0].rm_so; - if (pmatch[0].rm_eo == 0) { - /* For example, happens when FS can match - * an empthy string (awk -F ' *') - */ - l++; - pmatch[0].rm_eo++; - } n++; /* we saw yet another delimiter */ } else { pmatch[0].rm_eo = l; |