diff options
author | Denys Vlasenko | 2021-07-14 14:25:07 +0200 |
---|---|---|
committer | Denys Vlasenko | 2021-07-14 16:32:19 +0200 |
commit | d62627487a44d9175b05d49846aeef83fed97019 (patch) | |
tree | cca6e3b0ba26dfbf6dc652ff0d9770572260cf03 | |
parent | e6f4145f2961bfd500214ef1fcf07543ffacb603 (diff) | |
download | busybox-d62627487a44d9175b05d49846aeef83fed97019.zip busybox-d62627487a44d9175b05d49846aeef83fed97019.tar.gz |
awk: tighten parsing - disallow extra semicolons
'; BEGIN {...}' and 'BEGIN {...} ;; {...}' are not accepted by gawk
function old new delta
parse_program 332 353 +21
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r-- | editors/awk.c | 40 |
1 files changed, 24 insertions, 16 deletions
diff --git a/editors/awk.c b/editors/awk.c index 7a28235..2f8a18c 100644 --- a/editors/awk.c +++ b/editors/awk.c @@ -1634,7 +1634,7 @@ static void chain_group(void) debug_printf_parse("%s: ST_FOR\n", __func__); next_token(TC_LPAREN); n2 = parse_expr(TC_SEMICOL | TC_RPAREN); - if (t_tclass & TC_RPAREN) { /* for-in */ + if (t_tclass & TC_RPAREN) { /* for (I in ARRAY) */ if (!n2 || n2->info != TI_IN) syntax_error(EMSG_UNEXP_TOKEN); n = chain_node(OC_WALKINIT | VV); @@ -1700,20 +1700,15 @@ static void parse_program(char *p) for (;;) { uint32_t tclass; - tclass = next_token(TC_EOF | TS_OPSEQ | TC_LBRACE | - TC_SEMICOL | TC_NEWLINE | TC_BEGIN | TC_END | TC_FUNCDECL); - + tclass = next_token(TS_OPSEQ | TC_LBRACE | TC_BEGIN | TC_END | TC_FUNCDECL + | TC_EOF | TC_NEWLINE /* but not TC_SEMICOL */); + got_tok: if (tclass == TC_EOF) { debug_printf_parse("%s: TC_EOF\n", __func__); break; } - if (tclass & (TC_SEMICOL | TC_NEWLINE)) { - debug_printf_parse("%s: TC_SEMICOL | TC_NEWLINE\n", __func__); -//NB: gawk allows many newlines, but does not allow more than one semicolon: -// BEGIN {...}<newline>;<newline>; -//would complain "each rule must have a pattern or an action part". -//Same message for -// ; BEGIN {...} + if (tclass == TC_NEWLINE) { + debug_printf_parse("%s: TC_NEWLINE\n", __func__); continue; } if (tclass == TC_BEGIN) { @@ -1722,7 +1717,7 @@ static void parse_program(char *p) /* ensure there is no newline between BEGIN and { */ next_token(TC_LBRACE); chain_until_rbrace(); - continue; + goto next_tok; } if (tclass == TC_END) { debug_printf_parse("%s: TC_END\n", __func__); @@ -1730,7 +1725,7 @@ static void parse_program(char *p) /* ensure there is no newline between END and { */ next_token(TC_LBRACE); chain_until_rbrace(); - continue; + goto next_tok; } if (tclass == TC_FUNCDECL) { func *f; @@ -1765,7 +1760,7 @@ static void parse_program(char *p) continue; chain_until_rbrace(); hash_clear(ahash); - continue; + goto next_tok; } seq = &mainseq; if (tclass & TS_OPSEQ) { @@ -1784,12 +1779,25 @@ static void parse_program(char *p) chain_node(OC_PRINT); } cn->r.n = mainseq.last; - continue; + goto next_tok; } /* tclass == TC_LBRACE */ debug_printf_parse("%s: TC_LBRACE(?)\n", __func__); chain_until_rbrace(); - } + next_tok: + /* Same as next_token() at the top of the loop, + TC_SEMICOL */ + tclass = next_token(TS_OPSEQ | TC_LBRACE | TC_BEGIN | TC_END | TC_FUNCDECL + | TC_EOF | TC_NEWLINE | TC_SEMICOL); + /* gawk allows many newlines, but does not allow more than one semicolon: + * BEGIN {...}<newline>;<newline>; + * would complain "each rule must have a pattern or an action part". + * Same message for + * ; BEGIN {...} + */ + if (tclass != TC_SEMICOL) + goto got_tok; /* use this token */ + /* else: loop back - ate the semicolon, get and use _next_ token */ + } /* for (;;) */ } |