From d62627487a44d9175b05d49846aeef83fed97019 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Wed, 14 Jul 2021 14:25:07 +0200 Subject: awk: tighten parsing - disallow extra semicolons '; BEGIN {...}' and 'BEGIN {...} ;; {...}' are not accepted by gawk function old new delta parse_program 332 353 +21 Signed-off-by: Denys Vlasenko --- editors/awk.c | 40 ++++++++++++++++++++++++---------------- 1 file changed, 24 insertions(+), 16 deletions(-) (limited to 'editors/awk.c') diff --git a/editors/awk.c b/editors/awk.c index 7a28235..2f8a18c 100644 --- a/editors/awk.c +++ b/editors/awk.c @@ -1634,7 +1634,7 @@ static void chain_group(void) debug_printf_parse("%s: ST_FOR\n", __func__); next_token(TC_LPAREN); n2 = parse_expr(TC_SEMICOL | TC_RPAREN); - if (t_tclass & TC_RPAREN) { /* for-in */ + if (t_tclass & TC_RPAREN) { /* for (I in ARRAY) */ if (!n2 || n2->info != TI_IN) syntax_error(EMSG_UNEXP_TOKEN); n = chain_node(OC_WALKINIT | VV); @@ -1700,20 +1700,15 @@ static void parse_program(char *p) for (;;) { uint32_t tclass; - tclass = next_token(TC_EOF | TS_OPSEQ | TC_LBRACE | - TC_SEMICOL | TC_NEWLINE | TC_BEGIN | TC_END | TC_FUNCDECL); - + tclass = next_token(TS_OPSEQ | TC_LBRACE | TC_BEGIN | TC_END | TC_FUNCDECL + | TC_EOF | TC_NEWLINE /* but not TC_SEMICOL */); + got_tok: if (tclass == TC_EOF) { debug_printf_parse("%s: TC_EOF\n", __func__); break; } - if (tclass & (TC_SEMICOL | TC_NEWLINE)) { - debug_printf_parse("%s: TC_SEMICOL | TC_NEWLINE\n", __func__); -//NB: gawk allows many newlines, but does not allow more than one semicolon: -// BEGIN {...};; -//would complain "each rule must have a pattern or an action part". -//Same message for -// ; BEGIN {...} + if (tclass == TC_NEWLINE) { + debug_printf_parse("%s: TC_NEWLINE\n", __func__); continue; } if (tclass == TC_BEGIN) { @@ -1722,7 +1717,7 @@ static void parse_program(char *p) /* ensure there is no newline between BEGIN and { */ next_token(TC_LBRACE); chain_until_rbrace(); - continue; + goto next_tok; } if (tclass == TC_END) { debug_printf_parse("%s: TC_END\n", __func__); @@ -1730,7 +1725,7 @@ static void parse_program(char *p) /* ensure there is no newline between END and { */ next_token(TC_LBRACE); chain_until_rbrace(); - continue; + goto next_tok; } if (tclass == TC_FUNCDECL) { func *f; @@ -1765,7 +1760,7 @@ static void parse_program(char *p) continue; chain_until_rbrace(); hash_clear(ahash); - continue; + goto next_tok; } seq = &mainseq; if (tclass & TS_OPSEQ) { @@ -1784,12 +1779,25 @@ static void parse_program(char *p) chain_node(OC_PRINT); } cn->r.n = mainseq.last; - continue; + goto next_tok; } /* tclass == TC_LBRACE */ debug_printf_parse("%s: TC_LBRACE(?)\n", __func__); chain_until_rbrace(); - } + next_tok: + /* Same as next_token() at the top of the loop, + TC_SEMICOL */ + tclass = next_token(TS_OPSEQ | TC_LBRACE | TC_BEGIN | TC_END | TC_FUNCDECL + | TC_EOF | TC_NEWLINE | TC_SEMICOL); + /* gawk allows many newlines, but does not allow more than one semicolon: + * BEGIN {...};; + * would complain "each rule must have a pattern or an action part". + * Same message for + * ; BEGIN {...} + */ + if (tclass != TC_SEMICOL) + goto got_tok; /* use this token */ + /* else: loop back - ate the semicolon, get and use _next_ token */ + } /* for (;;) */ } -- cgit v1.1