diff options
author | Denys Vlasenko | 2021-07-03 01:16:48 +0200 |
---|---|---|
committer | Denys Vlasenko | 2021-07-03 01:16:48 +0200 |
commit | 2b65e73db3254a7228802886546152c72217017d (patch) | |
tree | a1a8d3cd8e5daee15a15e4fce42a328df50e0c35 | |
parent | 717200eb43c9420773c0f8b751aadabba3052027 (diff) | |
download | busybox-2b65e73db3254a7228802886546152c72217017d.zip busybox-2b65e73db3254a7228802886546152c72217017d.tar.gz |
awk: tighten rules in action parsing
Disallow:
BEGIN
{ action } - must start on the same line
Disallow:
func f()
print "hello" - must be in {...}
function old new delta
chain_until_rbrace - 41 +41
parse_program 307 336 +29
chain_group 649 616 -33
------------------------------------------------------------------------------
(add/remove: 1/0 grow/shrink: 1/1 up/down: 70/-33) Total: 37 bytes
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r-- | editors/awk.c | 108 |
1 files changed, 66 insertions, 42 deletions
diff --git a/editors/awk.c b/editors/awk.c index a1a2afd..c684168 100644 --- a/editors/awk.c +++ b/editors/awk.c @@ -1549,29 +1549,35 @@ static node *chain_loop(node *nn) return n; } +static void chain_until_rbrace(void) +{ + uint32_t tc; + while ((tc = next_token(TS_GRPSEQ | TC_RBRACE)) != TC_RBRACE) { + debug_printf_parse("%s: !TC_RBRACE\n", __func__); + if (tc == TC_NEWLINE) + continue; + rollback_token(); + chain_group(); + } + debug_printf_parse("%s: TC_RBRACE\n", __func__); +} + /* parse group and attach it to chain */ static void chain_group(void) { - uint32_t c; + uint32_t tc; node *n, *n2, *n3; do { - c = next_token(TS_GRPSEQ); - } while (c & TC_NEWLINE); + tc = next_token(TS_GRPSEQ); + } while (tc == TC_NEWLINE); - if (c & TC_LBRACE) { + if (tc == TC_LBRACE) { debug_printf_parse("%s: TC_LBRACE\n", __func__); - while ((c = next_token(TS_GRPSEQ | TC_RBRACE)) != TC_RBRACE) { - debug_printf_parse("%s: !TC_RBRACE\n", __func__); - if (c & TC_NEWLINE) - continue; - rollback_token(); - chain_group(); - } - debug_printf_parse("%s: TC_RBRACE\n", __func__); + chain_until_rbrace(); return; } - if (c & (TS_OPSEQ | TS_OPTERM)) { + if (tc & (TS_OPSEQ | TS_OPTERM)) { debug_printf_parse("%s: TS_OPSEQ | TS_OPTERM\n", __func__); rollback_token(); chain_expr(OC_EXEC | Vx); @@ -1675,37 +1681,48 @@ static void chain_group(void) static void parse_program(char *p) { - uint32_t tclass; - node *cn; - func *f; - var *v; - debug_printf_parse("%s()\n", __func__); g_pos = p; t_lineno = 1; - while ((tclass = next_token(TC_EOF | TS_OPSEQ | TC_LBRACE | - TS_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) { + for (;;) { + uint32_t tclass; - if (tclass & TS_OPTERM) { + tclass = next_token(TC_EOF | TS_OPSEQ | TC_LBRACE | + TS_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL); + + if (tclass == TC_EOF) { + debug_printf_parse("%s: TC_EOF\n", __func__); + break; + } + if (tclass & TS_OPTERM) { /* ; or <newline> */ debug_printf_parse("%s: TS_OPTERM\n", __func__); +//NB: gawk allows many newlines, but does not allow more than one semicolon: +// BEGIN {...}<newline>;<newline>; +//would complain "each rule must have a pattern or an action part". +//Same message for +// ; BEGIN {...} continue; } - - seq = &mainseq; - if (tclass & TC_BEGIN) { + if (tclass == TC_BEGIN) { debug_printf_parse("%s: TC_BEGIN\n", __func__); seq = &beginseq; -//TODO: ensure there is no newline between BEGIN and { -//next_token(TC_LBRACE); rollback_token(); - chain_group(); - } else if (tclass & TC_END) { + /* ensure there is no newline between BEGIN and { */ + next_token(TC_LBRACE); + chain_until_rbrace(); + continue; + } + if (tclass == TC_END) { debug_printf_parse("%s: TC_END\n", __func__); seq = &endseq; -//TODO: ensure there is no newline between END and { -//next_token(TC_LBRACE); rollback_token(); - chain_group(); - } else if (tclass & TC_FUNCDECL) { + /* ensure there is no newline between END and { */ + next_token(TC_LBRACE); + chain_until_rbrace(); + continue; + } + if (tclass == TC_FUNCDECL) { + func *f; + debug_printf_parse("%s: TC_FUNCDECL\n", __func__); next_token(TC_FUNCTION); f = newfunc(t_string); @@ -1716,6 +1733,7 @@ static void parse_program(char *p) //f->nargs = 0; - already is /* func arg list: comma sep list of args, and a close paren */ for (;;) { + var *v; if (next_token(TC_VARIABLE | TC_RPAREN) == TC_RPAREN) { if (f->nargs == 0) break; /* func() is ok */ @@ -1730,31 +1748,37 @@ static void parse_program(char *p) /* it was a comma, we ate it */ } seq = &f->body; -//TODO: ensure there is { after "func F(...)" - but newlines are allowed -//while (next_token(TC_LBRACE | TC_NEWLINE) == TC_NEWLINE) continue; rollback_token(); - chain_group(); + /* ensure there is { after "func F(...)" - but newlines are allowed */ + while (next_token(TC_LBRACE | TC_NEWLINE) == TC_NEWLINE) + continue; + chain_until_rbrace(); hash_clear(ahash); - } else if (tclass & TS_OPSEQ) { + continue; + } + seq = &mainseq; + if (tclass & TS_OPSEQ) { + node *cn; + debug_printf_parse("%s: TS_OPSEQ\n", __func__); rollback_token(); cn = chain_node(OC_TEST); cn->l.n = parse_expr(TS_OPTERM | TC_EOF | TC_LBRACE); - if (t_tclass & TC_LBRACE) { + if (t_tclass == TC_LBRACE) { debug_printf_parse("%s: TC_LBRACE\n", __func__); rollback_token(); chain_group(); } else { + /* no action, assume default "{ print }" */ debug_printf_parse("%s: !TC_LBRACE\n", __func__); chain_node(OC_PRINT); } cn->r.n = mainseq.last; - } else /* if (tclass & TC_LBRACE) */ { - debug_printf_parse("%s: TC_LBRACE(?)\n", __func__); - rollback_token(); - chain_group(); + continue; } + /* tclass == TC_LBRACE */ + debug_printf_parse("%s: TC_LBRACE(?)\n", __func__); + chain_until_rbrace(); } - debug_printf_parse("%s: TC_EOF\n", __func__); } |