summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDenys Vlasenko2021-07-03 01:16:48 +0200
committerDenys Vlasenko2021-07-03 01:16:48 +0200
commit2b65e73db3254a7228802886546152c72217017d (patch)
treea1a8d3cd8e5daee15a15e4fce42a328df50e0c35
parent717200eb43c9420773c0f8b751aadabba3052027 (diff)
downloadbusybox-2b65e73db3254a7228802886546152c72217017d.zip
busybox-2b65e73db3254a7228802886546152c72217017d.tar.gz
awk: tighten rules in action parsing
Disallow: BEGIN { action } - must start on the same line Disallow: func f() print "hello" - must be in {...} function old new delta chain_until_rbrace - 41 +41 parse_program 307 336 +29 chain_group 649 616 -33 ------------------------------------------------------------------------------ (add/remove: 1/0 grow/shrink: 1/1 up/down: 70/-33) Total: 37 bytes Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r--editors/awk.c108
1 files changed, 66 insertions, 42 deletions
diff --git a/editors/awk.c b/editors/awk.c
index a1a2afd..c684168 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -1549,29 +1549,35 @@ static node *chain_loop(node *nn)
return n;
}
+static void chain_until_rbrace(void)
+{
+ uint32_t tc;
+ while ((tc = next_token(TS_GRPSEQ | TC_RBRACE)) != TC_RBRACE) {
+ debug_printf_parse("%s: !TC_RBRACE\n", __func__);
+ if (tc == TC_NEWLINE)
+ continue;
+ rollback_token();
+ chain_group();
+ }
+ debug_printf_parse("%s: TC_RBRACE\n", __func__);
+}
+
/* parse group and attach it to chain */
static void chain_group(void)
{
- uint32_t c;
+ uint32_t tc;
node *n, *n2, *n3;
do {
- c = next_token(TS_GRPSEQ);
- } while (c & TC_NEWLINE);
+ tc = next_token(TS_GRPSEQ);
+ } while (tc == TC_NEWLINE);
- if (c & TC_LBRACE) {
+ if (tc == TC_LBRACE) {
debug_printf_parse("%s: TC_LBRACE\n", __func__);
- while ((c = next_token(TS_GRPSEQ | TC_RBRACE)) != TC_RBRACE) {
- debug_printf_parse("%s: !TC_RBRACE\n", __func__);
- if (c & TC_NEWLINE)
- continue;
- rollback_token();
- chain_group();
- }
- debug_printf_parse("%s: TC_RBRACE\n", __func__);
+ chain_until_rbrace();
return;
}
- if (c & (TS_OPSEQ | TS_OPTERM)) {
+ if (tc & (TS_OPSEQ | TS_OPTERM)) {
debug_printf_parse("%s: TS_OPSEQ | TS_OPTERM\n", __func__);
rollback_token();
chain_expr(OC_EXEC | Vx);
@@ -1675,37 +1681,48 @@ static void chain_group(void)
static void parse_program(char *p)
{
- uint32_t tclass;
- node *cn;
- func *f;
- var *v;
-
debug_printf_parse("%s()\n", __func__);
g_pos = p;
t_lineno = 1;
- while ((tclass = next_token(TC_EOF | TS_OPSEQ | TC_LBRACE |
- TS_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
+ for (;;) {
+ uint32_t tclass;
- if (tclass & TS_OPTERM) {
+ tclass = next_token(TC_EOF | TS_OPSEQ | TC_LBRACE |
+ TS_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL);
+
+ if (tclass == TC_EOF) {
+ debug_printf_parse("%s: TC_EOF\n", __func__);
+ break;
+ }
+ if (tclass & TS_OPTERM) { /* ; or <newline> */
debug_printf_parse("%s: TS_OPTERM\n", __func__);
+//NB: gawk allows many newlines, but does not allow more than one semicolon:
+// BEGIN {...}<newline>;<newline>;
+//would complain "each rule must have a pattern or an action part".
+//Same message for
+// ; BEGIN {...}
continue;
}
-
- seq = &mainseq;
- if (tclass & TC_BEGIN) {
+ if (tclass == TC_BEGIN) {
debug_printf_parse("%s: TC_BEGIN\n", __func__);
seq = &beginseq;
-//TODO: ensure there is no newline between BEGIN and {
-//next_token(TC_LBRACE); rollback_token();
- chain_group();
- } else if (tclass & TC_END) {
+ /* ensure there is no newline between BEGIN and { */
+ next_token(TC_LBRACE);
+ chain_until_rbrace();
+ continue;
+ }
+ if (tclass == TC_END) {
debug_printf_parse("%s: TC_END\n", __func__);
seq = &endseq;
-//TODO: ensure there is no newline between END and {
-//next_token(TC_LBRACE); rollback_token();
- chain_group();
- } else if (tclass & TC_FUNCDECL) {
+ /* ensure there is no newline between END and { */
+ next_token(TC_LBRACE);
+ chain_until_rbrace();
+ continue;
+ }
+ if (tclass == TC_FUNCDECL) {
+ func *f;
+
debug_printf_parse("%s: TC_FUNCDECL\n", __func__);
next_token(TC_FUNCTION);
f = newfunc(t_string);
@@ -1716,6 +1733,7 @@ static void parse_program(char *p)
//f->nargs = 0; - already is
/* func arg list: comma sep list of args, and a close paren */
for (;;) {
+ var *v;
if (next_token(TC_VARIABLE | TC_RPAREN) == TC_RPAREN) {
if (f->nargs == 0)
break; /* func() is ok */
@@ -1730,31 +1748,37 @@ static void parse_program(char *p)
/* it was a comma, we ate it */
}
seq = &f->body;
-//TODO: ensure there is { after "func F(...)" - but newlines are allowed
-//while (next_token(TC_LBRACE | TC_NEWLINE) == TC_NEWLINE) continue; rollback_token();
- chain_group();
+ /* ensure there is { after "func F(...)" - but newlines are allowed */
+ while (next_token(TC_LBRACE | TC_NEWLINE) == TC_NEWLINE)
+ continue;
+ chain_until_rbrace();
hash_clear(ahash);
- } else if (tclass & TS_OPSEQ) {
+ continue;
+ }
+ seq = &mainseq;
+ if (tclass & TS_OPSEQ) {
+ node *cn;
+
debug_printf_parse("%s: TS_OPSEQ\n", __func__);
rollback_token();
cn = chain_node(OC_TEST);
cn->l.n = parse_expr(TS_OPTERM | TC_EOF | TC_LBRACE);
- if (t_tclass & TC_LBRACE) {
+ if (t_tclass == TC_LBRACE) {
debug_printf_parse("%s: TC_LBRACE\n", __func__);
rollback_token();
chain_group();
} else {
+ /* no action, assume default "{ print }" */
debug_printf_parse("%s: !TC_LBRACE\n", __func__);
chain_node(OC_PRINT);
}
cn->r.n = mainseq.last;
- } else /* if (tclass & TC_LBRACE) */ {
- debug_printf_parse("%s: TC_LBRACE(?)\n", __func__);
- rollback_token();
- chain_group();
+ continue;
}
+ /* tclass == TC_LBRACE */
+ debug_printf_parse("%s: TC_LBRACE(?)\n", __func__);
+ chain_until_rbrace();
}
- debug_printf_parse("%s: TC_EOF\n", __func__);
}