From 0256e00a9d077588bd3a39f5a1ef7e2eaa2911e4 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Tue, 30 May 2023 16:42:18 +0200 Subject: awk: fix precedence of = relative to == Discovered while adding code to disallow assignments to non-lvalues function old new delta parse_expr 936 991 +55 .rodata 105243 105247 +4 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 2/0 up/down: 59/0) Total: 59 bytes Signed-off-by: Denys Vlasenko --- editors/awk.c | 66 ++++++++++++++++++++++++++++++++++++----------------- testsuite/awk.tests | 5 ++++ 2 files changed, 50 insertions(+), 21 deletions(-) diff --git a/editors/awk.c b/editors/awk.c index c49ad6e..0f062dc 100644 --- a/editors/awk.c +++ b/editors/awk.c @@ -337,7 +337,9 @@ static void debug_parse_print_tc(uint32_t n) #undef P #undef PRIMASK #undef PRIMASK2 -#define P(x) (x << 24) +/* Smaller 'x' means _higher_ operator precedence */ +#define PRECEDENCE(x) (x << 24) +#define P(x) PRECEDENCE(x) #define PRIMASK 0x7F000000 #define PRIMASK2 0x7E000000 @@ -360,7 +362,7 @@ enum { OC_MOVE = 0x1f00, OC_PGETLINE = 0x2000, OC_REGEXP = 0x2100, OC_REPLACE = 0x2200, OC_RETURN = 0x2300, OC_SPRINTF = 0x2400, OC_TERNARY = 0x2500, OC_UNARY = 0x2600, OC_VAR = 0x2700, - OC_DONE = 0x2800, + OC_CONST = 0x2800, OC_DONE = 0x2900, ST_IF = 0x3000, ST_DO = 0x3100, ST_FOR = 0x3200, ST_WHILE = 0x3300 @@ -440,9 +442,9 @@ static const uint32_t tokeninfo[] ALIGN4 = { #define TI_PREINC (OC_UNARY|xV|P(9)|'P') #define TI_PREDEC (OC_UNARY|xV|P(9)|'M') TI_PREINC, TI_PREDEC, OC_FIELD|xV|P(5), - OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74), OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-', - OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/', OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&', - OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-', OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&', + OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(38), OC_REPLACE|NV|P(38)|'+', OC_REPLACE|NV|P(38)|'-', + OC_REPLACE|NV|P(38)|'*', OC_REPLACE|NV|P(38)|'/', OC_REPLACE|NV|P(38)|'%', OC_REPLACE|NV|P(38)|'&', + OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-', OC_REPLACE|NV|P(38)|'&', OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%', OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*', OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3, OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1, #define TI_LESS (OC_COMPARE|VV|P(39)|2) @@ -1301,7 +1303,7 @@ static uint32_t next_token(uint32_t expected) save_tclass = tc; save_info = t_info; tc = TC_BINOPX; - t_info = OC_CONCAT | SS | P(35); + t_info = OC_CONCAT | SS | PRECEDENCE(35); } t_tclass = tc; @@ -1361,9 +1363,8 @@ static node *parse_expr(uint32_t term_tc) { node sn; node *cn = &sn; - node *vn, *glptr; + node *glptr; uint32_t tc, expected_tc; - var *v; debug_printf_parse("%s() term_tc(%x):", __func__, term_tc); debug_parse_print_tc(term_tc); @@ -1374,11 +1375,12 @@ static node *parse_expr(uint32_t term_tc) expected_tc = TS_OPERAND | TS_UOPPRE | TC_REGEXP | term_tc; while (!((tc = next_token(expected_tc)) & term_tc)) { + node *vn; if (glptr && (t_info == TI_LESS)) { /* input redirection (<) attached to glptr node */ debug_printf_parse("%s: input redir\n", __func__); - cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37)); + cn = glptr->l.n = new_node(OC_CONCAT | SS | PRECEDENCE(37)); cn->a.n = glptr; expected_tc = TS_OPERAND | TS_UOPPRE; glptr = NULL; @@ -1390,24 +1392,42 @@ static node *parse_expr(uint32_t term_tc) * previous operators with higher priority */ vn = cn; while (((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2)) - || ((t_info == vn->info) && t_info == TI_COLON) + || (t_info == vn->info && t_info == TI_COLON) ) { vn = vn->a.n; if (!vn->a.n) syntax_error(EMSG_UNEXP_TOKEN); } if (t_info == TI_TERNARY) //TODO: why? - t_info += P(6); + t_info += PRECEDENCE(6); cn = vn->a.n->r.n = new_node(t_info); cn->a.n = vn->a.n; if (tc & TS_BINOP) { cn->l.n = vn; -//FIXME: this is the place to detect and reject assignments to non-lvalues. -//Currently we allow "assignments" to consts and temporaries, nonsense like this: -// awk 'BEGIN { "qwe" = 1 }' -// awk 'BEGIN { 7 *= 7 }' -// awk 'BEGIN { length("qwe") = 1 }' -// awk 'BEGIN { (1+1) += 3 }' + + /* Prevent: + * awk 'BEGIN { "qwe" = 1 }' + * awk 'BEGIN { 7 *= 7 }' + * awk 'BEGIN { length("qwe") = 1 }' + * awk 'BEGIN { (1+1) += 3 }' + */ + /* Assignment? (including *= and friends) */ + if (((t_info & OPCLSMASK) == OC_MOVE) + || ((t_info & OPCLSMASK) == OC_REPLACE) + ) { + debug_printf_parse("%s: MOVE/REPLACE vn->info:%08x\n", __func__, vn->info); + /* Left side is a (variable or array element) + * or function argument + * or $FIELD ? + */ + if ((vn->info & OPCLSMASK) != OC_VAR + && (vn->info & OPCLSMASK) != OC_FNARG + && (vn->info & OPCLSMASK) != OC_FIELD + ) { + syntax_error(EMSG_UNEXP_TOKEN); /* no. bad */ + } + } + expected_tc = TS_OPERAND | TS_UOPPRE | TC_REGEXP; if (t_info == TI_PGETLINE) { /* it's a pipe */ @@ -1443,6 +1463,8 @@ static node *parse_expr(uint32_t term_tc) /* one should be very careful with switch on tclass - * only simple tclasses should be used (TC_xyz, not TS_xyz) */ switch (tc) { + var *v; + case TC_VARIABLE: case TC_ARRAY: debug_printf_parse("%s: TC_VARIABLE | TC_ARRAY\n", __func__); @@ -1463,14 +1485,14 @@ static node *parse_expr(uint32_t term_tc) case TC_NUMBER: case TC_STRING: debug_printf_parse("%s: TC_NUMBER | TC_STRING\n", __func__); - cn->info = OC_VAR; + cn->info = OC_CONST; v = cn->l.v = xzalloc(sizeof(var)); - if (tc & TC_NUMBER) + if (tc & TC_NUMBER) { setvar_i(v, t_double); - else { + } else { setvar_s(v, t_string); - expected_tc &= ~TC_UOPPOST; /* "str"++ is not allowed */ } + expected_tc &= ~TC_UOPPOST; /* NUM++, "str"++ not allowed */ break; case TC_REGEXP: @@ -3124,6 +3146,8 @@ static var *evaluate(node *op, var *res) /* -- recursive node type -- */ + case XC( OC_CONST ): + debug_printf_eval("CONST "); case XC( OC_VAR ): debug_printf_eval("VAR\n"); L.v = op->l.v; diff --git a/testsuite/awk.tests b/testsuite/awk.tests index 8ab1c68..cdab93d 100755 --- a/testsuite/awk.tests +++ b/testsuite/awk.tests @@ -547,4 +547,9 @@ testing 'awk does not split on CR (char 13)' \ 'word1 word2 word3\r word2 word3\r\n' \ '' 'word1 word2 word3\r' +testing "awk = has higher precedence than == (despite what gawk manpage claims)" \ + "awk 'BEGIN { v=1; print 2==v; print 2==v=2; print v; print v=3==3; print v}'" \ + '0\n1\n2\n1\n3\n' \ + '' '' + exit $FAILCOUNT -- cgit v1.1