diff options
author | Denys Vlasenko | 2018-04-02 12:35:04 +0200 |
---|---|---|
committer | Denys Vlasenko | 2018-04-02 13:15:37 +0200 |
commit | 216913c290fd2b88b744c04c0a2ef21fd1410ba9 (patch) | |
tree | 906139aa9c6d05a091c55ab83d1bf547fe839f31 /shell/ash.c | |
parent | e84212f8346741a2d4a04b40639c44fe519cf5a7 (diff) | |
download | busybox-216913c290fd2b88b744c04c0a2ef21fd1410ba9.zip busybox-216913c290fd2b88b744c04c0a2ef21fd1410ba9.tar.gz |
ash: parser: Add syntax stack for recursive parsing
This closes 10821.
Upstream patch:
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Fri, 9 Mar 2018 00:14:02 +0800
parser: Add syntax stack for recursive parsing
Without a stack of syntaxes we cannot correctly these two cases
together:
"${a#'$$'}"
"${a#"${b-'$$'}"}"
A recursive parser also helps in some other corner cases such
as nested arithmetic expansion with paratheses.
This patch adds a syntax stack allocated from the stack using
alloca. As a side-effect this allows us to remove the naked
backslashes for patterns within double-quotes, which means that
EXP_QPAT also has to go.
This patch also fixes removes any backslashes that precede right
braces when they are present within a parameter expansion context,
and backslashes that precede double quotes within inner double
quotes inside a parameter expansion in a here-document context.
The idea of a recursive parser is based on a patch by Harald van
Dijk.
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
var_bash3, var_bash4 and var_bash6 tests are updated
with the output given by bash-4.3.43
With this patch, the following tests now pass for ash:
dollar_repl_slash_bash2.tests
squote_in_varexp2.tests
squote_in_varexp.tests
var_bash4.tests
function old new delta
readtoken1 2615 2874 +259
synstack_push - 54 +54
evalvar 574 571 -3
rmescapes 330 310 -20
subevalvar 1279 1258 -21
argstr 1146 1107 -39
------------------------------------------------------------------------------
(add/remove: 1/0 grow/shrink: 1/4 up/down: 313/-83) Total: 230 bytes
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
Diffstat (limited to 'shell/ash.c')
-rw-r--r-- | shell/ash.c | 231 |
1 files changed, 134 insertions, 97 deletions
diff --git a/shell/ash.c b/shell/ash.c index cf1d062..97379cd 100644 --- a/shell/ash.c +++ b/shell/ash.c @@ -5888,10 +5888,9 @@ static int substr_atoi(const char *s) * performs globbing, and thus diverges from what we do). */ #define EXP_CASE 0x10 /* keeps quotes around for CASE pattern */ -#define EXP_QPAT 0x20 /* pattern in quoted parameter expansion */ -#define EXP_VARTILDE2 0x40 /* expand tildes after colons only */ -#define EXP_WORD 0x80 /* expand word in parameter expansion */ -#define EXP_QUOTED 0x100 /* expand word in double quotes */ +#define EXP_VARTILDE2 0x20 /* expand tildes after colons only */ +#define EXP_WORD 0x40 /* expand word in parameter expansion */ +#define EXP_QUOTED 0x80 /* expand word in double quotes */ /* * rmescape() flags */ @@ -5901,7 +5900,7 @@ static int substr_atoi(const char *s) #define RMESCAPE_HEAP 0x10 /* Malloc strings instead of stalloc */ /* Add CTLESC when necessary. */ -#define QUOTES_ESC (EXP_FULL | EXP_CASE | EXP_QPAT) +#define QUOTES_ESC (EXP_FULL | EXP_CASE) /* Do not skip NUL characters. */ #define QUOTES_KEEPNUL EXP_TILDE @@ -6090,7 +6089,6 @@ rmescapes(char *str, int flag, int *slash_position) IF_BASH_PATTERN_SUBST('/',) CTLESC, CTLQUOTEMARK, '\0' }; char *p, *q, *r; - unsigned inquotes; unsigned protect_against_glob; unsigned globbing; @@ -6121,18 +6119,21 @@ rmescapes(char *str, int flag, int *slash_position) } } - inquotes = 0; globbing = flag & RMESCAPE_GLOB; protect_against_glob = globbing; while (*p) { if ((unsigned char)*p == CTLQUOTEMARK) { -// Note: both inquotes and protect_against_glob only affect whether +// Note: protect_against_glob only affect whether // CTLESC,<ch> gets converted to <ch> or to \<ch> - inquotes = ~inquotes; p++; protect_against_glob = globbing; continue; } + if (*p == '\\') { + /* naked back slash */ + protect_against_glob = 0; + goto copy; + } if ((unsigned char)*p == CTLESC) { p++; #if DEBUG @@ -6168,10 +6169,6 @@ rmescapes(char *str, int flag, int *slash_position) *q++ = '\\'; } } - } else if (*p == '\\' && !inquotes) { - /* naked back slash */ - protect_against_glob = 0; - goto copy; } #if BASH_PATTERN_SUBST else if (slash_position && p == str + *slash_position) { @@ -6669,16 +6666,6 @@ argstr(char *p, int flags) case CTLESC: startloc++; length++; - - /* - * Quoted parameter expansion pattern: remove quote - * unless inside inner quotes or we have a literal - * backslash. - */ - if (((flags | inquotes) & (EXP_QPAT | EXP_QUOTED)) == - EXP_QPAT && *p != '\\') - break; - goto addquote; case CTLVAR: TRACE(("argstr: evalvar('%s')\n", p)); @@ -6869,15 +6856,24 @@ subevalvar(char *p, char *varname, int strloc, int subtype, } #endif argstr_flags = EXP_TILDE; - if (subtype != VSASSIGN && subtype != VSQUESTION) - argstr_flags |= (flag & (EXP_QUOTED | EXP_QPAT) ? EXP_QPAT : EXP_CASE); + if (subtype != VSASSIGN + && subtype != VSQUESTION +#if BASH_SUBSTR + && subtype != VSSUBSTR +#endif + ) { + /* EXP_CASE keeps CTLESC's */ + argstr_flags = EXP_TILDE | EXP_CASE; + } argstr(p, argstr_flags); + //bb_error_msg("str0:'%s'", (char *)stackblock() + strloc); #if BASH_PATTERN_SUBST slash_pos = -1; if (repl) { slash_pos = expdest - ((char *)stackblock() + strloc); STPUTC('/', expdest); - argstr(repl + 1, argstr_flags); + //bb_error_msg("repl+1:'%s'", repl + 1); + argstr(repl + 1, EXP_TILDE); /* EXP_TILDE: echo "${v/x/~}" expands ~ ! */ *repl = '/'; } #endif @@ -10669,6 +10665,34 @@ pgetc_eatbnl(void) return c; } +struct synstack { + smalluint syntax; + uint8_t innerdq :1; + uint8_t varpushed :1; + uint8_t dblquote :1; + int varnest; /* levels of variables expansion */ + int dqvarnest; /* levels of variables expansion within double quotes */ + int parenlevel; /* levels of parens in arithmetic */ + struct synstack *prev; + struct synstack *next; +}; + +static void +synstack_push(struct synstack **stack, struct synstack *next, int syntax) +{ + memset(next, 0, sizeof(*next)); + next->syntax = syntax; + next->next = *stack; + (*stack)->prev = next; + *stack = next; +} + +static ALWAYS_INLINE void +synstack_pop(struct synstack **stack) +{ + *stack = (*stack)->next; +} + /* * To handle the "." command, a stack of input files is used. Pushfile * adds a new entry to the stack and popfile restores the previous level. @@ -11928,19 +11952,13 @@ readtoken1(int c, int syntax, char *eofmark, int striptabs) size_t len; struct nodelist *bqlist; smallint quotef; - smallint dblquote; smallint oldstyle; - IF_FEATURE_SH_MATH(smallint prevsyntax;) /* syntax before arithmetic */ smallint pssyntax; /* we are expanding a prompt string */ - int varnest; /* levels of variables expansion */ - IF_FEATURE_SH_MATH(int arinest;) /* levels of arithmetic expansion */ - IF_FEATURE_SH_MATH(int parenlevel;) /* levels of parens in arithmetic */ - int dqvarnest; /* levels of variables expansion within double quotes */ IF_BASH_DOLLAR_SQUOTE(smallint bash_dollar_squote = 0;) + /* syntax stack */ + struct synstack synbase = { .syntax = syntax }; + struct synstack *synstack = &synbase; - bqlist = NULL; - quotef = 0; - IF_FEATURE_SH_MATH(prevsyntax = 0;) #if ENABLE_ASH_EXPAND_PRMT pssyntax = (syntax == PSSYNTAX); if (pssyntax) @@ -11948,11 +11966,10 @@ readtoken1(int c, int syntax, char *eofmark, int striptabs) #else pssyntax = 0; /* constant */ #endif - dblquote = (syntax == DQSYNTAX); - varnest = 0; - IF_FEATURE_SH_MATH(arinest = 0;) - IF_FEATURE_SH_MATH(parenlevel = 0;) - dqvarnest = 0; + if (syntax == DQSYNTAX) + synstack->dblquote = 1; + quotef = 0; + bqlist = NULL; STARTSTACKSTR(out); loop: @@ -11960,9 +11977,9 @@ readtoken1(int c, int syntax, char *eofmark, int striptabs) CHECKEND(); /* set c to PEOF if at end of here document */ for (;;) { /* until end of line or end of word */ CHECKSTRSPACE(4, out); /* permit 4 calls to USTPUTC */ - switch (SIT(c, syntax)) { + switch (SIT(c, synstack->syntax)) { case CNL: /* '\n' */ - if (syntax == BASESYNTAX) + if (synstack->syntax == BASESYNTAX) goto endword; /* exit outer loop */ USTPUTC(c, out); nlprompt(); @@ -11982,13 +11999,13 @@ readtoken1(int c, int syntax, char *eofmark, int striptabs) if (c & 0x100) { /* Unknown escape. Encode as '\z' */ c = (unsigned char)c; - if (eofmark == NULL || dblquote) + if (eofmark == NULL || synstack->dblquote) USTPUTC(CTLESC, out); USTPUTC('\\', out); } } #endif - if (eofmark == NULL || dblquote) + if (eofmark == NULL || synstack->dblquote) USTPUTC(CTLESC, out); USTPUTC(c, out); break; @@ -12008,20 +12025,13 @@ readtoken1(int c, int syntax, char *eofmark, int striptabs) /* Backslash is retained if we are in "str" * and next char isn't dquote-special. */ - if (dblquote + if (synstack->dblquote && c != '\\' && c != '`' && c != '$' - && (c != '"' || eofmark != NULL) + && (c != '"' || (eofmark != NULL && !synstack->varnest)) + && (c != '}' || !synstack->varnest) ) { -//dash survives not doing USTPUTC(CTLESC), but merely by chance: -//Example: "\z" gets encoded as "\<CTLESC>z". -//rmescapes() then emits "\", "\z", protecting z from globbing. -//But it's wrong, should protect _both_ from globbing: -//everything in double quotes is not globbed. -//Unlike dash, we have a fix in rmescapes() which emits bare "z" -//for "<CTLESC>z" since "z" is not glob-special (else unicode may break), -//and glob would see "\z" and eat "\". Thus: USTPUTC(CTLESC, out); /* protect '\' from glob */ USTPUTC('\\', out); } @@ -12031,56 +12041,62 @@ readtoken1(int c, int syntax, char *eofmark, int striptabs) } break; case CSQUOTE: - syntax = SQSYNTAX; + synstack->syntax = SQSYNTAX; quotemark: if (eofmark == NULL) { USTPUTC(CTLQUOTEMARK, out); } break; case CDQUOTE: - syntax = DQSYNTAX; - dblquote = 1; + synstack->syntax = DQSYNTAX; + synstack->dblquote = 1; + toggledq: + if (synstack->varnest) + synstack->innerdq ^= 1; goto quotemark; case CENDQUOTE: IF_BASH_DOLLAR_SQUOTE(bash_dollar_squote = 0;) - if (eofmark != NULL && varnest == 0) { + if (eofmark != NULL && synstack->varnest == 0) { USTPUTC(c, out); - } else { - if (dqvarnest == 0) { - syntax = BASESYNTAX; - dblquote = 0; - } - quotef = 1; - goto quotemark; + break; } - break; + + if (synstack->dqvarnest == 0) { + synstack->syntax = BASESYNTAX; + synstack->dblquote = 0; + } + + quotef = 1; + + if (c == '"') + goto toggledq; + + goto quotemark; case CVAR: /* '$' */ PARSESUB(); /* parse substitution */ break; case CENDVAR: /* '}' */ - if (varnest > 0) { - varnest--; - if (dqvarnest > 0) { - dqvarnest--; - } + if (!synstack->innerdq && synstack->varnest > 0) { + if (!--synstack->varnest && synstack->varpushed) + synstack_pop(&synstack); + else if (synstack->dqvarnest > 0) + synstack->dqvarnest--; c = CTLENDVAR; } USTPUTC(c, out); break; #if ENABLE_FEATURE_SH_MATH case CLP: /* '(' in arithmetic */ - parenlevel++; + synstack->parenlevel++; USTPUTC(c, out); break; case CRP: /* ')' in arithmetic */ - if (parenlevel > 0) { - parenlevel--; + if (synstack->parenlevel > 0) { + synstack->parenlevel--; } else { if (pgetc_eatbnl() == ')') { c = CTLENDARI; - if (--arinest == 0) { - syntax = prevsyntax; - } + synstack_pop(&synstack); } else { /* * unbalanced parens @@ -12106,7 +12122,7 @@ readtoken1(int c, int syntax, char *eofmark, int striptabs) case CIGN: break; default: - if (varnest == 0) { + if (synstack->varnest == 0) { #if BASH_REDIR_OUTPUT if (c == '&') { //Can't call pgetc_eatbnl() here, this requires three-deep pungetc() @@ -12125,12 +12141,12 @@ readtoken1(int c, int syntax, char *eofmark, int striptabs) endword: #if ENABLE_FEATURE_SH_MATH - if (syntax == ARISYNTAX) + if (synstack->syntax == ARISYNTAX) raise_error_syntax("missing '))'"); #endif - if (syntax != BASESYNTAX && eofmark == NULL) + if (synstack->syntax != BASESYNTAX && eofmark == NULL) raise_error_syntax("unterminated quoted string"); - if (varnest != 0) { + if (synstack->varnest != 0) { /* { */ raise_error_syntax("missing '}'"); } @@ -12312,7 +12328,7 @@ parsesub: { || (c != '(' && c != '{' && !is_name(c) && !is_special(c)) ) { #if BASH_DOLLAR_SQUOTE - if (syntax != DQSYNTAX && c == '\'') + if (synstack->syntax != DQSYNTAX && c == '\'') bash_dollar_squote = 1; else #endif @@ -12332,6 +12348,8 @@ parsesub: { } } else { /* $VAR, $<specialchar>, ${...}, or PEOA/PEOF */ + smalluint newsyn = synstack->syntax; + USTPUTC(CTLVAR, out); typeloc = out - (char *)stackblock(); STADJUST(1, out); @@ -12390,6 +12408,8 @@ parsesub: { static const char types[] ALIGN1 = "}-+?="; /* ${VAR...} but not $VAR or ${#VAR} */ /* c == first char after VAR */ + int cc = c; + switch (c) { case ':': c = pgetc_eatbnl(); @@ -12414,21 +12434,24 @@ parsesub: { break; } case '%': - case '#': { - int cc = c; + case '#': subtype = (c == '#' ? VSTRIMLEFT : VSTRIMRIGHT); c = pgetc_eatbnl(); - if (c != cc) - goto badsub; - subtype++; + if (c == cc) + subtype++; + else + pungetc(); + + newsyn = BASESYNTAX; break; - } #if BASH_PATTERN_SUBST case '/': /* ${v/[/]pattern/repl} */ //TODO: encode pattern and repl separately. -// Currently ${v/$var_with_slash/repl} is horribly broken +// Currently cases like: v=1;echo ${v/$((1/1))/ONE} +// are broken (should print "ONE") subtype = VSREPLACE; + newsyn = BASESYNTAX; c = pgetc_eatbnl(); if (c != '/') goto badsub; @@ -12440,11 +12463,24 @@ parsesub: { badsub: pungetc(); } + + if (newsyn == ARISYNTAX && subtype > VSNORMAL) + newsyn = DQSYNTAX; + + if (newsyn != synstack->syntax) { + synstack_push(&synstack, + synstack->prev ?: alloca(sizeof(*synstack)), + newsyn); + + synstack->varpushed = 1; + synstack->dblquote = newsyn != BASESYNTAX; + } + ((unsigned char *)stackblock())[typeloc] = subtype; if (subtype != VSNORMAL) { - varnest++; - if (dblquote) - dqvarnest++; + synstack->varnest++; + if (synstack->dblquote) + synstack->dqvarnest++; } STPUTC('=', out); } @@ -12501,7 +12537,7 @@ parsebackq: { case '\\': pc = pgetc(); /* or pgetc_eatbnl()? why (example)? */ if (pc != '\\' && pc != '`' && pc != '$' - && (!dblquote || pc != '"') + && (!synstack->dblquote || pc != '"') ) { STPUTC('\\', pout); } @@ -12576,10 +12612,11 @@ parsebackq: { * Parse an arithmetic expansion (indicate start of one and set state) */ parsearith: { - if (++arinest == 1) { - prevsyntax = syntax; - syntax = ARISYNTAX; - } + + synstack_push(&synstack, + synstack->prev ?: alloca(sizeof(*synstack)), + ARISYNTAX); + synstack->dblquote = 1; USTPUTC(CTLARI, out); goto parsearith_return; } |