summaryrefslogtreecommitdiff
path: root/shell/ash.c
diff options
context:
space:
mode:
authorDenys Vlasenko2018-04-02 12:35:04 +0200
committerDenys Vlasenko2018-04-02 13:15:37 +0200
commit216913c290fd2b88b744c04c0a2ef21fd1410ba9 (patch)
tree906139aa9c6d05a091c55ab83d1bf547fe839f31 /shell/ash.c
parente84212f8346741a2d4a04b40639c44fe519cf5a7 (diff)
downloadbusybox-216913c290fd2b88b744c04c0a2ef21fd1410ba9.zip
busybox-216913c290fd2b88b744c04c0a2ef21fd1410ba9.tar.gz
ash: parser: Add syntax stack for recursive parsing
This closes 10821. Upstream patch: From: Herbert Xu <herbert@gondor.apana.org.au> Date: Fri, 9 Mar 2018 00:14:02 +0800 parser: Add syntax stack for recursive parsing Without a stack of syntaxes we cannot correctly these two cases together: "${a#'$$'}" "${a#"${b-'$$'}"}" A recursive parser also helps in some other corner cases such as nested arithmetic expansion with paratheses. This patch adds a syntax stack allocated from the stack using alloca. As a side-effect this allows us to remove the naked backslashes for patterns within double-quotes, which means that EXP_QPAT also has to go. This patch also fixes removes any backslashes that precede right braces when they are present within a parameter expansion context, and backslashes that precede double quotes within inner double quotes inside a parameter expansion in a here-document context. The idea of a recursive parser is based on a patch by Harald van Dijk. Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> var_bash3, var_bash4 and var_bash6 tests are updated with the output given by bash-4.3.43 With this patch, the following tests now pass for ash: dollar_repl_slash_bash2.tests squote_in_varexp2.tests squote_in_varexp.tests var_bash4.tests function old new delta readtoken1 2615 2874 +259 synstack_push - 54 +54 evalvar 574 571 -3 rmescapes 330 310 -20 subevalvar 1279 1258 -21 argstr 1146 1107 -39 ------------------------------------------------------------------------------ (add/remove: 1/0 grow/shrink: 1/4 up/down: 313/-83) Total: 230 bytes Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
Diffstat (limited to 'shell/ash.c')
-rw-r--r--shell/ash.c231
1 files changed, 134 insertions, 97 deletions
diff --git a/shell/ash.c b/shell/ash.c
index cf1d062..97379cd 100644
--- a/shell/ash.c
+++ b/shell/ash.c
@@ -5888,10 +5888,9 @@ static int substr_atoi(const char *s)
* performs globbing, and thus diverges from what we do).
*/
#define EXP_CASE 0x10 /* keeps quotes around for CASE pattern */
-#define EXP_QPAT 0x20 /* pattern in quoted parameter expansion */
-#define EXP_VARTILDE2 0x40 /* expand tildes after colons only */
-#define EXP_WORD 0x80 /* expand word in parameter expansion */
-#define EXP_QUOTED 0x100 /* expand word in double quotes */
+#define EXP_VARTILDE2 0x20 /* expand tildes after colons only */
+#define EXP_WORD 0x40 /* expand word in parameter expansion */
+#define EXP_QUOTED 0x80 /* expand word in double quotes */
/*
* rmescape() flags
*/
@@ -5901,7 +5900,7 @@ static int substr_atoi(const char *s)
#define RMESCAPE_HEAP 0x10 /* Malloc strings instead of stalloc */
/* Add CTLESC when necessary. */
-#define QUOTES_ESC (EXP_FULL | EXP_CASE | EXP_QPAT)
+#define QUOTES_ESC (EXP_FULL | EXP_CASE)
/* Do not skip NUL characters. */
#define QUOTES_KEEPNUL EXP_TILDE
@@ -6090,7 +6089,6 @@ rmescapes(char *str, int flag, int *slash_position)
IF_BASH_PATTERN_SUBST('/',) CTLESC, CTLQUOTEMARK, '\0' };
char *p, *q, *r;
- unsigned inquotes;
unsigned protect_against_glob;
unsigned globbing;
@@ -6121,18 +6119,21 @@ rmescapes(char *str, int flag, int *slash_position)
}
}
- inquotes = 0;
globbing = flag & RMESCAPE_GLOB;
protect_against_glob = globbing;
while (*p) {
if ((unsigned char)*p == CTLQUOTEMARK) {
-// Note: both inquotes and protect_against_glob only affect whether
+// Note: protect_against_glob only affect whether
// CTLESC,<ch> gets converted to <ch> or to \<ch>
- inquotes = ~inquotes;
p++;
protect_against_glob = globbing;
continue;
}
+ if (*p == '\\') {
+ /* naked back slash */
+ protect_against_glob = 0;
+ goto copy;
+ }
if ((unsigned char)*p == CTLESC) {
p++;
#if DEBUG
@@ -6168,10 +6169,6 @@ rmescapes(char *str, int flag, int *slash_position)
*q++ = '\\';
}
}
- } else if (*p == '\\' && !inquotes) {
- /* naked back slash */
- protect_against_glob = 0;
- goto copy;
}
#if BASH_PATTERN_SUBST
else if (slash_position && p == str + *slash_position) {
@@ -6669,16 +6666,6 @@ argstr(char *p, int flags)
case CTLESC:
startloc++;
length++;
-
- /*
- * Quoted parameter expansion pattern: remove quote
- * unless inside inner quotes or we have a literal
- * backslash.
- */
- if (((flags | inquotes) & (EXP_QPAT | EXP_QUOTED)) ==
- EXP_QPAT && *p != '\\')
- break;
-
goto addquote;
case CTLVAR:
TRACE(("argstr: evalvar('%s')\n", p));
@@ -6869,15 +6856,24 @@ subevalvar(char *p, char *varname, int strloc, int subtype,
}
#endif
argstr_flags = EXP_TILDE;
- if (subtype != VSASSIGN && subtype != VSQUESTION)
- argstr_flags |= (flag & (EXP_QUOTED | EXP_QPAT) ? EXP_QPAT : EXP_CASE);
+ if (subtype != VSASSIGN
+ && subtype != VSQUESTION
+#if BASH_SUBSTR
+ && subtype != VSSUBSTR
+#endif
+ ) {
+ /* EXP_CASE keeps CTLESC's */
+ argstr_flags = EXP_TILDE | EXP_CASE;
+ }
argstr(p, argstr_flags);
+ //bb_error_msg("str0:'%s'", (char *)stackblock() + strloc);
#if BASH_PATTERN_SUBST
slash_pos = -1;
if (repl) {
slash_pos = expdest - ((char *)stackblock() + strloc);
STPUTC('/', expdest);
- argstr(repl + 1, argstr_flags);
+ //bb_error_msg("repl+1:'%s'", repl + 1);
+ argstr(repl + 1, EXP_TILDE); /* EXP_TILDE: echo "${v/x/~}" expands ~ ! */
*repl = '/';
}
#endif
@@ -10669,6 +10665,34 @@ pgetc_eatbnl(void)
return c;
}
+struct synstack {
+ smalluint syntax;
+ uint8_t innerdq :1;
+ uint8_t varpushed :1;
+ uint8_t dblquote :1;
+ int varnest; /* levels of variables expansion */
+ int dqvarnest; /* levels of variables expansion within double quotes */
+ int parenlevel; /* levels of parens in arithmetic */
+ struct synstack *prev;
+ struct synstack *next;
+};
+
+static void
+synstack_push(struct synstack **stack, struct synstack *next, int syntax)
+{
+ memset(next, 0, sizeof(*next));
+ next->syntax = syntax;
+ next->next = *stack;
+ (*stack)->prev = next;
+ *stack = next;
+}
+
+static ALWAYS_INLINE void
+synstack_pop(struct synstack **stack)
+{
+ *stack = (*stack)->next;
+}
+
/*
* To handle the "." command, a stack of input files is used. Pushfile
* adds a new entry to the stack and popfile restores the previous level.
@@ -11928,19 +11952,13 @@ readtoken1(int c, int syntax, char *eofmark, int striptabs)
size_t len;
struct nodelist *bqlist;
smallint quotef;
- smallint dblquote;
smallint oldstyle;
- IF_FEATURE_SH_MATH(smallint prevsyntax;) /* syntax before arithmetic */
smallint pssyntax; /* we are expanding a prompt string */
- int varnest; /* levels of variables expansion */
- IF_FEATURE_SH_MATH(int arinest;) /* levels of arithmetic expansion */
- IF_FEATURE_SH_MATH(int parenlevel;) /* levels of parens in arithmetic */
- int dqvarnest; /* levels of variables expansion within double quotes */
IF_BASH_DOLLAR_SQUOTE(smallint bash_dollar_squote = 0;)
+ /* syntax stack */
+ struct synstack synbase = { .syntax = syntax };
+ struct synstack *synstack = &synbase;
- bqlist = NULL;
- quotef = 0;
- IF_FEATURE_SH_MATH(prevsyntax = 0;)
#if ENABLE_ASH_EXPAND_PRMT
pssyntax = (syntax == PSSYNTAX);
if (pssyntax)
@@ -11948,11 +11966,10 @@ readtoken1(int c, int syntax, char *eofmark, int striptabs)
#else
pssyntax = 0; /* constant */
#endif
- dblquote = (syntax == DQSYNTAX);
- varnest = 0;
- IF_FEATURE_SH_MATH(arinest = 0;)
- IF_FEATURE_SH_MATH(parenlevel = 0;)
- dqvarnest = 0;
+ if (syntax == DQSYNTAX)
+ synstack->dblquote = 1;
+ quotef = 0;
+ bqlist = NULL;
STARTSTACKSTR(out);
loop:
@@ -11960,9 +11977,9 @@ readtoken1(int c, int syntax, char *eofmark, int striptabs)
CHECKEND(); /* set c to PEOF if at end of here document */
for (;;) { /* until end of line or end of word */
CHECKSTRSPACE(4, out); /* permit 4 calls to USTPUTC */
- switch (SIT(c, syntax)) {
+ switch (SIT(c, synstack->syntax)) {
case CNL: /* '\n' */
- if (syntax == BASESYNTAX)
+ if (synstack->syntax == BASESYNTAX)
goto endword; /* exit outer loop */
USTPUTC(c, out);
nlprompt();
@@ -11982,13 +11999,13 @@ readtoken1(int c, int syntax, char *eofmark, int striptabs)
if (c & 0x100) {
/* Unknown escape. Encode as '\z' */
c = (unsigned char)c;
- if (eofmark == NULL || dblquote)
+ if (eofmark == NULL || synstack->dblquote)
USTPUTC(CTLESC, out);
USTPUTC('\\', out);
}
}
#endif
- if (eofmark == NULL || dblquote)
+ if (eofmark == NULL || synstack->dblquote)
USTPUTC(CTLESC, out);
USTPUTC(c, out);
break;
@@ -12008,20 +12025,13 @@ readtoken1(int c, int syntax, char *eofmark, int striptabs)
/* Backslash is retained if we are in "str"
* and next char isn't dquote-special.
*/
- if (dblquote
+ if (synstack->dblquote
&& c != '\\'
&& c != '`'
&& c != '$'
- && (c != '"' || eofmark != NULL)
+ && (c != '"' || (eofmark != NULL && !synstack->varnest))
+ && (c != '}' || !synstack->varnest)
) {
-//dash survives not doing USTPUTC(CTLESC), but merely by chance:
-//Example: "\z" gets encoded as "\<CTLESC>z".
-//rmescapes() then emits "\", "\z", protecting z from globbing.
-//But it's wrong, should protect _both_ from globbing:
-//everything in double quotes is not globbed.
-//Unlike dash, we have a fix in rmescapes() which emits bare "z"
-//for "<CTLESC>z" since "z" is not glob-special (else unicode may break),
-//and glob would see "\z" and eat "\". Thus:
USTPUTC(CTLESC, out); /* protect '\' from glob */
USTPUTC('\\', out);
}
@@ -12031,56 +12041,62 @@ readtoken1(int c, int syntax, char *eofmark, int striptabs)
}
break;
case CSQUOTE:
- syntax = SQSYNTAX;
+ synstack->syntax = SQSYNTAX;
quotemark:
if (eofmark == NULL) {
USTPUTC(CTLQUOTEMARK, out);
}
break;
case CDQUOTE:
- syntax = DQSYNTAX;
- dblquote = 1;
+ synstack->syntax = DQSYNTAX;
+ synstack->dblquote = 1;
+ toggledq:
+ if (synstack->varnest)
+ synstack->innerdq ^= 1;
goto quotemark;
case CENDQUOTE:
IF_BASH_DOLLAR_SQUOTE(bash_dollar_squote = 0;)
- if (eofmark != NULL && varnest == 0) {
+ if (eofmark != NULL && synstack->varnest == 0) {
USTPUTC(c, out);
- } else {
- if (dqvarnest == 0) {
- syntax = BASESYNTAX;
- dblquote = 0;
- }
- quotef = 1;
- goto quotemark;
+ break;
}
- break;
+
+ if (synstack->dqvarnest == 0) {
+ synstack->syntax = BASESYNTAX;
+ synstack->dblquote = 0;
+ }
+
+ quotef = 1;
+
+ if (c == '"')
+ goto toggledq;
+
+ goto quotemark;
case CVAR: /* '$' */
PARSESUB(); /* parse substitution */
break;
case CENDVAR: /* '}' */
- if (varnest > 0) {
- varnest--;
- if (dqvarnest > 0) {
- dqvarnest--;
- }
+ if (!synstack->innerdq && synstack->varnest > 0) {
+ if (!--synstack->varnest && synstack->varpushed)
+ synstack_pop(&synstack);
+ else if (synstack->dqvarnest > 0)
+ synstack->dqvarnest--;
c = CTLENDVAR;
}
USTPUTC(c, out);
break;
#if ENABLE_FEATURE_SH_MATH
case CLP: /* '(' in arithmetic */
- parenlevel++;
+ synstack->parenlevel++;
USTPUTC(c, out);
break;
case CRP: /* ')' in arithmetic */
- if (parenlevel > 0) {
- parenlevel--;
+ if (synstack->parenlevel > 0) {
+ synstack->parenlevel--;
} else {
if (pgetc_eatbnl() == ')') {
c = CTLENDARI;
- if (--arinest == 0) {
- syntax = prevsyntax;
- }
+ synstack_pop(&synstack);
} else {
/*
* unbalanced parens
@@ -12106,7 +12122,7 @@ readtoken1(int c, int syntax, char *eofmark, int striptabs)
case CIGN:
break;
default:
- if (varnest == 0) {
+ if (synstack->varnest == 0) {
#if BASH_REDIR_OUTPUT
if (c == '&') {
//Can't call pgetc_eatbnl() here, this requires three-deep pungetc()
@@ -12125,12 +12141,12 @@ readtoken1(int c, int syntax, char *eofmark, int striptabs)
endword:
#if ENABLE_FEATURE_SH_MATH
- if (syntax == ARISYNTAX)
+ if (synstack->syntax == ARISYNTAX)
raise_error_syntax("missing '))'");
#endif
- if (syntax != BASESYNTAX && eofmark == NULL)
+ if (synstack->syntax != BASESYNTAX && eofmark == NULL)
raise_error_syntax("unterminated quoted string");
- if (varnest != 0) {
+ if (synstack->varnest != 0) {
/* { */
raise_error_syntax("missing '}'");
}
@@ -12312,7 +12328,7 @@ parsesub: {
|| (c != '(' && c != '{' && !is_name(c) && !is_special(c))
) {
#if BASH_DOLLAR_SQUOTE
- if (syntax != DQSYNTAX && c == '\'')
+ if (synstack->syntax != DQSYNTAX && c == '\'')
bash_dollar_squote = 1;
else
#endif
@@ -12332,6 +12348,8 @@ parsesub: {
}
} else {
/* $VAR, $<specialchar>, ${...}, or PEOA/PEOF */
+ smalluint newsyn = synstack->syntax;
+
USTPUTC(CTLVAR, out);
typeloc = out - (char *)stackblock();
STADJUST(1, out);
@@ -12390,6 +12408,8 @@ parsesub: {
static const char types[] ALIGN1 = "}-+?=";
/* ${VAR...} but not $VAR or ${#VAR} */
/* c == first char after VAR */
+ int cc = c;
+
switch (c) {
case ':':
c = pgetc_eatbnl();
@@ -12414,21 +12434,24 @@ parsesub: {
break;
}
case '%':
- case '#': {
- int cc = c;
+ case '#':
subtype = (c == '#' ? VSTRIMLEFT : VSTRIMRIGHT);
c = pgetc_eatbnl();
- if (c != cc)
- goto badsub;
- subtype++;
+ if (c == cc)
+ subtype++;
+ else
+ pungetc();
+
+ newsyn = BASESYNTAX;
break;
- }
#if BASH_PATTERN_SUBST
case '/':
/* ${v/[/]pattern/repl} */
//TODO: encode pattern and repl separately.
-// Currently ${v/$var_with_slash/repl} is horribly broken
+// Currently cases like: v=1;echo ${v/$((1/1))/ONE}
+// are broken (should print "ONE")
subtype = VSREPLACE;
+ newsyn = BASESYNTAX;
c = pgetc_eatbnl();
if (c != '/')
goto badsub;
@@ -12440,11 +12463,24 @@ parsesub: {
badsub:
pungetc();
}
+
+ if (newsyn == ARISYNTAX && subtype > VSNORMAL)
+ newsyn = DQSYNTAX;
+
+ if (newsyn != synstack->syntax) {
+ synstack_push(&synstack,
+ synstack->prev ?: alloca(sizeof(*synstack)),
+ newsyn);
+
+ synstack->varpushed = 1;
+ synstack->dblquote = newsyn != BASESYNTAX;
+ }
+
((unsigned char *)stackblock())[typeloc] = subtype;
if (subtype != VSNORMAL) {
- varnest++;
- if (dblquote)
- dqvarnest++;
+ synstack->varnest++;
+ if (synstack->dblquote)
+ synstack->dqvarnest++;
}
STPUTC('=', out);
}
@@ -12501,7 +12537,7 @@ parsebackq: {
case '\\':
pc = pgetc(); /* or pgetc_eatbnl()? why (example)? */
if (pc != '\\' && pc != '`' && pc != '$'
- && (!dblquote || pc != '"')
+ && (!synstack->dblquote || pc != '"')
) {
STPUTC('\\', pout);
}
@@ -12576,10 +12612,11 @@ parsebackq: {
* Parse an arithmetic expansion (indicate start of one and set state)
*/
parsearith: {
- if (++arinest == 1) {
- prevsyntax = syntax;
- syntax = ARISYNTAX;
- }
+
+ synstack_push(&synstack,
+ synstack->prev ?: alloca(sizeof(*synstack)),
+ ARISYNTAX);
+ synstack->dblquote = 1;
USTPUTC(CTLARI, out);
goto parsearith_return;
}