Путь: Toys/Pending, команды версии: Ver.4 Ver.9 Комментарии в файле sh.c : Команд: 14 sh
cd
declare
exit
set
unset
eval
exec
export
jobs
local
shift
source
wait
Исходный текст в файле sh.c #define FOR_sh #include "toys.h" GLOBALS( union { struct { char *c; } sh; struct { char *a; } exec; }; // keep SECONDS here: used to work around compiler limitation in run_command() long long SECONDS; char *isexec, *wcpat; unsigned options, jobcnt, LINENO; int hfd, pid, bangpid, srclvl, recursion; // Callable function array struct sh_function { char *name; struct sh_pipeline { // pipeline segments: linked list of arg w/metadata struct sh_pipeline *next, *prev, *end; int count, here, type, lineno; struct sh_arg { char **v; int c; } arg[1]; } *pipeline; unsigned long refcount; } **functions; long funcslen; // runtime function call stack struct sh_fcall { struct sh_fcall *next, *prev; // This dlist in reverse order: TT.ff current function, TT.ff->prev globals struct sh_vars { long flags; char *str; } *vars; long varslen, varscap, shift, oldlineno; struct sh_function *func; // TODO wire this up struct sh_pipeline *pl; char *ifs, *omnom; struct sh_arg arg; struct arg_list *delete; // Runtime stack of nested if/else/fi and for/do/done contexts. struct sh_blockstack { struct sh_blockstack *next; struct sh_pipeline *start, *middle; struct sh_process *pp; // list of processes piping in to us int run, loop, *urd, pout, pipe; struct sh_arg farg; // for/select arg stack, case wildcard deck struct arg_list *fdelete; // farg's cleanup list char *fvar; // for/select's iteration variable name } *blk; } *ff; // TODO ctrl-Z suspend should stop script struct sh_process { struct sh_process *next, *prev; // | && || struct arg_list *delete; // expanded strings // undo redirects, a=b at start, child PID, exit status, has !, job # int *urd, envlen, pid, exit, flags, job, dash; long long when; // when job backgrounded/suspended struct sh_arg *raw, arg; } *pp; // currently running process // job list, command line for $*, scratch space for do_wildcard_files() struct sh_arg jobs, *wcdeck; ) // Prototype because $($($(blah))) nests, leading to run->parse->run loop int do_source(char *name, FILE *ff); // functions contain pipelines contain functions: prototype because loop static void free_pipeline(void *pipeline); // recalculate needs to get/set variables, but setvar_found calls recalculate static struct sh_vars *setvar(char *str); // ordered for greedy matching, so >&; becomes >& ; not > &; // making these const means I need to typecast the const away later to // avoid endless warnings. static const char *redirectors[] = {"<<<", "<<-", "<<", "<&", "<>", "<", ">>", ">&", ">|", ">", "&>>", "&>", 0}; // The order of these has to match the string in set_main() #define OPT_B 0x100 #define OPT_C 0x200 #define OPT_x 0x400 // only export $PWD and $OLDPWD on first cd #define OPT_cd 0x80000000 // struct sh_process->flags #define PFLAG_NOT 1 static void syntax_err(char *s) { struct sh_fcall *ff = TT.ff; // TODO: script@line only for script not interactive. for (ff = TT.ff; ff != TT.ff->prev; ff = ff->next) if (ff->omnom) break; error_msg("syntax error '%s'@%u: %s", ff->omnom ? : "-c", TT.LINENO, s); toys.exitval = 2; if (!(TT.options&FLAG_i)) xexit(); } void debug_show_fds() { int x = 0, fd = open("/proc/self/fd", O_RDONLY); DIR *X = fdopendir(fd); struct dirent *DE; char *s, *ss = 0, buf[4096], *sss = buf; if (!X) return; for (; (DE = readdir(X));) { if (atoi(DE->d_name) == fd) continue; s = xreadlink(ss = xmprintf("/proc/self/fd/%s", DE->d_name)); if (s && *s != '.') sss += sprintf(sss, ", %s=%s"+2*!x++, DE->d_name, s); free(s); free(ss); } *sss = 0; dprintf(2, "%d fd:%s\n", getpid(), buf); closedir(X); } static char **nospace(char **ss) { while (isspace(**ss)) ++*ss; return ss; } // append to array with null terminator and realloc as necessary static void arg_add(struct sh_arg *arg, char *data) { // expand with stride 32. Micro-optimization: don't realloc empty stack if (!(arg->c&31) && (arg->c || !arg->v)) arg->v = xrealloc(arg->v, sizeof(char *)*(arg->c+33)); arg->v[arg->c++] = data; arg->v[arg->c] = 0; } // add argument to an arg_list static void *push_arg(struct arg_list **list, void *arg) { struct arg_list *al; if (list) { al = xmalloc(sizeof(struct arg_list)); al->next = *list; al->arg = arg; *list = al; } return arg; } static void arg_add_del(struct sh_arg *arg, char *data,struct arg_list **delete) { arg_add(arg, push_arg(delete, data)); } // Assign one variable from malloced key=val string, returns var struct // TODO implement remaining types #define VAR_NOFREE (1<<10) #define VAR_WHITEOUT (1<<9) #define VAR_DICT (1<<8) #define VAR_ARRAY (1<<7) #define VAR_INT (1<<6) #define VAR_TOLOWER (1<<5) #define VAR_TOUPPER (1<<4) #define VAR_NAMEREF (1<<3) #define VAR_EXPORT (1<<2) #define VAR_READONLY (1<<1) #define VAR_MAGIC (1<<0) // return length of valid variable name static char *varend(char *s) { if (isdigit(*s)) return s; while (*s>' ' && (*s=='_' || !ispunct(*s))) s++; return s; } // TODO: this has to handle VAR_NAMEREF, but return dangling symlink // Also, unset -n, also "local ISLINK" to parent var. // Return sh_vars * or 0 if not found. // Sets *pff to function (only if found), only returns whiteouts if pff not NULL static struct sh_vars *findvar(char *name, struct sh_fcall **pff) { int len = varend(name)-name; struct sh_fcall *ff = TT.ff; // advance through locals to global context, ignoring whiteouts if (len) do { struct sh_vars *var = ff->vars+ff->varslen; if (var) while (var--!=ff->vars) { if (strncmp(var->str, name, len) || var->str[len]!='=') continue; if (pff) *pff = ff; else if (var->flags&VAR_WHITEOUT) return 0; return var; } } while ((ff = ff->next)!=TT.ff); return 0; } // get value of variable starting at s. static char *getvar(char *s) { struct sh_vars *var = findvar(s, 0); if (!var) return 0; if (var->flags & VAR_MAGIC) { char c = *var->str; if (c == 'S') sprintf(toybuf, "%lld", (millitime()-TT.SECONDS)/1000); else if (c == 'R') sprintf(toybuf, "%ld", random()&((1<<16)-1)); else if (c == 'L') sprintf(toybuf, "%u", TT.ff->pl->lineno); else if (c == 'G') sprintf(toybuf, "TODO: GROUPS"); else if (c == 'B') sprintf(toybuf, "%d", getpid()); else if (c == 'E') { struct timespec ts; clock_gettime(CLOCK_REALTIME, &ts); sprintf(toybuf, "%lld%c%06ld", (long long)ts.tv_sec, (s[5]=='R')*'.', ts.tv_nsec/1000); } return toybuf; } return varend(var->str)+1; } // Append variable to ff->vars, returning *struct. Does not check duplicates. static struct sh_vars *addvar(char *s, struct sh_fcall *ff) { if (ff->varslen == ff->varscap && !(ff->varslen&31)) { ff->varscap += 32; ff->vars = xrealloc(ff->vars, (ff->varscap)*sizeof(*ff->vars)); } if (!s) return ff->vars; ff->vars[ff->varslen].flags = 0; ff->vars[ff->varslen].str = s; return ff->vars+ff->varslen++; } // Recursively calculate string into dd, returns 0 if failed, ss = error point // Recursion resolves operators of lower priority level to a value // Loops through operators at same priority #define NO_ASSIGN 128 static int recalculate(long long *dd, char **ss, int lvl) { long long ee, ff; char *var = 0, *val, cc = **nospace(ss); int ii, noa = lvl&NO_ASSIGN; lvl &= NO_ASSIGN-1; // Unary prefixes can only occur at the start of a parse context if (cc=='!' || cc=='~') { ++*ss; if (!recalculate(dd, ss, noa|15)) return 0; *dd = (cc=='!') ? !*dd : ~*dd; } else if (cc=='+' || cc=='-') { // Is this actually preincrement/decrement? (Requires assignable var.) if (*++*ss==cc) { val = (*ss)++; nospace(ss); if (*ss==(var = varend(*ss))) { *ss = val; var = 0; } } if (!var) { if (!recalculate(dd, ss, noa|15)) return 0; if (cc=='-') *dd = -*dd; } } else if (cc=='(') { ++*ss; if (!recalculate(dd, ss, noa|1)) return 0; if (**ss!=')') return 0; else ++*ss; } else if (isdigit(cc)) { *dd = strtoll(*ss, ss, 0); if (**ss=='#') { if (!*++*ss || isspace(**ss) || ispunct(**ss)) return 0; *dd = strtoll(val = *ss, ss, *dd); if (val == *ss) return 0; } } else if ((var = varend(*ss))==*ss) { // At lvl 0 "" is ok, anything higher needs a non-empty equation if (lvl || (cc && cc!=')')) return 0; *dd = 0; return 1; } // If we got a variable, evaluate its contents to set *dd if (var) { // Recursively evaluate, catching x=y; y=x; echo $((x)) if (TT.recursion++ == 50+200*CFG_TOYBOX_FORK) { perror_msg("recursive occlusion"); --TT.recursion; return 0; } val = getvar(var = *ss) ? : ""; ii = recalculate(dd, &val, noa); TT.recursion--; if (!ii) return 0; if (*val) { perror_msg("bad math: %s @ %d", var, (int)(val-var)); return 0; } val = *ss = varend(var); // Operators that assign to a varible must be adjacent to one: // Handle preincrement/predecrement (only gets here if var set before else) if (cc=='+' || cc=='-') { if (cc=='+') ee = ++*dd; else ee = --*dd; } else cc = 0; // handle postinc/postdec if ((**nospace(ss)=='+' || **ss=='-') && (*ss)[1]==**ss) { ee = ((cc = **ss)=='+') ? 1+*dd : -1+*dd; *ss += 2; // Assignment operators: = *= /= %= += -= <<= >>= &= ^= |= } else if (lvl<=2 && (*ss)[ii = (-1 != stridx("*/%+-", **ss)) +2*!smemcmp(*ss, "<<", 2)+2*!smemcmp(*ss, ">>", 2)]=='=') { // TODO: assignments are lower priority BUT must go after variable, // come up with precedence checking tests? cc = **ss; *ss += ii+1; if (!recalculate(&ee, ss, noa|1)) return 0; // TODO lvl instead of 1? if (cc=='*') *dd *= ee; else if (cc=='+') *dd += ee; else if (cc=='-') *dd -= ee; else if (cc=='<') *dd <<= ee; else if (cc=='>') *dd >>= ee; else if (cc=='&') *dd &= ee; else if (cc=='^') *dd ^= ee; else if (cc=='|') *dd |= ee; else if (!cc) *dd = ee; else if (!ee) { perror_msg("%c0", cc); return 0; } else if (cc=='/') *dd /= ee; else if (cc=='%') *dd %= ee; ee = *dd; } if (cc && !noa) setvar(xmprintf("%.*s=%lld", (int)(val-var), var, ee)); } // x**y binds first if (lvl<=14) while (strstart(nospace(ss), "**")) { if (!recalculate(&ee, ss, noa|15)) return 0; if (ee<0) perror_msg("** < 0"); for (ff = *dd, *dd = 1; ee; ee--) *dd *= ff; } // w*x/y%z bind next if (lvl<=13) while ((cc = **nospace(ss)) && strchr("*/%", cc)) { ++*ss; if (!recalculate(&ee, ss, noa|14)) return 0; if (cc=='*') *dd *= ee; else if (!ee) { perror_msg("%c0", cc); return 0; } else if (cc=='%') *dd %= ee; else *dd /= ee; } // x+y-z if (lvl<=12) while ((cc = **nospace(ss)) && strchr("+-", cc)) { ++*ss; if (!recalculate(&ee, ss, noa|13)) return 0; if (cc=='+') *dd += ee; else *dd -= ee; } // x<<y >> if (lvl<=11) while ((cc = **nospace(ss)) && strchr("<>", cc) && cc==(*ss)[1]){ *ss += 2; if (!recalculate(&ee, ss, noa|12)) return 0; if (cc == '<') *dd <<= ee; else *dd >>= ee; } // x<y <= > >= if (lvl<=10) while ((cc = **nospace(ss)) && strchr("<>", cc)) { if ((ii = *++*ss=='=')) ++*ss; if (!recalculate(&ee, ss, noa|11)) return 0; if (cc=='<') *dd = ii ? (*dd<=ee) : (*dd<ee); else *dd = ii ? (*dd>=ee) : (*dd>ee); } if (lvl<=9) while ((cc = **nospace(ss)) && strchr("=!", cc) && (*ss)[1]=='='){ *ss += 2; if (!recalculate(&ee, ss, noa|10)) return 0; *dd = (cc=='!') ? *dd != ee : *dd == ee; } if (lvl<=8) while (**nospace(ss)=='&' && (*ss)[1]!='&') { ++*ss; if (!recalculate(&ee, ss, noa|9)) return 0; *dd &= ee; } if (lvl<=7) while (**nospace(ss)=='^') { ++*ss; if (!recalculate(&ee, ss, noa|8)) return 0; *dd ^= ee; } if (lvl<=6) while (**nospace(ss)=='|' && (*ss)[1]!='|') { ++*ss; if (!recalculate(&ee, ss, noa|7)) return 0; *dd |= ee; } if (lvl<=5) while (strstart(nospace(ss), "&&")) { if (!recalculate(&ee, ss, noa|6|NO_ASSIGN*!*dd)) return 0; *dd = *dd && ee; } if (lvl<=4) while (strstart(nospace(ss), "||")) { if (!recalculate(&ee, ss, noa|5|NO_ASSIGN*!!*dd)) return 0; *dd = *dd || ee; } // ? : slightly weird: recurses with lower priority instead of looping // because a ? b ? c : d ? e : f : g == a ? (b ? c : (d ? e : f) : g) if (lvl<=3) if (**nospace(ss)=='?') { ++*ss; if (**nospace(ss)==':' && *dd) ee = *dd; else if (!recalculate(&ee, ss, noa|1|NO_ASSIGN*!*dd) || **nospace(ss)!=':') return 0; ++*ss; if (!recalculate(&ff, ss, noa|1|NO_ASSIGN*!!*dd)) return 0; *dd = *dd ? ee : ff; } // lvl<=2 assignment would go here, but handled above because variable // , (slightly weird, replaces dd instead of modifying it via ee/ff) if (lvl<=1) while (**nospace(ss)==',') { ++*ss; if (!recalculate(dd, ss, noa|2)) return 0; } return 1; } // Return length of utf8 char @s fitting in len, writing value into *cc static int getutf8(char *s, int len, int *cc) { unsigned wc; if (len<0) wc = len = 0; else if (1>(len = utf8towc(&wc, s, len))) wc = *s, len = 1; if (cc) *cc = wc; return len; } // utf8 strchr: return wide char matched at wc from chrs, or 0 if not matched // if len, save length of next wc (whether or not it's in list) static int utf8chr(char *wc, char *chrs, int *len) { unsigned wc1, wc2; int ll; if (len) *len = 1; if (!*wc) return 0; if (0<(ll = utf8towc(&wc1, wc, 99))) { if (len) *len = ll; while (*chrs) { if(1>(ll = utf8towc(&wc2, chrs, 99))) chrs++; else { if (wc1 == wc2) return wc1; chrs += ll; } } } return 0; } // return length of match found at this point (try is null terminated array) static int anystart(char *s, char **try) { char *ss = s; while (*try) if (strstart(&s, *try++)) return s-ss; return 0; } // does this entire string match one of the strings in try[] static int anystr(char *s, char **try) { while (*try) if (!strcmp(s, *try++)) return 1; return 0; } // Update $IFS cache in function call stack after variable assignment static void cache_ifs(char *s, struct sh_fcall *ff) { if (!strncmp(s, "IFS=", 4)) do ff->ifs = s+4; while ((ff = ff->next) != TT.ff->prev); } // declare -aAilnrux // ft // TODO VAR_ARRAY VAR_DICT // Assign new name=value string for existing variable. s takes x=y or x+=y static struct sh_vars *setvar_found(char *s, int freeable, struct sh_vars *var) { char *ss, *sss, *sd, buf[24]; long ii, jj, kk, flags = var->flags&~VAR_WHITEOUT; long long ll; int cc, vlen = varend(s)-s; if (flags&VAR_READONLY) { error_msg("%.*s: read only", vlen, s); goto bad; } // If += has no old value (addvar placeholder or empty old var) yank the + if (s[vlen]=='+' && (var->str==s || !strchr(var->str, '=')[1])) { ss = xmprintf("%.*s%s", vlen, s, s+vlen+1); if (var->str==s) { if (!freeable++) var->flags |= VAR_NOFREE; } else if (freeable++) free(s); s = ss; } // Handle VAR_NAMEREF mismatch by replacing name if (strncmp(var->str, s, vlen)) { ss = s+vlen+(s[vlen]=='+')+1; ss = xmprintf("%.*s%s", (vlen = varend(var->str)-var->str)+1, var->str, ss); if (freeable++) free(s); s = ss; } // utf8 aware case conversion, two pass (measure, allocate, convert) because // unicode IS stupid enough for upper/lower case to be different utf8 byte // lengths, for example lowercase of U+023a (c8 ba) is U+2c65 (e2 b1 a5) if (flags&(VAR_TOUPPER|VAR_TOLOWER)) { for (jj = kk = 0, sss = 0; jj<2; jj++, sss = sd = xmalloc(vlen+kk+2)) { sd = jj ? stpncpy(sss, s, vlen+1) : (void *)&sss; for (ss = s+vlen+1; (ii = getutf8(ss, 4, &cc)); ss += ii) { kk += wctoutf8(sd, (flags&VAR_TOUPPER) ? towupper(cc) : towlower(cc)); if (jj) { sd += kk; kk = 0; } } } *sd = 0; if (freeable++) free(s); s = sss; } // integer variables treat += differently ss = s+vlen+(s[vlen]=='+')+1; if (flags&VAR_INT) { sd = ss; if (!recalculate(&ll, &sd, 0) || *sd) { perror_msg("bad math: %s @ %d", ss, (int)(sd-ss)); goto bad; } sprintf(buf, "%lld", ll); if (flags&VAR_MAGIC) { if (*s == 'S') { ll *= 1000; TT.SECONDS = (s[vlen]=='+') ? TT.SECONDS+ll : millitime()-ll; } else if (*s == 'R') srandom(ll); if (freeable) free(s); // magic can't be whiteout or nofree, and keeps old string return var; } else if (s[vlen]=='+' || strcmp(buf, ss)) { if (s[vlen]=='+') ll += atoll(strchr(var->str, '=')+1); ss = xmprintf("%.*s=%lld", vlen, s, ll); if (freeable++) free(s); s = ss; } } else if (s[vlen]=='+' && !(flags&VAR_MAGIC)) { ss = xmprintf("%s%s", var->str, ss); if (freeable++) free(s); s = ss; } // Replace old string with new one, adjusting nofree status if (flags&VAR_NOFREE) flags ^= VAR_NOFREE; else free(var->str); if (!freeable) flags |= VAR_NOFREE; var->str = s; var->flags = flags; return var; bad: if (freeable) free(s); return 0; } // Creates new variables (local or global) and handles += // returns 0 on error, else sh_vars of new entry. static struct sh_vars *setvar_long(char *s, int freeable, struct sh_fcall *ff) { struct sh_vars *vv = 0, *was; char *ss; if (!s) return 0; ss = varend(s); if (ss[*ss=='+']!='=') { error_msg("bad setvar %s\n", s); if (freeable) free(s); return 0; } // Add if necessary, set value, and remove again if we added but set failed if (!(was = vv = findvar(s, &ff))) (vv = addvar(s, ff))->flags = VAR_NOFREE; if (!setvar_found(s, freeable, vv)) { if (!was) memmove(vv, vv+1, sizeof(ff->vars)*(--ff->varslen-(vv-ff->vars))); return 0; } cache_ifs(vv->str, ff); return vv; } // Set variable via a malloced "name=value" (or "name+=value") string. // Returns sh_vars * or 0 for failure (readonly, etc) static struct sh_vars *setvar(char *str) { return setvar_long(str, 0, TT.ff->prev); } // returns whether variable found (whiteout doesn't count) static int unsetvar(char *name) { struct sh_fcall *ff; struct sh_vars *var = findvar(name, &ff); int len = varend(name)-name; if (!var || (var->flags&VAR_WHITEOUT)) return 0; if (var->flags&VAR_READONLY) error_msg("readonly %.*s", len, name); else { // turn local into whiteout if (ff != TT.ff->prev) { var->flags = VAR_WHITEOUT; if (!(var->flags&VAR_NOFREE)) (var->str = xrealloc(var->str, len+2))[len+1] = 0; // free from global context } else { if (!(var->flags&VAR_NOFREE)) free(var->str); memmove(var, var+1, sizeof(ff->vars)*(ff->varslen-(var-ff->vars))); } if (!strcmp(name, "IFS")) do ff->ifs = " \t\n"; while ((ff = ff->next) != TT.ff->prev); } return 1; } static struct sh_vars *setvarval(char *name, char *val) { return setvar(xmprintf("%s=%s", name, val)); } // TODO: keep variable arrays sorted for binary search // create array of variables visible in current function. static struct sh_vars **visible_vars(void) { struct sh_arg arg; struct sh_fcall *ff; struct sh_vars *vv; unsigned ii, jj, len; arg.c = 0; arg.v = 0; // Find non-duplicate entries: TODO, sort and binary search for (ff = TT.ff; ; ff = ff->next) { if (ff->vars) for (ii = ff->varslen; ii--;) { vv = ff->vars+ii; len = 1+(varend(vv->str)-vv->str); for (jj = 0; ;jj++) { if (jj == arg.c) arg_add(&arg, (void *)vv); else if (strncmp(arg.v[jj], vv->str, len)) continue; break; } } if (ff->next == TT.ff) break; } return (void *)arg.v; } // malloc declare -x "escaped string" static char *declarep(struct sh_vars *var) { char *types = "rxnuliaA", *esc = "$\"\\`", *in, flags[16], *out = flags, *ss; int len; for (len = 0; types[len]; len++) if (var->flags&(2<<len)) *out++ = types[len]; if (out==flags) *out++ = '-'; *out = 0; len = out-flags; for (in = var->str; *in; in++) len += !!strchr(esc, *in); len += in-var->str; ss = xmalloc(len+15); len = varend(var->str)-var->str; out = ss + sprintf(ss, "declare -%s %.*s", flags, len, var->str); if (var->flags != VAR_MAGIC) { out = stpcpy(out, "=\""); for (in = var->str+len+1; *in; *out++ = *in++) if (strchr(esc, *in)) *out++ = '\\'; *out++ = '"'; } *out = 0; return ss; } // Skip past valid prefix that could go before redirect static char *skip_redir_prefix(char *word) { char *s = word; if (*s == '{') { if (*(s = varend(s+1)) == '}' && s != word+1) s++; else s = word; } else while (isdigit(*s)) s++; return s; } // parse next word from command line. Returns end, or 0 if need continuation // caller eats leading spaces. early = skip one quote block (or return start) // quote is depth of existing quote stack in toybuf (usually 0) static char *parse_word(char *start, int early, int quote) { int ii, qq, qc = 0; char *end = start, *ss; // Handle redirections, <(), (( )) that only count at the start of word ss = skip_redir_prefix(end); // 123<<file- parses as 2 args: "123<<" "file-" if (strstart(&ss, "<(") || strstart(&ss, ">(")) { toybuf[quote++]=')'; end = ss; } else if ((ii = anystart(ss, (void *)redirectors))) return ss+ii; if (strstart(&end, "((")) toybuf[quote++] = 254; // Loop to find end of this word while (*end) { // If we're stopping early and already handled a symbol... if (early && end!=start && !quote) break; // barf if we're near overloading quote stack (nesting ridiculously deep) if (quote>4000) { syntax_err("bad quote depth"); return (void *)1; } // Are we in a quote context? if ((qq = quote ? toybuf[quote-1] : 0)) { ii = *end++; if ((qq==')' || qq>=254) && (ii=='(' || ii==')')) { // parentheses nest if (ii=='(') qc++; else if (qc) qc--; else if (qq>=254) { // (( can end with )) or retroactively become two (( if we hit one ) if (ii==')' && *end==')') quote--, end++; else if (qq==254) return start+1; else if (qq==255) toybuf[quote-1] = ')'; } else if (ii==')') quote--; } else if (ii==qq) quote--; // matching end quote else if (qq!='\'') end--, ii = 0; // single quote claims everything if (ii) continue; // fall through for other quote types // space and flow control chars only end word when not quoted in any way } else { if (isspace(*end)) break; ss = end + anystart(end, (char *[]){";;&", ";;", ";&", ";", "||", "|&", "|", "&&", "&", "(", ")", 0}); if (ss!=end) return (end==start) ? ss : end; } // start new quote context? (' not special within ") if (strchr("'\"`"+(qq=='"'), ii = *end++)) toybuf[quote++] = ii; // \? $() ${} $[] ?() *() +() @() !() else { if (ii=='\\') { // TODO why end[1] here? sh -c $'abc\\\ndef' Add test. if (!*end || (*end=='\n' && !end[1])) return early ? end : 0; } else if (ii=='$' && -1!=(qq = stridx("({[", *end))) { if (strstart(&end, "((")) { end--; toybuf[quote++] = 255; } else toybuf[quote++] = ")}]"[qq]; } else if (*end=='(' && strchr("?*+@!", ii)) toybuf[quote++] = ')'; else { end--; if (early && !quote) return end; } end++; } } return (quote && !early) ? 0 : end; } // Return next available high (>=10) file descriptor static int next_hfd() { int hfd; for (; TT.hfd<=99999; TT.hfd++) if (-1 == fcntl(TT.hfd, F_GETFL)) break; hfd = TT.hfd; if (TT.hfd>99999) { hfd = -1; if (!errno) errno = EMFILE; } return hfd; } // Perform a redirect, saving displaced filehandle to a high (>10) fd // rd is an int array: [0] = count, followed by from/to pairs to restore later. // If from >= 0 dup from->to after saving to. If from == -1 just save to. // if from == -2 schedule "to" to be closed by unredirect. static int save_redirect(int **rd, int from, int to) { int cnt, hfd, *rr; //dprintf(2, "%d redir %d to %d\n", getpid(), from, to); if (from == to) return 0; // save displaced to, copying to high (>=10) file descriptor to undo later // except if we're saving to environment variable instead (don't undo that) if (from>-2) { if ((hfd = next_hfd())==-1) return 1; if (hfd != dup2(to, hfd)) hfd = -1; else fcntl(hfd, F_SETFD, FD_CLOEXEC); // dup "to" if (from >= 0 && to != dup2(from, to)) { if (hfd >= 0) close(hfd); return 1; } } else { hfd = to; to = -1; } // Append undo information to redirect list so we can restore saved hfd later. if (!((cnt = *rd ? **rd : 0)&31)) *rd = xrealloc(*rd, (cnt+33)*2*sizeof(int)); *(rr = *rd) = ++cnt; rr[2*cnt-1] = hfd; rr[2*cnt] = to; return 0; } // restore displaced filehandles, closing high filehandles they were copied to static void unredirect(int *urd) { int *rr = urd+1, i; if (!urd) return; for (i = 0; i<*urd; i++, rr += 2) if (rr[0] != -1) { // No idea what to do about fd exhaustion here, so Steinbach's Guideline. dup2(rr[0], rr[1]); close(rr[0]); } free(urd); } // TODO: waitpid(WNOHANG) to clean up zombies and catch background& ending static void subshell_callback(char **argv) { // This depends on environ having been replaced by caller environ[1] = xmprintf("@%d,%d", getpid(), getppid()); environ[2] = xmprintf("$=%d", TT.pid); // TODO: test $$ in (nommu) } // TODO what happens when you background a function? // turn a parsed pipeline back into a string. static char *pl2str(struct sh_pipeline *pl, int one) { struct sh_pipeline *end = 0, *pp; int len QUIET, i; char *ss; // Find end of block (or one argument) if (one) end = pl->next; else for (end = pl, len = 0; end; end = end->next) if (end->type == 1) len++; else if (end->type == 3 && --len<0) break; // measure, then allocate for (ss = 0;; ss = xmalloc(len+1)) { for (pp = pl; pp != end; pp = pp->next) { if (pp->type == 'F') continue; // TODO fix this for (i = len = 0; i<=pp->arg->c; i++) len += snprintf(ss+len, ss ? INT_MAX : 0, " %s"+!i, pp->arg->v[i] ? : ";"+(pp->next==end)); } if (ss) return ss; } // TODO test output with case and function // TODO add HERE documents back in // TODO handle functions } static struct sh_blockstack *clear_block(struct sh_blockstack *blk) { memset(blk, 0, sizeof(*blk)); blk->start = TT.ff->pl; blk->run = 1; blk->pout = -1; return blk; } // when ending a block, free, cleanup redirects and pop stack. static struct sh_pipeline *pop_block(void) { struct sh_pipeline *pl = 0; struct sh_blockstack *blk = TT.ff->blk; // when ending a block, free, cleanup redirects and pop stack. if (blk->pout != -1) close(blk->pout); unredirect(blk->urd); llist_traverse(blk->fdelete, llist_free_arg); free(blk->farg.v); if (TT.ff->blk->next) { pl = blk->start->end; free(llist_pop(&TT.ff->blk)); } else clear_block(blk); return pl; } // Push a new empty block to the stack static void add_block(void) { struct sh_blockstack *blk = clear_block(xmalloc(sizeof(*blk))); blk->next = TT.ff->blk; TT.ff->blk = blk; } // Add entry to runtime function call stack static void call_function(void) { // dlist in reverse order: TT.ff = current function, TT.ff->prev = globals dlist_add_nomalloc((void *)&TT.ff, xzalloc(sizeof(struct sh_fcall))); TT.ff = TT.ff->prev; add_block(); // TODO caller needs to set pl, vars, func // default $* is to copy previous TT.ff->arg.v = TT.ff->next->arg.v; TT.ff->arg.c = TT.ff->next->arg.c; TT.ff->ifs = TT.ff->next->ifs; } static void free_function(struct sh_function *funky) { if (--funky->refcount) return; free(funky->name); llist_traverse(funky->pipeline, free_pipeline); free(funky); } // TODO: old function-vs-source definition is "has variables", but no ff->func? // returns 0 if source popped, nonzero if function popped static int end_function(int funconly) { struct sh_fcall *ff = TT.ff; int func = ff->next!=ff && ff->vars; if (!func && funconly) return 0; llist_traverse(ff->delete, llist_free_arg); ff->delete = 0; while (TT.ff->blk->next) pop_block(); pop_block(); // for a function, free variables and pop context if (!func) return 0; while (ff->varslen) if (!(ff->vars[--ff->varslen].flags&VAR_NOFREE)) free(ff->vars[ff->varslen].str); free(ff->vars); free(TT.ff->blk); if (ff->func) free_function(ff->func); free(dlist_pop(&TT.ff)); return 1; } // TODO check every caller of run_subshell for error, or syntax_error() here // from pipe() failure // TODO need CLOFORK? CLOEXEC doesn't help if we don't exec... // Pass environment and command string to child shell, return PID of child static int run_subshell(char *str, int len) { pid_t pid; //dprintf(2, "%d run_subshell %.*s\n", getpid(), len, str); debug_show_fds(); // The with-mmu path is significantly faster. if (CFG_TOYBOX_FORK) { if ((pid = fork())<0) perror_msg("fork"); else if (!pid) { call_function(); if (str) { do_source(0, fmemopen(str, len, "r")); _exit(toys.exitval); } } // On nommu vfork, exec /proc/self/exe, and pipe state data to ourselves. } else { int pipes[2]; unsigned i; char **oldenv = environ, *ss = str ? : pl2str(TT.ff->pl->next, 0); struct sh_vars **vv; // open pipe to child if (pipe(pipes) || 254 != dup2(pipes[0], 254)) return 1; close(pipes[0]); fcntl(pipes[1], F_SETFD, FD_CLOEXEC); // vfork child with clean environment environ = xzalloc(4*sizeof(char *)); *environ = getvar("PATH") ? : "PATH="; pid = xpopen_setup(0, 0, subshell_callback); // TODO what if pid -1? Handle process exhaustion. // free entries added to end of environment by callback (shared heap) free(environ[1]); free(environ[2]); free(environ); environ = oldenv; // marshall context to child close(254); dprintf(pipes[1], "%lld %u %u %u %u\n", TT.SECONDS, TT.options, TT.LINENO, TT.pid, TT.bangpid); for (i = 0, vv = visible_vars(); vv[i]; i++) dprintf(pipes[1], "%u %lu\n%.*s", (unsigned)strlen(vv[i]->str), vv[i]->flags, (int)strlen(vv[i]->str), vv[i]->str); free(vv); // send command dprintf(pipes[1], "0 0\n%.*s\n", len, ss); if (!str) free(ss); close(pipes[1]); } return pid; } // Call subshell with either stdin/stdout redirected, return other end of pipe static int pipe_subshell(char *s, int len, int out) { int pipes[2], *uu = 0, in = !out; // Grab subshell data if (pipe(pipes)) { perror_msg("%.*s", len, s); return -1; } // Perform input or output redirect and launch process (ignoring errors) save_redirect(&uu, pipes[in], in); close(pipes[in]); fcntl(pipes[!in], F_SETFD, FD_CLOEXEC); run_subshell(s, len); fcntl(pipes[!in], F_SETFD, 0); unredirect(uu); return pipes[out]; } // grab variable or special param (ala $$) up to len bytes. Return value. // set *used to length consumed. Does not handle $* and $@ char *getvar_special(char *str, int len, int *used, struct arg_list **delete) { char *s = 0, *ss, cc = *str; unsigned uu; *used = 1; if (cc == '-') { s = ss = xmalloc(8); if (TT.options&FLAG_i) *ss++ = 'i'; if (TT.options&OPT_B) *ss++ = 'B'; if (TT.options&FLAG_s) *ss++ = 's'; if (TT.options&FLAG_c) *ss++ = 'c'; *ss = 0; } else if (cc == '?') s = xmprintf("%d", toys.exitval); else if (cc == '$') s = xmprintf("%d", TT.pid); else if (cc == '#') s = xmprintf("%d", TT.ff->arg.c ? TT.ff->arg.c-1 : 0); else if (cc == '!') s = xmprintf("%d"+2*!TT.bangpid, TT.bangpid); else { delete = 0; for (*used = uu = 0; *used<len && isdigit(str[*used]); ++*used) uu = (10*uu)+str[*used]-'0'; if (*used) { if (uu) uu += TT.ff->shift; if (uu<TT.ff->arg.c) s = TT.ff->arg.v[uu]; } else if ((*used = varend(str)-str)) return getvar(str); } if (s) push_arg(delete, s); return s; } #define WILD_SHORT 1 // else longest match #define WILD_CASE 2 // case insensitive #define WILD_ANY 4 // advance through pattern instead of str // Returns length of str matched by pattern, or -1 if not all pattern consumed static int wildcard_matchlen(char *str, int len, char *pattern, int plen, struct sh_arg *deck, int flags) { struct sh_arg ant = {0}; // stack: of str offsets long ss, pp, dd, best = -1; int i, j, c, not; // Loop through wildcards in pattern. for (ss = pp = dd = 0; ;) { if ((flags&WILD_ANY) && best!=-1) break; // did we consume pattern? if (pp==plen) { if (ss>best) best = ss; if (ss==len || (flags&WILD_SHORT)) break; // attempt literal match? } else if (dd>=deck->c || pp!=(long)deck->v[dd]) { if (ss<len) { if (flags&WILD_CASE) { ss += getutf8(str+ss, len-ss, &c); c = towupper(c); pp += getutf8(pattern+pp, pp-plen, &i); i = towupper(i); } else c = str[ss++], i = pattern[pp++]; if (c==i) continue; } // Wildcard chars: |+@!*?()[] } else { c = pattern[pp++]; dd++; if (c=='?' || ((flags&WILD_ANY) && c=='*')) { ss += (i = getutf8(str+ss, len-ss, 0)); if (i) continue; } else if (c=='*') { // start with zero length match, don't record consecutive ** if (dd==1 || pp-2!=(long)deck->v[dd-1] || pattern[pp-2]!='*') { arg_add(&ant, (void *)ss); arg_add(&ant, 0); } continue; } else if (c == '[') { pp += (not = !!strchr("!^", pattern[pp])); ss += getutf8(str+ss, len-ss, &c); for (i = 0; pp<(long)deck->v[dd]; i = 0) { pp += getutf8(pattern+pp, plen-pp, &i); if (pattern[pp]=='-') { ++pp; pp += getutf8(pattern+pp, plen-pp, &j); if (not^(i<=c && j>=c)) break; } else if (not^(i==c)) break; } if (i) { pp = 1+(long)deck->v[dd++]; continue; } // ( preceded by +@!*? } else { // TODO ( ) | dd++; continue; } } // match failure if (flags&WILD_ANY) { ss = 0; if (plen==pp) break; continue; } // pop retry stack or return failure (TODO: seek to next | in paren) while (ant.c) { if ((c = pattern[(long)deck->v[--dd]])=='*') { if (len<(ss = (long)ant.v[ant.c-2]+(long)++ant.v[ant.c-1])) ant.c -= 2; else { pp = (long)deck->v[dd++]+1; break; } } else if (c == '(') dprintf(2, "TODO: ("); } if (!ant.c) break; } free (ant.v); return best; } static int wildcard_match(char *s, char *p, struct sh_arg *deck, int flags) { return wildcard_matchlen(s, strlen(s), p, strlen(p), deck, flags); } // TODO: test that * matches "" // skip to next slash in wildcard path, passing count active ranges. // start at pattern[off] and deck[*idx], return pattern pos and update *idx char *wildcard_path(char *pattern, int off, struct sh_arg *deck, int *idx, int count) { char *p, *old; int i = 0, j = 0; // Skip [] and nested () ranges within deck until / or NUL for (p = old = pattern+off;; p++) { if (!*p) return p; while (*p=='/') { old = p++; if (j && !count) return old; j = 0; } // Got wildcard? Return start of name if out of count, else skip [] () if (*idx<deck->c && p-pattern == (long)deck->v[*idx]) { if (!j++ && !count--) return old; ++*idx; if (*p=='[') p = pattern+(long)deck->v[(*idx)++]; else if (*p=='(') while (*++p) if (p-pattern == (long)deck->v[*idx]) { ++*idx; if (*p == ')') { if (!i) break; i--; } else if (*p == '(') i++; } } } } // TODO ** means this directory as well as ones below it, shopt -s globstar // Filesystem traversal callback // pass on: filename, portion of deck, portion of pattern, // input: pattern+offset, deck+offset. Need to update offsets. int do_wildcard_files(struct dirtree *node) { struct dirtree *nn; char *pattern, *patend; int lvl, ll = 0, ii = 0, rc; struct sh_arg ant; // Top level entry has no pattern in it if (!node->parent) return DIRTREE_RECURSE; // Find active pattern range for (nn = node->parent; nn; nn = nn->parent) if (nn->parent) ii++; pattern = wildcard_path(TT.wcpat, 0, TT.wcdeck, &ll, ii); while (*pattern=='/') pattern++; lvl = ll; patend = wildcard_path(TT.wcpat, pattern-TT.wcpat, TT.wcdeck, &ll, 1); // Don't include . entries unless explicitly asked for them if (*node->name=='.' && *pattern!='.') return 0; // Don't descend into non-directory (was called with DIRTREE_SYMFOLLOW) if (*patend && !S_ISDIR(node->st.st_mode) && *node->name) return 0; // match this filename from pattern to p in deck from lvl to ll ant.c = ll-lvl; ant.v = TT.wcdeck->v+lvl; for (ii = 0; ii<ant.c; ii++) TT.wcdeck->v[lvl+ii] -= pattern-TT.wcpat; rc = wildcard_matchlen(node->name, strlen(node->name), pattern, patend-pattern, &ant, 0); for (ii = 0; ii<ant.c; ii++) TT.wcdeck->v[lvl+ii] += pattern-TT.wcpat; // Return failure or save exact match. if (rc<0 || node->name[rc]) return 0; if (!*patend) return DIRTREE_SAVE; // Are there more wildcards to test children against? if (TT.wcdeck->c!=ll) return DIRTREE_RECURSE; // No more wildcards: check for child and return failure if it isn't there. pattern = xmprintf("%s%s", node->name, patend); rc = faccessat(dirtree_parentfd(node), pattern, F_OK, AT_SYMLINK_NOFOLLOW); free(pattern); if (rc) return 0; // Save child and self. (Child could be trailing / but only one saved.) while (*patend=='/' && patend[1]) patend++; node->child = xzalloc(sizeof(struct dirtree)+1+strlen(patend)); node->child->parent = node; strcpy(node->child->name, patend); return DIRTREE_SAVE; } // Record active wildcard chars in output string // *new start of string, oo offset into string, deck is found wildcards, static void collect_wildcards(char *new, long oo, struct sh_arg *deck) { long bracket, *vv; char cc = new[oo]; // Record unescaped/unquoted wildcard metadata for later processing if (!deck->c) arg_add(deck, 0); vv = (long *)deck->v; // vv[0] used for paren level (bottom 16 bits) + bracket start offset<<16 // at end loop backwards through live wildcards to remove pending unmatched ( if (!cc) { long ii = 0, jj = 65535&*vv, kk; for (kk = deck->c; jj;) { if (')' == (cc = new[vv[--kk]])) ii++; else if ('(' == cc) { if (ii) ii--; else { memmove(vv+kk, vv+kk+1, sizeof(long)*(deck->c-- -kk)); jj--; } } } if (deck->c) memmove(vv, vv+1, sizeof(long)*deck->c--); return; } // Start +( range, or remove first char that isn't wildcard without ( if (deck->c>1 && vv[deck->c-1] == oo-1 && strchr("+@!*?", new[oo-1])) { if (cc == '(') { vv[deck->c-1] = oo; return; } else if (!strchr("*?", new[oo-1])) deck->c--; } // fall through to add wildcard, popping parentheses stack as necessary if (strchr("|+@!*?", cc)); else if (cc == ')' && (65535&*vv)) --*vv; // complete [range], discard wildcards within, add [, fall through to add ] else if (cc == ']' && (bracket = *vv>>16)) { // don't end range yet for [] or [^] if (bracket+1 == oo || (bracket+2 == oo && strchr("!^", new[oo-1]))) return; while (deck->c>1 && vv[deck->c-1]>=bracket) deck->c--; *vv &= 65535; arg_add(deck, (void *)bracket); // Not a wildcard } else { // [ is speculative, don't add to deck yet, just record we saw it if (cc == '[' && !(*vv>>16)) *vv = (oo<<16)+(65535&*vv); return; } // add active wildcard location arg_add(deck, (void *)oo); } // wildcard expand data against filesystem, and add results to arg list // Note: this wildcard deck has extra argument at start (leftover from parsing) static void wildcard_add_files(struct sh_arg *arg, char *pattern, struct sh_arg *deck, struct arg_list **delete) { struct dirtree *dt; char *pp; int ll = 0; // fast path: when no wildcards, add pattern verbatim collect_wildcards("", 0, deck); if (!deck->c) return arg_add(arg, pattern); // Traverse starting with leading patternless path. pp = wildcard_path(TT.wcpat = pattern, 0, TT.wcdeck = deck, &ll, 0); pp = (pp==pattern) ? 0 : xstrndup(pattern, pp-pattern); dt = dirtree_flagread(pp, DIRTREE_STATLESS|DIRTREE_SYMFOLLOW, do_wildcard_files); free(pp); deck->c = 0; // If no match save pattern, else free tree saving each path found. if (!dt) return arg_add(arg, pattern); while (dt) { while (dt->child) dt = dt->child; arg_add(arg, push_arg(delete, dirtree_path(dt, 0))); do { pp = (void *)dt; if ((dt = dt->parent)) dt->child = dt->child->next; free(pp); } while (dt && !dt->child); } // TODO: test .*/../ } // Copy string until } including escaped } // if deck collect wildcards, and store terminator at deck->v[deck->c] char *slashcopy(char *s, char *c, struct sh_arg *deck) { char *ss; long ii, jj; for (ii = 0; !strchr(c, s[ii]); ii++) if (s[ii] == '\\') ii++; ss = xmalloc(ii+1); for (ii = jj = 0; !strchr(c, s[jj]); ii++) if ('\\'==(ss[ii] = s[jj++])) ss[ii] = s[jj++]; else if (deck) collect_wildcards(ss, ii, deck); ss[ii] = 0; if (deck) { arg_add(deck, 0); deck->v[--deck->c] = (void *)jj; collect_wildcards("", 0, deck); } return ss; } #define NO_QUOTE (1<<0) // quote removal #define NO_PATH (1<<1) // path expansion (wildcards) #define NO_SPLIT (1<<2) // word splitting #define NO_BRACE (1<<3) // {brace,expansion} #define NO_TILDE (1<<4) // ~username/path #define NO_NULL (1<<5) // Expand to "" instead of NULL #define SEMI_IFS (1<<6) // Use ' ' instead of IFS to combine $* // expand str appending to arg using above flag defines, add mallocs to delete // if ant not null, save wildcard deck there instead of expanding vs filesystem // returns 0 for success, 1 for error. // If measure stop at *measure and return input bytes consumed in *measure static int expand_arg_nobrace(struct sh_arg *arg, char *str, unsigned flags, struct arg_list **delete, struct sh_arg *ant, long *measure) { char cc, qq = flags&NO_QUOTE, sep[6], *new = str, *s, *ss = ss, *ifs, *slice; int ii = 0, oo = 0, xx, yy, dd, jj, kk, ll, mm; struct sh_arg deck = {0}; // Tilde expansion if (!(flags&NO_TILDE) && *str == '~') { struct passwd *pw = 0; ss = 0; while (str[ii] && str[ii]!=':' && str[ii]!='/') ii++; if (ii==1) { if (!(ss = getvar("HOME")) || !*ss) pw = bufgetpwuid(getuid()); } else { // TODO bufgetpwnam pw = getpwnam(s = xstrndup(str+1, ii-1)); free(s); } if (pw) { ss = pw->pw_dir; if (!ss || !*ss) ss = "/"; } if (ss) { oo = strlen(ss); s = xmprintf("%s%s", ss, str+ii); if (str != new) free(new); new = s; } } // parameter/variable expansion and dequoting if (!ant) ant = &deck; for (; (cc = str[ii++]); str!=new && (new[oo] = 0)) { struct sh_arg aa = {0}; int nosplit = 0; if (measure && cc==*measure) break; // skip literal chars if (!strchr("'\"\\$`"+2*(flags&NO_QUOTE), cc)) { if (str != new) new[oo] = cc; if (!(flags&NO_PATH) && !(qq&1)) collect_wildcards(new, oo, ant); oo++; continue; } // allocate snapshot if we just started modifying if (str == new) { new = xstrdup(new); new[oo] = 0; } ifs = slice = 0; // handle escapes and quoting if (cc == '"') qq++; else if (cc == '\'') { if (qq&1) new[oo++] = cc; else { qq += 2; while ((cc = str[ii++]) != '\'') new[oo++] = cc; } // both types of subshell work the same, so do $( here not in '$' below // TODO $((echo hello) | cat) ala $(( becomes $( ( retroactively } else if (cc == '`' || (cc == '$' && (str[ii]=='(' || str[ii]=='['))) { off_t pp = 0; s = str+ii-1; kk = parse_word(s, 1, 0)-s; if (str[ii] == '[' || *toybuf == 255) { // (( parsed together, not (( ) ) struct sh_arg aa = {0}; long long ll; // Expand $VARS in math string ss = str+ii+1+(str[ii]=='('); push_arg(delete, ss = xstrndup(ss, kk - (3+2*(str[ii]!='[')))); expand_arg_nobrace(&aa, ss, NO_PATH|NO_SPLIT, delete, 0, 0); s = ss = (aa.v && *aa.v) ? *aa.v : ""; free(aa.v); // Recursively calculate result if (!recalculate(&ll, &s, 0) || *s) { error_msg("bad math: %s @ %ld", ss, (long)(s-ss)+1); goto fail; } ii += kk-1; push_arg(delete, ifs = xmprintf("%lld", ll)); } else { // Run subshell and trim trailing newlines s += (jj = 1+(cc == '$')); ii += --kk; kk -= jj; // Special case echo $(<input) for (ss = s; isspace(*ss); ss++); if (*ss != '<') ss = 0; else { while (isspace(*++ss)); if (!(ll = parse_word(ss, 0, 0)-ss)) ss = 0; else { jj = ll+(ss-s); while (isspace(s[jj])) jj++; if (jj != kk) ss = 0; else { jj = xcreate_stdio(ss = xstrndup(ss, ll), O_RDONLY|WARN_ONLY, 0); free(ss); } } } // TODO what does \ in `` mean? What is echo `printf %s \$x` supposed to do? // This has to be async so pipe buffer doesn't fill up if (!ss) jj = pipe_subshell(s, kk, 0); // TODO $(true &&) syntax_err() if ((ifs = readfd(jj, 0, &pp))) for (kk = strlen(ifs); kk && ifs[kk-1]=='\n'; ifs[--kk] = 0); close(jj); } } else if (cc=='\\' || !str[ii]) { if (!(qq&1) || (str[ii] && strchr("\"\\$`", str[ii]))) new[oo++] = str[ii] ? str[ii++] : cc; // $VARIABLE expansions } else if (cc == '$' && str[ii]) { cc = *(ss = str+ii++); if (cc=='\'') { for (s = str+ii; *s != '\''; oo += wcrtomb(new+oo, unescape2(&s, 0),0)); ii = s-str+1; continue; } else if (cc=='"' && !(qq&1)) { qq++; continue; } else if (cc == '{') { // Skip escapes to find }, parse_word() guarantees ${} terminates for (cc = *++ss; str[ii] != '}'; ii++) if (str[ii]=='\\') ii++; ii++; if (cc == '}') ifs = (void *)1; else if (strchr("#!", cc)) ss++; if (!(jj = varend(ss)-ss)) while (isdigit(ss[jj])) jj++; if (!jj && strchr("#$_*", *ss)) jj++; // parameter or operator? Maybe not a prefix: ${#-} vs ${#-x} if (!jj && strchr("-?@", *ss)) if (ss[++jj]!='}' && ss[-1]!='{') ss--; slice = ss+jj; // start of :operation if (!jj) { // literal ${#} or ${!} wasn't a prefix if (strchr("#!", cc)) ifs = getvar_special(--ss, 1, &kk, delete); else ifs = (void *)1; // unrecognized char ala ${~} } else if (ss[-1]=='{'); // not prefix, fall through else if (cc == '#') { // TODO ${#x[@]} dd = !!strchr("@*", *ss); // For ${#@} or ${#*} do normal ${#} ifs = getvar_special(ss-dd, jj, &kk, delete) ? : ""; if (!dd) push_arg(delete, ifs = xmprintf("%zu", strlen(ifs))); // ${!@} ${!@Q} ${!x} ${!x@} ${!x@Q} ${!x#} ${!x[} ${!x[*]} } else if (cc == '!') { // TODO: ${var[@]} array // special case: normal varname followed by @} or *} = prefix list if (ss[jj] == '*' || (ss[jj] == '@' && !isalpha(ss[jj+1]))) { struct sh_vars **vv = visible_vars(); for (slice++, kk = 0; vv[kk]; kk++) { if (vv[kk]->flags&VAR_WHITEOUT) continue; if (!strncmp(s = vv[kk]->str, ss, jj)) arg_add(&aa, push_arg(delete, s = xstrndup(s, stridx(s, '=')))); } if (aa.c) push_arg(delete, aa.v); free(vv); // else dereference to get new varname, discarding if none, check err } else { // First expansion if (strchr("@*", *ss)) { // special case ${!*}/${!@} expand_arg_nobrace(&aa, "\"$*\"", NO_PATH|NO_SPLIT, delete, 0, 0); ifs = *aa.v; free(aa.v); memset(&aa, 0, sizeof(aa)); jj = 1; } else ifs = getvar_special(ss, jj, &jj, delete); slice = ss+jj; // Second expansion if (!jj) ifs = (void *)1; else if (ifs && *(ss = ifs)) { if (strchr("@*", cc)) { aa.c = TT.ff->arg.c-1; aa.v = TT.ff->arg.v+1; jj = 1; } else ifs = getvar_special(ifs, strlen(ifs), &jj, delete); if (ss && ss[jj]) { ifs = (void *)1; slice = ss+strlen(ss); } } } } // Substitution error? if (ifs == (void *)1) { barf: if (!(((unsigned long)ifs)>>1)) ifs = "bad substitution"; error_msg("%.*s: %s", (int)(slice-ss), ss, ifs); goto fail; } } else jj = 1; // Resolve unprefixed variables if (strchr("{$", ss[-1])) { if (strchr("@*", cc)) { aa.c = TT.ff->arg.c-1; aa.v = TT.ff->arg.v+1; } else { ifs = getvar_special(ss, jj, &jj, delete); if (!jj) { if (ss[-1] == '{') goto barf; new[oo++] = '$'; ii--; continue; } else if (ss[-1] != '{') ii += jj-1; } } } // combine before/ifs/after sections & split words on $IFS in ifs // keep oo bytes of str before (already parsed) // insert ifs (active for wildcards+splitting) // keep str+ii after (still to parse) // Fetch separator to glue string back together with *sep = 0; if (((qq&1) && cc=='*') || (flags&NO_SPLIT)) { unsigned wc; nosplit++; if (flags&SEMI_IFS) strcpy(sep, " "); // TODO what if separator is bigger? Need to grab 1 column of combining chars else if (0<(dd = utf8towc(&wc, TT.ff->ifs, 4))) sprintf(sep, "%.*s", dd, TT.ff->ifs); } // when aa proceed through entries until NULL, else process ifs once mm = yy = 0; do { // get next argument if (aa.c) ifs = aa.v[mm++] ? : ""; // Are we performing surgery on this argument? if (slice && *slice != '}') { dd = slice[xx = (*slice == ':')]; if (!ifs || (xx && !*ifs)) { if (strchr("-?=", dd)) { // - use default = assign default ? error push_arg(delete, ifs = slashcopy(slice+xx+1, "}", 0)); if (dd == '?' || (dd == '=' && !(setvar(s = xmprintf("%.*s=%s", (int)(slice-ss), ss, ifs))))) goto barf; // TODO ? exits past "source" boundary } } else if (dd == '-'); // NOP when ifs not empty // use alternate value else if (dd == '+') push_arg(delete, ifs = slashcopy(slice+xx+1, "}", 0)); else if (xx) { // ${x::} long long la = 0, lb = LLONG_MAX, lc = 1; ss = ++slice; if ((lc = recalculate(&la, &ss, 0)) && *ss == ':') { ss++; lc = recalculate(&lb, &ss, 0); } if (!lc || *ss != '}') { for (s = ss; *s != '}' && *s != ':'; s++); error_msg("bad %.*s @ %ld", (int)(s-slice), slice,(long)(ss-slice)); //TODO fix error message goto fail; } // This isn't quite what bash does, but close enough. if (!(lc = aa.c)) lc = strlen(ifs); else if (!la && !yy && strchr("@*", *slice)) { aa.v--; // ${*:0} shows $0 even though default is 1-indexed aa.c++; yy++; } if (la<0 && (la += lc)<0) continue; if (lb<0) lb = lc+lb-la; if (aa.c) { if (mm<la || mm>=la+lb) continue; } else if (la>=lc || lb<0) ifs = ""; else if (la+lb>=lc) ifs += la; else if (!*delete || ifs != (*delete)->arg) push_arg(delete, ifs = xmprintf("%.*s", (int)lb, ifs+la)); else { for (dd = 0; dd<lb ; dd++) if (!(ifs[dd] = ifs[dd+la])) break; ifs[dd] = 0; } } else if (strchr("#%^,", *slice)) { struct sh_arg wild = {0}; char buf[8]; s = slashcopy(slice+(xx = slice[1]==*slice)+1, "}", &wild); // ${x^pat} ${x^^pat} uppercase ${x,} ${x,,} lowercase (no pat = ?) if (strchr("^,", *slice)) { for (ss = ifs; *ss; ss += dd) { dd = getutf8(ss, 4, &jj); if (!*s || 0<wildcard_match(ss, s, &wild, WILD_ANY)) { ll = ((*slice=='^') ? towupper : towlower)(jj); // Of COURSE unicode case switch can change utf8 encoding length // Lower case U+0069 becomes u+0130 in turkish. // Greek U+0390 becomes 3 characters TODO test this if (ll != jj) { yy = ss-ifs; if (!*delete || (*delete)->arg!=ifs) push_arg(delete, ifs = xstrdup(ifs)); if (dd != (ll = wctoutf8(buf, ll))) { if (dd<ll) ifs = (*delete)->arg = xrealloc(ifs, strlen(ifs)+1+dd-ll); memmove(ifs+yy+dd-ll, ifs+yy+ll, strlen(ifs+yy+ll)+1); } memcpy(ss = ifs+yy, buf, dd = ll); } } if (!xx) break; } // ${x#y} remove shortest prefix ${x##y} remove longest prefix } else if (*slice=='#') { if (0<(dd = wildcard_match(ifs, s, &wild, WILD_SHORT*!xx))) ifs += dd; // ${x%y} ${x%%y} suffix } else if (*slice=='%') { for (ss = ifs+strlen(ifs), yy = -1; ss>=ifs; ss--) { if (0<(dd = wildcard_match(ss, s, &wild, WILD_SHORT*xx))&&!ss[dd]) { yy = ss-ifs; if (!xx) break; } } if (yy != -1) { if (delete && *delete && (*delete)->arg==ifs) ifs[yy] = 0; else push_arg(delete, ifs = xstrndup(ifs, yy)); } } free(s); free(wild.v); // ${x/pat/sub} substitute ${x//pat/sub} global ${x/#pat/sub} begin // ${x/%pat/sub} end ${x/pat} delete pat (x can be @ or *) } else if (*slice=='/') { struct sh_arg wild = {0}; s = slashcopy(ss = slice+(xx = !!strchr("/#%", slice[1]))+1, "/}", &wild); ss += (long)wild.v[wild.c]; ss = (*ss == '/') ? slashcopy(ss+1, "}", 0) : 0; jj = ss ? strlen(ss) : 0; ll = 0; for (ll = 0; ifs[ll];) { // TODO nocasematch option if (0<(dd = wildcard_match(ifs+ll, s, &wild, 0))) { char *bird = 0; if (slice[1]=='%' && ifs[ll+dd]) { ll++; continue; } if (delete && *delete && (*delete)->arg==ifs) { if (jj==dd) memcpy(ifs+ll, ss, jj); else if (jj<dd) sprintf(ifs+ll, "%s%s", ss, ifs+ll+dd); else bird = ifs; } else bird = (void *)1; if (bird) { ifs = xmprintf("%.*s%s%s", ll, ifs, ss ? : "", ifs+ll+dd); if (bird != (void *)1) { free(bird); (*delete)->arg = ifs; } else push_arg(delete, ifs); } if (slice[1]!='/') break; } else ll++; if (slice[1]=='#') break; } // ${x@QEPAa} Q=$'blah' E=blah without the $'' wrap, P=expand as $PS1 // A=declare that recreates var a=attribute flags // x can be @* // } else if (*slice=='@') { // TODO test x can be @ or * } else { // TODO test ${-abc} as error ifs = slice; goto barf; } // TODO: $((a=42)) can change var, affect lifetime // must replace ifs AND any previous output arg[] within pointer strlen() // also x=;echo $x${x:=4}$x } // Nothing left to do? if (!ifs) break; if (!*ifs && !qq) continue; // loop within current ifs checking region to split words do { // find end of (split) word if ((qq&1) || nosplit) ss = ifs+strlen(ifs); else for (ss = ifs; *ss; ss += kk) if (utf8chr(ss, TT.ff->ifs, &kk)) break; // when no prefix, not splitting, no suffix: use existing memory if (!oo && !*ss && !((mm==aa.c) ? str[ii] : nosplit)) { if (qq || ss!=ifs) { if (!(flags&NO_PATH)) for (jj = 0; ifs[jj]; jj++) collect_wildcards(ifs, jj, ant); wildcard_add_files(arg, ifs, &deck, delete); } continue; } // resize allocation and copy next chunk of IFS-free data jj = (mm == aa.c) && !*ss; new = xrealloc(new, oo + (ss-ifs) + ((nosplit&!jj) ? strlen(sep) : 0) + (jj ? strlen(str+ii) : 0) + 1); dd = sprintf(new + oo, "%.*s%s", (int)(ss-ifs), ifs, (nosplit&!jj) ? sep : ""); if (flags&NO_PATH) oo += dd; else while (dd--) collect_wildcards(new, oo++, ant); if (jj) break; // If splitting, keep quoted, non-blank, or non-whitespace separator if (!nosplit) { if (qq || *new || *ss) { push_arg(delete, new = xrealloc(new, strlen(new)+1)); wildcard_add_files(arg, new, &deck, delete); new = xstrdup(str+ii); } qq &= 1; oo = 0; } // Skip trailing seperator (combining whitespace) kk = 0; while ((jj = utf8chr(ss, TT.ff->ifs, &ll))) { if (!iswspace(jj) && kk++) break; ss += ll; } } while (*(ifs = ss)); } while (!(mm == aa.c)); } // TODO globbing * ? [] +() happens after variable resolution // TODO test word splitting completely eliminating argument when no non-$IFS data left // wordexp keeps pattern when no matches // TODO test NO_SPLIT cares about IFS, see also trailing \n // Record result. if (*new || qq) { if (str != new) push_arg(delete, new); wildcard_add_files(arg, new, &deck, delete); new = 0; } // return success after freeing arg = 0; fail: if (str != new) free(new); free(deck.v); if (ant!=&deck && ant->v) collect_wildcards("", 0, ant); if (measure) *measure = --ii; return !!arg; } struct sh_brace { struct sh_brace *next, *prev, *stack; int active, cnt, idx, commas[]; }; static int brace_end(struct sh_brace *bb) { return bb->commas[(bb->cnt<0 ? 0 : bb->cnt)+1]; } // expand braces (ala {a,b,c}) and call expand_arg_nobrace() each permutation static int expand_arg(struct sh_arg *arg, char *old, unsigned flags, struct arg_list **delete) { struct sh_brace *bb = 0, *blist = 0, *bstk, *bnext; int i, j, k, x; char *s, *ss; // collect brace spans if ((TT.options&OPT_B) && !(flags&NO_BRACE)) for (i = 0; ; i++) { // skip quoted/escaped text while ((s = parse_word(old+i, 1, 0)) != old+i) i += s-(old+i); // start a new span if (old[i] == '{') { dlist_add_nomalloc((void *)&blist, (void *)(bb = xzalloc(sizeof(struct sh_brace)+34*4))); bb->commas[0] = i; // end of string: abort unfinished spans and end loop } else if (!old[i]) { for (bb = blist; bb;) { if (!bb->active) { if (bb==blist) { dlist_pop(&blist); bb = blist; } else dlist_pop(&bb); } else bb = (bb->next==blist) ? 0 : bb->next; } break; // no active span? } else if (!bb) continue; // end current span else if (old[i] == '}') { bb->active = bb->commas[bb->cnt+1] = i; // Is this a .. span? j = 1+*bb->commas; if (!bb->cnt && i-j>=4) { // a..z span? Single digit numbers handled here too. TODO: utf8 if (old[j+1]=='.' && old[j+2]=='.') { bb->commas[2] = old[j]; bb->commas[3] = old[j+3]; k = 0; if (old[j+4]=='}' || (sscanf(old+j+4, "..%u}%n", bb->commas+4, &k) && k)) bb->cnt = -1; } // 3..11 numeric span? if (!bb->cnt) { for (k=0, j = 1+*bb->commas; k<3; k++, j += x) if (!sscanf(old+j, "..%u%n"+2*!k, bb->commas+2+k, &x)) break; if (old[j]=='}') bb->cnt = -2; } // Increment goes in the right direction by at least 1 if (bb->cnt) { if (!bb->commas[4]) bb->commas[4] = 1; if ((bb->commas[3]-bb->commas[2]>0) != (bb->commas[4]>0)) bb->commas[4] *= -1; } } // discard commaless span that wasn't x..y if (!bb->cnt) free(dlist_pop((blist==bb) ? &blist : &bb)); // Set bb to last unfinished brace (if any) for (bb = blist ? blist->prev : 0; bb && bb->active; bb = (bb==blist) ? 0 : bb->prev); // add a comma to current span } else if (old[i] == ',') { if (bb->cnt && !(bb->cnt&31)) { dlist_lpop(&blist); dlist_add_nomalloc((void *)&blist, (void *)(bb = xrealloc(bb, sizeof(struct sh_brace)+(bb->cnt+34)*4))); } bb->commas[++bb->cnt] = i; } } // TODO NO_SPLIT with braces? (Collate with spaces?) // If none, pass on verbatim if (!blist) return expand_arg_nobrace(arg, old, flags, delete, 0, 0); // enclose entire range in top level brace. (bstk = xzalloc(sizeof(struct sh_brace)+8))->commas[1] = strlen(old)+1; bstk->commas[0] = -1; // loop through each combination for (;;) { // Brace expansion can't be longer than original string. Keep start to { s = ss = xmalloc(bstk->commas[1]); // Append output from active braces to string for (bb = blist; bb; bb = (bnext == blist) ? 0 : bnext) { // If this brace already tip of stack, pop it. (We'll re-add in a moment.) if (bstk == bb) bstk = bstk->stack; // if bb is within bstk, save prefix text from bstk's "," to bb's "{" if (brace_end(bstk)>bb->commas[0]) { i = bstk->commas[bstk->idx]+1; s = stpncpy(s, old+i, bb->commas[0]-i); } else bstk = bstk->stack; // bb past bstk so done with old bstk, pop it // push self onto stack as active bb->stack = bstk; bb->active = 1; bstk = bnext = bb; // Find next active range: skip inactive spans from earlier/later commas while ((bnext = (bnext->next==blist) ? 0 : bnext->next)) { // past end of this brace (always true for a..b ranges) if ((i = bnext->commas[0])>brace_end(bb)) break; // in this brace but not this section if (i<bb->commas[bb->idx] || i>bb->commas[bb->idx+1]) { bnext->active = 0; bnext->stack = 0; // in this section } else break; } // is next span past this range? if (!bnext || bb->cnt<0 || bnext->commas[0]>bb->commas[bb->idx+1]) { // output uninterrupted span if (bb->cnt<0) { k = bb->commas[2]+bb->commas[4]*bb->idx; s += sprintf(s, (bb->cnt==-1) ? "\\%c"+!ispunct(k) : "%d", k); } else { i = bb->commas[bstk->idx]+1; s = stpncpy(s, old+i, bb->commas[bb->idx+1]-i); } // While not sibling, output tail and pop while (!bnext || bnext->commas[0]>brace_end(bstk)) { if (!(bb = bstk->stack)) break; i = brace_end(bstk)+1; // start of span j = bb->commas[bb->idx+1]; // enclosing comma span (can't be a..b) while (bnext) { if (bnext->commas[0]<j) { j = bnext->commas[0];// sibling break; } else if (brace_end(bb)>bnext->commas[0]) bnext = (bnext->next == blist) ? 0 : bnext->next; else break; } s = stpncpy(s, old+i, j-i); // if next is sibling but parent _not_ a sibling, don't pop if (bnext && bnext->commas[0]<brace_end(bb)) break; bstk = bb; } } } // Save result, aborting on expand error if (expand_arg_nobrace(arg, push_arg(delete, ss), flags, delete, 0, 0)) { llist_traverse(blist, free); return 1; } // increment for (bb = blist->prev; bb; bb = (bb == blist) ? 0 : bb->prev) { if (!bb->stack) continue; else if (bb->cnt<0) { if (abs(bb->commas[2]-bb->commas[3]) < abs(++bb->idx*bb->commas[4])) |