
Путь: Toys/POSIX, команды версии: Ver.4 Ver.9 sed Комментарии в файле sed.c :
Исходный текст в файле sed.c #define FOR_sed
#include "toys.h"
GLOBALS(
char *i;
struct arg_list *f, *e;
// processed pattern list
struct double_list *pattern;
char *nextline, *remember, *tarxform;
void *restart, *lastregex;
long nextlen, rememberlen, count;
int fdout, noeol;
unsigned xx, tarxlen, xflags;
char delim, xftype;
)
// Linked list of parsed sed commands. Offset fields indicate location where
// regex or string starts, ala offset+(char *)struct, because we remalloc()
// these to expand them for multiline inputs, and pointers would have to be
// individually adjusted.
struct sedcmd {
struct sedcmd *next, *prev;
// Begin and end of each match
long lmatch[2]; // line number of match
int rmatch[2]; // offset of regex struct for prefix matches (/abc/,/def/p)
int arg1, arg2, w; // offset of two arguments per command, plus s//w filename
unsigned not, hit;
unsigned sflags; // s///flag bits, see SFLAG macros below
char c; // action
};
#define SFLAG_i 1
#define SFLAG_g 2
#define SFLAG_p 4
#define SFLAG_x 8
#define SFLAG_slash 16
#define SFLAG_R 32
#define SFLAG_S 64
#define SFLAG_H 128
// Write out line with potential embedded NUL, handling eol/noeol
static int emit(char *line, long len, int eol)
{
int l = len, old = line[len];
if (FLAG(tarxform)) {
TT.tarxform = xrealloc(TT.tarxform, TT.tarxlen+len+TT.noeol+eol);
if (TT.noeol) TT.tarxform[TT.tarxlen++] = TT.delim;
memcpy(TT.tarxform+TT.tarxlen, line, len);
TT.tarxlen += len;
if (eol) TT.tarxform[TT.tarxlen++] = TT.delim;
} else {
if (TT.noeol && !writeall(TT.fdout, &TT.delim, 1)) return 1;
if (eol) line[len++] = TT.delim;
if (!len) return 0;
l = writeall(TT.fdout, line, len);
if (eol) line[len-1] = old;
}
TT.noeol = !eol;
if (l != len) {
if (TT.fdout != 1) perror_msg("short write");
return 1;
}
return 0;
}
// Extend allocation to include new string, with newline between if newlen<0
static char *extend_string(char **old, char *new, int oldlen, int newlen)
{
int newline = newlen < 0;
char *s;
if (newline) newlen = -newlen;
s = *old = xrealloc(*old, oldlen+newlen+newline+1);
if (newline) s[oldlen++] = TT.delim;
memcpy(s+oldlen, new, newlen);
s[oldlen+newlen] = 0;
return s+oldlen+newlen+1;
}
// An empty regex repeats the previous one
static void *get_regex(void *command, int offset)
{
if (!offset) {
if (!TT.lastregex) error_exit("no previous regex");
return TT.lastregex;
}
return TT.lastregex = offset+(char *)command;
}
// Apply pattern to line from input file
static void sed_line(char **pline, long plen)
{
struct append {
struct append *next, *prev;
int file;
char *str;
} *append = 0;
char *line;
long len;
struct sedcmd *command;
int eol = 0, tea = 0;
if (FLAG(tarxform)) {
if (!pline) return;
line = *pline;
len = plen;
*pline = 0;
pline = 0;
} else {
line = TT.nextline;
len = TT.nextlen;
// Ignore EOF for all files before last unless -i or -s
if (!pline && !FLAG(i) && !FLAG(s)) return;
// Grab next line for deferred processing (EOF detection: we get a NULL
// pline at EOF to flush last line). Note that only end of _last_ input
// file matches $ (unless we're doing -i).
TT.nextline = 0;
TT.nextlen = 0;
if (pline) {
TT.nextline = *pline;
TT.nextlen = plen;
*pline = 0;
}
}
if (!line || !len) return;
if (line[len-1] == TT.delim) line[--len] = eol++;
if (FLAG(tarxform) && len) {
TT.xftype = line[--len];
line[len] = 0;
}
TT.count++;
// To prevent N as last command from restarting script, we added 1 to restart
// so we'd use it here even when NULL. Alas, compilers that think C has
// references instead of pointers assume ptr-1 can never be NULL (demonstrably
// untrue) and inappropriately dead code eliminate, so use LP64 math until
// we get a -fpointers-are-not-references compiler option.
command = (void *)(TT.restart ? ((unsigned long)TT.restart)-1
: (unsigned long)TT.pattern);
TT.restart = 0;
while (command) {
char *str, c = command->c;
// Have we got a line or regex matching range for this rule?
if (*command->lmatch || *command->rmatch) {
int miss = 0;
long lm;
// In a match that might end?
if (command->hit) {
if (!(lm = command->lmatch[1])) {
if (!command->rmatch[1]) command->hit = 0;
else {
void *rm = get_regex(command, command->rmatch[1]);
// regex match end includes matching line, so defer deactivation
if (line && !regexec0(rm, line, len, 0, 0, 0)) miss = 1;
}
} else if (lm > 0 && lm < TT.count) command->hit = 0;
else if (lm < -1 && TT.count == command->hit+(-lm-1)) command->hit = 0;
// Start a new match?
} else {
if (!(lm = *command->lmatch)) {
void *rm = get_regex(command, *command->rmatch);
if (line && !regexec0(rm, line, len, 0, 0, 0))
command->hit = TT.count;
} else if (lm == TT.count || (lm == -1 && !pline))
command->hit = TT.count;
if (!command->lmatch[1] && !command->rmatch[1]) miss = 1;
}
// Didn't match?
lm = !(command->not^!!command->hit);
// Deferred disable from regex end match
if (miss || command->lmatch[1] == TT.count) command->hit = 0;
if (lm) {
// Handle skipping curly bracket command group
if (c == '{') {
int curly = 1;
while (curly) {
command = command->next;
if (command->c == '{') curly++;
if (command->c == '}') curly--;
}
}
command = command->next;
continue;
}
}
// A deleted line can still update line match state for later commands
if (!line) {
command = command->next;
continue;
}
// Process command
if (c=='a' || c=='r') {
struct append *a = xzalloc(sizeof(struct append));
if (command->arg1) a->str = command->arg1+(char *)command;
a->file = c=='r';
dlist_add_nomalloc((void *)&append, (void *)a);
} else if (c=='b' || c=='t' || c=='T') {
int t = tea;
if (c != 'b') tea = 0;
if (c=='b' || t^(c=='T')) {
if (!command->arg1) break;
str = command->arg1+(char *)command;
for (command = (void *)TT.pattern; command; command = command->next)
if (command->c == ':' && !strcmp(command->arg1+(char *)command, str))
break;
if (!command) error_exit("no :%s", str);
}
} else if (c=='c') {
str = command->arg1+(char *)command;
if (!command->hit) emit(str, strlen(str), 1);
free(line);
line = 0;
continue;
} else if (c=='d') {
free(line);
line = 0;
continue;
} else if (c=='D') {
// Delete up to \n or end of buffer
str = line;
while ((str-line)<len) if (*(str++) == TT.delim) break;
len -= str - line;
memmove(line, str, len);
// if "delete" blanks line, disable further processing
// otherwise trim and restart script
if (!len) {
free(line);
line = 0;
} else {
line[len] = 0;
command = (void *)TT.pattern;
}
continue;
} else if (c=='g') {
free(line);
line = xmemdup(TT.remember, TT.rememberlen+1);
len = TT.rememberlen;
} else if (c=='G') {
line = xrealloc(line, len+TT.rememberlen+2);
line[len++] = TT.delim;
memcpy(line+len, TT.remember, TT.rememberlen);
line[len += TT.rememberlen] = 0;
} else if (c=='h') {
free(TT.remember);
TT.remember = xstrdup(line);
TT.rememberlen = len;
} else if (c=='H') {
TT.remember = xrealloc(TT.remember, TT.rememberlen+len+2);
TT.remember[TT.rememberlen++] = TT.delim;
memcpy(TT.remember+TT.rememberlen, line, len);
TT.remember[TT.rememberlen += len] = 0;
} else if (c=='i') {
str = command->arg1+(char *)command;
emit(str, strlen(str), 1);
} else if (c=='l') {
int i, x, off;
if (!TT.xx) {
terminal_size(&TT.xx, 0);
if (!TT.xx) TT.xx = 80;
if (TT.xx > sizeof(toybuf)-10) TT.xx = sizeof(toybuf)-10;
if (TT.xx > 4) TT.xx -= 4;
}
for (i = off = 0; i<len; i++) {
if (off >= TT.xx) {
toybuf[off++] = '\\';
emit(toybuf, off, 1);
off = 0;
}
x = stridx("\\\a\b\f\r\t\v\n", line[i]);
if (x != -1) {
toybuf[off++] = '\\';
toybuf[off++] = "\\abfrtvn"[x];
} else if (line[i] >= ' ') toybuf[off++] = line[i];
else off += sprintf(toybuf+off, "\\%03o", line[i]);
}
toybuf[off++] = '$';
emit(toybuf, off, 1);
} else if (c=='n') {
// The +1 forces restart processing even when next is null
TT.restart = (void *)(((unsigned long)command->next)+1);
break;
} else if (c=='N') {
// Can't just grab next line because we could have multiple N and
// we need to actually read ahead to get N;$p EOF detection right.
if (pline) {
// The +1 forces restart processing even when next is null
TT.restart = (void *)(((unsigned long)command->next)+1);
extend_string(&line, TT.nextline, len, -TT.nextlen);
free(TT.nextline);
TT.nextline = line;
TT.nextlen += len + 1;
line = 0;
}
// Pending append goes out right after N
goto done;
} else if (c=='p' || c=='P') {
char *l = (c=='P') ? strchr(line, TT.delim) : 0;
if (emit(line, l ? l-line : len, eol)) break;
} else if (c=='q' || c=='Q') {
if (pline) *pline = (void *)1;
free(TT.nextline);
if (!toys.exitval && command->arg1)
toys.exitval = atoi(command->arg1+(char *)command);
TT.nextline = 0;
TT.nextlen = 0;
if (c=='Q') line = 0;
break;
} else if (c=='s') {
char *rline = line, *new = command->arg2 + (char *)command, *l2 = 0;
regmatch_t *match = (void *)toybuf;
regex_t *reg = get_regex(command, command->arg1);
int mflags = 0, count = 0, l2used = 0, zmatch = 1, l2l = len, l2old = 0,
bonk = 0, mlen, off, newlen;
// Skip suppressed --tarxform types
if (TT.xftype && (command->sflags & (SFLAG_R<<stridx("rsh", TT.xftype))));
// Loop finding match in remaining line (up to remaining len)
else while (!regexec0(reg, rline, len-(rline-line), 10, match, mflags)) {
mlen = match[0].rm_eo-match[0].rm_so;
// xform matches ending in / aren't allowed to match entire line
if ((command->sflags & SFLAG_slash) && mlen==len) {
while (len && ++bonk && line[--len]=='/');
continue;
}
mflags = REG_NOTBOL;
// Zero length matches don't count immediately after a previous match
if (!mlen && !zmatch) {
if (rline-line == len) break;
l2[l2used++] = *rline++;
zmatch++;
continue;
} else zmatch = 0;
// If we're replacing only a specific match, skip if this isn't it
off = command->sflags>>8;
if (off && off != ++count) {
if (l2) memcpy(l2+l2used, rline, match[0].rm_eo);
l2used += match[0].rm_eo;
rline += match[0].rm_eo;
continue;
}
// The fact getline() can allocate unbounded amounts of memory is
// a bigger issue, but while we're here check for integer overflow
if (match[0].rm_eo > INT_MAX) perror_exit(0);
// newlen = strlen(new) but with \1 and & and printf escapes
for (off = newlen = 0; new[off]; off++) {
int cc = -1;
if (new[off] == '&') cc = 0;
else if (new[off] == '\\') cc = new[++off] - '0';
if (cc < 0 || cc > 9) {
newlen++;
continue;
}
newlen += match[cc].rm_eo-match[cc].rm_so;
}
// Copy changed data to new string
// Adjust allocation size of new string, copy data we know we'll keep
l2l += newlen-mlen;
if ((l2l|0xfff) > l2old) l2 = xrealloc(l2, l2old = (l2l|0xfff)+1);
if (match[0].rm_so) {
memcpy(l2+l2used, rline, match[0].rm_so);
l2used += match[0].rm_so;
}
// copy in new replacement text
for (off = mlen = 0; new[off]; off++) {
int cc = 0, ll;
if (new[off] == '\\') {
cc = new[++off] - '0';
if (cc<0 || cc>9) {
if (!(l2[l2used+mlen++] = unescape(new[off])))
l2[l2used+mlen-1] = new[off];
continue;
} else if (cc > reg->re_nsub) error_exit("no s//\\%d/", cc);
} else if (new[off] != '&') {
l2[l2used+mlen++] = new[off];
continue;
}
if (match[cc].rm_so != -1) {
ll = match[cc].rm_eo-match[cc].rm_so;
memcpy(l2+l2used+mlen, rline+match[cc].rm_so, ll);
mlen += ll;
}
}
l2used += newlen;
rline += match[0].rm_eo;
if (!(command->sflags & SFLAG_g)) break;
}
len += bonk;
// If we made any changes, finish off l2 and swap it for line
if (l2) {
// grab trailing unmatched data and null terminator, swap with original
mlen = len-(rline-line);
memcpy(l2+l2used, rline, mlen+1);
len = l2used + mlen;
free(line);
line = l2;
}
if (mflags) {
if (command->sflags & SFLAG_p) emit(line, len, eol);
tea = 1;
if (command->w) goto writenow;
}
} else if (c=='w') {
int fd, noeol;
char *name;
writenow:
if (FLAG(tarxform)) error_exit("tilt");
// Swap out emit() context
fd = TT.fdout;
noeol = TT.noeol;
// We save filehandle and newline status before filename
name = command->w + (char *)command;
memcpy(&TT.fdout, name, 4);
name += 4;
TT.noeol = *(name++);
// write, then save/restore context
if (emit(line, len, eol))
perror_exit("w '%s'", command->arg1+(char *)command);
*(--name) = TT.noeol;
TT.noeol = noeol;
TT.fdout = fd;
} else if (c=='x') {
long swap = TT.rememberlen;
str = TT.remember;
TT.remember = line;
line = str;
TT.rememberlen = len;
len = swap;
} else if (c=='y') {
char *from, *to = (char *)command;
int i, j;
from = to+command->arg1;
to += command->arg2;
for (i = 0; i < len; i++) {
j = stridx(from, line[i]);
if (j != -1) line[i] = to[j];
}
} else if (c=='=') {
sprintf(toybuf, "%ld", TT.count);
if (emit(toybuf, strlen(toybuf), 1)) break;
}
command = command->next;
}
done:
if (line && !FLAG(n)) emit(line, len, eol);
// TODO: should "sed -z ax" use \n instead of NUL?
if (dlist_terminate(append)) while (append) {
struct append *a = append->next;
if (append->file) {
int fd = open(append->str, O_RDONLY);
// Force newline if noeol pending
if (fd != -1) {
if (TT.noeol) xwrite(TT.fdout, &TT.delim, 1);
TT.noeol = 0;
xsendfile(fd, TT.fdout);
close(fd);
}
} else if (append->str) emit(append->str, strlen(append->str), 1);
else emit(line, 0, 0);
free(append);
append = a;
}
free(line);
if (TT.tarxlen) {
dprintf(TT.fdout, "%08x", --TT.tarxlen);
writeall(TT.fdout, TT.tarxform, TT.tarxlen);
TT.tarxlen = 0;
}
}
// Callback called on each input file
static void do_sed_file(int fd, char *name)
{
char *tmp, *s;
if (FLAG(i)) {
if (!fd) return error_msg("-i on stdin");
TT.fdout = copy_tempfile(fd, name, &tmp);
}
if (FLAG(i) || FLAG(s)) {
struct sedcmd *command;
TT.count = 0;
for (command = (void *)TT.pattern; command; command = command->next)
command->hit = 0;
}
do_lines(fd, TT.delim, sed_line);
if (FLAG(i)) {
if (TT.i && *TT.i) {
xrename(name, s = xmprintf("%s%s", name, TT.i));
free(s);
}
replace_tempfile(-1, TT.fdout, &tmp);
TT.fdout = 1;
}
if (FLAG(i) || FLAG(s)) {
TT.nextline = 0;
TT.nextlen = TT.noeol = 0;
}
}
// Copy chunk of string between two delimiters, converting printf escapes.
// returns processed copy of string (0 if error), *pstr advances to next
// unused char. if delim (or *delim) is 0 uses/saves starting char as delimiter
// if regxex, ignore delimiter in [ranges]
static char *unescape_delimited_string(char **pstr, char *delim)
{
char *to, *from, mode = 0, d;
// Grab leading delimiter (if necessary), allocate space for new string
from = *pstr;
if (!delim || !*delim) {
if (!(d = *(from++))) return 0;
if (d == '\\') d = *(from++);
if (!d || d == '\\') return 0;
if (delim) *delim = d;
} else d = *delim;
to = delim = xmalloc(strlen(*pstr)+1);
while (mode || *from != d) {
if (!*from) return 0;
// delimiter in regex character range doesn't count
if (*from == '[') {
if (!mode) {
mode = ']';
if (from[1]=='-' || from[1]==']') *(to++) = *(from++);
} else if (mode == ']' && strchr(".=:", from[1])) {
*(to++) = *(from++);
mode = *from;
}
} else if (*from == mode) {
if (mode == ']') mode = 0;
else {
*(to++) = *(from++);
mode = ']';
}
// Length 1 range (X-X with same X) is "undefined" and makes regcomp err,
// but the perl build does it, so we need to filter it out.
} else if (mode && *from == '-' && from[-1] == from[1]) {
from+=2;
continue;
} else if (*from == '\\') {
if (!from[1]) return 0;
// Check escaped end delimiter before printf style escapes.
if (from[1] == d) from++;
else if (from[1]=='\\') *(to++) = *(from++);
else {
char c = unescape(from[1]);
if (c) {
*(to++) = c;
from+=2;
continue;
} else if (!mode) *(to++) = *(from++);
}
}
*(to++) = *(from++);
}
*to = 0;
*pstr = from+1;
return delim;
}
// Translate pattern strings into command structures. Each command structure
// is a single allocation (which requires some math and remalloc at times).
static void parse_pattern(char **pline, long len)
{
struct sedcmd *command = (void *)TT.pattern;
char *line, *reg, c, *errstart;
int i;
line = errstart = pline ? *pline : "";
if (len && line[len-1]=='\n') line[--len] = 0;
// Append this line to previous multiline command? (hit indicates type.)
// During parsing "hit" stores data about line continuations, but in
// sed_line() it means the match range attached to this command
// is active, so processing the continuation must zero it again.
if (command && command->prev->hit) {
// Remove half-finished entry from list so remalloc() doesn't confuse it
TT.pattern = TT.pattern->prev;
command = dlist_pop(&TT.pattern);
c = command->c;
reg = (char *)command;
reg += command->arg1 + strlen(reg + command->arg1);
// Resume parsing for 'a' or 's' command. (Only two that can do this.)
// TODO: using 256 to indicate 'a' means our s/// delimiter can't be
// a unicode character.
if (command->hit < 256) goto resume_s;
else goto resume_a;
}
// Loop through commands in this line.
command = 0;
for (;;) {
if (command) dlist_add_nomalloc(&TT.pattern, (void *)command);
// If there's no more data on this line, return.
for (;;) {
while (isspace(*line) || *line == ';') line++;
if (*line == '#') while (*line && *line != '\n') line++;
else break;
}
if (!*line) return;
if (FLAG(tarxform) && strstart(&line, "flags=")) {
TT.xflags = 7;
while (0<=(i = stridx("rRsShH", *line))) {
if (i&1) TT.xflags |= 1<<(i>>1);
else TT.xflags &= ~(1<<(i>>1));
line++;
}
continue;
}
// Start by writing data into toybuf.
errstart = line;
memset(toybuf, 0, sizeof(struct sedcmd));
command = (void *)toybuf;
reg = toybuf + sizeof(struct sedcmd);
// Parse address range (if any)
for (i = 0; i < 2; i++) {
if (*line == ',') line++;
else if (i) break;
if (i && *line == '+' && isdigit(line[1])) {
line++;
command->lmatch[i] = -2-strtol(line, &line, 0);
} else if (isdigit(*line)) command->lmatch[i] = strtol(line, &line, 0);
else if (*line == '$') {
command->lmatch[i] = -1;
line++;
} else if (*line == '/' || *line == '\\') {
char *s = line;
if (!(s = unescape_delimited_string(&line, 0))) goto error;
if (!*s) command->rmatch[i] = 0;
else {
xregcomp((void *)reg, s, REG_EXTENDED*!!FLAG(r));
command->rmatch[i] = reg-toybuf;
reg += sizeof(regex_t);
}
free(s);
} else break;
}
while (isspace(*line)) line++;
if (!*line) break;
if (*line == '!') {
command->not = 1;
line++;
}
while (isspace(*line)) line++;
if (!*line) break;
c = command->c = *(line++);
if (strchr("}:", c) && i) break;
if (strchr("aiqQr=", c) && i>1) break;
// Allocate memory and copy out of toybuf now that we know how big it is
command = xmemdup(toybuf, reg-toybuf);
reg = (reg-toybuf) + (char *)command;
// Parse arguments by command type
if (c == '{') TT.nextlen++;
else if (c == '}') {
if (!TT.nextlen--) break;
} else if (c == 's') {
char *end, delim = 0;
int flags;
// s/pattern/replacement/flags
// line continuations use arg1 (back at the start of the function),
// so let's fill out arg2 first (since the regex part can't be multiple
// lines) and swap them back later.
// get pattern (just record, we parse it later)
command->arg2 = reg - (char *)command;
if (!(TT.remember = unescape_delimited_string(&line, &delim)))
goto error;
reg += sizeof(regex_t);
command->arg1 = reg-(char *)command;
command->hit = delim;
resume_s:
// get replacement - don't replace escapes yet because \1 and \& need
// processing later, after we replace \\ with \ we can't tell \\1 from \1
end = line;
while (*end != command->hit) {
if (!*end) goto error;
if (*end++ == '\\') {
if (!*end || *end == '\n') {
end[-1] = '\n';
break;
}
end++;
}
}
reg = extend_string((void *)&command, line, reg-(char *)command,end-line);
line = end;
// line continuation? (note: '\n' can't be a valid delim).
if (*line == command->hit) command->hit = 0;
else {
if (!*line) continue;
reg--;
line++;
goto resume_s;
}
// swap arg1/arg2 so they're back in order arguments occur.
i = command->arg1;
command->arg1 = command->arg2;
command->arg2 = i;
command->sflags = TT.xflags*SFLAG_R;
// get flags
for (line++; *line; line++) {
long l;
if (isspace(*line) && *line != '\n') continue;
if (0 <= (l = stridx("igpx", *line))) command->sflags |= 1<<l;
else if (*line == 'I') command->sflags |= 1<<0;
else if (FLAG(tarxform) && 0 <= (l = stridx("RSH", *line)))
command->sflags |= SFLAG_R<<l;
// Given that the default is rsh all enabled... why do these exist?
else if (FLAG(tarxform) && 0 <= (l = stridx("rsh", *line)))
command->sflags &= ~(SFLAG_R<<l);
else if (!(command->sflags>>8) && 0<(l = strtol(line, &line, 10))) {
command->sflags |= l << 8;
line--;
} else break;
}
flags = (FLAG(r) || (command->sflags & SFLAG_x)) ? REG_EXTENDED : 0;
if (command->sflags & SFLAG_i) flags |= REG_ICASE;
// We deferred actually parsing the regex until we had the s///i flag
// allocating the space was done by extend_string() above
if (!*TT.remember) command->arg1 = 0;
else {
xregcomp((void *)(command->arg1+(char *)command), TT.remember, flags);
if (FLAG(tarxform) && TT.remember[strlen(TT.remember)-1]=='/')
command->sflags |= SFLAG_slash;
}
free(TT.remember);
TT.remember = 0;
if (*line == 'w') {
line++;
goto writenow;
}
} else if (c == 'w') {
int fd, delim;
char *cc;
// Since s/// uses arg1 and arg2, and w needs a persistent filehandle and
// eol status, and to retain the filename for error messages, we'd need
// to go up to arg5 just for this. Compromise: dynamically allocate the
// filehandle and eol status.
writenow:
while (isspace(*line)) line++;
if (!*line) goto error;
for (cc = line; *cc; cc++) if (*cc == '\\' && cc[1] == ';') break;
delim = *cc;
*cc = 0;
fd = xcreate(line, O_WRONLY|O_CREAT|O_TRUNC|O_APPEND, 0644);
*cc = delim;
command->w = reg - (char *)command;
command = xrealloc(command, command->w+(cc-line)+6);
reg = command->w + (char *)command;
memcpy(reg, &fd, 4);
reg += 4;
*(reg++) = 0;
memcpy(reg, line, delim);
reg += delim;
*(reg++) = 0;
line = cc;
if (delim) line += 2;
} else if (c == 'y') {
char *s, delim = 0;
int len;
if (!(s = unescape_delimited_string(&line, &delim))) goto error;
command->arg1 = reg-(char *)command;
len = strlen(s);
reg = extend_string((void *)&command, s, reg-(char *)command, len);
free(s);
command->arg2 = reg-(char *)command;
if (!(s = unescape_delimited_string(&line, &delim))) goto error;
if (len != strlen(s)) goto error;
reg = extend_string((void *)&command, s, reg-(char*)command, len);
free(s);
} else if (strchr("abcirtTqQw:", c)) {
int end;
// trim leading spaces
while (isspace(*line) && *line != '\n') line++;
// Resume logic differs from 's' case because we don't add a newline
// unless it's after something, so we add it on return instead.
resume_a:
command->hit = 0;
// btTqQ: end with space or semicolon, aicrw continue to newline.
if (!(end = strcspn(line, strchr(":btTqQ", c) ? "}; \t\r\n\v\f" : "\n"))){
// Argument's optional for btTqQ
if (strchr("btTqQ", c)) continue;
else if (!command->arg1) break;
}
// Error checking: qQ can only have digits after them
if (c=='q' || c=='Q') {
for (i = 0; i<end && isdigit(line[i]); i++);
if (i != end) {
line += i;
break;
}
}
// Extend allocation to include new string. We use offsets instead of
// pointers so realloc() moving stuff doesn't break things. Ok to write
// \n over NUL terminator because call to extend_string() adds it back.
if (!command->arg1) command->arg1 = reg - (char*)command;
else if (*(command->arg1+(char *)command)) *(reg++) = '\n';
else if (!pline) {
command->arg1 = 0;
continue;
}
reg = extend_string((void *)&command, line, reg - (char *)command, end);
// Recopy data to remove escape sequences and handle line continuation.
if (strchr("aci", c)) {
reg -= end+1;
for (i = end; i; i--) {
if ((*reg++ = *line++)=='\\') {
// escape at end of line: resume if -e escaped literal newline,
// else request callback and resume with next line
if (!--i) {
*--reg = 0;
if (*line) {
line++;
goto resume_a;
}
command->hit = 256;
break;
}
if (!(reg[-1] = unescape(*line))) reg[-1] = *line;
line++;
}
}
*reg = 0;
} else line += end;
// Commands that take no arguments
} else if (!strchr("{dDgGhHlnNpPx=", c)) break;
}
error:
error_exit("bad pattern '%s'@%ld (%c)", errstart, line-errstart+1L, *line);
}
// Is the pointer "find" within the string "range".
static int instr(char *find, char *range)
{
return find>=range && range+strlen(range)>=find;
}
void sed_main(void)
{
char **args = toys.optargs, **aa;
if (FLAG(tarxform)) toys.optflags |= FLAG_z;
if (!FLAG(z)) TT.delim = '\n';
// Lie to autoconf when it asks stupid questions, so configure regexes
// that look for "GNU sed version %f" greater than some old buggy number
// don't fail us for not matching their narrow expectations.
if (FLAG(version)) {
xprintf("This is not GNU sed version 9.0\n");
return;
}
// Handling our own --version means we handle our own --help too.
if (FLAG(help)) help_exit(0);
// Parse pattern into commands.
// If no -e or -f, first argument is the pattern.
if (!TT.e && !TT.f) {
if (!*toys.optargs) error_exit("no pattern");
(TT.e = xzalloc(sizeof(struct arg_list)))->arg = *(args++);
}
// -e and -f care about order, so use argv[] to recreate original order
for (aa = toys.argv+1; *aa; aa++) {
if (TT.e && instr(TT.e->arg, *aa)) {
parse_pattern(&TT.e->arg, strlen(TT.e->arg));
free(llist_pop(&TT.e));
}
if (TT.f && instr(TT.f->arg, *aa)) {
do_lines(xopenro(TT.f->arg), TT.delim, parse_pattern);
free(llist_pop(&TT.f));
}
}
parse_pattern(0, 0);
dlist_terminate(TT.pattern);
if (TT.nextlen) error_exit("no }");
TT.fdout = 1;
TT.remember = xstrdup("");
// Inflict pattern upon input files. Long version because !O_CLOEXEC
loopfiles_rw(args, O_RDONLY|WARN_ONLY, 0, do_sed_file);
// Provide EOF flush at end of cumulative input for non-i mode.
if (!FLAG(i) && !FLAG(s)) {
toys.optflags |= FLAG_s;
sed_line(0, 0);
}
// todo: need to close fd when done for TOYBOX_FREE?
} |
![]() |