sbase

suckless unix tools
git clone git://git.suckless.org/sbase
Log | Files | Refs | README | LICENSE

sed.c (41896B)


      1 /* FIXME: summary
      2  * decide whether we enforce valid UTF-8, right now it's enforced in certain
      3  *     parts of the script, but not the input...
      4  * nul bytes cause explosions due to use of libc string functions. thoughts?
      5  * lack of newline at end of file, currently we add one. what should we do?
      6  * allow "\\t" for "\t" etc. in regex? in replacement text?
      7  * POSIX says don't flush on N when out of input, but GNU and busybox do.
      8  */
      9 
     10 #include <ctype.h>
     11 #include <errno.h>
     12 #include <regex.h>
     13 #include <stdlib.h>
     14 #include <string.h>
     15 
     16 #include "utf.h"
     17 #include "util.h"
     18 
     19 /* Types */
     20 
     21 /* used as queue for writes and stack for {,:,b,t */
     22 typedef struct {
     23 	void **data;
     24 	size_t size;
     25 	size_t cap;
     26 } Vec;
     27 
     28 /* used for arbitrary growth, str is a C string
     29  * FIXME: does it make sense to keep track of length? or just rely on libc
     30  *        string functions? If we want to support nul bytes everything changes
     31  */
     32 typedef struct {
     33 	char  *str;
     34 	size_t cap;
     35 } String;
     36 
     37 typedef struct Cmd Cmd;
     38 typedef struct {
     39 	void  (*fn)(Cmd *);
     40 	char *(*getarg)(Cmd *, char *);
     41 	void  (*freearg)(Cmd *);
     42 	unsigned char naddr;
     43 } Fninfo;
     44 
     45 typedef struct {
     46 	union {
     47 		size_t   lineno;
     48 		regex_t *re;
     49 	} u;
     50 	enum {
     51 		IGNORE, /* empty address, ignore        */
     52 		EVERY , /* every line                   */
     53 		LINE  , /* line number                  */
     54 		LAST  , /* last line ($)                */
     55 		REGEX , /* use included regex           */
     56 		LASTRE, /* use most recently used regex */
     57 	} type;
     58 } Addr;
     59 
     60 /* DISCUSS: naddr is not strictly necessary, but very helpful
     61  * naddr == 0 iff beg.type == EVERY  && end.type == IGNORE
     62  * naddr == 1 iff beg.type != IGNORE && end.type == IGNORE
     63  * naddr == 2 iff beg.type != IGNORE && end.type != IGNORE
     64  */
     65 typedef struct {
     66 	Addr          beg;
     67 	Addr          end;
     68 	unsigned char naddr;
     69 } Range;
     70 
     71 typedef struct {
     72 	regex_t      *re; /* if NULL use last regex */
     73 	String        repl;
     74 	FILE         *file;
     75 	size_t        occurrence; /* 0 for all (g flag) */
     76 	Rune          delim;
     77 	unsigned int  p:1;
     78 } Sarg;
     79 
     80 typedef struct {
     81 	Rune *set1;
     82 	Rune *set2;
     83 } Yarg;
     84 
     85 typedef struct {
     86 	String str; /* a,c,i text. r file path */
     87 	void  (*print)(char *, FILE *); /* check_puts for a, write_file for r, unused for c,i */
     88 } ACIRarg;
     89 
     90 struct Cmd {
     91 	Range   range;
     92 	Fninfo *fninfo;
     93 	union {
     94 		Cmd      *jump;   /* used for   b,t when running  */
     95 		char     *label;  /* used for :,b,t when building */
     96 		ptrdiff_t offset; /* used for { (pointers break during realloc) */
     97 		FILE     *file;   /* used for w */
     98 
     99 		/* FIXME: Should the following be in the union? or pointers and malloc? */
    100 		Sarg      s;
    101 		Yarg      y;
    102 		ACIRarg   acir;
    103 	} u; /* I find your lack of anonymous unions disturbing */
    104 	unsigned int in_match:1;
    105 	unsigned int negate  :1;
    106 };
    107 
    108 /* Files for w command (and s' w flag) */
    109 typedef struct {
    110 	char *path;
    111 	FILE *file;
    112 } Wfile;
    113 
    114 /*
    115  * Function Declarations
    116  */
    117 
    118 /* Dynamically allocated arrays and strings */
    119 static void resize(void **ptr, size_t *nmemb, size_t size, size_t new_nmemb, void **next);
    120 static void *pop(Vec *v);
    121 static void push(Vec *v, void *p);
    122 static void stracat(String *dst, char *src);
    123 static void strnacat(String *dst, char *src, size_t n);
    124 static void stracpy(String *dst, char *src);
    125 
    126 /* Cleanup and errors */
    127 static void usage(void);
    128 
    129 /* Parsing functions and related utilities */
    130 static void compile(char *s, int isfile);
    131 static int read_line(FILE *f, String *s);
    132 static char *make_range(Range *range, char *s);
    133 static char *make_addr(Addr *addr, char *s);
    134 static char *find_delim(char *s, Rune delim, int do_brackets);
    135 static char *chompr(char *s, Rune rune);
    136 static char *chomp(char *s);
    137 static Rune *strtorunes(char *s, size_t nrunes);
    138 static long stol(char *s, char **endp);
    139 static size_t escapes(char *beg, char *end, Rune delim, int n_newline);
    140 static size_t echarntorune(Rune *r, char *s, size_t n);
    141 static void insert_labels(void);
    142 
    143 /* Get and Free arg and related utilities */
    144 static char *get_aci_arg(Cmd *c, char *s);
    145 static void aci_append(Cmd *c, char *s);
    146 static void free_acir_arg(Cmd *c);
    147 static char *get_bt_arg(Cmd *c, char *s);
    148 static char *get_r_arg(Cmd *c, char *s);
    149 static char *get_s_arg(Cmd *c, char *s);
    150 static void free_s_arg(Cmd *c);
    151 static char *get_w_arg(Cmd *c, char *s);
    152 static char *get_y_arg(Cmd *c, char *s);
    153 static void free_y_arg(Cmd *c);
    154 static char *get_colon_arg(Cmd *c, char *s);
    155 static char *get_lbrace_arg(Cmd *c, char *s);
    156 static char *get_rbrace_arg(Cmd *c, char *s);
    157 static char *semicolon_arg(char *s);
    158 
    159 /* Running */
    160 static void run(void);
    161 static int in_range(Cmd *c);
    162 static int match_addr(Addr *a);
    163 static int next_file(void);
    164 static int is_eof(FILE *f);
    165 static void do_writes(void);
    166 static void write_file(char *path, FILE *out);
    167 static void check_puts(char *s, FILE *f);
    168 static void update_ranges(Cmd *beg, Cmd *end);
    169 
    170 /* Sed functions */
    171 static void cmd_y(Cmd *c);
    172 static void cmd_x(Cmd *c);
    173 static void cmd_w(Cmd *c);
    174 static void cmd_t(Cmd *c);
    175 static void cmd_s(Cmd *c);
    176 static void cmd_r(Cmd *c);
    177 static void cmd_q(Cmd *c);
    178 static void cmd_P(Cmd *c);
    179 static void cmd_p(Cmd *c);
    180 static void cmd_N(Cmd *c);
    181 static void cmd_n(Cmd *c);
    182 static void cmd_l(Cmd *c);
    183 static void cmd_i(Cmd *c);
    184 static void cmd_H(Cmd *c);
    185 static void cmd_h(Cmd *c);
    186 static void cmd_G(Cmd *c);
    187 static void cmd_g(Cmd *c);
    188 static void cmd_D(Cmd *c);
    189 static void cmd_d(Cmd *c);
    190 static void cmd_c(Cmd *c);
    191 static void cmd_b(Cmd *c);
    192 static void cmd_a(Cmd *c);
    193 static void cmd_colon(Cmd *c);
    194 static void cmd_equal(Cmd *c);
    195 static void cmd_lbrace(Cmd *c);
    196 static void cmd_rbrace(Cmd *c);
    197 static void cmd_last(Cmd *c);
    198 
    199 /* Actions */
    200 static void new_line(void);
    201 static void app_line(void);
    202 static void new_next(void);
    203 static void old_next(void);
    204 
    205 /*
    206  * Globals
    207  */
    208 static Vec braces, labels, branches; /* holds ptrdiff_t. addrs of {, :, bt */
    209 static Vec writes; /* holds cmd*. writes scheduled by a and r commands */
    210 static Vec wfiles; /* holds Wfile*. files for w and s///w commands */
    211 
    212 static Cmd   *prog, *pc; /* Program, program counter */
    213 static size_t pcap;
    214 static size_t lineno;
    215 
    216 static regex_t *lastre; /* last used regex for empty regex search */
    217 static char   **files;  /* list of file names from argv */
    218 static FILE    *file;   /* current file we are reading */
    219 static int      ret;    /* exit status */
    220 
    221 static String patt, hold, genbuf;
    222 
    223 static struct {
    224 	unsigned int n       :1; /* -n (no print) */
    225 	unsigned int E       :1; /* -E (extended re) */
    226 	unsigned int s       :1; /* s/// replacement happened */
    227 	unsigned int aci_cont:1; /* a,c,i text continuation */
    228 	unsigned int s_cont  :1; /* s/// replacement text continuation */
    229 	unsigned int halt    :1; /* halt execution */
    230 } gflags;
    231 
    232 /* FIXME: move character inside Fninfo and only use 26*sizeof(Fninfo) instead of 127*sizeof(Fninfo) bytes */
    233 static Fninfo fns[] = {
    234 	['a'] = { cmd_a     , get_aci_arg   , free_acir_arg , 1 }, /* schedule write of text for later                                                      */
    235 	['b'] = { cmd_b     , get_bt_arg    , NULL          , 2 }, /* branch to label char *label when building, Cmd *jump when running                     */
    236 	['c'] = { cmd_c     , get_aci_arg   , free_acir_arg , 2 }, /* delete pattern space. at 0 or 1 addr or end of 2 addr, write text                     */
    237 	['d'] = { cmd_d     , NULL          , NULL          , 2 }, /* delete pattern space                                                                  */
    238 	['D'] = { cmd_D     , NULL          , NULL          , 2 }, /* delete to first newline and start new cycle without reading (if no newline, d)        */
    239 	['g'] = { cmd_g     , NULL          , NULL          , 2 }, /* replace pattern space with hold space                                                 */
    240 	['G'] = { cmd_G     , NULL          , NULL          , 2 }, /* append newline and hold space to pattern space                                        */
    241 	['h'] = { cmd_h     , NULL          , NULL          , 2 }, /* replace hold space with pattern space                                                 */
    242 	['H'] = { cmd_H     , NULL          , NULL          , 2 }, /* append newline and pattern space to hold space                                        */
    243 	['i'] = { cmd_i     , get_aci_arg   , free_acir_arg , 1 }, /* write text                                                                            */
    244 	['l'] = { cmd_l     , NULL          , NULL          , 2 }, /* write pattern space in 'visually unambiguous form'                                    */
    245 	['n'] = { cmd_n     , NULL          , NULL          , 2 }, /* write pattern space (unless -n) read to replace pattern space (if no input, quit)     */
    246 	['N'] = { cmd_N     , NULL          , NULL          , 2 }, /* append to pattern space separated by newline, line number changes (if no input, quit) */
    247 	['p'] = { cmd_p     , NULL          , NULL          , 2 }, /* write pattern space                                                                   */
    248 	['P'] = { cmd_P     , NULL          , NULL          , 2 }, /* write pattern space up to first newline                                               */
    249 	['q'] = { cmd_q     , NULL          , NULL          , 1 }, /* quit                                                                                  */
    250 	['r'] = { cmd_r     , get_r_arg     , free_acir_arg , 1 }, /* write contents of file (unable to open/read treated as empty file)                    */
    251 	['s'] = { cmd_s     , get_s_arg     , free_s_arg    , 2 }, /* find/replace/all that crazy s stuff                                                   */
    252 	['t'] = { cmd_t     , get_bt_arg    , NULL          , 2 }, /* if s/// succeeded (since input or last t) branch to label (branch to end if no label) */
    253 	['w'] = { cmd_w     , get_w_arg     , NULL          , 2 }, /* append pattern space to file                                                          */
    254 	['x'] = { cmd_x     , NULL          , NULL          , 2 }, /* exchange pattern and hold spaces                                                      */
    255 	['y'] = { cmd_y     , get_y_arg     , free_y_arg    , 2 }, /* replace runes in set1 with runes in set2                                              */
    256 	[':'] = { cmd_colon , get_colon_arg , NULL          , 0 }, /* defines label for later b and t commands                                              */
    257 	['='] = { cmd_equal , NULL          , NULL          , 1 }, /* printf("%d\n", line_number);                                                          */
    258 	['{'] = { cmd_lbrace, get_lbrace_arg, NULL          , 2 }, /* if we match, run commands, otherwise jump to close                                    */
    259 	['}'] = { cmd_rbrace, get_rbrace_arg, NULL          , 0 }, /* noop, hold onto open for ease of building scripts                                     */
    260 
    261 	[0x7f] = { NULL, NULL, NULL, 0 }, /* index is checked with isascii(3p). fill out rest of array */
    262 };
    263 
    264 /*
    265  * Function Definitions
    266  */
    267 
    268 /* given memory pointed to by *ptr that currently holds *nmemb members of size
    269  * size, realloc to hold new_nmemb members, return new_nmemb in *memb and one
    270  * past old end in *next. if realloc fails...explode
    271  */
    272 static void
    273 resize(void **ptr, size_t *nmemb, size_t size, size_t new_nmemb, void **next)
    274 {
    275 	void *n, *tmp;
    276 
    277 	if (new_nmemb) {
    278 		tmp = ereallocarray(*ptr, new_nmemb, size);
    279 	} else { /* turns out realloc(*ptr, 0) != free(*ptr) */
    280 		free(*ptr);
    281 		tmp = NULL;
    282 	}
    283 	n = (char *)tmp + *nmemb * size;
    284 	*nmemb = new_nmemb;
    285 	*ptr   = tmp;
    286 	if (next)
    287 		*next = n;
    288 }
    289 
    290 static void *
    291 pop(Vec *v)
    292 {
    293 	if (!v->size)
    294 		return NULL;
    295 	return v->data[--v->size];
    296 }
    297 
    298 static void
    299 push(Vec *v, void *p)
    300 {
    301 	if (v->size == v->cap)
    302 		resize((void **)&v->data, &v->cap, sizeof(*v->data), v->cap * 2 + 1, NULL);
    303 	v->data[v->size++] = p;
    304 }
    305 
    306 static void
    307 stracat(String *dst, char *src)
    308 {
    309 	int new = !dst->cap;
    310 	size_t len;
    311 
    312 	len = (new ? 0 : strlen(dst->str)) + strlen(src) + 1;
    313 	if (dst->cap < len)
    314 		resize((void **)&dst->str, &dst->cap, 1, len * 2, NULL);
    315 	if (new)
    316 		*dst->str = '\0';
    317 	strcat(dst->str, src);
    318 }
    319 
    320 static void
    321 strnacat(String *dst, char *src, size_t n)
    322 {
    323 	int new = !dst->cap;
    324 	size_t len;
    325 
    326 	len = strlen(src);
    327 	len = (new ? 0 : strlen(dst->str)) + MIN(n, len) + 1;
    328 	if (dst->cap < len)
    329 		resize((void **)&dst->str, &dst->cap, 1, len * 2, NULL);
    330 	if (new)
    331 		*dst->str = '\0';
    332 	strlcat(dst->str, src, len);
    333 }
    334 
    335 static void
    336 stracpy(String *dst, char *src)
    337 {
    338 	size_t len;
    339 
    340 	len = strlen(src) + 1;
    341 	if (dst->cap < len)
    342 		resize((void **)&dst->str, &dst->cap, 1, len * 2, NULL);
    343 	strcpy(dst->str, src);
    344 }
    345 
    346 static void
    347 leprintf(char *s)
    348 {
    349 	if (errno)
    350 		eprintf("%zu: %s: %s\n", lineno, s, strerror(errno));
    351 	else
    352 		eprintf("%zu: %s\n", lineno, s);
    353 }
    354 
    355 /* FIXME: write usage message */
    356 static void
    357 usage(void)
    358 {
    359 	eprintf("usage: sed [-nrE] script [file ...]\n"
    360 	        "       sed [-nrE] -e script [-e script] ... [-f scriptfile] ... [file ...]\n"
    361 	        "       sed [-nrE] [-e script] ... -f scriptfile [-f scriptfile] ... [file ...]\n");
    362 }
    363 
    364 /* Differences from POSIX
    365  * we allows semicolons and trailing blanks inside {}
    366  * we allow spaces after ! (and in between !s)
    367  * we allow extended regular expressions (-E)
    368  */
    369 static void
    370 compile(char *s, int isfile)
    371 {
    372 	FILE *f;
    373 
    374 	if (isfile) {
    375 		f = fopen(s, "r");
    376 		if (!f)
    377 			eprintf("fopen %s:", s);
    378 	} else {
    379 		if (!*s) /* empty string script */
    380 			return;
    381 		f = fmemopen(s, strlen(s), "r");
    382 		if (!f)
    383 			eprintf("fmemopen:");
    384 	}
    385 
    386 	/* NOTE: get arg functions can't use genbuf */
    387 	while (read_line(f, &genbuf) != EOF) {
    388 		s = genbuf.str;
    389 
    390 		/* if the first two characters of the script are "#n" default output shall be suppressed */
    391 		if (++lineno == 1 && *s == '#' && s[1] == 'n') {
    392 			gflags.n = 1;
    393 			continue;
    394 		}
    395 
    396 		if (gflags.aci_cont) {
    397 			aci_append(pc - 1, s);
    398 			continue;
    399 		}
    400 		if (gflags.s_cont)
    401 			s = (pc - 1)->fninfo->getarg(pc - 1, s);
    402 
    403 		while (*s) {
    404 			s = chompr(s, ';');
    405 			if (!*s || *s == '#')
    406 				break;
    407 
    408 			if ((size_t)(pc - prog) == pcap)
    409 				resize((void **)&prog, &pcap, sizeof(*prog), pcap * 2 + 1, (void **)&pc);
    410 
    411 			pc->range.beg.type = pc->range.end.type = IGNORE;
    412 			pc->fninfo = NULL;
    413 			pc->in_match = 0;
    414 
    415 			s = make_range(&pc->range, s);
    416 			s = chomp(s);
    417 			pc->negate = *s == '!';
    418 			s = chompr(s, '!');
    419 
    420 			if (!isascii(*s) || !(pc->fninfo = &fns[(unsigned)*s])->fn)
    421 				leprintf("bad sed function");
    422 			if (pc->range.naddr > pc->fninfo->naddr)
    423 				leprintf("wrong number of addresses");
    424 			s++;
    425 
    426 			if (pc->fninfo->getarg)
    427 				s = pc->fninfo->getarg(pc, s);
    428 
    429 			pc++;
    430 		}
    431 	}
    432 
    433 	fshut(f, s);
    434 }
    435 
    436 /* FIXME: if we decide to honor lack of trailing newline, set/clear a global
    437  * flag when reading a line
    438  */
    439 static int
    440 read_line(FILE *f, String *s)
    441 {
    442 	ssize_t len;
    443 
    444 	if (!f)
    445 		return EOF;
    446 
    447 	if ((len = getline(&s->str, &s->cap, f)) < 0) {
    448 		if (ferror(f))
    449 			eprintf("getline:");
    450 		return EOF;
    451 	}
    452 	if (s->str[--len] == '\n')
    453 		s->str[len] = '\0';
    454 	return 0;
    455 }
    456 
    457 /* read first range from s, return pointer to one past end of range */
    458 static char *
    459 make_range(Range *range, char *s)
    460 {
    461 	s = make_addr(&range->beg, s);
    462 
    463 	if (*s == ',')
    464 		s = make_addr(&range->end, s + 1);
    465 	else
    466 		range->end.type = IGNORE;
    467 
    468 	if      (range->beg.type == EVERY  && range->end.type == IGNORE) range->naddr = 0;
    469 	else if (range->beg.type != IGNORE && range->end.type == IGNORE) range->naddr = 1;
    470 	else if (range->beg.type != IGNORE && range->end.type != IGNORE) range->naddr = 2;
    471 	else leprintf("this is impossible...");
    472 
    473 	return s;
    474 }
    475 
    476 /* read first addr from s, return pointer to one past end of addr */
    477 static char *
    478 make_addr(Addr *addr, char *s)
    479 {
    480 	Rune r;
    481 	char *p = s + strlen(s);
    482 	size_t rlen = echarntorune(&r, s, p - s);
    483 
    484 	if (r == '$') {
    485 		addr->type = LAST;
    486 		s += rlen;
    487 	} else if (isdigitrune(r)) {
    488 		addr->type = LINE;
    489 		addr->u.lineno = stol(s, &s);
    490 	} else if (r == '/' || r == '\\') {
    491 		Rune delim;
    492 		if (r == '\\') {
    493 			s += rlen;
    494 			rlen = echarntorune(&r, s, p - s);
    495 		}
    496 		if (r == '\\')
    497 			leprintf("bad delimiter '\\'");
    498 		delim = r;
    499 		s += rlen;
    500 		rlen = echarntorune(&r, s, p - s);
    501 		if (r == delim) {
    502 			addr->type = LASTRE;
    503 			s += rlen;
    504 		} else {
    505 			addr->type = REGEX;
    506 			p = find_delim(s, delim, 1);
    507 			if (!*p)
    508 				leprintf("unclosed regex");
    509 			p -= escapes(s, p, delim, 0);
    510 			*p++ = '\0';
    511 			addr->u.re = emalloc(sizeof(*addr->u.re));
    512 			eregcomp(addr->u.re, s, gflags.E ? REG_EXTENDED : 0);
    513 			s = p;
    514 		}
    515 	} else {
    516 		addr->type = EVERY;
    517 	}
    518 
    519 	return s;
    520 }
    521 
    522 /* return pointer to first delim in s that is not escaped
    523  * and if do_brackets is set, not in [] (note possible [::], [..], [==], inside [])
    524  * return pointer to trailing nul byte if no delim found
    525  *
    526  * any escaped character that is not special is just itself (POSIX undefined)
    527  * FIXME: pull out into some util thing, will be useful for ed as well
    528  */
    529 static char *
    530 find_delim(char *s, Rune delim, int do_brackets)
    531 {
    532 	enum {
    533 		OUTSIDE         , /* not in brackets */
    534 		BRACKETS_OPENING, /* last char was first [ or last two were first [^ */
    535 		BRACKETS_INSIDE , /* inside [] */
    536 		INSIDE_OPENING  , /* inside [] and last char was [ */
    537 		CLASS_INSIDE    , /* inside class [::], or colating element [..] or [==], inside [] */
    538 		CLASS_CLOSING   , /* inside class [::], or colating element [..] or [==], and last character was the respective : . or = */
    539 	} state = OUTSIDE;
    540 
    541 	Rune r, c = 0; /* no c won't be used uninitialized, shutup -Wall */
    542 	size_t rlen;
    543 	int escape = 0;
    544 	char *end = s + strlen(s);
    545 
    546 	for (; *s; s += rlen) {
    547 		rlen = echarntorune(&r, s, end - s);
    548 
    549 		if      (state == BRACKETS_OPENING       &&  r == '^'  ) {                            continue; }
    550 		else if (state == BRACKETS_OPENING       &&  r == ']'  ) { state  = BRACKETS_INSIDE ; continue; }
    551 		else if (state == BRACKETS_OPENING                     ) { state  = BRACKETS_INSIDE ;           }
    552 
    553 		if      (state == CLASS_CLOSING          &&  r == ']'  ) { state  = BRACKETS_INSIDE ;           }
    554 		else if (state == CLASS_CLOSING                        ) { state  = CLASS_INSIDE    ;           }
    555 		else if (state == CLASS_INSIDE           &&  r ==  c   ) { state  = CLASS_CLOSING   ;           }
    556 		else if (state == INSIDE_OPENING         && (r == ':'  ||
    557 		                                             r == '.'  ||
    558 		                                             r == '=') ) { state  = CLASS_INSIDE    ; c = r;    }
    559 		else if (state == INSIDE_OPENING         &&  r == ']'  ) { state  = OUTSIDE         ;           }
    560 		else if (state == INSIDE_OPENING                       ) { state  = BRACKETS_INSIDE ;           }
    561 		else if (state == BRACKETS_INSIDE        &&  r == '['  ) { state  = INSIDE_OPENING  ;           }
    562 		else if (state == BRACKETS_INSIDE        &&  r == ']'  ) { state  = OUTSIDE         ;           }
    563 		else if (state == OUTSIDE                &&  escape    ) { escape = 0               ;           }
    564 		else if (state == OUTSIDE                &&  r == '\\' ) { escape = 1               ;           }
    565 		else if (state == OUTSIDE                &&  r == delim) return s;
    566 		else if (state == OUTSIDE && do_brackets &&  r == '['  ) { state  = BRACKETS_OPENING;           }
    567 	}
    568 	return s;
    569 }
    570 
    571 static char *
    572 chomp(char *s)
    573 {
    574 	return chompr(s, 0);
    575 }
    576 
    577 /* eat all leading whitespace and occurrences of rune */
    578 static char *
    579 chompr(char *s, Rune rune)
    580 {
    581 	Rune   r;
    582 	size_t rlen;
    583 	char  *end = s + strlen(s);
    584 
    585 	while (*s && (rlen = echarntorune(&r, s, end - s)) && (isspacerune(r) || r == rune))
    586 		s += rlen;
    587 	return s;
    588 }
    589 
    590 /* convert first nrunes Runes from UTF-8 string s in allocated Rune*
    591  * NOTE: sequence must be valid UTF-8, check first */
    592 static Rune *
    593 strtorunes(char *s, size_t nrunes)
    594 {
    595 	Rune *rs, *rp;
    596 
    597 	rp = rs = ereallocarray(NULL, nrunes + 1, sizeof(*rs));
    598 
    599 	while (nrunes--)
    600 		s += chartorune(rp++, s);
    601 
    602 	*rp = '\0';
    603 	return rs;
    604 }
    605 
    606 static long
    607 stol(char *s, char **endp)
    608 {
    609 	long n;
    610 	errno = 0;
    611 	n = strtol(s, endp, 10);
    612 
    613 	if (errno)
    614 		leprintf("strtol:");
    615 	if (*endp == s)
    616 		leprintf("strtol: invalid number");
    617 
    618 	return n;
    619 }
    620 
    621 /* from beg to end replace "\\d" with "d" and "\\n" with "\n" (where d is delim)
    622  * if delim is 'n' and n_newline is 0 then "\\n" is replaced with "n" (normal)
    623  * if delim is 'n' and n_newline is 1 then "\\n" is replaced with "\n" (y command)
    624  * if delim is 0 all escaped characters represent themselves (aci text)
    625  * memmove rest of string (beyond end) into place
    626  * return the number of converted escapes (backslashes removed)
    627  * FIXME: this has had too many corner cases slapped on and is ugly. rewrite better
    628  */
    629 static size_t
    630 escapes(char *beg, char *end, Rune delim, int n_newline)
    631 {
    632 	size_t num = 0;
    633 	char *src = beg, *dst = beg;
    634 
    635 	while (src < end) {
    636 		/* handle escaped backslash specially so we don't think the second
    637 		 * backslash is escaping something */
    638 		if (*src == '\\' && src[1] == '\\') {
    639 			*dst++ = *src++;
    640 			if (delim)
    641 				*dst++ = *src++;
    642 			else
    643 				src++;
    644 		} else if (*src == '\\' && !delim) {
    645 			src++;
    646 		} else if (*src == '\\' && src[1]) {
    647 			Rune r;
    648 			size_t rlen;
    649 			num++;
    650 			src++;
    651 			rlen = echarntorune(&r, src, end - src);
    652 
    653 			if (r == 'n' && delim == 'n') {
    654 				*src = n_newline ? '\n' : 'n'; /* src so we can still memmove() */
    655 			} else if (r == 'n') {
    656 				*src = '\n';
    657 			} else if (r != delim) {
    658 				*dst++ = '\\';
    659 				num--;
    660 			}
    661 
    662 			memmove(dst, src, rlen);
    663 			dst += rlen;
    664 			src += rlen;
    665 		} else {
    666 			*dst++ = *src++;
    667 		}
    668 	}
    669 	memmove(dst, src, strlen(src) + 1);
    670 	return num;
    671 }
    672 
    673 static size_t
    674 echarntorune(Rune *r, char *s, size_t n)
    675 {
    676 	size_t rlen = charntorune(r, s, n);
    677 	if (!rlen || *r == Runeerror)
    678 		leprintf("invalid UTF-8");
    679 	return rlen;
    680 }
    681 
    682 static void
    683 insert_labels(void)
    684 {
    685 	size_t i;
    686 	Cmd *from, *to;
    687 
    688 	while (branches.size) {
    689 		from = prog + (ptrdiff_t)pop(&branches);
    690 
    691 		if (!from->u.label) {/* no label branch to end of script */
    692 			from->u.jump = pc - 1;
    693 		} else {
    694 			for (i = 0; i < labels.size; i++) {
    695 				to = prog + (ptrdiff_t)labels.data[i];
    696 				if (!strcmp(from->u.label, to->u.label)) {
    697 					from->u.jump = to;
    698 					break;
    699 				}
    700 			}
    701 			if (i == labels.size)
    702 				leprintf("bad label");
    703 		}
    704 	}
    705 }
    706 
    707 /*
    708  * Getargs / Freeargs
    709  * Read argument from s, return pointer to one past last character of argument
    710  */
    711 
    712 /* POSIX compliant
    713  * i\
    714  * foobar
    715  *
    716  * also allow the following non POSIX compliant
    717  * i        # empty line
    718  * ifoobar
    719  * ifoobar\
    720  * baz
    721  *
    722  * FIXME: GNU and busybox discard leading spaces
    723  * i  foobar
    724  * i foobar
    725  * ifoobar
    726  * are equivalent in GNU and busybox. We don't. Should we?
    727  */
    728 static char *
    729 get_aci_arg(Cmd *c, char *s)
    730 {
    731 	c->u.acir.print = check_puts;
    732 	c->u.acir.str = (String){ NULL, 0 };
    733 
    734 	gflags.aci_cont = !!*s; /* no continue flag if empty string */
    735 
    736 	/* neither empty string nor POSIX compliant */
    737 	if (*s && !(*s == '\\' && !s[1]))
    738 		aci_append(c, s);
    739 
    740 	return s + strlen(s);
    741 }
    742 
    743 static void
    744 aci_append(Cmd *c, char *s)
    745 {
    746 	char *end = s + strlen(s), *p = end;
    747 
    748 	gflags.aci_cont = 0;
    749 	while (--p >= s && *p == '\\')
    750 		gflags.aci_cont = !gflags.aci_cont;
    751 
    752 	if (gflags.aci_cont)
    753 		*--end = '\n';
    754 
    755 	escapes(s, end, 0, 0);
    756 	stracat(&c->u.acir.str, s);
    757 }
    758 
    759 static void
    760 free_acir_arg(Cmd *c)
    761 {
    762 	free(c->u.acir.str.str);
    763 }
    764 
    765 /* POSIX dictates that label is rest of line, including semicolons, trailing
    766  * whitespace, closing braces, etc. and can be limited to 8 bytes
    767  *
    768  * I allow a semicolon or closing brace to terminate a label name, it's not
    769  * POSIX compliant, but it's useful and every sed version I've tried to date
    770  * does the same.
    771  *
    772  * FIXME: POSIX dictates that leading whitespace is ignored but trailing
    773  * whitespace is not. This is annoying and we should probably get rid of it.
    774  */
    775 static char *
    776 get_bt_arg(Cmd *c, char *s)
    777 {
    778 	char *p = semicolon_arg(s = chomp(s));
    779 
    780 	if (p != s) {
    781 		c->u.label = estrndup(s, p - s);
    782 	} else {
    783 		c->u.label = NULL;
    784 	}
    785 
    786 	push(&branches, (void *)(c - prog));
    787 
    788 	return p;
    789 }
    790 
    791 /* POSIX dictates file name is rest of line including semicolons, trailing
    792  * whitespace, closing braces, etc. and file name must be preceded by a space
    793  *
    794  * I allow a semicolon or closing brace to terminate a file name and don't
    795  * enforce leading space.
    796  *
    797  * FIXME: decide whether trailing whitespace should be included and fix
    798  * accordingly
    799  */
    800 static char *
    801 get_r_arg(Cmd *c, char *s)
    802 {
    803 	char *p = semicolon_arg(s = chomp(s));
    804 
    805 	if (p == s)
    806 		leprintf("no file name");
    807 
    808 	c->u.acir.str.str = estrndup(s, p - s);
    809 	c->u.acir.print = write_file;
    810 
    811 	return p;
    812 }
    813 
    814 /* we allow "\\n" in replacement text to mean "\n" (undefined in POSIX)
    815  *
    816  * FIXME: allow other escapes in regex and replacement? if so change escapes()
    817  */
    818 static char *
    819 get_s_arg(Cmd *c, char *s)
    820 {
    821 	Rune delim, r;
    822 	Cmd buf;
    823 	char *p;
    824 	int esc, lastre;
    825 
    826 	/* s/Find/Replace/Flags */
    827 
    828 	/* Find */
    829 	if (!gflags.s_cont) { /* NOT continuing from literal newline in replacement text */
    830 		lastre = 0;
    831 		c->u.s.repl = (String){ NULL, 0 };
    832 		c->u.s.occurrence = 1;
    833 		c->u.s.file = NULL;
    834 		c->u.s.p = 0;
    835 
    836 		if (!*s || *s == '\\')
    837 			leprintf("bad delimiter");
    838 
    839 		p = s + strlen(s);
    840 		s += echarntorune(&delim, s, p - s);
    841 		c->u.s.delim = delim;
    842 
    843 		echarntorune(&r, s, p - s);
    844 		if (r == delim) /* empty regex */
    845 			lastre = 1;
    846 
    847 		p = find_delim(s, delim, 1);
    848 		if (!*p)
    849 			leprintf("missing second delimiter");
    850 		p -= escapes(s, p, delim, 0);
    851 		*p = '\0';
    852 
    853 		if (lastre) {
    854 			c->u.s.re = NULL;
    855 		} else {
    856 			c->u.s.re = emalloc(sizeof(*c->u.s.re));
    857 			/* FIXME: different eregcomp that calls fatal */
    858 			eregcomp(c->u.s.re, s, gflags.E ? REG_EXTENDED : 0);
    859 		}
    860 		s = p + runelen(delim);
    861 	}
    862 
    863 	/* Replace */
    864 	delim = c->u.s.delim;
    865 
    866 	p = find_delim(s, delim, 0);
    867 	p -= escapes(s, p, delim, 0);
    868 	if (!*p) { /* no third delimiter */
    869 		/* FIXME: same backslash counting as aci_append() */
    870 		if (p[-1] != '\\')
    871 			leprintf("missing third delimiter or <backslash><newline>");
    872 		p[-1] = '\n';
    873 		gflags.s_cont = 1;
    874 	} else {
    875 		gflags.s_cont = 0;
    876 	}
    877 
    878 	/* check for bad references in replacement text */
    879 	*p = '\0';
    880 	for (esc = 0, p = s; *p; p++) {
    881 		if (esc) {
    882 			esc = 0;
    883 			if (isdigit(*p) && c->u.s.re && (size_t)(*p - '0') > c->u.s.re->re_nsub)
    884 				leprintf("back reference number greater than number of groups");
    885 		} else if (*p == '\\') {
    886 			esc = 1;
    887 		}
    888 	}
    889 	stracat(&c->u.s.repl, s);
    890 
    891 	if (gflags.s_cont)
    892 		return p;
    893 
    894 	s = p + runelen(delim);
    895 
    896 	/* Flags */
    897 	p = semicolon_arg(s = chomp(s));
    898 
    899 	/* FIXME: currently for simplicity take last of g or occurrence flags and
    900 	 *        ignore multiple p flags. need to fix that */
    901 	for (; s < p; s++) {
    902 		if (isdigit(*s)) {
    903 			c->u.s.occurrence = stol(s, &s);
    904 			s--; /* for loop will advance pointer */
    905 		} else {
    906 			switch (*s) {
    907 			case 'g': c->u.s.occurrence = 0; break;
    908 			case 'p': c->u.s.p = 1;          break;
    909 			case 'w':
    910 				/* must be last flag, take everything up to newline/semicolon
    911 				 * s == p after this */
    912 				s = get_w_arg(&buf, chomp(s+1));
    913 				c->u.s.file = buf.u.file;
    914 				break;
    915 			}
    916 		}
    917 	}
    918 	return p;
    919 }
    920 
    921 static void
    922 free_s_arg(Cmd *c)
    923 {
    924 	if (c->u.s.re)
    925 		regfree(c->u.s.re);
    926 	free(c->u.s.re);
    927 	free(c->u.s.repl.str);
    928 }
    929 
    930 /* see get_r_arg notes */
    931 static char *
    932 get_w_arg(Cmd *c, char *s)
    933 {
    934 	char *p = semicolon_arg(s = chomp(s));
    935 	Wfile *w, **wp;
    936 
    937 	if (p == s)
    938 		leprintf("no file name");
    939 
    940 	for (wp = (Wfile **)wfiles.data; (size_t)(wp - (Wfile **)wfiles.data) < wfiles.size; wp++) {
    941 		if (strlen((*wp)->path) == (size_t)(p - s) && !strncmp(s, (*wp)->path, p - s)) {
    942 			c->u.file = (*wp)->file;
    943 			return p;
    944 		}
    945 	}
    946 
    947 	w = emalloc(sizeof(*w));
    948 	w->path = estrndup(s, p - s);
    949 
    950 	if (!(w->file = fopen(w->path, "w")))
    951 		leprintf("fopen failed");
    952 
    953 	c->u.file = w->file;
    954 
    955 	push(&wfiles, w);
    956 	return p;
    957 }
    958 
    959 static char *
    960 get_y_arg(Cmd *c, char *s)
    961 {
    962 	Rune delim;
    963 	char *p = s + strlen(s);
    964 	size_t rlen = echarntorune(&delim, s, p - s);
    965 	size_t nrunes1, nrunes2;
    966 
    967 	c->u.y.set1 = c->u.y.set2 = NULL;
    968 
    969 	s += rlen;
    970 	p = find_delim(s, delim, 0);
    971 	p -= escapes(s, p, delim, 1);
    972 	nrunes1 = utfnlen(s, p - s);
    973 	c->u.y.set1 = strtorunes(s, nrunes1);
    974 
    975 	s = p + rlen;
    976 	p = find_delim(s, delim, 0);
    977 	p -= escapes(s, p, delim, 1);
    978 	nrunes2 = utfnlen(s, p - s);
    979 
    980 	if (nrunes1 != nrunes2)
    981 		leprintf("different set lengths");
    982 
    983 	c->u.y.set2 = strtorunes(s, utfnlen(s, p - s));
    984 
    985 	return p + rlen;
    986 }
    987 
    988 static void
    989 free_y_arg(Cmd *c)
    990 {
    991 	free(c->u.y.set1);
    992 	free(c->u.y.set2);
    993 }
    994 
    995 /* see get_bt_arg notes */
    996 static char *
    997 get_colon_arg(Cmd *c, char *s)
    998 {
    999 	char *p = semicolon_arg(s = chomp(s));
   1000 
   1001 	if (p == s)
   1002 		leprintf("no label name");
   1003 
   1004 	c->u.label = estrndup(s, p - s);
   1005 	push(&labels, (void *)(c - prog));
   1006 	return p;
   1007 }
   1008 
   1009 static char *
   1010 get_lbrace_arg(Cmd *c, char *s)
   1011 {
   1012 	push(&braces, (void *)(c - prog));
   1013 	return s;
   1014 }
   1015 
   1016 static char *
   1017 get_rbrace_arg(Cmd *c, char *s)
   1018 {
   1019 	Cmd *lbrace;
   1020 
   1021 	if (!braces.size)
   1022 		leprintf("extra }");
   1023 
   1024 	lbrace = prog + (ptrdiff_t)pop(&braces);
   1025 	lbrace->u.offset = c - prog;
   1026 	return s;
   1027 }
   1028 
   1029 /* s points to beginning of an argument that may be semicolon terminated
   1030  * return pointer to semicolon or nul byte after string
   1031  * or closing brace as to not force ; before }
   1032  * FIXME: decide whether or not to eat trailing whitespace for arguments that
   1033  *        we allow semicolon/brace termination that POSIX doesn't
   1034  *        b, r, t, w, :
   1035  *        POSIX says trailing whitespace is part of label name, file name, etc.
   1036  *        we should probably eat it
   1037  */
   1038 static char *
   1039 semicolon_arg(char *s)
   1040 {
   1041 	char *p = strpbrk(s, ";}");
   1042 	if (!p)
   1043 		p = s + strlen(s);
   1044 	return p;
   1045 }
   1046 
   1047 static void
   1048 run(void)
   1049 {
   1050 	lineno = 0;
   1051 	if (braces.size)
   1052 		leprintf("extra {");
   1053 
   1054 	/* genbuf has already been initialized, patt will be in new_line
   1055 	 * (or we'll halt) */
   1056 	stracpy(&hold, "");
   1057 
   1058 	insert_labels();
   1059 	next_file();
   1060 	new_line();
   1061 
   1062 	for (pc = prog; !gflags.halt; pc++)
   1063 		pc->fninfo->fn(pc);
   1064 }
   1065 
   1066 /* return true if we are in range for c, set c->in_match appropriately */
   1067 static int
   1068 in_range(Cmd *c)
   1069 {
   1070 	if (match_addr(&c->range.beg)) {
   1071 		if (c->range.naddr == 2) {
   1072 			if (c->range.end.type == LINE && c->range.end.u.lineno <= lineno)
   1073 				c->in_match = 0;
   1074 			else
   1075 				c->in_match = 1;
   1076 		}
   1077 		return !c->negate;
   1078 	}
   1079 	if (c->in_match && match_addr(&c->range.end)) {
   1080 		c->in_match = 0;
   1081 		return !c->negate;
   1082 	}
   1083 	return c->in_match ^ c->negate;
   1084 }
   1085 
   1086 /* return true if addr matches current line */
   1087 static int
   1088 match_addr(Addr *a)
   1089 {
   1090 	switch (a->type) {
   1091 	default:
   1092 	case IGNORE: return 0;
   1093 	case EVERY: return 1;
   1094 	case LINE: return lineno == a->u.lineno;
   1095 	case LAST:
   1096 		while (is_eof(file) && !next_file())
   1097 			;
   1098 		return !file;
   1099 	case REGEX:
   1100 		lastre = a->u.re;
   1101 		return !regexec(a->u.re, patt.str, 0, NULL, 0);
   1102 	case LASTRE:
   1103 		if (!lastre)
   1104 			leprintf("no previous regex");
   1105 		return !regexec(lastre, patt.str, 0, NULL, 0);
   1106 	}
   1107 }
   1108 
   1109 /* move to next input file
   1110  * stdin if first call and no files
   1111  * return 0 for success and 1 for no more files
   1112  */
   1113 static int
   1114 next_file(void)
   1115 {
   1116 	static unsigned char first = 1;
   1117 
   1118 	if (file == stdin)
   1119 		clearerr(file);
   1120 	else if (file)
   1121 		fshut(file, "<file>");
   1122 	/* given no files, default to stdin */
   1123 	file = first && !*files ? stdin : NULL;
   1124 	first = 0;
   1125 
   1126 	while (!file && *files) {
   1127 		if (!strcmp(*files, "-")) {
   1128 			file = stdin;
   1129 		} else if (!(file = fopen(*files, "r"))) {
   1130 			/* warn this file didn't open, but move on to next */
   1131 			weprintf("fopen %s:", *files);
   1132 			ret = 1;
   1133 		}
   1134 		files++;
   1135 	}
   1136 
   1137 	return !file;
   1138 }
   1139 
   1140 /* test if stream is at EOF */
   1141 static int
   1142 is_eof(FILE *f)
   1143 {
   1144 	int c;
   1145 
   1146 	if (!f || feof(f))
   1147 		return 1;
   1148 
   1149 	c = fgetc(f);
   1150 	if (c == EOF && ferror(f))
   1151 		eprintf("fgetc:");
   1152 	if (c != EOF && ungetc(c, f) == EOF)
   1153 		eprintf("ungetc EOF\n");
   1154 
   1155 	return c == EOF;
   1156 }
   1157 
   1158 /* perform writes that were scheduled
   1159  * for aci this is check_puts(string, stdout)
   1160  * for r this is write_file(path, stdout)
   1161  */
   1162 static void
   1163 do_writes(void)
   1164 {
   1165 	Cmd *c;
   1166 	size_t i;
   1167 
   1168 	for (i = 0; i < writes.size; i++) {
   1169 		c = writes.data[i];
   1170 		c->u.acir.print(c->u.acir.str.str, stdout);
   1171 	}
   1172 	writes.size = 0;
   1173 }
   1174 
   1175 /* used for r's u.acir.print()
   1176  * FIXME: something like util's concat() would be better
   1177  */
   1178 static void
   1179 write_file(char *path, FILE *out)
   1180 {
   1181 	FILE *in = fopen(path, "r");
   1182 	if (!in) /* no file is treated as empty file */
   1183 		return;
   1184 
   1185 	while (read_line(in, &genbuf) != EOF)
   1186 		check_puts(genbuf.str, out);
   1187 
   1188 	fshut(in, path);
   1189 }
   1190 
   1191 static void
   1192 check_puts(char *s, FILE *f)
   1193 {
   1194 	if (s && fputs(s, f) == EOF)
   1195 		eprintf("fputs:");
   1196 	if (fputs("\n", f) == EOF)
   1197 		eprintf("fputs:");
   1198 }
   1199 
   1200 /* iterate from beg to end updating ranges so we don't miss any commands
   1201  * e.g. sed -n '1d;1,3p' should still print lines 2 and 3
   1202  */
   1203 static void
   1204 update_ranges(Cmd *beg, Cmd *end)
   1205 {
   1206 	while (beg < end)
   1207 		in_range(beg++);
   1208 }
   1209 
   1210 /*
   1211  * Sed functions
   1212  */
   1213 static void
   1214 cmd_a(Cmd *c)
   1215 {
   1216 	if (in_range(c))
   1217 		push(&writes, c);
   1218 }
   1219 
   1220 static void
   1221 cmd_b(Cmd *c)
   1222 {
   1223 	if (!in_range(c))
   1224 		return;
   1225 
   1226 	/* if we jump backwards update to end, otherwise update to destination */
   1227 	update_ranges(c + 1, c->u.jump > c ? c->u.jump : prog + pcap);
   1228 	pc = c->u.jump;
   1229 }
   1230 
   1231 static void
   1232 cmd_c(Cmd *c)
   1233 {
   1234 	if (!in_range(c))
   1235 		return;
   1236 
   1237 	/* write the text on the last line of the match */
   1238 	if (!c->in_match)
   1239 		check_puts(c->u.acir.str.str, stdout);
   1240 	/* otherwise start the next cycle without printing pattern space
   1241 	 * effectively deleting the text */
   1242 	new_next();
   1243 }
   1244 
   1245 static void
   1246 cmd_d(Cmd *c)
   1247 {
   1248 	if (!in_range(c))
   1249 		return;
   1250 
   1251 	new_next();
   1252 }
   1253 
   1254 static void
   1255 cmd_D(Cmd *c)
   1256 {
   1257 	char *p;
   1258 
   1259 	if (!in_range(c))
   1260 		return;
   1261 
   1262 	if ((p = strchr(patt.str, '\n'))) {
   1263 		p++;
   1264 		memmove(patt.str, p, strlen(p) + 1);
   1265 		old_next();
   1266 	} else {
   1267 		new_next();
   1268 	}
   1269 }
   1270 
   1271 static void
   1272 cmd_g(Cmd *c)
   1273 {
   1274 	if (in_range(c))
   1275 		stracpy(&patt, hold.str);
   1276 }
   1277 
   1278 static void
   1279 cmd_G(Cmd *c)
   1280 {
   1281 	if (!in_range(c))
   1282 		return;
   1283 
   1284 	stracat(&patt, "\n");
   1285 	stracat(&patt, hold.str);
   1286 }
   1287 
   1288 static void
   1289 cmd_h(Cmd *c)
   1290 {
   1291 	if (in_range(c))
   1292 		stracpy(&hold, patt.str);
   1293 }
   1294 
   1295 static void
   1296 cmd_H(Cmd *c)
   1297 {
   1298 	if (!in_range(c))
   1299 		return;
   1300 
   1301 	stracat(&hold, "\n");
   1302 	stracat(&hold, patt.str);
   1303 }
   1304 
   1305 static void
   1306 cmd_i(Cmd *c)
   1307 {
   1308 	if (in_range(c))
   1309 		check_puts(c->u.acir.str.str, stdout);
   1310 }
   1311 
   1312 /* I think it makes sense to print invalid UTF-8 sequences in octal to satisfy
   1313  * the "visually unambiguous form" sed(1p)
   1314  */
   1315 static void
   1316 cmd_l(Cmd *c)
   1317 {
   1318 	Rune   r;
   1319 	char  *p, *end;
   1320 	size_t rlen;
   1321 
   1322 	char *escapes[] = { /* FIXME: 7 entries and search instead of 127 */
   1323 		['\\'] = "\\\\", ['\a'] = "\\a", ['\b'] = "\\b",
   1324 		['\f'] = "\\f" , ['\r'] = "\\r", ['\t'] = "\\t",
   1325 		['\v'] = "\\v" , [0x7f] = NULL, /* fill out the table */
   1326 	};
   1327 
   1328 	if (!in_range(c))
   1329 		return;
   1330 
   1331 	/* FIXME: line wrapping. sed(1p) says "length at which folding occurs is
   1332 	 * unspecified, but should be appropraite for the output device"
   1333 	 * just wrap at 80 Runes?
   1334 	 */
   1335 	for (p = patt.str, end = p + strlen(p); p < end; p += rlen) {
   1336 		if (isascii(*p) && escapes[(unsigned int)*p]) {
   1337 			fputs(escapes[(unsigned int)*p], stdout);
   1338 			rlen = 1;
   1339 		} else if (!(rlen = charntorune(&r, p, end - p))) {
   1340 			/* ran out of chars, print the bytes of the short sequence */
   1341 			for (; p < end; p++)
   1342 				printf("\\%03hho", (unsigned char)*p);
   1343 			break;
   1344 		} else if (r == Runeerror) {
   1345 			for (; rlen; rlen--, p++)
   1346 				printf("\\%03hho", (unsigned char)*p);
   1347 		} else {
   1348 			while (fwrite(p, rlen, 1, stdout) < 1 && errno == EINTR)
   1349 				;
   1350 			if (ferror(stdout))
   1351 				eprintf("fwrite:");
   1352 		}
   1353 	}
   1354 	check_puts("$", stdout);
   1355 }
   1356 
   1357 static void
   1358 cmd_n(Cmd *c)
   1359 {
   1360 	if (!in_range(c))
   1361 		return;
   1362 
   1363 	if (!gflags.n)
   1364 		check_puts(patt.str, stdout);
   1365 	do_writes();
   1366 	new_line();
   1367 }
   1368 
   1369 static void
   1370 cmd_N(Cmd *c)
   1371 {
   1372 	if (!in_range(c))
   1373 		return;
   1374 	do_writes();
   1375 	app_line();
   1376 }
   1377 
   1378 static void
   1379 cmd_p(Cmd *c)
   1380 {
   1381 	if (in_range(c))
   1382 		check_puts(patt.str, stdout);
   1383 }
   1384 
   1385 static void
   1386 cmd_P(Cmd *c)
   1387 {
   1388 	char *p;
   1389 
   1390 	if (!in_range(c))
   1391 		return;
   1392 
   1393 	if ((p = strchr(patt.str, '\n')))
   1394 		*p = '\0';
   1395 
   1396 	check_puts(patt.str, stdout);
   1397 
   1398 	if (p)
   1399 		*p = '\n';
   1400 }
   1401 
   1402 static void
   1403 cmd_q(Cmd *c)
   1404 {
   1405 	if (!in_range(c))
   1406 		return;
   1407 
   1408 	if (!gflags.n)
   1409 		check_puts(patt.str, stdout);
   1410 	do_writes();
   1411 	gflags.halt = 1;
   1412 }
   1413 
   1414 static void
   1415 cmd_r(Cmd *c)
   1416 {
   1417 	if (in_range(c))
   1418 		push(&writes, c);
   1419 }
   1420 
   1421 static void
   1422 cmd_s(Cmd *c)
   1423 {
   1424 	String tmp;
   1425 	Rune r;
   1426 	size_t plen, rlen, len;
   1427 	char *p, *s, *end;
   1428 	unsigned int matches = 0, last_empty = 1, qflag = 0, cflags = 0;
   1429 	regex_t *re;
   1430 	regmatch_t *rm, *pmatch = NULL;
   1431 
   1432 	if (!in_range(c))
   1433 		return;
   1434 
   1435 	if (!c->u.s.re && !lastre)
   1436 		leprintf("no previous regex");
   1437 
   1438 	re = c->u.s.re ? c->u.s.re : lastre;
   1439 	lastre = re;
   1440 
   1441 	plen = re->re_nsub + 1;
   1442 	pmatch = ereallocarray(NULL, plen, sizeof(regmatch_t));
   1443 
   1444 	*genbuf.str = '\0';
   1445 	s = patt.str;
   1446 
   1447 	while (!qflag && !regexec(re, s, plen, pmatch, cflags)) {
   1448 		cflags = REG_NOTBOL; /* match against beginning of line first time, but not again */
   1449 		if (!*s) /* match against empty string first time, but not again */
   1450 			qflag = 1;
   1451 
   1452 		/* don't substitute if last match was not empty but this one is.
   1453 		 * s_a*_._g
   1454 		 * foobar -> .f.o.o.b.r.
   1455 		 */
   1456 		if ((last_empty || pmatch[0].rm_eo) &&
   1457 		    (++matches == c->u.s.occurrence || !c->u.s.occurrence)) {
   1458 			/* copy over everything before the match */
   1459 			strnacat(&genbuf, s, pmatch[0].rm_so);
   1460 
   1461 			/* copy over replacement text, taking into account &, backreferences, and \ escapes */
   1462 			for (p = c->u.s.repl.str, len = strcspn(p, "\\&"); *p; len = strcspn(++p, "\\&")) {
   1463 				strnacat(&genbuf, p, len);
   1464 				p += len;
   1465 				switch (*p) {
   1466 				default: leprintf("this shouldn't be possible");
   1467 				case '\0':
   1468 					/* we're at the end, back up one so the ++p will put us on
   1469 					 * the null byte to break out of the loop */
   1470 					--p;
   1471 					break;
   1472 				case '&':
   1473 					strnacat(&genbuf, s + pmatch[0].rm_so, pmatch[0].rm_eo - pmatch[0].rm_so);
   1474 					break;
   1475 				case '\\':
   1476 					if (isdigit(*++p)) { /* backreference */
   1477 						/* only need to check here if using lastre, otherwise we checked when building */
   1478 						if (!c->u.s.re && (size_t)(*p - '0') > re->re_nsub)
   1479 							leprintf("back reference number greater than number of groups");
   1480 						rm = &pmatch[*p - '0'];
   1481 						strnacat(&genbuf, s + rm->rm_so, rm->rm_eo - rm->rm_so);
   1482 					} else { /* character after backslash taken literally (well one byte, but it works) */
   1483 						strnacat(&genbuf, p, 1);
   1484 					}
   1485 					break;
   1486 				}
   1487 			}
   1488 		} else {
   1489 			/* not replacing, copy over everything up to and including the match */
   1490 			strnacat(&genbuf, s, pmatch[0].rm_eo);
   1491 		}
   1492 
   1493 		if (!pmatch[0].rm_eo) { /* empty match, advance one rune and add it to output */
   1494 			end = s + strlen(s);
   1495 			rlen = charntorune(&r, s, end - s);
   1496 
   1497 			if (!rlen) { /* ran out of bytes, copy short sequence */
   1498 				stracat(&genbuf, s);
   1499 				s = end;
   1500 			} else { /* copy whether or not it's a good rune */
   1501 				strnacat(&genbuf, s, rlen);
   1502 				s += rlen;
   1503 			}
   1504 		}
   1505 		last_empty = !pmatch[0].rm_eo;
   1506 		s += pmatch[0].rm_eo;
   1507 	}
   1508 	free(pmatch);
   1509 
   1510 	if (!(matches && matches >= c->u.s.occurrence)) /* no replacement */
   1511 		return;
   1512 
   1513 	gflags.s = 1;
   1514 
   1515 	stracat(&genbuf, s);
   1516 
   1517 	tmp    = patt;
   1518 	patt   = genbuf;
   1519 	genbuf = tmp;
   1520 
   1521 	if (c->u.s.p)
   1522 		check_puts(patt.str, stdout);
   1523 	if (c->u.s.file)
   1524 		check_puts(patt.str, c->u.s.file);
   1525 }
   1526 
   1527 static void
   1528 cmd_t(Cmd *c)
   1529 {
   1530 	if (!in_range(c) || !gflags.s)
   1531 		return;
   1532 
   1533 	/* if we jump backwards update to end, otherwise update to destination */
   1534 	update_ranges(c + 1, c->u.jump > c ? c->u.jump : prog + pcap);
   1535 	pc = c->u.jump;
   1536 	gflags.s = 0;
   1537 }
   1538 
   1539 static void
   1540 cmd_w(Cmd *c)
   1541 {
   1542 	if (in_range(c))
   1543 		check_puts(patt.str, c->u.file);
   1544 }
   1545 
   1546 static void
   1547 cmd_x(Cmd *c)
   1548 {
   1549 	String tmp;
   1550 
   1551 	if (!in_range(c))
   1552 		return;
   1553 
   1554 	tmp  = patt;
   1555 	patt = hold;
   1556 	hold = tmp;
   1557 }
   1558 
   1559 static void
   1560 cmd_y(Cmd *c)
   1561 {
   1562 	String tmp;
   1563 	Rune r, *rp;
   1564 	size_t n, rlen;
   1565 	char *s, *end, buf[UTFmax];
   1566 
   1567 	if (!in_range(c))
   1568 		return;
   1569 
   1570 	*genbuf.str = '\0';
   1571 	for (s = patt.str, end = s + strlen(s); *s; s += rlen) {
   1572 		if (!(rlen = charntorune(&r, s, end - s))) { /* ran out of chars, copy rest */
   1573 			stracat(&genbuf, s);
   1574 			break;
   1575 		} else if (r == Runeerror) { /* bad UTF-8 sequence, copy bytes */
   1576 			strnacat(&genbuf, s, rlen);
   1577 		} else {
   1578 			for (rp = c->u.y.set1; *rp; rp++)
   1579 				if (*rp == r)
   1580 					break;
   1581 			if (*rp) { /* found r in set1, replace with Rune from set2 */
   1582 				n = runetochar(buf, c->u.y.set2 + (rp - c->u.y.set1));
   1583 				strnacat(&genbuf, buf, n);
   1584 			} else {
   1585 				strnacat(&genbuf, s, rlen);
   1586 			}
   1587 		}
   1588 	}
   1589 	tmp    = patt;
   1590 	patt   = genbuf;
   1591 	genbuf = tmp;
   1592 }
   1593 
   1594 static void
   1595 cmd_colon(Cmd *c)
   1596 {
   1597 }
   1598 
   1599 static void
   1600 cmd_equal(Cmd *c)
   1601 {
   1602 	if (in_range(c))
   1603 		printf("%zu\n", lineno);
   1604 }
   1605 
   1606 static void
   1607 cmd_lbrace(Cmd *c)
   1608 {
   1609 	Cmd *jump;
   1610 
   1611 	if (in_range(c))
   1612 		return;
   1613 
   1614 	/* update ranges on all commands we skip */
   1615 	jump = prog + c->u.offset;
   1616 	update_ranges(c + 1, jump);
   1617 	pc = jump;
   1618 }
   1619 
   1620 static void
   1621 cmd_rbrace(Cmd *c)
   1622 {
   1623 }
   1624 
   1625 /* not actually a sed function, but acts like one, put in last spot of script */
   1626 static void
   1627 cmd_last(Cmd *c)
   1628 {
   1629 	if (!gflags.n)
   1630 		check_puts(patt.str, stdout);
   1631 	do_writes();
   1632 	new_next();
   1633 }
   1634 
   1635 /*
   1636  * Actions
   1637  */
   1638 
   1639 /* read new line, continue current cycle */
   1640 static void
   1641 new_line(void)
   1642 {
   1643 	while (read_line(file, &patt) == EOF) {
   1644 		if (next_file()) {
   1645 			gflags.halt = 1;
   1646 			return;
   1647 		}
   1648 	}
   1649 	gflags.s = 0;
   1650 	lineno++;
   1651 }
   1652 
   1653 /* append new line, continue current cycle
   1654  * FIXME: used for N, POSIX specifies do not print pattern space when out of
   1655  *        input, but GNU does so busybox does as well. Currently we don't.
   1656  *        Should we?
   1657  */
   1658 static void
   1659 app_line(void)
   1660 {
   1661 	while (read_line(file, &genbuf) == EOF) {
   1662 		if (next_file()) {
   1663 			gflags.halt = 1;
   1664 			return;
   1665 		}
   1666 	}
   1667 
   1668 	stracat(&patt, "\n");
   1669 	stracat(&patt, genbuf.str);
   1670 	gflags.s = 0;
   1671 	lineno++;
   1672 }
   1673 
   1674 /* read new line, start new cycle */
   1675 static void
   1676 new_next(void)
   1677 {
   1678 	*patt.str = '\0';
   1679 	update_ranges(pc + 1, prog + pcap);
   1680 	new_line();
   1681 	pc = prog - 1;
   1682 }
   1683 
   1684 /* keep old pattern space, start new cycle */
   1685 static void
   1686 old_next(void)
   1687 {
   1688 	update_ranges(pc + 1, prog + pcap);
   1689 	pc = prog - 1;
   1690 }
   1691 
   1692 int
   1693 main(int argc, char *argv[])
   1694 {
   1695 	char *arg;
   1696 	int script = 0;
   1697 
   1698 	ARGBEGIN {
   1699 	case 'n':
   1700 		gflags.n = 1;
   1701 		break;
   1702 	case 'r':
   1703 	case 'E':
   1704 		gflags.E = 1;
   1705 		break;
   1706 	case 'e':
   1707 		arg = EARGF(usage());
   1708 		compile(arg, 0);
   1709 		script = 1;
   1710 		break;
   1711 	case 'f':
   1712 		arg = EARGF(usage());
   1713 		compile(arg, 1);
   1714 		script = 1;
   1715 		break;
   1716 	default : usage();
   1717 	} ARGEND
   1718 
   1719 	/* no script to run */
   1720 	if (!script && !argc)
   1721 		usage();
   1722 
   1723 	/* no script yet, next argument is script */
   1724 	if (!script)
   1725 		compile(*argv++, 0);
   1726 
   1727 	/* shrink/grow memory to fit and add our last instruction */
   1728 	resize((void **)&prog, &pcap, sizeof(*prog), pc - prog + 1, NULL);
   1729 	pc = prog + pcap - 1;
   1730 	pc->fninfo = &(Fninfo){ cmd_last, NULL, NULL, 0 };
   1731 
   1732 	files = argv;
   1733 	run();
   1734 
   1735 	ret |= fshut(stdin, "<stdin>") | fshut(stdout, "<stdout>");
   1736 
   1737 	return ret;
   1738 }