9base

revived minimalist port of Plan 9 userland to Unix
git clone git://git.suckless.org/9base
Log | Files | Refs | README | LICENSE

sed.c (27240B)


      1 /*
      2  * sed -- stream  editor
      3  *
      4  *
      5  */
      6 #include <u.h>
      7 #include <libc.h>
      8 #include <bio.h>
      9 #include <regexp.h>
     10 
     11 enum {
     12 	DEPTH		= 20,		/* max nesting depth of {} */
     13 	MAXCMDS		= 512,		/* max sed commands */
     14 	ADDSIZE		= 10000,	/* size of add & read buffer */
     15 	MAXADDS		= 20,		/* max pending adds and reads */
     16 	LBSIZE		= 8192,		/* input line size */
     17 	LABSIZE		= 50,		/* max label name size */
     18 	MAXSUB		= 10,		/* max number of sub reg exp */
     19 	MAXFILES	= 120		/* max output files */
     20 };
     21 	/* An address is a line #, a R.E., "$", a reference to the last
     22 	 * R.E., or nothing.
     23 	 */
     24 typedef struct	{
     25 	enum {
     26 		A_NONE,
     27 		A_DOL,
     28 		A_LINE,
     29 		A_RE,
     30 		A_LAST
     31 	}type;
     32 	union {
     33 		long line;		/* Line # */
     34 		Reprog *rp;		/* Compiled R.E. */
     35 	} u;
     36 } Addr;
     37 
     38 typedef struct	SEDCOM {
     39 	Addr	ad1;			/* optional start address */
     40 	Addr	ad2;			/* optional end address */
     41 	union {
     42 		Reprog	*re1;		/* compiled R.E. */
     43 		Rune	*text;		/* added text or file name */
     44 		struct	SEDCOM	*lb1;	/* destination command of branch */
     45 	} u;
     46 	Rune	*rhs;			/* Right-hand side of substitution */
     47 	Biobuf*	fcode;			/* File ID for read and write */
     48 	char	command;		/* command code -see below */
     49 	char	gfl;			/* 'Global' flag for substitutions */
     50 	char	pfl;			/* 'print' flag for substitutions */
     51 	char	active;			/* 1 => data between start and end */
     52 	char	negfl;			/* negation flag */
     53 } SedCom;
     54 
     55 	/* Command Codes for field SedCom.command */
     56 #define ACOM	01
     57 #define BCOM	020
     58 #define CCOM	02
     59 #define	CDCOM	025
     60 #define	CNCOM	022
     61 #define COCOM	017
     62 #define	CPCOM	023
     63 #define DCOM	03
     64 #define ECOM	015
     65 #define EQCOM	013
     66 #define FCOM	016
     67 #define GCOM	027
     68 #define CGCOM	030
     69 #define HCOM	031
     70 #define CHCOM	032
     71 #define ICOM	04
     72 #define LCOM	05
     73 #define NCOM	012
     74 #define PCOM	010
     75 #define QCOM	011
     76 #define RCOM	06
     77 #define SCOM	07
     78 #define TCOM	021
     79 #define WCOM	014
     80 #define	CWCOM	024
     81 #define	YCOM	026
     82 #define XCOM	033
     83 
     84 	
     85 typedef struct label {			/* Label symbol table */
     86 	Rune	asc[9];			/* Label name */
     87 	SedCom	*chain;
     88 	SedCom	*address;		/* Command associated with label */
     89 } Label;
     90 
     91 typedef	struct	FILE_CACHE {		/* Data file control block */
     92 	struct FILE_CACHE *next;	/* Forward Link */
     93 	char 		  *name;	/* Name of file */
     94 } FileCache;
     95 
     96 SedCom pspace[MAXCMDS];			/* Command storage */
     97 SedCom *pend = pspace+MAXCMDS;		/* End of command storage */
     98 SedCom *rep = pspace;			/* Current fill point */
     99 
    100 Reprog	*lastre = 0;			/* Last regular expression */
    101 Resub	subexp[MAXSUB];			/* sub-patterns of pattern match*/
    102 
    103 Rune	addspace[ADDSIZE];		/* Buffer for a, c, & i commands */
    104 Rune	*addend = addspace+ADDSIZE;
    105 
    106 SedCom	*abuf[MAXADDS];			/* Queue of pending adds & reads */
    107 SedCom	**aptr = abuf;
    108 
    109 struct {				/* Sed program input control block */
    110 	enum PTYPE 			/* Either on command line or in file */
    111 		{ P_ARG,
    112 		  P_FILE
    113 		} type;
    114 	union PCTL {			/* Pointer to data */
    115 		Biobuf	*bp;
    116 		char	*curr;
    117 	} pctl;
    118 } prog;
    119 
    120 Rune	genbuf[LBSIZE];			/* Miscellaneous buffer */
    121 
    122 FileCache	*fhead = 0;		/* Head of File Cache Chain */
    123 FileCache	*ftail = 0;		/* Tail of File Cache Chain */
    124 
    125 Rune	*loc1;				/* Start of pattern match */
    126 Rune	*loc2;				/* End of pattern match */
    127 Rune	seof;				/* Pattern delimiter char */
    128 
    129 Rune	linebuf[LBSIZE+1];		/* Input data buffer */
    130 Rune	*lbend = linebuf+LBSIZE;	/* End of buffer */
    131 Rune	*spend = linebuf;			/* End of input data */
    132 Rune	*cp;				/* Current scan point in linebuf */
    133 
    134 Rune	holdsp[LBSIZE+1];		/* Hold buffer */
    135 Rune	*hend = holdsp+LBSIZE;		/* End of hold buffer */
    136 Rune	*hspend = holdsp;		/* End of hold data */
    137 
    138 int	nflag;				/* Command line flags */
    139 int	gflag;
    140 int	lflag;
    141 
    142 int	dolflag;			/* Set when at true EOF */
    143 int	sflag;				/* Set when substitution done */
    144 int	jflag;				/* Set when jump required */
    145 int	delflag;			/* Delete current line when set */
    146 
    147 long	lnum = 0;			/* Input line count */
    148 
    149 char	fname[MAXFILES][40];		/* File name cache */
    150 Biobuf	*fcode[MAXFILES];		/* File ID cache */
    151 int	nfiles = 0;			/* Cache fill point */
    152 
    153 Biobuf	fout;				/* Output stream */
    154 Biobuf	bstdin;				/* Default input */
    155 Biobuf*	f = 0;				/* Input data */
    156 
    157 Label	ltab[LABSIZE];			/* Label name symbol table */
    158 Label	*labend = ltab+LABSIZE;		/* End of label table */
    159 Label	*lab = ltab+1;			/* Current Fill point */
    160 
    161 int	depth = 0;			/* {} stack pointer */
    162 
    163 Rune	bad;				/* Dummy err ptr reference */
    164 Rune	*badp = &bad;
    165 
    166 
    167 char	CGMES[]	 = 	"Command garbled: %S";
    168 char	TMMES[]	 = 	"Too much text: %S";
    169 char	LTL[]	 = 	"Label too long: %S";
    170 char	AD0MES[] =	"No addresses allowed: %S";
    171 char	AD1MES[] =	"Only one address allowed: %S";
    172 
    173 void	address(Addr *);
    174 void	arout(void);
    175 int	cmp(char *, char *);
    176 int	rcmp(Rune *, Rune *);
    177 void	command(SedCom *);
    178 Reprog	*compile(void);
    179 Rune	*compsub(Rune *, Rune *);
    180 void	dechain(void);
    181 void	dosub(Rune *);
    182 int	ecmp(Rune *, Rune *, int);
    183 void	enroll(char *);
    184 void	errexit(void);
    185 int	executable(SedCom *);
    186 void	execute(void);
    187 void	fcomp(void);
    188 long	getrune(void);
    189 Rune	*gline(Rune *);
    190 int	match(Reprog *, Rune *);
    191 void	newfile(enum PTYPE, char *);
    192 int 	opendata(void);
    193 Biobuf	*open_file(char *);
    194 Rune	*place(Rune *, Rune *, Rune *);
    195 void	quit(char *, char *);
    196 int	rline(Rune *, Rune *);
    197 Label	*search(Label *);
    198 int	substitute(SedCom *);
    199 char	*text(char *);
    200 Rune	*stext(Rune *, Rune *);
    201 int	ycomp(SedCom *);
    202 char *	trans(int c);
    203 void	putline(Biobuf *bp, Rune *buf, int n);
    204 
    205 void
    206 main(int argc, char **argv)
    207 {
    208 	int	compfl;
    209 
    210 	lnum = 0;
    211 	Binit(&fout, 1, OWRITE);
    212 	fcode[nfiles++] = &fout;
    213 	compfl = 0;
    214 
    215 	if(argc == 1)
    216 		exits(0);
    217 	ARGBEGIN{
    218 		case 'n':
    219 			nflag++;
    220 			continue;
    221 		case 'f':
    222 			if(argc <= 1)
    223 				quit("no pattern-file", 0);
    224 			newfile(P_FILE, ARGF());
    225 			fcomp();
    226 			compfl = 1;
    227 			continue;
    228 		case 'e':
    229 			if (argc <= 1)
    230 				quit("missing pattern", 0);
    231 			newfile(P_ARG, ARGF());
    232 			fcomp();
    233 			compfl = 1;
    234 			continue;
    235 		case 'g':
    236 			gflag++;
    237 			continue;
    238 		case 'l':
    239 			lflag++;
    240 			continue;
    241 		default:
    242 			fprint(2, "sed: Unknown flag: %c\n", ARGC());
    243 			continue;
    244 	} ARGEND
    245 
    246 	if(compfl == 0) {
    247 		if (--argc < 0)
    248 			quit("missing pattern", 0);
    249 		newfile(P_ARG, *argv++);
    250 		fcomp();
    251 	}
    252 
    253 	if(depth)
    254 		quit("Too many {'s", 0);
    255 
    256 	ltab[0].address = rep;
    257 
    258 	dechain();
    259 
    260 	if(argc <= 0)
    261 		enroll(0);		/* Add stdin to cache */
    262 	else while(--argc >= 0) {
    263 		enroll(*argv++);
    264 	}
    265 	execute();
    266 	exits(0);
    267 }
    268 void
    269 fcomp(void)
    270 {
    271 	Rune	*tp;
    272 	SedCom	*pt, *pt1;
    273 	int	i;
    274 	Label	*lpt;
    275 
    276 	static Rune	*p = addspace;
    277 	static SedCom	**cmpend[DEPTH];	/* stack of {} operations */
    278 
    279 	while (rline(linebuf, lbend) >= 0) {
    280 		cp = linebuf;
    281 comploop:
    282 		while(*cp == ' ' || *cp == '\t')
    283 			cp++;
    284 		if(*cp == '\0' || *cp == '#')
    285 			continue;
    286 		if(*cp == ';') {
    287 			cp++;
    288 			goto comploop;
    289 		}
    290 
    291 		address(&rep->ad1);
    292 		if (rep->ad1.type != A_NONE) {
    293 			if (rep->ad1.type == A_LAST) {
    294 				if (!lastre)
    295 					quit("First RE may not be null", 0);
    296 				rep->ad1.type = A_RE;
    297 				rep->ad1.u.rp = lastre;
    298 			}
    299 			if(*cp == ',' || *cp == ';') {
    300 				cp++;
    301 				address(&rep->ad2);
    302 				if (rep->ad2.type == A_LAST) {
    303 					rep->ad1.type = A_RE;
    304 					rep->ad2.u.rp = lastre;
    305 				}
    306 			} else
    307 				rep->ad2.type = A_NONE;
    308 		}
    309 		while(*cp == ' ' || *cp == '\t')
    310 			cp++;
    311 
    312 swit:
    313 		switch(*cp++) {
    314 
    315 			default:
    316 				quit("Unrecognized command: %S", (char *)linebuf);
    317 
    318 			case '!':
    319 				rep->negfl = 1;
    320 				goto swit;
    321 
    322 			case '{':
    323 				rep->command = BCOM;
    324 				rep->negfl = !(rep->negfl);
    325 				cmpend[depth++] = &rep->u.lb1;
    326 				if(++rep >= pend)
    327 					quit("Too many commands: %S", (char *) linebuf);
    328 				if(*cp == '\0')	continue;
    329 				goto comploop;
    330 
    331 			case '}':
    332 				if(rep->ad1.type != A_NONE)
    333 					quit(AD0MES, (char *) linebuf);
    334 				if(--depth < 0)
    335 					quit("Too many }'s", 0);
    336 				*cmpend[depth] = rep;
    337 				if(*cp == 0)	continue;
    338 				goto comploop;
    339 
    340 			case '=':
    341 				rep->command = EQCOM;
    342 				if(rep->ad2.type != A_NONE)
    343 					quit(AD1MES, (char *) linebuf);
    344 				break;
    345 
    346 			case ':':
    347 				if(rep->ad1.type != A_NONE)
    348 					quit(AD0MES, (char *) linebuf);
    349 
    350 				while(*cp == ' ')
    351 					cp++;
    352 				tp = lab->asc;
    353 				while (*cp && *cp != ';' && *cp != ' ' && *cp != '\t' && *cp != '#') {
    354 					*tp++ = *cp++;
    355 					if(tp >= &(lab->asc[8]))
    356 						quit(LTL, (char *) linebuf);
    357 				}
    358 				*tp = '\0';
    359 
    360 				if(lpt = search(lab)) {
    361 					if(lpt->address)
    362 						quit("Duplicate labels: %S", (char *) linebuf);
    363 				} else {
    364 					lab->chain = 0;
    365 					lpt = lab;
    366 					if(++lab >= labend)
    367 						quit("Too many labels: %S", (char *) linebuf);
    368 				}
    369 				lpt->address = rep;
    370 				if (*cp == '#')
    371 					continue;
    372 				rep--;			/* reuse this slot */
    373 				break;
    374 
    375 			case 'a':
    376 				rep->command = ACOM;
    377 				if(rep->ad2.type != A_NONE)
    378 					quit(AD1MES, (char *) linebuf);
    379 				if(*cp == '\\')	cp++;
    380 				if(*cp++ != '\n')
    381 					quit(CGMES, (char *) linebuf);
    382 				rep->u.text = p;
    383 				p = stext(p, addend);
    384 				break;
    385 			case 'c':
    386 				rep->command = CCOM;
    387 				if(*cp == '\\')	cp++;
    388 				if(*cp++ != '\n')
    389 					quit(CGMES, (char *) linebuf);
    390 				rep->u.text = p;
    391 				p = stext(p, addend);
    392 				break;
    393 			case 'i':
    394 				rep->command = ICOM;
    395 				if(rep->ad2.type != A_NONE)
    396 					quit(AD1MES, (char *) linebuf);
    397 				if(*cp == '\\')	cp++;
    398 				if(*cp++ != '\n')
    399 					quit(CGMES, (char *) linebuf);
    400 				rep->u.text = p;
    401 				p = stext(p, addend);
    402 				break;
    403 
    404 			case 'g':
    405 				rep->command = GCOM;
    406 				break;
    407 
    408 			case 'G':
    409 				rep->command = CGCOM;
    410 				break;
    411 
    412 			case 'h':
    413 				rep->command = HCOM;
    414 				break;
    415 
    416 			case 'H':
    417 				rep->command = CHCOM;
    418 				break;
    419 
    420 			case 't':
    421 				rep->command = TCOM;
    422 				goto jtcommon;
    423 
    424 			case 'b':
    425 				rep->command = BCOM;
    426 jtcommon:
    427 				while(*cp == ' ')cp++;
    428 				if(*cp == '\0') {
    429 					if(pt = ltab[0].chain) {
    430 						while(pt1 = pt->u.lb1)
    431 							pt = pt1;
    432 						pt->u.lb1 = rep;
    433 					} else
    434 						ltab[0].chain = rep;
    435 					break;
    436 				}
    437 				tp = lab->asc;
    438 				while((*tp++ = *cp++))
    439 					if(tp >= &(lab->asc[8]))
    440 						quit(LTL, (char *) linebuf);
    441 				cp--;
    442 				tp[-1] = '\0';
    443 
    444 				if(lpt = search(lab)) {
    445 					if(lpt->address) {
    446 						rep->u.lb1 = lpt->address;
    447 					} else {
    448 						pt = lpt->chain;
    449 						while(pt1 = pt->u.lb1)
    450 							pt = pt1;
    451 						pt->u.lb1 = rep;
    452 					}
    453 				} else {
    454 					lab->chain = rep;
    455 					lab->address = 0;
    456 					if(++lab >= labend)
    457 						quit("Too many labels: %S",
    458 							(char *) linebuf);
    459 				}
    460 				break;
    461 
    462 			case 'n':
    463 				rep->command = NCOM;
    464 				break;
    465 
    466 			case 'N':
    467 				rep->command = CNCOM;
    468 				break;
    469 
    470 			case 'p':
    471 				rep->command = PCOM;
    472 				break;
    473 
    474 			case 'P':
    475 				rep->command = CPCOM;
    476 				break;
    477 
    478 			case 'r':
    479 				rep->command = RCOM;
    480 				if(rep->ad2.type != A_NONE)
    481 					quit(AD1MES, (char *) linebuf);
    482 				if(*cp++ != ' ')
    483 					quit(CGMES, (char *) linebuf);
    484 				rep->u.text = p;
    485 				p = stext(p, addend);
    486 				break;
    487 
    488 			case 'd':
    489 				rep->command = DCOM;
    490 				break;
    491 
    492 			case 'D':
    493 				rep->command = CDCOM;
    494 				rep->u.lb1 = pspace;
    495 				break;
    496 
    497 			case 'q':
    498 				rep->command = QCOM;
    499 				if(rep->ad2.type != A_NONE)
    500 					quit(AD1MES, (char *) linebuf);
    501 				break;
    502 
    503 			case 'l':
    504 				rep->command = LCOM;
    505 				break;
    506 
    507 			case 's':
    508 				rep->command = SCOM;
    509 				seof = *cp++;
    510 				if ((rep->u.re1 = compile()) == 0) {
    511 					if(!lastre)
    512 						quit("First RE may not be null.", 0);
    513 					rep->u.re1 = lastre;
    514 				}
    515 				rep->rhs = p;
    516 				if((p = compsub(p, addend)) == 0)
    517 					quit(CGMES, (char *) linebuf);
    518 				if(*cp == 'g') {
    519 					cp++;
    520 					rep->gfl++;
    521 				} else if(gflag)
    522 					rep->gfl++;
    523 
    524 				if(*cp == 'p') {
    525 					cp++;
    526 					rep->pfl = 1;
    527 				}
    528 
    529 				if(*cp == 'P') {
    530 					cp++;
    531 					rep->pfl = 2;
    532 				}
    533 
    534 				if(*cp == 'w') {
    535 					cp++;
    536 					if(*cp++ !=  ' ')
    537 						quit(CGMES, (char *) linebuf);
    538 					text(fname[nfiles]);
    539 					for(i = nfiles - 1; i >= 0; i--)
    540 						if(cmp(fname[nfiles],fname[i]) == 0) {
    541 							rep->fcode = fcode[i];
    542 							goto done;
    543 						}
    544 					if(nfiles >= MAXFILES)
    545 						quit("Too many files in w commands 1", 0);
    546 					rep->fcode = open_file(fname[nfiles]);
    547 				}
    548 				break;
    549 
    550 			case 'w':
    551 				rep->command = WCOM;
    552 				if(*cp++ != ' ')
    553 					quit(CGMES, (char *) linebuf);
    554 				text(fname[nfiles]);
    555 				for(i = nfiles - 1; i >= 0; i--)
    556 					if(cmp(fname[nfiles], fname[i]) == 0) {
    557 						rep->fcode = fcode[i];
    558 						goto done;
    559 					}
    560 				if(nfiles >= MAXFILES){
    561 					fprint(2, "sed: Too many files in w commands 2 \n");
    562 					fprint(2, "nfiles = %d; MAXF = %d\n", nfiles, MAXFILES);
    563 					errexit();
    564 				}
    565 				rep->fcode = open_file(fname[nfiles]);
    566 				break;
    567 
    568 			case 'x':
    569 				rep->command = XCOM;
    570 				break;
    571 
    572 			case 'y':
    573 				rep->command = YCOM;
    574 				seof = *cp++;
    575 				if (ycomp(rep) == 0)
    576 					quit(CGMES, (char *) linebuf);
    577 				break;
    578 
    579 		}
    580 done:
    581 		if(++rep >= pend)
    582 			quit("Too many commands, last: %S", (char *) linebuf);
    583 
    584 		if(*cp++ != '\0') {
    585 			if(cp[-1] == ';')
    586 				goto comploop;
    587 			quit(CGMES, (char *) linebuf);
    588 		}
    589 
    590 	}
    591 }
    592 
    593 Biobuf *
    594 open_file(char *name)
    595 {
    596 	Biobuf *bp;
    597 	int fd;
    598 
    599 	if ((bp = malloc(sizeof(Biobuf))) == 0)
    600 		quit("Out of memory", 0);
    601 	if ((fd = open(name, OWRITE)) < 0 &&
    602 		(fd = create(name, OWRITE, 0666)) < 0)
    603 			quit("Cannot create %s", name);
    604 	Binit(bp, fd, OWRITE);
    605 	Bseek(bp, 0, 2);
    606 	fcode[nfiles++] = bp;
    607 	return bp;
    608 }
    609 
    610 Rune	*
    611 compsub(Rune *rhs, Rune *end)
    612 {
    613 	Rune	r;
    614 
    615 	while ((r = *cp++) != '\0') {
    616 		if(r == '\\') {
    617 			if (rhs < end)
    618 				*rhs++ = 0xFFFF;
    619 			else
    620 				return 0;
    621 			r = *cp++;
    622 			if(r == 'n')
    623 				r = '\n';
    624 		} else {
    625 			if(r == seof) {
    626 				if (rhs < end)
    627 					*rhs++ = '\0';
    628 				else
    629 					return 0;
    630 				return rhs;
    631 			}
    632 		}
    633 		if (rhs < end)
    634 			*rhs++ = r;
    635 		else	
    636 			return 0;
    637 
    638 	}
    639 	return 0;
    640 }
    641 
    642 Reprog *
    643 compile(void)
    644 {
    645 	Rune c;
    646 	char *ep;
    647 	char expbuf[512];
    648 
    649 	if((c = *cp++) == seof)		/* '//' */
    650 		return 0;
    651 	ep = expbuf;
    652 	do {
    653 		if (c == 0 || c == '\n')
    654 			quit(TMMES, (char *) linebuf);
    655 		if (c == '\\') {
    656 			if (ep >= expbuf+sizeof(expbuf))
    657 				quit(TMMES, (char *) linebuf);
    658 			ep += runetochar(ep, &c);
    659 			if ((c = *cp++) == 'n')
    660 				c = '\n';
    661 		}
    662 		if (ep >= expbuf+sizeof(expbuf))
    663 			quit(TMMES, (char *) linebuf);
    664 		ep += runetochar(ep, &c);
    665 	} while ((c = *cp++) != seof);
    666 	*ep = 0;
    667 	return lastre = regcomp(expbuf);
    668 }
    669 
    670 void
    671 regerror(char *s)
    672 {
    673 	USED(s);
    674 	quit(CGMES, (char *) linebuf);
    675 }
    676 
    677 void
    678 newfile(enum PTYPE type, char *name)
    679 {
    680 	if (type == P_ARG)
    681 		prog.pctl.curr = name;
    682 	else if ((prog.pctl.bp = Bopen(name, OREAD)) == 0)
    683 		quit("Cannot open pattern-file: %s\n", name);
    684 	prog.type = type;
    685 }
    686 
    687 int
    688 rline(Rune *buf, Rune *end)
    689 {
    690 	long c;
    691 	Rune r;
    692 
    693 	while ((c = getrune()) >= 0) {
    694 		r = c;
    695 		if (r == '\\') {
    696 			if (buf <= end)
    697 				*buf++ = r;
    698 			if ((c = getrune()) < 0)
    699 				break;
    700 			r = c;
    701 		} else if (r == '\n') {
    702 			*buf = '\0';
    703 			return(1);
    704 		}
    705 		if (buf <= end)
    706 			*buf++ = r;
    707 	}
    708 	*buf = '\0';
    709 	return(-1);
    710 }
    711 
    712 long
    713 getrune(void)
    714 {
    715 	char *p;
    716 	long c;
    717 	Rune r;
    718 
    719 	if (prog.type == P_ARG) {
    720 		if ((p = prog.pctl.curr) != 0) {
    721 			if (*p) {
    722 				prog.pctl.curr += chartorune(&r, p);
    723 				c = r;
    724 			} else {
    725 				c = '\n';	/* fake an end-of-line */
    726 				prog.pctl.curr = 0;
    727 			}
    728 		} else 
    729 			c = -1;
    730 	} else if ((c = Bgetrune(prog.pctl.bp)) < 0)
    731 			Bterm(prog.pctl.bp);
    732 	return c;
    733 }
    734 
    735 void
    736 address(Addr *ap)
    737 {
    738 	int c;
    739 	long	lno;
    740 
    741 	if((c = *cp++) == '$')
    742 		ap->type = A_DOL;
    743 	else if(c == '/') {
    744 		seof = c;
    745 		if (ap->u.rp = compile())
    746 			ap->type = A_RE;
    747 		else
    748 			ap->type = A_LAST;
    749 	}
    750 	else if (c >= '0' && c <= '9') {
    751 		lno = c-'0';
    752 		while ((c = *cp) >= '0' && c <= '9')
    753 			lno = lno*10 + *cp++-'0';
    754 		if(!lno)
    755 			quit("line number 0 is illegal",0);
    756 		ap->type = A_LINE;
    757 		ap->u.line = lno;
    758 	}
    759 	else {
    760 		cp--;
    761 		ap->type = A_NONE;
    762 	}
    763 }
    764 
    765 int
    766 cmp(char *a, char *b)		/* compare characters */
    767 {
    768 	while(*a == *b++)
    769 		if (*a == '\0')
    770 			return(0);
    771 		else a++;
    772 	return(1);
    773 }
    774 
    775 int
    776 rcmp(Rune *a, Rune *b)		/* compare runes */
    777 {
    778 	while(*a == *b++)
    779 		if (*a == '\0')
    780 			return(0);
    781 		else a++;
    782 	return(1);
    783 }
    784 
    785 char *
    786 text(char *p)		/* extract character string */
    787 {
    788 	Rune	r;
    789 
    790 	while(*cp == '\t' || *cp == ' ')
    791 			cp++;
    792 	while (*cp) {
    793 		if ((r = *cp++) == '\\')
    794 			if ((r = *cp++) == 0)
    795 				break;;
    796 		if (r == '\n')
    797 			while (*cp == '\t' || *cp == ' ')
    798 					cp++;
    799 		p += runetochar(p, &r);
    800 	}
    801 	*p++ = '\0';
    802 	return p;
    803 }
    804 
    805 Rune *
    806 stext(Rune *p, Rune *end)		/* extract rune string */
    807 {
    808 	while(*cp == '\t' || *cp == ' ')
    809 		cp++;
    810 	while (*cp) {
    811 		if (*cp == '\\')
    812 			if (*++cp == 0)
    813 				break;
    814 		if (p >= end-1)
    815 			quit(TMMES, (char *) linebuf);
    816 		if ((*p++ = *cp++) == '\n')
    817 			while(*cp == '\t' || *cp == ' ')
    818 					cp++;
    819 	}
    820 	*p++ = 0;
    821 	return p;
    822 }
    823 
    824 
    825 Label *
    826 search (Label *ptr)
    827 {
    828 	Label	*rp;
    829 
    830 	for (rp = ltab; rp < ptr; rp++)
    831 		if(rcmp(rp->asc, ptr->asc) == 0)
    832 			return(rp);
    833 	return(0);
    834 }
    835 
    836 void
    837 dechain(void)
    838 {
    839 	Label	*lptr;
    840 	SedCom	*rptr, *trptr;
    841 
    842 	for(lptr = ltab; lptr < lab; lptr++) {
    843 
    844 		if(lptr->address == 0)
    845 			quit("Undefined label: %S", (char *) lptr->asc);
    846 
    847 		if(lptr->chain) {
    848 			rptr = lptr->chain;
    849 			while(trptr = rptr->u.lb1) {
    850 				rptr->u.lb1 = lptr->address;
    851 				rptr = trptr;
    852 			}
    853 			rptr->u.lb1 = lptr->address;
    854 		}
    855 	}
    856 }
    857 
    858 int
    859 ycomp(SedCom *r)
    860 {
    861 	int 	i;
    862 	Rune	*rp;
    863 	Rune	c, *tsp, highc;
    864 	Rune	*sp;
    865 
    866 	highc = 0;
    867 	for(tsp = cp; *tsp != seof; tsp++) {
    868 		if(*tsp == '\\')
    869 			tsp++;
    870 		if(*tsp == '\n' || *tsp == '\0')
    871 			return(0);
    872 		if (*tsp > highc) highc = *tsp;
    873 	}
    874 	tsp++;
    875 	if ((rp = r->u.text = (Rune *) malloc(sizeof(Rune)*(highc+2))) == 0)
    876 		quit("Out of memory", 0);
    877 	*rp++ = highc;				/* save upper bound */
    878 	for (i = 0; i <= highc; i++)
    879 		rp[i] = i;
    880 	sp = cp;
    881 	while((c = *sp++) != seof) {
    882 		if(c == '\\' && *sp == 'n') {
    883 			sp++;
    884 			c = '\n';
    885 		}
    886 		if((rp[c] = *tsp++) == '\\' && *tsp == 'n') {
    887 			rp[c] = '\n';
    888 			tsp++;
    889 		}
    890 		if(rp[c] == seof || rp[c] == '\0') {
    891 			free(r->u.re1);
    892 			r->u.re1 = 0;
    893 			return(0);
    894 		}
    895 	}
    896 	if(*tsp != seof) {
    897 		free(r->u.re1);
    898 		r->u.re1 = 0;
    899 		return(0);
    900 	}
    901 	cp = tsp+1;
    902 	return(1);
    903 }
    904 
    905 void
    906 execute(void)
    907 {
    908 	SedCom	*ipc;
    909 
    910 	while (spend = gline(linebuf)){
    911 		for(ipc = pspace; ipc->command; ) {
    912 			if (!executable(ipc)) {
    913 				ipc++;
    914 				continue;
    915 			}
    916 			command(ipc);
    917 
    918 			if(delflag)
    919 				break;
    920 			if(jflag) {
    921 				jflag = 0;
    922 				if((ipc = ipc->u.lb1) == 0)
    923 					break;
    924 			} else
    925 				ipc++;
    926 
    927 		}
    928 		if(!nflag && !delflag)
    929 			putline(&fout, linebuf, spend-linebuf);
    930 		if(aptr > abuf) {
    931 			arout();
    932 		}
    933 		delflag = 0;
    934 	}
    935 }
    936 	/* determine if a statement should be applied to an input line */
    937 int
    938 executable(SedCom *ipc)
    939 {
    940 	if (ipc->active) {	/* Addr1 satisfied - accept until Addr2 */
    941 		if (ipc->active == 1)		/* Second line */
    942 			ipc->active = 2;
    943 		switch(ipc->ad2.type) {
    944 			case A_NONE:	/* No second addr; use first */
    945 				ipc->active = 0;
    946 				break;
    947 			case A_DOL:	/* Accept everything */
    948 				return !ipc->negfl;
    949 			case A_LINE:	/* Line at end of range? */
    950 				if (lnum <= ipc->ad2.u.line) {
    951 					if (ipc->ad2.u.line == lnum)
    952 						ipc->active = 0;
    953 					return !ipc->negfl;
    954 				}
    955 				ipc->active = 0;	/* out of range */
    956 				return ipc->negfl;
    957 			case A_RE:	/* Check for matching R.E. */
    958 				if (match(ipc->ad2.u.rp, linebuf))
    959 					ipc->active = 0;
    960 				return !ipc->negfl;
    961 			default:		/* internal error */
    962 				quit("Internal error", 0);
    963 		}
    964 	}
    965 	switch (ipc->ad1.type) {	/* Check first address */
    966 		case A_NONE:			/* Everything matches */
    967 			return !ipc->negfl;
    968 		case A_DOL:			/* Only last line */
    969 			if (dolflag)
    970 				return !ipc->negfl;
    971 			break;
    972 		case A_LINE:			/* Check line number */
    973 			if (ipc->ad1.u.line == lnum) {
    974 				ipc->active = 1;	/* In range */
    975 				return !ipc->negfl;
    976 			}
    977 			break;
    978 		case A_RE:			/* Check R.E. */
    979 			if (match(ipc->ad1.u.rp, linebuf)) {
    980 				ipc->active = 1;	/* In range */
    981 				return !ipc->negfl;
    982 			}
    983 			break;
    984 		default:
    985 			quit("Internal error", 0);
    986 	}
    987 	return ipc->negfl;
    988 }
    989 
    990 int
    991 match(Reprog *pattern, Rune *buf)
    992 {
    993 	if (!pattern)
    994 		return 0;
    995 	subexp[0].s.rsp = buf; 
    996 	subexp[0].e.rep = 0;
    997 	if (rregexec(pattern, linebuf, subexp, MAXSUB) > 0) {
    998 		loc1 = subexp[0].s.rsp;
    999 		loc2 = subexp[0].e.rep;
   1000 		return 1;
   1001 	}
   1002 	loc1 = loc2 = 0;
   1003 	return 0;
   1004 }
   1005 
   1006 int
   1007 substitute(SedCom *ipc)
   1008 {
   1009 	int len;
   1010 
   1011 	if(!match(ipc->u.re1, linebuf))
   1012 		return 0;
   1013 
   1014 	/*
   1015 	 * we have at least one match.  some patterns, e.g. '$' or '^', can
   1016 	 * produce zero-length matches, so during a global substitute we
   1017 	 * must bump to the character after a zero-length match to keep from looping.
   1018 	 */
   1019 	sflag = 1;
   1020 	if(ipc->gfl == 0)		/* single substitution */
   1021 		dosub(ipc->rhs);
   1022 	else
   1023 	do{				/* global substitution */
   1024 		len = loc2-loc1;	/* length of match */
   1025 		dosub(ipc->rhs);	/* dosub moves loc2 */
   1026 		if(*loc2 == 0)		/* end of string */
   1027 			break;
   1028 		if(len == 0)		/* zero-length R.E. match */
   1029 			loc2++;		/* bump over zero-length match */
   1030 		if(*loc2 == 0)		/* end of string */
   1031 			break;
   1032 	} while(match(ipc->u.re1, loc2));
   1033 	return 1;
   1034 }
   1035 
   1036 void
   1037 dosub(Rune *rhsbuf)
   1038 {
   1039 	Rune *lp, *sp;
   1040 	Rune *rp;
   1041 	int c, n;
   1042 
   1043 	lp = linebuf;
   1044 	sp = genbuf;
   1045 	rp = rhsbuf;
   1046 	while (lp < loc1)
   1047 		*sp++ = *lp++;
   1048 	while(c = *rp++) {
   1049 		if (c == '&') {
   1050 			sp = place(sp, loc1, loc2);
   1051 			continue;
   1052 		}
   1053 		if (c == 0xFFFF && (c = *rp++) >= '1' && c < MAXSUB+'0') {
   1054 			n = c-'0';
   1055 			if (subexp[n].s.rsp && subexp[n].e.rep) {
   1056 				sp = place(sp, subexp[n].s.rsp, subexp[n].e.rep);
   1057 				continue;
   1058 			}
   1059 			else {
   1060 				fprint(2, "sed: Invalid back reference \\%d\n",n);
   1061 				errexit();
   1062 			}
   1063 		}
   1064 		*sp++ = c;
   1065 		if (sp >= &genbuf[LBSIZE])
   1066 			fprint(2, "sed: Output line too long.\n");
   1067 	}
   1068 	lp = loc2;
   1069 	loc2 = sp - genbuf + linebuf;
   1070 	while (*sp++ = *lp++)
   1071 		if (sp >= &genbuf[LBSIZE])
   1072 			fprint(2, "sed: Output line too long.\n");
   1073 	lp = linebuf;
   1074 	sp = genbuf;
   1075 	while (*lp++ = *sp++)
   1076 		;
   1077 	spend = lp-1;
   1078 }
   1079 
   1080 Rune *
   1081 place(Rune *sp, Rune *l1, Rune *l2)
   1082 {
   1083 	while (l1 < l2) {
   1084 		*sp++ = *l1++;
   1085 		if (sp >= &genbuf[LBSIZE])
   1086 			fprint(2, "sed: Output line too long.\n");
   1087 	}
   1088 	return(sp);
   1089 }
   1090 
   1091 char *
   1092 trans(int c)
   1093 {
   1094 	static char buf[] = "\\x0000";
   1095 	static char hex[] = "0123456789abcdef";
   1096 
   1097 	switch(c) {
   1098 		case '\b':
   1099 			return "\\b";
   1100 		case '\n':
   1101 			return "\\n";
   1102 		case '\r':
   1103 			return "\\r";
   1104 		case '\t':
   1105 			return "\\t";
   1106 		case '\\':
   1107 			return "\\\\";
   1108 	}
   1109 	buf[2] = hex[(c>>12)&0xF];
   1110 	buf[3] = hex[(c>>8)&0xF];
   1111 	buf[4] = hex[(c>>4)&0xF];
   1112 	buf[5] = hex[c&0xF];
   1113 	return buf;
   1114 }
   1115 
   1116 void
   1117 command(SedCom *ipc)
   1118 {
   1119 	int	i, c;
   1120 	Rune	*p1, *p2;
   1121 	char	*ucp;
   1122 	Rune	*rp;
   1123 	Rune	*execp;
   1124 
   1125 	switch(ipc->command) {
   1126 
   1127 		case ACOM:
   1128 			*aptr++ = ipc;
   1129 			if(aptr >= abuf+MAXADDS) {
   1130 				quit("sed: Too many appends after line %ld\n",
   1131 					(char *) lnum);
   1132 			}
   1133 			*aptr = 0;
   1134 			break;
   1135 		case CCOM:
   1136 			delflag = 1;
   1137 			if(ipc->active == 1) {
   1138 				for(rp = ipc->u.text; *rp; rp++)
   1139 					Bputrune(&fout, *rp);
   1140 				Bputc(&fout, '\n');
   1141 			}
   1142 			break;
   1143 		case DCOM:
   1144 			delflag++;
   1145 			break;
   1146 		case CDCOM:
   1147 			p1 = p2 = linebuf;
   1148 			while(*p1 != '\n') {
   1149 				if(*p1++ == 0) {
   1150 					delflag++;
   1151 					return;
   1152 				}
   1153 			}
   1154 			p1++;
   1155 			while(*p2++ = *p1++)
   1156 				;
   1157 			spend = p2-1;
   1158 			jflag++;
   1159 			break;
   1160 		case EQCOM:
   1161 			Bprint(&fout, "%ld\n", lnum);
   1162 			break;
   1163 		case GCOM:
   1164 			p1 = linebuf;
   1165 			p2 = holdsp;
   1166 			while(*p1++ = *p2++)
   1167 				;
   1168 			spend = p1-1;
   1169 			break;
   1170 		case CGCOM:
   1171 			*spend++ = '\n';
   1172 			p1 = spend;
   1173 			p2 = holdsp;
   1174 			while(*p1++ = *p2++)
   1175 				if(p1 >= lbend)
   1176 					break;
   1177 			spend = p1-1;
   1178 			break;
   1179 		case HCOM:
   1180 			p1 = holdsp;
   1181 			p2 = linebuf;
   1182 			while(*p1++ = *p2++);
   1183 			hspend = p1-1;
   1184 			break;
   1185 		case CHCOM:
   1186 			*hspend++ = '\n';
   1187 			p1 = hspend;
   1188 			p2 = linebuf;
   1189 			while(*p1++ = *p2++)
   1190 				if(p1 >= hend)
   1191 					break;
   1192 			hspend = p1-1;
   1193 			break;
   1194 		case ICOM:
   1195 			for(rp = ipc->u.text; *rp; rp++)
   1196 				Bputrune(&fout, *rp);
   1197 			Bputc(&fout, '\n');
   1198 			break;
   1199 		case BCOM:
   1200 			jflag = 1;
   1201 			break;
   1202 		case LCOM:
   1203 			c = 0;
   1204 			for (i = 0, rp = linebuf; *rp; rp++) {
   1205 				c = *rp;
   1206 				if(c >= 0x20 && c < 0x7F && c != '\\') {
   1207 					Bputc(&fout, c);
   1208 					if(i++ > 71) {
   1209 						Bprint(&fout, "\\\n");
   1210 						i = 0;
   1211 					}
   1212 				} else {
   1213 					for (ucp = trans(*rp); *ucp; ucp++){
   1214 						c = *ucp;
   1215 						Bputc(&fout, c);
   1216 						if(i++ > 71) {
   1217 							Bprint(&fout, "\\\n");
   1218 							i = 0;
   1219 						}
   1220 					}
   1221 				}
   1222 			}
   1223 			if(c == ' ')
   1224 				Bprint(&fout, "\\n");
   1225 			Bputc(&fout, '\n');
   1226 			break;
   1227 		case NCOM:
   1228 			if(!nflag)
   1229 				putline(&fout, linebuf, spend-linebuf);
   1230 
   1231 			if(aptr > abuf)
   1232 				arout();
   1233 			if((execp = gline(linebuf)) == 0) {
   1234 				delflag = 1;
   1235 				break;
   1236 			}
   1237 			spend = execp;
   1238 			break;
   1239 		case CNCOM:
   1240 			if(aptr > abuf)
   1241 				arout();
   1242 			*spend++ = '\n';
   1243 			if((execp = gline(spend)) == 0) {
   1244 				delflag = 1;
   1245 				break;
   1246 			}
   1247 			spend = execp;
   1248 			break;
   1249 		case PCOM:
   1250 			putline(&fout, linebuf, spend-linebuf);
   1251 			break;
   1252 		case CPCOM:
   1253 	cpcom:
   1254 			for(rp = linebuf; *rp && *rp != '\n'; rp++)
   1255 				Bputc(&fout, *rp);
   1256 			Bputc(&fout, '\n');
   1257 			break;
   1258 		case QCOM:
   1259 			if(!nflag)
   1260 				putline(&fout, linebuf, spend-linebuf);
   1261 			if(aptr > abuf)
   1262 				arout();
   1263 			exits(0);
   1264 		case RCOM:
   1265 			*aptr++ = ipc;
   1266 			if(aptr >= &abuf[MAXADDS])
   1267 				quit("sed: Too many reads after line %ld\n",
   1268 					(char *) lnum);
   1269 			*aptr = 0;
   1270 			break;
   1271 		case SCOM:
   1272 			i = substitute(ipc);
   1273 			if(i && ipc->pfl)
   1274 				if(ipc->pfl == 1)
   1275 					putline(&fout, linebuf, spend-linebuf);
   1276 				else
   1277 					goto cpcom;
   1278 			if(i && ipc->fcode)
   1279 				goto wcom;
   1280 			break;
   1281 
   1282 		case TCOM:
   1283 			if(sflag == 0)	break;
   1284 			sflag = 0;
   1285 			jflag = 1;
   1286 			break;
   1287 
   1288 		wcom:
   1289 		case WCOM:
   1290 			putline(ipc->fcode,linebuf, spend-linebuf);
   1291 			break;
   1292 		case XCOM:
   1293 			p1 = linebuf;
   1294 			p2 = genbuf;
   1295 			while(*p2++ = *p1++);
   1296 			p1 = holdsp;
   1297 			p2 = linebuf;
   1298 			while(*p2++ = *p1++);
   1299 			spend = p2 - 1;
   1300 			p1 = genbuf;
   1301 			p2 = holdsp;
   1302 			while(*p2++ = *p1++);
   1303 			hspend = p2 - 1;
   1304 			break;
   1305 		case YCOM:
   1306 			p1 = linebuf;
   1307 			p2 = ipc->u.text;
   1308 			for (i = *p2++;	*p1; p1++){
   1309 				if (*p1 <= i) *p1 = p2[*p1];
   1310 			}
   1311 			break;
   1312 	}
   1313 
   1314 }
   1315 
   1316 void
   1317 putline(Biobuf *bp, Rune *buf, int n)
   1318 {
   1319 	while (n--)
   1320 		Bputrune(bp, *buf++);
   1321 	Bputc(bp, '\n');
   1322 	if(lflag)
   1323 		Bflush(bp);
   1324 }
   1325 
   1326 int
   1327 ecmp(Rune *a, Rune *b, int count)
   1328 {
   1329 	while(count--)
   1330 		if(*a++ != *b++)	return(0);
   1331 	return(1);
   1332 }
   1333 
   1334 void
   1335 arout(void)
   1336 {
   1337 	Rune	*p1;
   1338 	Biobuf	*fi;
   1339 	int	c;
   1340 	char	*s;
   1341 	char	buf[128];
   1342 
   1343 	for (aptr = abuf; *aptr; aptr++) {
   1344 		if((*aptr)->command == ACOM) {
   1345 			for(p1 = (*aptr)->u.text; *p1; p1++ )
   1346 				Bputrune(&fout, *p1);
   1347 			Bputc(&fout, '\n');
   1348 		} else {
   1349 			for(s = buf, p1= (*aptr)->u.text; *p1; p1++)
   1350 					s += runetochar(s, p1);
   1351 			*s = '\0';
   1352 			if((fi = Bopen(buf, OREAD)) == 0)
   1353 				continue;
   1354 			while((c = Bgetc(fi)) >= 0)
   1355 				Bputc(&fout, c);
   1356 			Bterm(fi);
   1357 		}
   1358 	}
   1359 	aptr = abuf;
   1360 	*aptr = 0;
   1361 }
   1362 
   1363 void
   1364 errexit(void)
   1365 {
   1366 	exits("error");
   1367 }
   1368 
   1369 void
   1370 quit (char *msg, char *arg)
   1371 {
   1372 	fprint(2, "sed: ");
   1373 	fprint(2, msg, arg);
   1374 	fprint(2, "\n");
   1375 	errexit();
   1376 }
   1377 
   1378 Rune *
   1379 gline(Rune *addr)
   1380 {
   1381 	long	c;
   1382 	Rune *p;
   1383 
   1384 	static long peekc = 0;
   1385 
   1386 	if (f == 0 && opendata() < 0)
   1387 		return 0;
   1388 	sflag = 0;
   1389 	lnum++;
   1390 /*	Bflush(&fout);********* dumped 4/30/92 - bobf****/
   1391 	do {
   1392 		p = addr;
   1393 		for (c = (peekc ? peekc : Bgetrune(f)); c >= 0; c = Bgetrune(f)) {
   1394 			if (c == '\n') {
   1395 				if ((peekc = Bgetrune(f)) < 0) {
   1396 					if (fhead == 0)
   1397 						dolflag = 1;
   1398 				}
   1399 				*p = '\0';
   1400 				return p;
   1401 			}
   1402 			if (c && p < lbend)
   1403 				*p++ = c;
   1404 		}
   1405 		/* return partial final line, adding implicit newline */
   1406 		if(p != addr) {
   1407 			*p = '\0';
   1408 			peekc = -1;
   1409 			if (fhead == 0)
   1410 				dolflag = 1;
   1411 			return p;
   1412 		}
   1413 		peekc = 0;
   1414 		Bterm(f);
   1415 	} while (opendata() > 0);	/* Switch to next stream */
   1416 	f = 0;
   1417 	return 0;
   1418 }
   1419 
   1420 	/* Data file input section - the intent is to transparently
   1421 	 *	catenate all data input streams.
   1422 	 */
   1423 void
   1424 enroll(char *filename)		/* Add a file to the input file cache */
   1425 {
   1426 	FileCache *fp;
   1427 
   1428 	if ((fp = (FileCache *) malloc(sizeof (FileCache))) == 0)
   1429 		quit("Out of memory", 0);
   1430 	if (ftail == 0)
   1431 		fhead = fp;
   1432 	else
   1433 		ftail->next = fp;
   1434 	ftail = fp;
   1435 	fp->next = 0;
   1436 	fp->name = filename;	/* 0 => stdin */
   1437 }
   1438 
   1439 int
   1440 opendata(void)
   1441 {
   1442 	if (fhead == 0)
   1443 		return -1;
   1444 	if (fhead->name) {
   1445 		if ((f = Bopen(fhead->name, OREAD)) == 0)
   1446 			quit("Can't open %s", fhead->name);
   1447 	} else {
   1448 		Binit(&bstdin, 0, OREAD);
   1449 		f = &bstdin;
   1450 	}
   1451 	fhead = fhead->next;
   1452 	return 1;
   1453 }