sed.c (27240B)
1 /* 2 * sed -- stream editor 3 * 4 * 5 */ 6 #include <u.h> 7 #include <libc.h> 8 #include <bio.h> 9 #include <regexp.h> 10 11 enum { 12 DEPTH = 20, /* max nesting depth of {} */ 13 MAXCMDS = 512, /* max sed commands */ 14 ADDSIZE = 10000, /* size of add & read buffer */ 15 MAXADDS = 20, /* max pending adds and reads */ 16 LBSIZE = 8192, /* input line size */ 17 LABSIZE = 50, /* max label name size */ 18 MAXSUB = 10, /* max number of sub reg exp */ 19 MAXFILES = 120 /* max output files */ 20 }; 21 /* An address is a line #, a R.E., "$", a reference to the last 22 * R.E., or nothing. 23 */ 24 typedef struct { 25 enum { 26 A_NONE, 27 A_DOL, 28 A_LINE, 29 A_RE, 30 A_LAST 31 }type; 32 union { 33 long line; /* Line # */ 34 Reprog *rp; /* Compiled R.E. */ 35 } u; 36 } Addr; 37 38 typedef struct SEDCOM { 39 Addr ad1; /* optional start address */ 40 Addr ad2; /* optional end address */ 41 union { 42 Reprog *re1; /* compiled R.E. */ 43 Rune *text; /* added text or file name */ 44 struct SEDCOM *lb1; /* destination command of branch */ 45 } u; 46 Rune *rhs; /* Right-hand side of substitution */ 47 Biobuf* fcode; /* File ID for read and write */ 48 char command; /* command code -see below */ 49 char gfl; /* 'Global' flag for substitutions */ 50 char pfl; /* 'print' flag for substitutions */ 51 char active; /* 1 => data between start and end */ 52 char negfl; /* negation flag */ 53 } SedCom; 54 55 /* Command Codes for field SedCom.command */ 56 #define ACOM 01 57 #define BCOM 020 58 #define CCOM 02 59 #define CDCOM 025 60 #define CNCOM 022 61 #define COCOM 017 62 #define CPCOM 023 63 #define DCOM 03 64 #define ECOM 015 65 #define EQCOM 013 66 #define FCOM 016 67 #define GCOM 027 68 #define CGCOM 030 69 #define HCOM 031 70 #define CHCOM 032 71 #define ICOM 04 72 #define LCOM 05 73 #define NCOM 012 74 #define PCOM 010 75 #define QCOM 011 76 #define RCOM 06 77 #define SCOM 07 78 #define TCOM 021 79 #define WCOM 014 80 #define CWCOM 024 81 #define YCOM 026 82 #define XCOM 033 83 84 85 typedef struct label { /* Label symbol table */ 86 Rune asc[9]; /* Label name */ 87 SedCom *chain; 88 SedCom *address; /* Command associated with label */ 89 } Label; 90 91 typedef struct FILE_CACHE { /* Data file control block */ 92 struct FILE_CACHE *next; /* Forward Link */ 93 char *name; /* Name of file */ 94 } FileCache; 95 96 SedCom pspace[MAXCMDS]; /* Command storage */ 97 SedCom *pend = pspace+MAXCMDS; /* End of command storage */ 98 SedCom *rep = pspace; /* Current fill point */ 99 100 Reprog *lastre = 0; /* Last regular expression */ 101 Resub subexp[MAXSUB]; /* sub-patterns of pattern match*/ 102 103 Rune addspace[ADDSIZE]; /* Buffer for a, c, & i commands */ 104 Rune *addend = addspace+ADDSIZE; 105 106 SedCom *abuf[MAXADDS]; /* Queue of pending adds & reads */ 107 SedCom **aptr = abuf; 108 109 struct { /* Sed program input control block */ 110 enum PTYPE /* Either on command line or in file */ 111 { P_ARG, 112 P_FILE 113 } type; 114 union PCTL { /* Pointer to data */ 115 Biobuf *bp; 116 char *curr; 117 } pctl; 118 } prog; 119 120 Rune genbuf[LBSIZE]; /* Miscellaneous buffer */ 121 122 FileCache *fhead = 0; /* Head of File Cache Chain */ 123 FileCache *ftail = 0; /* Tail of File Cache Chain */ 124 125 Rune *loc1; /* Start of pattern match */ 126 Rune *loc2; /* End of pattern match */ 127 Rune seof; /* Pattern delimiter char */ 128 129 Rune linebuf[LBSIZE+1]; /* Input data buffer */ 130 Rune *lbend = linebuf+LBSIZE; /* End of buffer */ 131 Rune *spend = linebuf; /* End of input data */ 132 Rune *cp; /* Current scan point in linebuf */ 133 134 Rune holdsp[LBSIZE+1]; /* Hold buffer */ 135 Rune *hend = holdsp+LBSIZE; /* End of hold buffer */ 136 Rune *hspend = holdsp; /* End of hold data */ 137 138 int nflag; /* Command line flags */ 139 int gflag; 140 int lflag; 141 142 int dolflag; /* Set when at true EOF */ 143 int sflag; /* Set when substitution done */ 144 int jflag; /* Set when jump required */ 145 int delflag; /* Delete current line when set */ 146 147 long lnum = 0; /* Input line count */ 148 149 char fname[MAXFILES][40]; /* File name cache */ 150 Biobuf *fcode[MAXFILES]; /* File ID cache */ 151 int nfiles = 0; /* Cache fill point */ 152 153 Biobuf fout; /* Output stream */ 154 Biobuf bstdin; /* Default input */ 155 Biobuf* f = 0; /* Input data */ 156 157 Label ltab[LABSIZE]; /* Label name symbol table */ 158 Label *labend = ltab+LABSIZE; /* End of label table */ 159 Label *lab = ltab+1; /* Current Fill point */ 160 161 int depth = 0; /* {} stack pointer */ 162 163 Rune bad; /* Dummy err ptr reference */ 164 Rune *badp = &bad; 165 166 167 char CGMES[] = "Command garbled: %S"; 168 char TMMES[] = "Too much text: %S"; 169 char LTL[] = "Label too long: %S"; 170 char AD0MES[] = "No addresses allowed: %S"; 171 char AD1MES[] = "Only one address allowed: %S"; 172 173 void address(Addr *); 174 void arout(void); 175 int cmp(char *, char *); 176 int rcmp(Rune *, Rune *); 177 void command(SedCom *); 178 Reprog *compile(void); 179 Rune *compsub(Rune *, Rune *); 180 void dechain(void); 181 void dosub(Rune *); 182 int ecmp(Rune *, Rune *, int); 183 void enroll(char *); 184 void errexit(void); 185 int executable(SedCom *); 186 void execute(void); 187 void fcomp(void); 188 long getrune(void); 189 Rune *gline(Rune *); 190 int match(Reprog *, Rune *); 191 void newfile(enum PTYPE, char *); 192 int opendata(void); 193 Biobuf *open_file(char *); 194 Rune *place(Rune *, Rune *, Rune *); 195 void quit(char *, char *); 196 int rline(Rune *, Rune *); 197 Label *search(Label *); 198 int substitute(SedCom *); 199 char *text(char *); 200 Rune *stext(Rune *, Rune *); 201 int ycomp(SedCom *); 202 char * trans(int c); 203 void putline(Biobuf *bp, Rune *buf, int n); 204 205 void 206 main(int argc, char **argv) 207 { 208 int compfl; 209 210 lnum = 0; 211 Binit(&fout, 1, OWRITE); 212 fcode[nfiles++] = &fout; 213 compfl = 0; 214 215 if(argc == 1) 216 exits(0); 217 ARGBEGIN{ 218 case 'n': 219 nflag++; 220 continue; 221 case 'f': 222 if(argc <= 1) 223 quit("no pattern-file", 0); 224 newfile(P_FILE, ARGF()); 225 fcomp(); 226 compfl = 1; 227 continue; 228 case 'e': 229 if (argc <= 1) 230 quit("missing pattern", 0); 231 newfile(P_ARG, ARGF()); 232 fcomp(); 233 compfl = 1; 234 continue; 235 case 'g': 236 gflag++; 237 continue; 238 case 'l': 239 lflag++; 240 continue; 241 default: 242 fprint(2, "sed: Unknown flag: %c\n", ARGC()); 243 continue; 244 } ARGEND 245 246 if(compfl == 0) { 247 if (--argc < 0) 248 quit("missing pattern", 0); 249 newfile(P_ARG, *argv++); 250 fcomp(); 251 } 252 253 if(depth) 254 quit("Too many {'s", 0); 255 256 ltab[0].address = rep; 257 258 dechain(); 259 260 if(argc <= 0) 261 enroll(0); /* Add stdin to cache */ 262 else while(--argc >= 0) { 263 enroll(*argv++); 264 } 265 execute(); 266 exits(0); 267 } 268 void 269 fcomp(void) 270 { 271 Rune *tp; 272 SedCom *pt, *pt1; 273 int i; 274 Label *lpt; 275 276 static Rune *p = addspace; 277 static SedCom **cmpend[DEPTH]; /* stack of {} operations */ 278 279 while (rline(linebuf, lbend) >= 0) { 280 cp = linebuf; 281 comploop: 282 while(*cp == ' ' || *cp == '\t') 283 cp++; 284 if(*cp == '\0' || *cp == '#') 285 continue; 286 if(*cp == ';') { 287 cp++; 288 goto comploop; 289 } 290 291 address(&rep->ad1); 292 if (rep->ad1.type != A_NONE) { 293 if (rep->ad1.type == A_LAST) { 294 if (!lastre) 295 quit("First RE may not be null", 0); 296 rep->ad1.type = A_RE; 297 rep->ad1.u.rp = lastre; 298 } 299 if(*cp == ',' || *cp == ';') { 300 cp++; 301 address(&rep->ad2); 302 if (rep->ad2.type == A_LAST) { 303 rep->ad1.type = A_RE; 304 rep->ad2.u.rp = lastre; 305 } 306 } else 307 rep->ad2.type = A_NONE; 308 } 309 while(*cp == ' ' || *cp == '\t') 310 cp++; 311 312 swit: 313 switch(*cp++) { 314 315 default: 316 quit("Unrecognized command: %S", (char *)linebuf); 317 318 case '!': 319 rep->negfl = 1; 320 goto swit; 321 322 case '{': 323 rep->command = BCOM; 324 rep->negfl = !(rep->negfl); 325 cmpend[depth++] = &rep->u.lb1; 326 if(++rep >= pend) 327 quit("Too many commands: %S", (char *) linebuf); 328 if(*cp == '\0') continue; 329 goto comploop; 330 331 case '}': 332 if(rep->ad1.type != A_NONE) 333 quit(AD0MES, (char *) linebuf); 334 if(--depth < 0) 335 quit("Too many }'s", 0); 336 *cmpend[depth] = rep; 337 if(*cp == 0) continue; 338 goto comploop; 339 340 case '=': 341 rep->command = EQCOM; 342 if(rep->ad2.type != A_NONE) 343 quit(AD1MES, (char *) linebuf); 344 break; 345 346 case ':': 347 if(rep->ad1.type != A_NONE) 348 quit(AD0MES, (char *) linebuf); 349 350 while(*cp == ' ') 351 cp++; 352 tp = lab->asc; 353 while (*cp && *cp != ';' && *cp != ' ' && *cp != '\t' && *cp != '#') { 354 *tp++ = *cp++; 355 if(tp >= &(lab->asc[8])) 356 quit(LTL, (char *) linebuf); 357 } 358 *tp = '\0'; 359 360 if(lpt = search(lab)) { 361 if(lpt->address) 362 quit("Duplicate labels: %S", (char *) linebuf); 363 } else { 364 lab->chain = 0; 365 lpt = lab; 366 if(++lab >= labend) 367 quit("Too many labels: %S", (char *) linebuf); 368 } 369 lpt->address = rep; 370 if (*cp == '#') 371 continue; 372 rep--; /* reuse this slot */ 373 break; 374 375 case 'a': 376 rep->command = ACOM; 377 if(rep->ad2.type != A_NONE) 378 quit(AD1MES, (char *) linebuf); 379 if(*cp == '\\') cp++; 380 if(*cp++ != '\n') 381 quit(CGMES, (char *) linebuf); 382 rep->u.text = p; 383 p = stext(p, addend); 384 break; 385 case 'c': 386 rep->command = CCOM; 387 if(*cp == '\\') cp++; 388 if(*cp++ != '\n') 389 quit(CGMES, (char *) linebuf); 390 rep->u.text = p; 391 p = stext(p, addend); 392 break; 393 case 'i': 394 rep->command = ICOM; 395 if(rep->ad2.type != A_NONE) 396 quit(AD1MES, (char *) linebuf); 397 if(*cp == '\\') cp++; 398 if(*cp++ != '\n') 399 quit(CGMES, (char *) linebuf); 400 rep->u.text = p; 401 p = stext(p, addend); 402 break; 403 404 case 'g': 405 rep->command = GCOM; 406 break; 407 408 case 'G': 409 rep->command = CGCOM; 410 break; 411 412 case 'h': 413 rep->command = HCOM; 414 break; 415 416 case 'H': 417 rep->command = CHCOM; 418 break; 419 420 case 't': 421 rep->command = TCOM; 422 goto jtcommon; 423 424 case 'b': 425 rep->command = BCOM; 426 jtcommon: 427 while(*cp == ' ')cp++; 428 if(*cp == '\0') { 429 if(pt = ltab[0].chain) { 430 while(pt1 = pt->u.lb1) 431 pt = pt1; 432 pt->u.lb1 = rep; 433 } else 434 ltab[0].chain = rep; 435 break; 436 } 437 tp = lab->asc; 438 while((*tp++ = *cp++)) 439 if(tp >= &(lab->asc[8])) 440 quit(LTL, (char *) linebuf); 441 cp--; 442 tp[-1] = '\0'; 443 444 if(lpt = search(lab)) { 445 if(lpt->address) { 446 rep->u.lb1 = lpt->address; 447 } else { 448 pt = lpt->chain; 449 while(pt1 = pt->u.lb1) 450 pt = pt1; 451 pt->u.lb1 = rep; 452 } 453 } else { 454 lab->chain = rep; 455 lab->address = 0; 456 if(++lab >= labend) 457 quit("Too many labels: %S", 458 (char *) linebuf); 459 } 460 break; 461 462 case 'n': 463 rep->command = NCOM; 464 break; 465 466 case 'N': 467 rep->command = CNCOM; 468 break; 469 470 case 'p': 471 rep->command = PCOM; 472 break; 473 474 case 'P': 475 rep->command = CPCOM; 476 break; 477 478 case 'r': 479 rep->command = RCOM; 480 if(rep->ad2.type != A_NONE) 481 quit(AD1MES, (char *) linebuf); 482 if(*cp++ != ' ') 483 quit(CGMES, (char *) linebuf); 484 rep->u.text = p; 485 p = stext(p, addend); 486 break; 487 488 case 'd': 489 rep->command = DCOM; 490 break; 491 492 case 'D': 493 rep->command = CDCOM; 494 rep->u.lb1 = pspace; 495 break; 496 497 case 'q': 498 rep->command = QCOM; 499 if(rep->ad2.type != A_NONE) 500 quit(AD1MES, (char *) linebuf); 501 break; 502 503 case 'l': 504 rep->command = LCOM; 505 break; 506 507 case 's': 508 rep->command = SCOM; 509 seof = *cp++; 510 if ((rep->u.re1 = compile()) == 0) { 511 if(!lastre) 512 quit("First RE may not be null.", 0); 513 rep->u.re1 = lastre; 514 } 515 rep->rhs = p; 516 if((p = compsub(p, addend)) == 0) 517 quit(CGMES, (char *) linebuf); 518 if(*cp == 'g') { 519 cp++; 520 rep->gfl++; 521 } else if(gflag) 522 rep->gfl++; 523 524 if(*cp == 'p') { 525 cp++; 526 rep->pfl = 1; 527 } 528 529 if(*cp == 'P') { 530 cp++; 531 rep->pfl = 2; 532 } 533 534 if(*cp == 'w') { 535 cp++; 536 if(*cp++ != ' ') 537 quit(CGMES, (char *) linebuf); 538 text(fname[nfiles]); 539 for(i = nfiles - 1; i >= 0; i--) 540 if(cmp(fname[nfiles],fname[i]) == 0) { 541 rep->fcode = fcode[i]; 542 goto done; 543 } 544 if(nfiles >= MAXFILES) 545 quit("Too many files in w commands 1", 0); 546 rep->fcode = open_file(fname[nfiles]); 547 } 548 break; 549 550 case 'w': 551 rep->command = WCOM; 552 if(*cp++ != ' ') 553 quit(CGMES, (char *) linebuf); 554 text(fname[nfiles]); 555 for(i = nfiles - 1; i >= 0; i--) 556 if(cmp(fname[nfiles], fname[i]) == 0) { 557 rep->fcode = fcode[i]; 558 goto done; 559 } 560 if(nfiles >= MAXFILES){ 561 fprint(2, "sed: Too many files in w commands 2 \n"); 562 fprint(2, "nfiles = %d; MAXF = %d\n", nfiles, MAXFILES); 563 errexit(); 564 } 565 rep->fcode = open_file(fname[nfiles]); 566 break; 567 568 case 'x': 569 rep->command = XCOM; 570 break; 571 572 case 'y': 573 rep->command = YCOM; 574 seof = *cp++; 575 if (ycomp(rep) == 0) 576 quit(CGMES, (char *) linebuf); 577 break; 578 579 } 580 done: 581 if(++rep >= pend) 582 quit("Too many commands, last: %S", (char *) linebuf); 583 584 if(*cp++ != '\0') { 585 if(cp[-1] == ';') 586 goto comploop; 587 quit(CGMES, (char *) linebuf); 588 } 589 590 } 591 } 592 593 Biobuf * 594 open_file(char *name) 595 { 596 Biobuf *bp; 597 int fd; 598 599 if ((bp = malloc(sizeof(Biobuf))) == 0) 600 quit("Out of memory", 0); 601 if ((fd = open(name, OWRITE)) < 0 && 602 (fd = create(name, OWRITE, 0666)) < 0) 603 quit("Cannot create %s", name); 604 Binit(bp, fd, OWRITE); 605 Bseek(bp, 0, 2); 606 fcode[nfiles++] = bp; 607 return bp; 608 } 609 610 Rune * 611 compsub(Rune *rhs, Rune *end) 612 { 613 Rune r; 614 615 while ((r = *cp++) != '\0') { 616 if(r == '\\') { 617 if (rhs < end) 618 *rhs++ = 0xFFFF; 619 else 620 return 0; 621 r = *cp++; 622 if(r == 'n') 623 r = '\n'; 624 } else { 625 if(r == seof) { 626 if (rhs < end) 627 *rhs++ = '\0'; 628 else 629 return 0; 630 return rhs; 631 } 632 } 633 if (rhs < end) 634 *rhs++ = r; 635 else 636 return 0; 637 638 } 639 return 0; 640 } 641 642 Reprog * 643 compile(void) 644 { 645 Rune c; 646 char *ep; 647 char expbuf[512]; 648 649 if((c = *cp++) == seof) /* '//' */ 650 return 0; 651 ep = expbuf; 652 do { 653 if (c == 0 || c == '\n') 654 quit(TMMES, (char *) linebuf); 655 if (c == '\\') { 656 if (ep >= expbuf+sizeof(expbuf)) 657 quit(TMMES, (char *) linebuf); 658 ep += runetochar(ep, &c); 659 if ((c = *cp++) == 'n') 660 c = '\n'; 661 } 662 if (ep >= expbuf+sizeof(expbuf)) 663 quit(TMMES, (char *) linebuf); 664 ep += runetochar(ep, &c); 665 } while ((c = *cp++) != seof); 666 *ep = 0; 667 return lastre = regcomp(expbuf); 668 } 669 670 void 671 regerror(char *s) 672 { 673 USED(s); 674 quit(CGMES, (char *) linebuf); 675 } 676 677 void 678 newfile(enum PTYPE type, char *name) 679 { 680 if (type == P_ARG) 681 prog.pctl.curr = name; 682 else if ((prog.pctl.bp = Bopen(name, OREAD)) == 0) 683 quit("Cannot open pattern-file: %s\n", name); 684 prog.type = type; 685 } 686 687 int 688 rline(Rune *buf, Rune *end) 689 { 690 long c; 691 Rune r; 692 693 while ((c = getrune()) >= 0) { 694 r = c; 695 if (r == '\\') { 696 if (buf <= end) 697 *buf++ = r; 698 if ((c = getrune()) < 0) 699 break; 700 r = c; 701 } else if (r == '\n') { 702 *buf = '\0'; 703 return(1); 704 } 705 if (buf <= end) 706 *buf++ = r; 707 } 708 *buf = '\0'; 709 return(-1); 710 } 711 712 long 713 getrune(void) 714 { 715 char *p; 716 long c; 717 Rune r; 718 719 if (prog.type == P_ARG) { 720 if ((p = prog.pctl.curr) != 0) { 721 if (*p) { 722 prog.pctl.curr += chartorune(&r, p); 723 c = r; 724 } else { 725 c = '\n'; /* fake an end-of-line */ 726 prog.pctl.curr = 0; 727 } 728 } else 729 c = -1; 730 } else if ((c = Bgetrune(prog.pctl.bp)) < 0) 731 Bterm(prog.pctl.bp); 732 return c; 733 } 734 735 void 736 address(Addr *ap) 737 { 738 int c; 739 long lno; 740 741 if((c = *cp++) == '$') 742 ap->type = A_DOL; 743 else if(c == '/') { 744 seof = c; 745 if (ap->u.rp = compile()) 746 ap->type = A_RE; 747 else 748 ap->type = A_LAST; 749 } 750 else if (c >= '0' && c <= '9') { 751 lno = c-'0'; 752 while ((c = *cp) >= '0' && c <= '9') 753 lno = lno*10 + *cp++-'0'; 754 if(!lno) 755 quit("line number 0 is illegal",0); 756 ap->type = A_LINE; 757 ap->u.line = lno; 758 } 759 else { 760 cp--; 761 ap->type = A_NONE; 762 } 763 } 764 765 int 766 cmp(char *a, char *b) /* compare characters */ 767 { 768 while(*a == *b++) 769 if (*a == '\0') 770 return(0); 771 else a++; 772 return(1); 773 } 774 775 int 776 rcmp(Rune *a, Rune *b) /* compare runes */ 777 { 778 while(*a == *b++) 779 if (*a == '\0') 780 return(0); 781 else a++; 782 return(1); 783 } 784 785 char * 786 text(char *p) /* extract character string */ 787 { 788 Rune r; 789 790 while(*cp == '\t' || *cp == ' ') 791 cp++; 792 while (*cp) { 793 if ((r = *cp++) == '\\') 794 if ((r = *cp++) == 0) 795 break;; 796 if (r == '\n') 797 while (*cp == '\t' || *cp == ' ') 798 cp++; 799 p += runetochar(p, &r); 800 } 801 *p++ = '\0'; 802 return p; 803 } 804 805 Rune * 806 stext(Rune *p, Rune *end) /* extract rune string */ 807 { 808 while(*cp == '\t' || *cp == ' ') 809 cp++; 810 while (*cp) { 811 if (*cp == '\\') 812 if (*++cp == 0) 813 break; 814 if (p >= end-1) 815 quit(TMMES, (char *) linebuf); 816 if ((*p++ = *cp++) == '\n') 817 while(*cp == '\t' || *cp == ' ') 818 cp++; 819 } 820 *p++ = 0; 821 return p; 822 } 823 824 825 Label * 826 search (Label *ptr) 827 { 828 Label *rp; 829 830 for (rp = ltab; rp < ptr; rp++) 831 if(rcmp(rp->asc, ptr->asc) == 0) 832 return(rp); 833 return(0); 834 } 835 836 void 837 dechain(void) 838 { 839 Label *lptr; 840 SedCom *rptr, *trptr; 841 842 for(lptr = ltab; lptr < lab; lptr++) { 843 844 if(lptr->address == 0) 845 quit("Undefined label: %S", (char *) lptr->asc); 846 847 if(lptr->chain) { 848 rptr = lptr->chain; 849 while(trptr = rptr->u.lb1) { 850 rptr->u.lb1 = lptr->address; 851 rptr = trptr; 852 } 853 rptr->u.lb1 = lptr->address; 854 } 855 } 856 } 857 858 int 859 ycomp(SedCom *r) 860 { 861 int i; 862 Rune *rp; 863 Rune c, *tsp, highc; 864 Rune *sp; 865 866 highc = 0; 867 for(tsp = cp; *tsp != seof; tsp++) { 868 if(*tsp == '\\') 869 tsp++; 870 if(*tsp == '\n' || *tsp == '\0') 871 return(0); 872 if (*tsp > highc) highc = *tsp; 873 } 874 tsp++; 875 if ((rp = r->u.text = (Rune *) malloc(sizeof(Rune)*(highc+2))) == 0) 876 quit("Out of memory", 0); 877 *rp++ = highc; /* save upper bound */ 878 for (i = 0; i <= highc; i++) 879 rp[i] = i; 880 sp = cp; 881 while((c = *sp++) != seof) { 882 if(c == '\\' && *sp == 'n') { 883 sp++; 884 c = '\n'; 885 } 886 if((rp[c] = *tsp++) == '\\' && *tsp == 'n') { 887 rp[c] = '\n'; 888 tsp++; 889 } 890 if(rp[c] == seof || rp[c] == '\0') { 891 free(r->u.re1); 892 r->u.re1 = 0; 893 return(0); 894 } 895 } 896 if(*tsp != seof) { 897 free(r->u.re1); 898 r->u.re1 = 0; 899 return(0); 900 } 901 cp = tsp+1; 902 return(1); 903 } 904 905 void 906 execute(void) 907 { 908 SedCom *ipc; 909 910 while (spend = gline(linebuf)){ 911 for(ipc = pspace; ipc->command; ) { 912 if (!executable(ipc)) { 913 ipc++; 914 continue; 915 } 916 command(ipc); 917 918 if(delflag) 919 break; 920 if(jflag) { 921 jflag = 0; 922 if((ipc = ipc->u.lb1) == 0) 923 break; 924 } else 925 ipc++; 926 927 } 928 if(!nflag && !delflag) 929 putline(&fout, linebuf, spend-linebuf); 930 if(aptr > abuf) { 931 arout(); 932 } 933 delflag = 0; 934 } 935 } 936 /* determine if a statement should be applied to an input line */ 937 int 938 executable(SedCom *ipc) 939 { 940 if (ipc->active) { /* Addr1 satisfied - accept until Addr2 */ 941 if (ipc->active == 1) /* Second line */ 942 ipc->active = 2; 943 switch(ipc->ad2.type) { 944 case A_NONE: /* No second addr; use first */ 945 ipc->active = 0; 946 break; 947 case A_DOL: /* Accept everything */ 948 return !ipc->negfl; 949 case A_LINE: /* Line at end of range? */ 950 if (lnum <= ipc->ad2.u.line) { 951 if (ipc->ad2.u.line == lnum) 952 ipc->active = 0; 953 return !ipc->negfl; 954 } 955 ipc->active = 0; /* out of range */ 956 return ipc->negfl; 957 case A_RE: /* Check for matching R.E. */ 958 if (match(ipc->ad2.u.rp, linebuf)) 959 ipc->active = 0; 960 return !ipc->negfl; 961 default: /* internal error */ 962 quit("Internal error", 0); 963 } 964 } 965 switch (ipc->ad1.type) { /* Check first address */ 966 case A_NONE: /* Everything matches */ 967 return !ipc->negfl; 968 case A_DOL: /* Only last line */ 969 if (dolflag) 970 return !ipc->negfl; 971 break; 972 case A_LINE: /* Check line number */ 973 if (ipc->ad1.u.line == lnum) { 974 ipc->active = 1; /* In range */ 975 return !ipc->negfl; 976 } 977 break; 978 case A_RE: /* Check R.E. */ 979 if (match(ipc->ad1.u.rp, linebuf)) { 980 ipc->active = 1; /* In range */ 981 return !ipc->negfl; 982 } 983 break; 984 default: 985 quit("Internal error", 0); 986 } 987 return ipc->negfl; 988 } 989 990 int 991 match(Reprog *pattern, Rune *buf) 992 { 993 if (!pattern) 994 return 0; 995 subexp[0].s.rsp = buf; 996 subexp[0].e.rep = 0; 997 if (rregexec(pattern, linebuf, subexp, MAXSUB) > 0) { 998 loc1 = subexp[0].s.rsp; 999 loc2 = subexp[0].e.rep; 1000 return 1; 1001 } 1002 loc1 = loc2 = 0; 1003 return 0; 1004 } 1005 1006 int 1007 substitute(SedCom *ipc) 1008 { 1009 int len; 1010 1011 if(!match(ipc->u.re1, linebuf)) 1012 return 0; 1013 1014 /* 1015 * we have at least one match. some patterns, e.g. '$' or '^', can 1016 * produce zero-length matches, so during a global substitute we 1017 * must bump to the character after a zero-length match to keep from looping. 1018 */ 1019 sflag = 1; 1020 if(ipc->gfl == 0) /* single substitution */ 1021 dosub(ipc->rhs); 1022 else 1023 do{ /* global substitution */ 1024 len = loc2-loc1; /* length of match */ 1025 dosub(ipc->rhs); /* dosub moves loc2 */ 1026 if(*loc2 == 0) /* end of string */ 1027 break; 1028 if(len == 0) /* zero-length R.E. match */ 1029 loc2++; /* bump over zero-length match */ 1030 if(*loc2 == 0) /* end of string */ 1031 break; 1032 } while(match(ipc->u.re1, loc2)); 1033 return 1; 1034 } 1035 1036 void 1037 dosub(Rune *rhsbuf) 1038 { 1039 Rune *lp, *sp; 1040 Rune *rp; 1041 int c, n; 1042 1043 lp = linebuf; 1044 sp = genbuf; 1045 rp = rhsbuf; 1046 while (lp < loc1) 1047 *sp++ = *lp++; 1048 while(c = *rp++) { 1049 if (c == '&') { 1050 sp = place(sp, loc1, loc2); 1051 continue; 1052 } 1053 if (c == 0xFFFF && (c = *rp++) >= '1' && c < MAXSUB+'0') { 1054 n = c-'0'; 1055 if (subexp[n].s.rsp && subexp[n].e.rep) { 1056 sp = place(sp, subexp[n].s.rsp, subexp[n].e.rep); 1057 continue; 1058 } 1059 else { 1060 fprint(2, "sed: Invalid back reference \\%d\n",n); 1061 errexit(); 1062 } 1063 } 1064 *sp++ = c; 1065 if (sp >= &genbuf[LBSIZE]) 1066 fprint(2, "sed: Output line too long.\n"); 1067 } 1068 lp = loc2; 1069 loc2 = sp - genbuf + linebuf; 1070 while (*sp++ = *lp++) 1071 if (sp >= &genbuf[LBSIZE]) 1072 fprint(2, "sed: Output line too long.\n"); 1073 lp = linebuf; 1074 sp = genbuf; 1075 while (*lp++ = *sp++) 1076 ; 1077 spend = lp-1; 1078 } 1079 1080 Rune * 1081 place(Rune *sp, Rune *l1, Rune *l2) 1082 { 1083 while (l1 < l2) { 1084 *sp++ = *l1++; 1085 if (sp >= &genbuf[LBSIZE]) 1086 fprint(2, "sed: Output line too long.\n"); 1087 } 1088 return(sp); 1089 } 1090 1091 char * 1092 trans(int c) 1093 { 1094 static char buf[] = "\\x0000"; 1095 static char hex[] = "0123456789abcdef"; 1096 1097 switch(c) { 1098 case '\b': 1099 return "\\b"; 1100 case '\n': 1101 return "\\n"; 1102 case '\r': 1103 return "\\r"; 1104 case '\t': 1105 return "\\t"; 1106 case '\\': 1107 return "\\\\"; 1108 } 1109 buf[2] = hex[(c>>12)&0xF]; 1110 buf[3] = hex[(c>>8)&0xF]; 1111 buf[4] = hex[(c>>4)&0xF]; 1112 buf[5] = hex[c&0xF]; 1113 return buf; 1114 } 1115 1116 void 1117 command(SedCom *ipc) 1118 { 1119 int i, c; 1120 Rune *p1, *p2; 1121 char *ucp; 1122 Rune *rp; 1123 Rune *execp; 1124 1125 switch(ipc->command) { 1126 1127 case ACOM: 1128 *aptr++ = ipc; 1129 if(aptr >= abuf+MAXADDS) { 1130 quit("sed: Too many appends after line %ld\n", 1131 (char *) lnum); 1132 } 1133 *aptr = 0; 1134 break; 1135 case CCOM: 1136 delflag = 1; 1137 if(ipc->active == 1) { 1138 for(rp = ipc->u.text; *rp; rp++) 1139 Bputrune(&fout, *rp); 1140 Bputc(&fout, '\n'); 1141 } 1142 break; 1143 case DCOM: 1144 delflag++; 1145 break; 1146 case CDCOM: 1147 p1 = p2 = linebuf; 1148 while(*p1 != '\n') { 1149 if(*p1++ == 0) { 1150 delflag++; 1151 return; 1152 } 1153 } 1154 p1++; 1155 while(*p2++ = *p1++) 1156 ; 1157 spend = p2-1; 1158 jflag++; 1159 break; 1160 case EQCOM: 1161 Bprint(&fout, "%ld\n", lnum); 1162 break; 1163 case GCOM: 1164 p1 = linebuf; 1165 p2 = holdsp; 1166 while(*p1++ = *p2++) 1167 ; 1168 spend = p1-1; 1169 break; 1170 case CGCOM: 1171 *spend++ = '\n'; 1172 p1 = spend; 1173 p2 = holdsp; 1174 while(*p1++ = *p2++) 1175 if(p1 >= lbend) 1176 break; 1177 spend = p1-1; 1178 break; 1179 case HCOM: 1180 p1 = holdsp; 1181 p2 = linebuf; 1182 while(*p1++ = *p2++); 1183 hspend = p1-1; 1184 break; 1185 case CHCOM: 1186 *hspend++ = '\n'; 1187 p1 = hspend; 1188 p2 = linebuf; 1189 while(*p1++ = *p2++) 1190 if(p1 >= hend) 1191 break; 1192 hspend = p1-1; 1193 break; 1194 case ICOM: 1195 for(rp = ipc->u.text; *rp; rp++) 1196 Bputrune(&fout, *rp); 1197 Bputc(&fout, '\n'); 1198 break; 1199 case BCOM: 1200 jflag = 1; 1201 break; 1202 case LCOM: 1203 c = 0; 1204 for (i = 0, rp = linebuf; *rp; rp++) { 1205 c = *rp; 1206 if(c >= 0x20 && c < 0x7F && c != '\\') { 1207 Bputc(&fout, c); 1208 if(i++ > 71) { 1209 Bprint(&fout, "\\\n"); 1210 i = 0; 1211 } 1212 } else { 1213 for (ucp = trans(*rp); *ucp; ucp++){ 1214 c = *ucp; 1215 Bputc(&fout, c); 1216 if(i++ > 71) { 1217 Bprint(&fout, "\\\n"); 1218 i = 0; 1219 } 1220 } 1221 } 1222 } 1223 if(c == ' ') 1224 Bprint(&fout, "\\n"); 1225 Bputc(&fout, '\n'); 1226 break; 1227 case NCOM: 1228 if(!nflag) 1229 putline(&fout, linebuf, spend-linebuf); 1230 1231 if(aptr > abuf) 1232 arout(); 1233 if((execp = gline(linebuf)) == 0) { 1234 delflag = 1; 1235 break; 1236 } 1237 spend = execp; 1238 break; 1239 case CNCOM: 1240 if(aptr > abuf) 1241 arout(); 1242 *spend++ = '\n'; 1243 if((execp = gline(spend)) == 0) { 1244 delflag = 1; 1245 break; 1246 } 1247 spend = execp; 1248 break; 1249 case PCOM: 1250 putline(&fout, linebuf, spend-linebuf); 1251 break; 1252 case CPCOM: 1253 cpcom: 1254 for(rp = linebuf; *rp && *rp != '\n'; rp++) 1255 Bputc(&fout, *rp); 1256 Bputc(&fout, '\n'); 1257 break; 1258 case QCOM: 1259 if(!nflag) 1260 putline(&fout, linebuf, spend-linebuf); 1261 if(aptr > abuf) 1262 arout(); 1263 exits(0); 1264 case RCOM: 1265 *aptr++ = ipc; 1266 if(aptr >= &abuf[MAXADDS]) 1267 quit("sed: Too many reads after line %ld\n", 1268 (char *) lnum); 1269 *aptr = 0; 1270 break; 1271 case SCOM: 1272 i = substitute(ipc); 1273 if(i && ipc->pfl) 1274 if(ipc->pfl == 1) 1275 putline(&fout, linebuf, spend-linebuf); 1276 else 1277 goto cpcom; 1278 if(i && ipc->fcode) 1279 goto wcom; 1280 break; 1281 1282 case TCOM: 1283 if(sflag == 0) break; 1284 sflag = 0; 1285 jflag = 1; 1286 break; 1287 1288 wcom: 1289 case WCOM: 1290 putline(ipc->fcode,linebuf, spend-linebuf); 1291 break; 1292 case XCOM: 1293 p1 = linebuf; 1294 p2 = genbuf; 1295 while(*p2++ = *p1++); 1296 p1 = holdsp; 1297 p2 = linebuf; 1298 while(*p2++ = *p1++); 1299 spend = p2 - 1; 1300 p1 = genbuf; 1301 p2 = holdsp; 1302 while(*p2++ = *p1++); 1303 hspend = p2 - 1; 1304 break; 1305 case YCOM: 1306 p1 = linebuf; 1307 p2 = ipc->u.text; 1308 for (i = *p2++; *p1; p1++){ 1309 if (*p1 <= i) *p1 = p2[*p1]; 1310 } 1311 break; 1312 } 1313 1314 } 1315 1316 void 1317 putline(Biobuf *bp, Rune *buf, int n) 1318 { 1319 while (n--) 1320 Bputrune(bp, *buf++); 1321 Bputc(bp, '\n'); 1322 if(lflag) 1323 Bflush(bp); 1324 } 1325 1326 int 1327 ecmp(Rune *a, Rune *b, int count) 1328 { 1329 while(count--) 1330 if(*a++ != *b++) return(0); 1331 return(1); 1332 } 1333 1334 void 1335 arout(void) 1336 { 1337 Rune *p1; 1338 Biobuf *fi; 1339 int c; 1340 char *s; 1341 char buf[128]; 1342 1343 for (aptr = abuf; *aptr; aptr++) { 1344 if((*aptr)->command == ACOM) { 1345 for(p1 = (*aptr)->u.text; *p1; p1++ ) 1346 Bputrune(&fout, *p1); 1347 Bputc(&fout, '\n'); 1348 } else { 1349 for(s = buf, p1= (*aptr)->u.text; *p1; p1++) 1350 s += runetochar(s, p1); 1351 *s = '\0'; 1352 if((fi = Bopen(buf, OREAD)) == 0) 1353 continue; 1354 while((c = Bgetc(fi)) >= 0) 1355 Bputc(&fout, c); 1356 Bterm(fi); 1357 } 1358 } 1359 aptr = abuf; 1360 *aptr = 0; 1361 } 1362 1363 void 1364 errexit(void) 1365 { 1366 exits("error"); 1367 } 1368 1369 void 1370 quit (char *msg, char *arg) 1371 { 1372 fprint(2, "sed: "); 1373 fprint(2, msg, arg); 1374 fprint(2, "\n"); 1375 errexit(); 1376 } 1377 1378 Rune * 1379 gline(Rune *addr) 1380 { 1381 long c; 1382 Rune *p; 1383 1384 static long peekc = 0; 1385 1386 if (f == 0 && opendata() < 0) 1387 return 0; 1388 sflag = 0; 1389 lnum++; 1390 /* Bflush(&fout);********* dumped 4/30/92 - bobf****/ 1391 do { 1392 p = addr; 1393 for (c = (peekc ? peekc : Bgetrune(f)); c >= 0; c = Bgetrune(f)) { 1394 if (c == '\n') { 1395 if ((peekc = Bgetrune(f)) < 0) { 1396 if (fhead == 0) 1397 dolflag = 1; 1398 } 1399 *p = '\0'; 1400 return p; 1401 } 1402 if (c && p < lbend) 1403 *p++ = c; 1404 } 1405 /* return partial final line, adding implicit newline */ 1406 if(p != addr) { 1407 *p = '\0'; 1408 peekc = -1; 1409 if (fhead == 0) 1410 dolflag = 1; 1411 return p; 1412 } 1413 peekc = 0; 1414 Bterm(f); 1415 } while (opendata() > 0); /* Switch to next stream */ 1416 f = 0; 1417 return 0; 1418 } 1419 1420 /* Data file input section - the intent is to transparently 1421 * catenate all data input streams. 1422 */ 1423 void 1424 enroll(char *filename) /* Add a file to the input file cache */ 1425 { 1426 FileCache *fp; 1427 1428 if ((fp = (FileCache *) malloc(sizeof (FileCache))) == 0) 1429 quit("Out of memory", 0); 1430 if (ftail == 0) 1431 fhead = fp; 1432 else 1433 ftail->next = fp; 1434 ftail = fp; 1435 fp->next = 0; 1436 fp->name = filename; /* 0 => stdin */ 1437 } 1438 1439 int 1440 opendata(void) 1441 { 1442 if (fhead == 0) 1443 return -1; 1444 if (fhead->name) { 1445 if ((f = Bopen(fhead->name, OREAD)) == 0) 1446 quit("Can't open %s", fhead->name); 1447 } else { 1448 Binit(&bstdin, 0, OREAD); 1449 f = &bstdin; 1450 } 1451 fhead = fhead->next; 1452 return 1; 1453 }