yacc.c (58914B)
1 #include <u.h> 2 #include <libc.h> 3 #include <bio.h> 4 #include <ctype.h> 5 6 #define Bungetrune Bungetc /* ok for now. */ 7 8 /* 9 * all these are 32 bit 10 */ 11 #define TBITSET ((32+NTERMS)/32) /* BOTCH?? +31 */ 12 #define BIT(a,i) ((a)[(i)>>5] & (1<<((i)&037))) 13 #define SETBIT(a,i) ((a)[(i)>>5] |= (1<<((i)&037))) 14 #define NWORDS(n) (((n)+32)/32) 15 16 char *PARSER = "#9/yacc/yaccpar"; 17 char *PARSERS = "#9/yacc/yaccpars"; 18 19 #define TEMPNAME "y.tmp.XXXXXX" 20 #define ACTNAME "y.acts.XXXXXX" 21 #define OFILE "tab.c" 22 #define FILEU "output" 23 #define FILED "tab.h" 24 #define FILEDEBUG "debug" 25 26 enum 27 { 28 /* 29 * the following are adjustable 30 * according to memory size 31 */ 32 ACTSIZE = 40000, 33 MEMSIZE = 40000, 34 NSTATES = 2000, 35 NTERMS = 511, 36 NPROD = 1600, 37 NNONTERM = 600, 38 TEMPSIZE = 2000, 39 CNAMSZ = 10000, 40 LSETSIZE = 2400, 41 WSETSIZE = 350, 42 43 NAMESIZE = 50, 44 NTYPES = 63, 45 ISIZE = 400, 46 47 PRIVATE = 0xE000, /* unicode private use */ 48 49 /* relationships which must hold: 50 TBITSET ints must hold NTERMS+1 bits... 51 WSETSIZE >= NNONTERM 52 LSETSIZE >= NNONTERM 53 TEMPSIZE >= NTERMS + NNONTERM + 1 54 TEMPSIZE >= NSTATES 55 */ 56 57 NTBASE = 010000, 58 ERRCODE = 8190, 59 ACCEPTCODE = 8191, 60 61 NOASC = 0, /* no assoc. */ 62 LASC = 1, /* left assoc. */ 63 RASC = 2, /* right assoc. */ 64 BASC = 3, /* binary assoc. */ 65 66 /* flags for state generation */ 67 68 DONE = 0, 69 MUSTDO = 1, 70 MUSTLOOKAHEAD = 2, 71 72 /* flags for a rule having an action, and being reduced */ 73 74 ACTFLAG = 04, 75 REDFLAG = 010, 76 77 /* output parser flags */ 78 YYFLAG1 = -1000, 79 80 /* parse tokens */ 81 IDENTIFIER = PRIVATE, 82 MARK, 83 TERM, 84 LEFT, 85 RIGHT, 86 BINARY, 87 PREC, 88 LCURLY, 89 IDENTCOLON, 90 NUMBER, 91 START, 92 TYPEDEF, 93 TYPENAME, 94 UNION, 95 96 ENDFILE = 0, 97 98 EMPTY = 1, 99 WHOKNOWS = 0, 100 OK = 1, 101 NOMORE = -1000 102 }; 103 104 /* macros for getting associativity and precedence levels */ 105 106 #define ASSOC(i) ((i)&03) 107 #define PLEVEL(i) (((i)>>4)&077) 108 #define TYPE(i) (((i)>>10)&077) 109 110 /* macros for setting associativity and precedence levels */ 111 112 #define SETASC(i,j) i |= j 113 #define SETPLEV(i,j) i |= (j<<4) 114 #define SETTYPE(i,j) i |= (j<<10) 115 116 /* looping macros */ 117 118 #define TLOOP(i) for(i=1; i<=ntokens; i++) 119 #define NTLOOP(i) for(i=0; i<=nnonter; i++) 120 #define PLOOP(s,i) for(i=s; i<nprod; i++) 121 #define SLOOP(i) for(i=0; i<nstate; i++) 122 #define WSBUMP(x) x++ 123 #define WSLOOP(s,j) for(j=s; j<cwp; j++) 124 #define ITMLOOP(i,p,q) for(q=pstate[i+1], p=pstate[i]; p<q; p++) 125 #define SETLOOP(i) for(i=0; i<tbitset; i++) 126 127 /* command to clobber tempfiles after use */ 128 129 #define ZAPFILE(x) if(x) remove(x) 130 131 /* I/O descriptors */ 132 133 Biobuf* faction; /* file for saving actions */ 134 Biobuf* fdefine; /* file for #defines */ 135 Biobuf* fdebug; /* y.debug for strings for debugging */ 136 Biobuf* ftable; /* y.tab.c file */ 137 Biobuf* ftemp; /* tempfile to pass 2 */ 138 Biobuf* finput; /* input file */ 139 Biobuf* foutput; /* y.output file */ 140 141 /* communication variables between various I/O routines */ 142 143 char* infile; /* input file name */ 144 int numbval; /* value of an input number */ 145 char tokname[NAMESIZE+4]; /* input token name, slop for runes and 0 */ 146 147 /* structure declarations */ 148 149 typedef 150 struct 151 { 152 int lset[TBITSET]; 153 } Lkset; 154 155 typedef 156 struct 157 { 158 int* pitem; 159 Lkset* look; 160 } Item; 161 162 typedef 163 struct 164 { 165 char* name; 166 int value; 167 } Symb; 168 169 typedef 170 struct 171 { 172 int* pitem; 173 int flag; 174 Lkset ws; 175 } Wset; 176 177 /* storage of names */ 178 179 char cnames[CNAMSZ]; /* place where token and nonterminal names are stored */ 180 int cnamsz = CNAMSZ; /* size of cnames */ 181 char* cnamp = cnames; /* place where next name is to be put in */ 182 int ndefout = 4; /* number of defined symbols output */ 183 char* tempname; 184 char* actname; 185 char ttempname[] = TEMPNAME; 186 char tactname[] = ACTNAME; 187 char* parser; 188 char* yydebug; 189 int yyarg; 190 int yyline = 1; 191 192 /* storage of types */ 193 int ntypes; /* number of types defined */ 194 char* typeset[NTYPES]; /* pointers to type tags */ 195 196 /* token information */ 197 198 int ntokens = 0 ; /* number of tokens */ 199 Symb tokset[NTERMS]; 200 int toklev[NTERMS]; /* vector with the precedence of the terminals */ 201 202 /* nonterminal information */ 203 204 int nnonter = -1; /* the number of nonterminals */ 205 Symb nontrst[NNONTERM]; 206 int start; /* start symbol */ 207 208 /* assigned token type values */ 209 int extval = 0; 210 211 char* ytabc = OFILE; /* name of y.tab.c */ 212 213 /* grammar rule information */ 214 215 int mem0[MEMSIZE] ; /* production storage */ 216 int* mem = mem0; 217 int nprod = 1; /* number of productions */ 218 int* prdptr[NPROD]; /* pointers to descriptions of productions */ 219 int levprd[NPROD]; /* precedence levels for the productions */ 220 int rlines[NPROD]; /* line number for this rule */ 221 222 /* state information */ 223 224 int nstate = 0; /* number of states */ 225 Item* pstate[NSTATES+2]; /* pointers to the descriptions of the states */ 226 int tystate[NSTATES]; /* contains type information about the states */ 227 int defact[NSTATES]; /* the default actions of states */ 228 int tstates[NTERMS]; /* states generated by terminal gotos */ 229 int ntstates[NNONTERM]; /* states generated by nonterminal gotos */ 230 int mstates[NSTATES]; /* chain of overflows of term/nonterm generation lists */ 231 int lastred; /* the number of the last reduction of a state */ 232 233 /* lookahead set information */ 234 235 Lkset lkst[LSETSIZE]; 236 int nolook; /* flag to turn off lookahead computations */ 237 int tbitset; /* size of lookahead sets */ 238 int nlset = 0; /* next lookahead set index */ 239 int nolook = 0; /* flag to suppress lookahead computations */ 240 Lkset clset; /* temporary storage for lookahead computations */ 241 242 /* working set information */ 243 244 Wset wsets[WSETSIZE]; 245 Wset* cwp; 246 247 /* storage for action table */ 248 249 int amem[ACTSIZE]; /* action table storage */ 250 int* memp = amem; /* next free action table position */ 251 int indgo[NSTATES]; /* index to the stored goto table */ 252 253 /* temporary vector, indexable by states, terms, or ntokens */ 254 255 int temp1[TEMPSIZE]; /* temporary storage, indexed by terms + ntokens or states */ 256 int lineno = 1; /* current input line number */ 257 int fatfl = 1; /* if on, error is fatal */ 258 int nerrors = 0; /* number of errors */ 259 260 /* statistics collection variables */ 261 262 int zzgoent; 263 int zzgobest; 264 int zzacent; 265 int zzexcp; 266 int zzclose; 267 int zzrrconf; 268 int zzsrconf; 269 270 int* ggreed = lkst[0].lset; 271 int* pgo = wsets[0].ws.lset; 272 int* yypgo = &nontrst[0].value; 273 274 int maxspr = 0; /* maximum spread of any entry */ 275 int maxoff = 0; /* maximum offset into a array */ 276 int* pmem = mem0; 277 int* maxa; 278 int nxdb = 0; 279 int adb = 0; 280 281 282 /* storage for information about the nonterminals */ 283 284 int** pres[NNONTERM+2]; /* vector of pointers to productions yielding each nonterminal */ 285 Lkset* pfirst[NNONTERM+2]; /* vector of pointers to first sets for each nonterminal */ 286 int pempty[NNONTERM+1]; /* vector of nonterminals nontrivially deriving e */ 287 288 /* random stuff picked out from between functions */ 289 290 int indebug = 0; 291 Wset* zzcwp = wsets; 292 int zzgoent = 0; 293 int zzgobest = 0; 294 int zzacent = 0; 295 int zzexcp = 0; 296 int zzclose = 0; 297 int zzsrconf = 0; 298 int* zzmemsz = mem0; 299 int zzrrconf = 0; 300 int pidebug = 0; /* debugging flag for putitem */ 301 int gsdebug = 0; 302 int cldebug = 0; /* debugging flag for closure */ 303 int pkdebug = 0; 304 int g2debug = 0; 305 306 struct 307 { 308 char* name; 309 long value; 310 } resrv[] = 311 { 312 "binary", BINARY, 313 "left", LEFT, 314 "nonassoc", BINARY, 315 "prec", PREC, 316 "right", RIGHT, 317 "start", START, 318 "term", TERM, 319 "token", TERM, 320 "type", TYPEDEF, 321 "union", UNION, 322 0, 323 }; 324 325 /* define functions */ 326 327 void main(int, char**); 328 void others(void); 329 char* chcopy(char*, char*); 330 char* writem(int*); 331 char* symnam(int); 332 void summary(void); 333 void error(char*, ...); 334 void aryfil(int*, int, int); 335 int setunion(int*, int*); 336 void prlook(Lkset*); 337 void cpres(void); 338 void cpfir(void); 339 int state(int); 340 void putitem(int*, Lkset*); 341 void cempty(void); 342 void stagen(void); 343 void closure(int); 344 Lkset* flset(Lkset*); 345 void cleantmp(void); 346 void intr(void); 347 void setup(int, char**); 348 void finact(void); 349 int defin(int, char*); 350 void defout(int); 351 char* cstash(char*); 352 long gettok(void); 353 int fdtype(int); 354 int chfind(int, char*); 355 void cpyunion(void); 356 void cpycode(void); 357 int skipcom(void); 358 void cpyact(int); 359 void openup(char*, int, int, int, char*); 360 void output(void); 361 int apack(int*, int); 362 void go2out(void); 363 void go2gen(int); 364 void precftn(int, int, int); 365 void wract(int); 366 void wrstate(int); 367 void warray(char*, int*, int); 368 void hideprod(void); 369 void callopt(void); 370 void gin(int); 371 void stin(int); 372 int nxti(void); 373 void osummary(void); 374 void aoutput(void); 375 void arout(char*, int*, int); 376 int gtnm(void); 377 378 void 379 main(int argc, char *argv[]) 380 { 381 PARSER = unsharp(PARSER); 382 PARSERS = unsharp(PARSERS); 383 parser = PARSER; 384 385 setup(argc, argv); /* initialize and read productions */ 386 tbitset = NWORDS(ntokens); 387 cpres(); /* make table of which productions yield a given nonterminal */ 388 cempty(); /* make a table of which nonterminals can match the empty string */ 389 cpfir(); /* make a table of firsts of nonterminals */ 390 stagen(); /* generate the states */ 391 output(); /* write the states and the tables */ 392 go2out(); 393 hideprod(); 394 summary(); 395 callopt(); 396 others(); 397 exits(0); 398 } 399 400 /* 401 * put out other arrays, copy the parsers 402 */ 403 void 404 others(void) 405 { 406 int c, i, j; 407 408 finput = Bopen(parser, OREAD); 409 if(finput == 0) 410 error("cannot open parser %s: %r", parser); 411 warray("yyr1", levprd, nprod); 412 aryfil(temp1, nprod, 0); 413 PLOOP(1, i) 414 temp1[i] = prdptr[i+1]-prdptr[i]-2; 415 warray("yyr2", temp1, nprod); 416 417 aryfil(temp1, nstate, -1000); 418 TLOOP(i) 419 for(j=tstates[i]; j!=0; j=mstates[j]) 420 temp1[j] = i; 421 NTLOOP(i) 422 for(j=ntstates[i]; j!=0; j=mstates[j]) 423 temp1[j] = -i; 424 warray("yychk", temp1, nstate); 425 warray("yydef", defact, nstate); 426 427 /* put out token translation tables */ 428 /* table 1 has 0-256 */ 429 aryfil(temp1, 256, 0); 430 c = 0; 431 TLOOP(i) { 432 j = tokset[i].value; 433 if(j >= 0 && j < 256) { 434 if(temp1[j]) { 435 print("yacc bug -- cant have 2 different Ts with same value\n"); 436 print(" %s and %s\n", tokset[i].name, tokset[temp1[j]].name); 437 nerrors++; 438 } 439 temp1[j] = i; 440 if(j > c) 441 c = j; 442 } 443 } 444 warray("yytok1", temp1, c+1); 445 446 /* table 2 has PRIVATE-PRIVATE+256 */ 447 aryfil(temp1, 256, 0); 448 c = 0; 449 TLOOP(i) { 450 j = tokset[i].value - PRIVATE; 451 if(j >= 0 && j < 256) { 452 if(temp1[j]) { 453 print("yacc bug -- cant have 2 different Ts with same value\n"); 454 print(" %s and %s\n", tokset[i].name, tokset[temp1[j]].name); 455 nerrors++; 456 } 457 temp1[j] = i; 458 if(j > c) 459 c = j; 460 } 461 } 462 warray("yytok2", temp1, c+1); 463 464 /* table 3 has everything else */ 465 Bprint(ftable, "static\tconst\tlong yytok3[] =\n{\n"); 466 c = 0; 467 TLOOP(i) { 468 j = tokset[i].value; 469 if(j >= 0 && j < 256) 470 continue; 471 if(j >= PRIVATE && j < 256+PRIVATE) 472 continue; 473 474 Bprint(ftable, "%4d,%4d,", j, i); 475 c++; 476 if(c%5 == 0) 477 Bprint(ftable, "\n"); 478 } 479 Bprint(ftable, "%4d\n};\n", 0); 480 481 /* copy parser text */ 482 while((c=Bgetrune(finput)) != Beof) { 483 if(c == '$') { 484 if((c = Bgetrune(finput)) != 'A') 485 Bputrune(ftable, '$'); 486 else { /* copy actions */ 487 faction = Bopen(actname, OREAD); 488 if(faction == 0) 489 error("cannot reopen action tempfile"); 490 while((c=Bgetrune(faction)) != Beof) 491 Bputrune(ftable, c); 492 Bterm(faction); 493 ZAPFILE(actname); 494 c = Bgetrune(finput); 495 } 496 } 497 Bputrune(ftable, c); 498 } 499 Bterm(ftable); 500 } 501 502 /* 503 * copies string q into p, returning next free char ptr 504 */ 505 char* 506 chcopy(char* p, char* q) 507 { 508 int c; 509 510 while(c = *q) { 511 if(c == '"') 512 *p++ = '\\'; 513 *p++ = c; 514 q++; 515 } 516 *p = 0; 517 return p; 518 } 519 520 /* 521 * creates output string for item pointed to by pp 522 */ 523 char* 524 writem(int *pp) 525 { 526 int i,*p; 527 static char sarr[ISIZE]; 528 char* q; 529 530 for(p=pp; *p>0; p++) 531 ; 532 p = prdptr[-*p]; 533 q = chcopy(sarr, nontrst[*p-NTBASE].name); 534 q = chcopy(q, ": "); 535 for(;;) { 536 *q = ' '; 537 p++; 538 if(p == pp) 539 *q = '.'; 540 q++; 541 *q = '\0'; 542 i = *p; 543 if(i <= 0) 544 break; 545 q = chcopy(q, symnam(i)); 546 if(q > &sarr[ISIZE-30]) 547 error("item too big"); 548 } 549 550 /* an item calling for a reduction */ 551 i = *pp; 552 if(i < 0 ) { 553 q = chcopy(q, " ("); 554 sprint(q, "%d)", -i); 555 } 556 return sarr; 557 } 558 559 /* 560 * return a pointer to the name of symbol i 561 */ 562 char* 563 symnam(int i) 564 { 565 char* cp; 566 567 cp = (i >= NTBASE)? nontrst[i-NTBASE].name: tokset[i].name; 568 if(*cp == ' ') 569 cp++; 570 return cp; 571 } 572 573 /* 574 * output the summary on y.output 575 */ 576 void 577 summary(void) 578 { 579 580 if(foutput != 0) { 581 Bprint(foutput, "\n%d/%d terminals, %d/%d nonterminals\n", 582 ntokens, NTERMS, nnonter, NNONTERM); 583 Bprint(foutput, "%d/%d grammar rules, %d/%d states\n", 584 nprod, NPROD, nstate, NSTATES); 585 Bprint(foutput, "%d shift/reduce, %d reduce/reduce conflicts reported\n", 586 zzsrconf, zzrrconf); 587 Bprint(foutput, "%d/%d working sets used\n", 588 (int)(zzcwp-wsets), WSETSIZE); 589 Bprint(foutput, "memory: states,etc. %d/%d, parser %d/%d\n", 590 (int)(zzmemsz-mem0), MEMSIZE, (int)(memp-amem), ACTSIZE); 591 Bprint(foutput, "%d/%d distinct lookahead sets\n", nlset, LSETSIZE); 592 Bprint(foutput, "%d extra closures\n", zzclose - 2*nstate); 593 Bprint(foutput, "%d shift entries, %d exceptions\n", zzacent, zzexcp); 594 Bprint(foutput, "%d goto entries\n", zzgoent); 595 Bprint(foutput, "%d entries saved by goto default\n", zzgobest); 596 } 597 if(zzsrconf != 0 || zzrrconf != 0) { 598 print("\nconflicts: "); 599 if(zzsrconf) 600 print("%d shift/reduce", zzsrconf); 601 if(zzsrconf && zzrrconf) 602 print(", "); 603 if(zzrrconf) 604 print("%d reduce/reduce", zzrrconf); 605 print("\n"); 606 } 607 if(ftemp != 0) { 608 Bterm(ftemp); 609 ftemp = 0; 610 } 611 if(fdefine != 0) { 612 Bterm(fdefine); 613 fdefine = 0; 614 } 615 } 616 617 /* 618 * write out error comment -- NEEDS WORK 619 */ 620 void 621 error(char *s, ...) 622 { 623 va_list arg; 624 625 nerrors++; 626 fprint(2, "\n fatal error:"); 627 va_start(arg, s); 628 vfprint(2, s, arg); 629 va_end(arg); 630 fprint(2, ", %s:%d\n", infile, lineno); 631 if(!fatfl) 632 return; 633 summary(); 634 cleantmp(); 635 exits("error"); 636 } 637 638 /* 639 * set elements 0 through n-1 to c 640 */ 641 void 642 aryfil(int *v, int n, int c) 643 { 644 int i; 645 646 for(i=0; i<n; i++) 647 v[i] = c; 648 } 649 650 /* 651 * set a to the union of a and b 652 * return 1 if b is not a subset of a, 0 otherwise 653 */ 654 int 655 setunion(int *a, int *b) 656 { 657 int i, x, sub; 658 659 sub = 0; 660 SETLOOP(i) { 661 x = *a; 662 *a |= *b; 663 if(*a != x) 664 sub = 1; 665 a++; 666 b++; 667 } 668 return sub; 669 } 670 671 void 672 prlook(Lkset* p) 673 { 674 int j, *pp; 675 676 pp = p->lset; 677 if(pp == 0) 678 Bprint(foutput, "\tNULL"); 679 else { 680 Bprint(foutput, " { "); 681 TLOOP(j) 682 if(BIT(pp,j)) 683 Bprint(foutput, "%s ", symnam(j)); 684 Bprint(foutput, "}"); 685 } 686 } 687 688 /* 689 * compute an array with the beginnings of productions yielding given nonterminals 690 * The array pres points to these lists 691 * the array pyield has the lists: the total size is only NPROD+1 692 */ 693 void 694 cpres(void) 695 { 696 int c, j, i, **pmem; 697 static int *pyield[NPROD]; 698 699 pmem = pyield; 700 NTLOOP(i) { 701 c = i+NTBASE; 702 pres[i] = pmem; 703 fatfl = 0; /* make undefined symbols nonfatal */ 704 PLOOP(0, j) 705 if(*prdptr[j] == c) 706 *pmem++ = prdptr[j]+1; 707 if(pres[i] == pmem) 708 error("nonterminal %s not defined!", nontrst[i].name); 709 } 710 pres[i] = pmem; 711 fatfl = 1; 712 if(nerrors) { 713 summary(); 714 cleantmp(); 715 exits("error"); 716 } 717 if(pmem != &pyield[nprod]) 718 error("internal Yacc error: pyield %d", pmem-&pyield[nprod]); 719 } 720 721 /* 722 * compute an array with the first of nonterminals 723 */ 724 void 725 cpfir(void) 726 { 727 int *p, **s, i, **t, ch, changes; 728 729 zzcwp = &wsets[nnonter]; 730 NTLOOP(i) { 731 aryfil(wsets[i].ws.lset, tbitset, 0); 732 t = pres[i+1]; 733 /* initially fill the sets */ 734 for(s=pres[i]; s<t; ++s) 735 for(p = *s; (ch = *p) > 0; ++p) { 736 if(ch < NTBASE) { 737 SETBIT(wsets[i].ws.lset, ch); 738 break; 739 } 740 if(!pempty[ch-NTBASE]) 741 break; 742 } 743 } 744 745 /* now, reflect transitivity */ 746 changes = 1; 747 while(changes) { 748 changes = 0; 749 NTLOOP(i) { 750 t = pres[i+1]; 751 for(s = pres[i]; s < t; ++s) 752 for(p = *s; (ch = (*p-NTBASE)) >= 0; ++p) { 753 changes |= setunion(wsets[i].ws.lset, wsets[ch].ws.lset); 754 if(!pempty[ch]) 755 break; 756 } 757 } 758 } 759 760 NTLOOP(i) 761 pfirst[i] = flset(&wsets[i].ws); 762 if(!indebug) 763 return; 764 if(foutput != 0) 765 NTLOOP(i) { 766 Bprint(foutput, "\n%s: ", nontrst[i].name); 767 prlook(pfirst[i]); 768 Bprint(foutput, " %d\n", pempty[i]); 769 } 770 } 771 772 /* 773 * sorts last state,and sees if it equals earlier ones. returns state number 774 */ 775 int 776 state(int c) 777 { 778 Item *p1, *p2, *k, *l, *q1, *q2; 779 int size1, size2, i; 780 781 p1 = pstate[nstate]; 782 p2 = pstate[nstate+1]; 783 if(p1 == p2) 784 return 0; /* null state */ 785 /* sort the items */ 786 for(k = p2-1; k > p1; k--) /* make k the biggest */ 787 for(l = k-1; l >= p1; --l) 788 if(l->pitem > k->pitem) { 789 int *s; 790 Lkset *ss; 791 792 s = k->pitem; 793 k->pitem = l->pitem; 794 l->pitem = s; 795 ss = k->look; 796 k->look = l->look; 797 l->look = ss; 798 } 799 size1 = p2 - p1; /* size of state */ 800 801 for(i = (c>=NTBASE)? ntstates[c-NTBASE]: tstates[c]; i != 0; i = mstates[i]) { 802 /* get ith state */ 803 q1 = pstate[i]; 804 q2 = pstate[i+1]; 805 size2 = q2 - q1; 806 if(size1 != size2) 807 continue; 808 k = p1; 809 for(l = q1; l < q2; l++) { 810 if(l->pitem != k->pitem) 811 break; 812 k++; 813 } 814 if(l != q2) 815 continue; 816 /* found it */ 817 pstate[nstate+1] = pstate[nstate]; /* delete last state */ 818 /* fix up lookaheads */ 819 if(nolook) 820 return i; 821 for(l = q1, k = p1; l < q2; ++l, ++k ) { 822 int s; 823 824 SETLOOP(s) 825 clset.lset[s] = l->look->lset[s]; 826 if(setunion(clset.lset, k->look->lset)) { 827 tystate[i] = MUSTDO; 828 /* register the new set */ 829 l->look = flset( &clset ); 830 } 831 } 832 return i; 833 } 834 /* state is new */ 835 if(nolook) 836 error("yacc state/nolook error"); 837 pstate[nstate+2] = p2; 838 if(nstate+1 >= NSTATES) 839 error("too many states"); 840 if(c >= NTBASE) { 841 mstates[nstate] = ntstates[c-NTBASE]; 842 ntstates[c-NTBASE] = nstate; 843 } else { 844 mstates[nstate] = tstates[c]; 845 tstates[c] = nstate; 846 } 847 tystate[nstate] = MUSTDO; 848 return nstate++; 849 } 850 851 void 852 putitem(int *ptr, Lkset *lptr) 853 { 854 Item *j; 855 856 if(pidebug && foutput != 0) 857 Bprint(foutput, "putitem(%s), state %d\n", writem(ptr), nstate); 858 j = pstate[nstate+1]; 859 j->pitem = ptr; 860 if(!nolook) 861 j->look = flset(lptr); 862 pstate[nstate+1] = ++j; 863 if((int*)j > zzmemsz) { 864 zzmemsz = (int*)j; 865 if(zzmemsz >= &mem0[MEMSIZE]) 866 error("out of state space"); 867 } 868 } 869 870 /* 871 * mark nonterminals which derive the empty string 872 * also, look for nonterminals which don't derive any token strings 873 */ 874 void 875 cempty(void) 876 { 877 878 int i, *p; 879 880 /* first, use the array pempty to detect productions that can never be reduced */ 881 /* set pempty to WHONOWS */ 882 aryfil(pempty, nnonter+1, WHOKNOWS); 883 884 /* now, look at productions, marking nonterminals which derive something */ 885 more: 886 PLOOP(0, i) { 887 if(pempty[*prdptr[i] - NTBASE]) 888 continue; 889 for(p = prdptr[i]+1; *p >= 0; ++p) 890 if(*p >= NTBASE && pempty[*p-NTBASE] == WHOKNOWS) 891 break; 892 /* production can be derived */ 893 if(*p < 0) { 894 pempty[*prdptr[i]-NTBASE] = OK; 895 goto more; 896 } 897 } 898 899 /* now, look at the nonterminals, to see if they are all OK */ 900 NTLOOP(i) { 901 /* the added production rises or falls as the start symbol ... */ 902 if(i == 0) 903 continue; 904 if(pempty[i] != OK) { 905 fatfl = 0; 906 error("nonterminal %s never derives any token string", nontrst[i].name); 907 } 908 } 909 910 if(nerrors) { 911 summary(); 912 cleantmp(); 913 exits("error"); 914 } 915 916 /* now, compute the pempty array, to see which nonterminals derive the empty string */ 917 /* set pempty to WHOKNOWS */ 918 aryfil( pempty, nnonter+1, WHOKNOWS); 919 920 /* loop as long as we keep finding empty nonterminals */ 921 922 again: 923 PLOOP(1, i) { 924 /* not known to be empty */ 925 if(pempty[*prdptr[i]-NTBASE] == WHOKNOWS) { 926 for(p = prdptr[i]+1; *p >= NTBASE && pempty[*p-NTBASE] == EMPTY ; ++p) 927 ; 928 /* we have a nontrivially empty nonterminal */ 929 if(*p < 0) { 930 pempty[*prdptr[i]-NTBASE] = EMPTY; 931 /* got one ... try for another */ 932 goto again; 933 } 934 } 935 } 936 } 937 938 /* 939 * generate the states 940 */ 941 void 942 stagen(void) 943 { 944 945 int c, i, j, more; 946 Wset *p, *q; 947 948 /* initialize */ 949 nstate = 0; 950 951 /* THIS IS FUNNY from the standpoint of portability 952 * it represents the magic moment when the mem0 array, which has 953 * been holding the productions, starts to hold item pointers, of a 954 * different type... 955 * someday, alloc should be used to allocate all this stuff... for now, we 956 * accept that if pointers don't fit in integers, there is a problem... 957 */ 958 959 pstate[0] = pstate[1] = (Item*)mem; 960 aryfil(clset.lset, tbitset, 0); 961 putitem(prdptr[0]+1, &clset); 962 tystate[0] = MUSTDO; 963 nstate = 1; 964 pstate[2] = pstate[1]; 965 966 aryfil(amem, ACTSIZE, 0); 967 968 /* now, the main state generation loop */ 969 for(more=1; more;) { 970 more = 0; 971 SLOOP(i) { 972 if(tystate[i] != MUSTDO) 973 continue; 974 tystate[i] = DONE; 975 aryfil(temp1, nnonter+1, 0); 976 /* take state i, close it, and do gotos */ 977 closure(i); 978 /* generate goto's */ 979 WSLOOP(wsets, p) { 980 if(p->flag) 981 continue; 982 p->flag = 1; 983 c = *(p->pitem); 984 if(c <= 1) { 985 if(pstate[i+1]-pstate[i] <= p-wsets) 986 tystate[i] = MUSTLOOKAHEAD; 987 continue; 988 } 989 /* do a goto on c */ 990 WSLOOP(p, q) 991 /* this item contributes to the goto */ 992 if(c == *(q->pitem)) { 993 putitem(q->pitem+1, &q->ws); 994 q->flag = 1; 995 } 996 if(c < NTBASE) 997 state(c); /* register new state */ 998 else 999 temp1[c-NTBASE] = state(c); 1000 } 1001 if(gsdebug && foutput != 0) { 1002 Bprint(foutput, "%d: ", i); 1003 NTLOOP(j) 1004 if(temp1[j]) 1005 Bprint(foutput, "%s %d, ", 1006 nontrst[j].name, temp1[j]); 1007 Bprint(foutput, "\n"); 1008 } 1009 indgo[i] = apack(&temp1[1], nnonter-1) - 1; 1010 /* do some more */ 1011 more = 1; 1012 } 1013 } 1014 } 1015 1016 /* 1017 * generate the closure of state i 1018 */ 1019 void 1020 closure(int i) 1021 { 1022 1023 Wset *u, *v; 1024 Item *p, *q; 1025 int c, ch, work, k, *pi, **s, **t; 1026 1027 zzclose++; 1028 1029 /* first, copy kernel of state i to wsets */ 1030 cwp = wsets; 1031 ITMLOOP(i, p, q) { 1032 cwp->pitem = p->pitem; 1033 cwp->flag = 1; /* this item must get closed */ 1034 SETLOOP(k) 1035 cwp->ws.lset[k] = p->look->lset[k]; 1036 WSBUMP(cwp); 1037 } 1038 1039 /* now, go through the loop, closing each item */ 1040 work = 1; 1041 while(work) { 1042 work = 0; 1043 WSLOOP(wsets, u) { 1044 if(u->flag == 0) 1045 continue; 1046 /* dot is before c */ 1047 c = *(u->pitem); 1048 if(c < NTBASE) { 1049 u->flag = 0; 1050 /* only interesting case is where . is before nonterminal */ 1051 continue; 1052 } 1053 1054 /* compute the lookahead */ 1055 aryfil(clset.lset, tbitset, 0); 1056 1057 /* find items involving c */ 1058 WSLOOP(u, v) 1059 if(v->flag == 1 && *(pi=v->pitem) == c) { 1060 v->flag = 0; 1061 if(nolook) 1062 continue; 1063 while((ch = *++pi) > 0) { 1064 /* terminal symbol */ 1065 if(ch < NTBASE) { 1066 SETBIT(clset.lset, ch); 1067 break; 1068 } 1069 /* nonterminal symbol */ 1070 setunion(clset.lset, pfirst[ch-NTBASE]->lset); 1071 if(!pempty[ch-NTBASE]) 1072 break; 1073 } 1074 if(ch <= 0) 1075 setunion(clset.lset, v->ws.lset); 1076 } 1077 1078 /* 1079 * now loop over productions derived from c 1080 * c is now nonterminal number 1081 */ 1082 c -= NTBASE; 1083 t = pres[c+1]; 1084 for(s = pres[c]; s < t; ++s) { 1085 /* 1086 * put these items into the closure 1087 * is the item there 1088 */ 1089 WSLOOP(wsets, v) 1090 /* yes, it is there */ 1091 if(v->pitem == *s) { 1092 if(nolook) 1093 goto nexts; 1094 if(setunion(v->ws.lset, clset.lset)) 1095 v->flag = work = 1; 1096 goto nexts; 1097 } 1098 1099 /* not there; make a new entry */ 1100 if(cwp-wsets+1 >= WSETSIZE) 1101 error( "working set overflow"); 1102 cwp->pitem = *s; 1103 cwp->flag = 1; 1104 if(!nolook) { 1105 work = 1; 1106 SETLOOP(k) cwp->ws.lset[k] = clset.lset[k]; 1107 } 1108 WSBUMP(cwp); 1109 1110 nexts:; 1111 } 1112 } 1113 } 1114 1115 /* have computed closure; flags are reset; return */ 1116 if(cwp > zzcwp) 1117 zzcwp = cwp; 1118 if(cldebug && foutput != 0) { 1119 Bprint(foutput, "\nState %d, nolook = %d\n", i, nolook); 1120 WSLOOP(wsets, u) { 1121 if(u->flag) 1122 Bprint(foutput, "flag set!\n"); 1123 u->flag = 0; 1124 Bprint(foutput, "\t%s", writem(u->pitem)); 1125 prlook(&u->ws); 1126 Bprint(foutput, "\n"); 1127 } 1128 } 1129 } 1130 1131 /* 1132 * decide if the lookahead set pointed to by p is known 1133 * return pointer to a perminent location for the set 1134 */ 1135 Lkset* 1136 flset(Lkset *p) 1137 { 1138 Lkset *q; 1139 int *u, *v, *w, j; 1140 1141 for(q = &lkst[nlset]; q-- > lkst;) { 1142 u = p->lset; 1143 v = q->lset; 1144 w = &v[tbitset]; 1145 while(v < w) 1146 if(*u++ != *v++) 1147 goto more; 1148 /* we have matched */ 1149 return q; 1150 more:; 1151 } 1152 /* add a new one */ 1153 q = &lkst[nlset++]; 1154 if(nlset >= LSETSIZE) 1155 error("too many lookahead sets"); 1156 SETLOOP(j) 1157 q->lset[j] = p->lset[j]; 1158 return q; 1159 } 1160 1161 void 1162 cleantmp(void) 1163 { 1164 ZAPFILE(actname); 1165 ZAPFILE(tempname); 1166 } 1167 1168 void 1169 intr(void) 1170 { 1171 cleantmp(); 1172 exits("interrupted"); 1173 } 1174 1175 void 1176 setup(int argc, char *argv[]) 1177 { 1178 long c, t; 1179 int i, j, fd, lev, ty, ytab, *p; 1180 int vflag, dflag, stem; 1181 char actnm[8], *stemc, *s, dirbuf[128]; 1182 Biobuf *fout; 1183 1184 ytab = 0; 1185 vflag = 0; 1186 dflag = 0; 1187 stem = 0; 1188 stemc = "y"; 1189 foutput = 0; 1190 fdefine = 0; 1191 fdebug = 0; 1192 ARGBEGIN{ 1193 case 'v': 1194 case 'V': 1195 vflag++; 1196 break; 1197 case 'D': 1198 yydebug = ARGF(); 1199 break; 1200 case 'a': 1201 yyarg = 1; 1202 break; 1203 case 'd': 1204 dflag++; 1205 break; 1206 case 'l': 1207 yyline = 0; 1208 break; 1209 case 'o': 1210 ytab++; 1211 ytabc = ARGF(); 1212 break; 1213 case 's': 1214 stem++; 1215 stemc = ARGF(); 1216 break; 1217 case 'S': 1218 parser = PARSERS; 1219 break; 1220 default: 1221 error("illegal option: %c", ARGC()); 1222 }ARGEND 1223 openup(stemc, dflag, vflag, ytab, ytabc); 1224 fout = dflag?fdefine:ftable; 1225 if(yyarg){ 1226 Bprint(ftable, "#define\tYYARG\t1\n\n"); 1227 } 1228 if((fd = mkstemp(ttempname)) >= 0){ 1229 tempname = ttempname; 1230 ftemp = Bfdopen(fd, OWRITE); 1231 } 1232 if((fd = mkstemp(tactname)) >= 0){ 1233 actname = tactname; 1234 faction = Bfdopen(fd, OWRITE); 1235 } 1236 if(ftemp == 0 || faction == 0) 1237 error("cannot open temp file"); 1238 if(argc < 1) 1239 error("no input file"); 1240 infile = argv[0]; 1241 if(infile[0] != '/' && getwd(dirbuf, sizeof dirbuf)!=nil){ 1242 i = strlen(infile)+1+strlen(dirbuf)+1+10; 1243 s = malloc(i); 1244 if(s != nil){ 1245 snprint(s, i, "%s/%s", dirbuf, infile); 1246 cleanname(s); 1247 infile = s; 1248 } 1249 } 1250 finput = Bopen(infile, OREAD); 1251 if(finput == 0) 1252 error("cannot open '%s'", argv[0]); 1253 cnamp = cnames; 1254 1255 defin(0, "$end"); 1256 extval = PRIVATE; /* tokens start in unicode 'private use' */ 1257 defin(0, "error"); 1258 defin(1, "$accept"); 1259 defin(0, "$unk"); 1260 mem = mem0; 1261 i = 0; 1262 1263 for(t = gettok(); t != MARK && t != ENDFILE;) 1264 switch(t) { 1265 case ';': 1266 t = gettok(); 1267 break; 1268 1269 case START: 1270 if(gettok() != IDENTIFIER) 1271 error("bad %%start construction"); 1272 start = chfind(1, tokname); 1273 t = gettok(); 1274 continue; 1275 1276 case TYPEDEF: 1277 if(gettok() != TYPENAME) 1278 error("bad syntax in %%type"); 1279 ty = numbval; 1280 for(;;) { 1281 t = gettok(); 1282 switch(t) { 1283 case IDENTIFIER: 1284 if((t=chfind(1, tokname)) < NTBASE) { 1285 j = TYPE(toklev[t]); 1286 if(j != 0 && j != ty) 1287 error("type redeclaration of token %s", 1288 tokset[t].name); 1289 else 1290 SETTYPE(toklev[t], ty); 1291 } else { 1292 j = nontrst[t-NTBASE].value; 1293 if(j != 0 && j != ty) 1294 error("type redeclaration of nonterminal %s", 1295 nontrst[t-NTBASE].name ); 1296 else 1297 nontrst[t-NTBASE].value = ty; 1298 } 1299 case ',': 1300 continue; 1301 case ';': 1302 t = gettok(); 1303 default: 1304 break; 1305 } 1306 break; 1307 } 1308 continue; 1309 1310 case UNION: 1311 /* copy the union declaration to the output */ 1312 cpyunion(); 1313 t = gettok(); 1314 continue; 1315 1316 case LEFT: 1317 case BINARY: 1318 case RIGHT: 1319 i++; 1320 1321 case TERM: 1322 /* nonzero means new prec. and assoc. */ 1323 lev = t-TERM; 1324 ty = 0; 1325 1326 /* get identifiers so defined */ 1327 t = gettok(); 1328 1329 /* there is a type defined */ 1330 if(t == TYPENAME) { 1331 ty = numbval; 1332 t = gettok(); 1333 } 1334 for(;;) { 1335 switch(t) { 1336 case ',': 1337 t = gettok(); 1338 continue; 1339 1340 case ';': 1341 break; 1342 1343 case IDENTIFIER: 1344 j = chfind(0, tokname); 1345 if(j >= NTBASE) 1346 error("%s defined earlier as nonterminal", tokname); 1347 if(lev) { 1348 if(ASSOC(toklev[j])) 1349 error("redeclaration of precedence of %s", tokname); 1350 SETASC(toklev[j], lev); 1351 SETPLEV(toklev[j], i); 1352 } 1353 if(ty) { 1354 if(TYPE(toklev[j])) 1355 error("redeclaration of type of %s", tokname); 1356 SETTYPE(toklev[j],ty); 1357 } 1358 t = gettok(); 1359 if(t == NUMBER) { 1360 tokset[j].value = numbval; 1361 if(j < ndefout && j > 3) 1362 error("please define type number of %s earlier", 1363 tokset[j].name); 1364 t = gettok(); 1365 } 1366 continue; 1367 } 1368 break; 1369 } 1370 continue; 1371 1372 case LCURLY: 1373 defout(0); 1374 cpycode(); 1375 t = gettok(); 1376 continue; 1377 1378 default: 1379 error("syntax error"); 1380 } 1381 if(t == ENDFILE) 1382 error("unexpected EOF before %%"); 1383 1384 /* t is MARK */ 1385 if(!yyarg) 1386 Bprint(ftable, "extern int yyerrflag;\n"); 1387 Bprint(ftable, "#ifndef YYMAXDEPTH\n"); 1388 Bprint(ftable, "#define YYMAXDEPTH 150\n"); 1389 Bprint(ftable, "#endif\n" ); 1390 if(!ntypes) { 1391 Bprint(ftable, "#ifndef YYSTYPE\n"); 1392 Bprint(ftable, "#define YYSTYPE int\n"); 1393 Bprint(ftable, "#endif\n"); 1394 } 1395 if(!yyarg){ 1396 Bprint(ftable, "YYSTYPE yylval;\n"); 1397 Bprint(ftable, "YYSTYPE yyval;\n"); 1398 }else{ 1399 if(dflag) 1400 Bprint(ftable, "#include \"%s.%s\"\n\n", stemc, FILED); 1401 Bprint(fout, "struct Yyarg {\n"); 1402 Bprint(fout, "\tint\tyynerrs;\n"); 1403 Bprint(fout, "\tint\tyyerrflag;\n"); 1404 Bprint(fout, "\tvoid*\targ;\n"); 1405 Bprint(fout, "\tYYSTYPE\tyyval;\n"); 1406 Bprint(fout, "\tYYSTYPE\tyylval;\n"); 1407 Bprint(fout, "};\n\n"); 1408 } 1409 prdptr[0] = mem; 1410 1411 /* added production */ 1412 *mem++ = NTBASE; 1413 1414 /* if start is 0, we will overwrite with the lhs of the first rule */ 1415 *mem++ = start; 1416 *mem++ = 1; 1417 *mem++ = 0; 1418 prdptr[1] = mem; 1419 while((t=gettok()) == LCURLY) 1420 cpycode(); 1421 if(t != IDENTCOLON) 1422 error("bad syntax on first rule"); 1423 1424 if(!start) 1425 prdptr[0][1] = chfind(1, tokname); 1426 1427 /* read rules */ 1428 while(t != MARK && t != ENDFILE) { 1429 /* process a rule */ 1430 rlines[nprod] = lineno; 1431 if(t == '|') 1432 *mem++ = *prdptr[nprod-1]; 1433 else 1434 if(t == IDENTCOLON) { 1435 *mem = chfind(1, tokname); 1436 if(*mem < NTBASE) 1437 error("token illegal on LHS of grammar rule"); 1438 mem++; 1439 } else 1440 error("illegal rule: missing semicolon or | ?"); 1441 /* read rule body */ 1442 t = gettok(); 1443 1444 more_rule: 1445 while(t == IDENTIFIER) { 1446 *mem = chfind(1, tokname); 1447 if(*mem < NTBASE) 1448 levprd[nprod] = toklev[*mem]; 1449 mem++; 1450 t = gettok(); 1451 } 1452 if(t == PREC) { 1453 if(gettok() != IDENTIFIER) 1454 error("illegal %%prec syntax"); 1455 j = chfind(2, tokname); 1456 if(j >= NTBASE) 1457 error("nonterminal %s illegal after %%prec", 1458 nontrst[j-NTBASE].name); 1459 levprd[nprod] = toklev[j]; 1460 t = gettok(); 1461 } 1462 if(t == '=') { 1463 levprd[nprod] |= ACTFLAG; 1464 Bprint(faction, "\ncase %d:", nprod); 1465 cpyact(mem-prdptr[nprod]-1); 1466 Bprint(faction, " break;"); 1467 if((t=gettok()) == IDENTIFIER) { 1468 1469 /* action within rule... */ 1470 sprint(actnm, "$$%d", nprod); 1471 1472 /* make it a nonterminal */ 1473 j = chfind(1, actnm); 1474 1475 /* 1476 * the current rule will become rule number nprod+1 1477 * move the contents down, and make room for the null 1478 */ 1479 for(p = mem; p >= prdptr[nprod]; --p) 1480 p[2] = *p; 1481 mem += 2; 1482 1483 /* enter null production for action */ 1484 p = prdptr[nprod]; 1485 *p++ = j; 1486 *p++ = -nprod; 1487 1488 /* update the production information */ 1489 levprd[nprod+1] = levprd[nprod] & ~ACTFLAG; 1490 levprd[nprod] = ACTFLAG; 1491 if(++nprod >= NPROD) 1492 error("more than %d rules", NPROD); 1493 prdptr[nprod] = p; 1494 1495 /* make the action appear in the original rule */ 1496 *mem++ = j; 1497 1498 /* get some more of the rule */ 1499 goto more_rule; 1500 } 1501 } 1502 1503 while(t == ';') 1504 t = gettok(); 1505 *mem++ = -nprod; 1506 1507 /* check that default action is reasonable */ 1508 if(ntypes && !(levprd[nprod]&ACTFLAG) && nontrst[*prdptr[nprod]-NTBASE].value) { 1509 1510 /* no explicit action, LHS has value */ 1511 int tempty; 1512 1513 tempty = prdptr[nprod][1]; 1514 if(tempty < 0) 1515 error("must return a value, since LHS has a type"); 1516 else 1517 if(tempty >= NTBASE) 1518 tempty = nontrst[tempty-NTBASE].value; 1519 else 1520 tempty = TYPE(toklev[tempty]); 1521 if(tempty != nontrst[*prdptr[nprod]-NTBASE].value) 1522 error("default action causes potential type clash"); 1523 } 1524 nprod++; 1525 if(nprod >= NPROD) 1526 error("more than %d rules", NPROD); 1527 prdptr[nprod] = mem; 1528 levprd[nprod] = 0; 1529 } 1530 1531 /* end of all rules */ 1532 defout(1); 1533 1534 finact(); 1535 if(t == MARK) { 1536 Bprint(ftable, "\n"); 1537 if(yyline) 1538 Bprint(ftable, "#line\t%d\t\"%s\"\n", lineno, infile); 1539 while((c=Bgetrune(finput)) != Beof) 1540 Bputrune(ftable, c); 1541 } 1542 Bterm(finput); 1543 } 1544 1545 /* 1546 * finish action routine 1547 */ 1548 void 1549 finact(void) 1550 { 1551 1552 Bterm(faction); 1553 Bprint(ftable, "#define YYEOFCODE %d\n", 1); 1554 Bprint(ftable, "#define YYERRCODE %d\n", 2); 1555 } 1556 1557 /* 1558 * define s to be a terminal if t=0 1559 * or a nonterminal if t=1 1560 */ 1561 int 1562 defin(int nt, char *s) 1563 { 1564 int val; 1565 Rune rune; 1566 1567 val = 0; 1568 if(nt) { 1569 nnonter++; 1570 if(nnonter >= NNONTERM) 1571 error("too many nonterminals, limit %d",NNONTERM); 1572 nontrst[nnonter].name = cstash(s); 1573 return NTBASE + nnonter; 1574 } 1575 1576 /* must be a token */ 1577 ntokens++; 1578 if(ntokens >= NTERMS) 1579 error("too many terminals, limit %d", NTERMS); 1580 tokset[ntokens].name = cstash(s); 1581 1582 /* establish value for token */ 1583 /* single character literal */ 1584 if(s[0] == ' ') { 1585 val = chartorune(&rune, &s[1]); 1586 if(s[val+1] == 0) { 1587 val = rune; 1588 goto out; 1589 } 1590 } 1591 1592 /* escape sequence */ 1593 if(s[0] == ' ' && s[1] == '\\') { 1594 if(s[3] == 0) { 1595 /* single character escape sequence */ 1596 switch(s[2]) { 1597 case 'n': val = '\n'; break; 1598 case 'r': val = '\r'; break; 1599 case 'b': val = '\b'; break; 1600 case 't': val = '\t'; break; 1601 case 'f': val = '\f'; break; 1602 case '\'': val = '\''; break; 1603 case '"': val = '"'; break; 1604 case '\\': val = '\\'; break; 1605 default: error("invalid escape"); 1606 } 1607 goto out; 1608 } 1609 1610 /* \nnn sequence */ 1611 if(s[2] >= '0' && s[2] <= '7') { 1612 if(s[3] < '0' || 1613 s[3] > '7' || 1614 s[4] < '0' || 1615 s[4] > '7' || 1616 s[5] != 0) 1617 error("illegal \\nnn construction"); 1618 val = 64*s[2] + 8*s[3] + s[4] - 73*'0'; 1619 if(val == 0) 1620 error("'\\000' is illegal"); 1621 goto out; 1622 } 1623 error("unknown escape"); 1624 } 1625 val = extval++; 1626 1627 out: 1628 tokset[ntokens].value = val; 1629 toklev[ntokens] = 0; 1630 return ntokens; 1631 } 1632 1633 /* 1634 * write out the defines (at the end of the declaration section) 1635 */ 1636 void 1637 defout(int last) 1638 { 1639 int i, c; 1640 char sar[NAMESIZE+10]; 1641 1642 for(i=ndefout; i<=ntokens; i++) { 1643 /* non-literals */ 1644 c = tokset[i].name[0]; 1645 if(c != ' ' && c != '$') { 1646 Bprint(ftable, "#define %s %d\n", 1647 tokset[i].name, tokset[i].value); 1648 if(fdefine) 1649 Bprint(fdefine, "#define\t%s\t%d\n", 1650 tokset[i].name, tokset[i].value); 1651 } 1652 } 1653 ndefout = ntokens+1; 1654 if(last && fdebug) { 1655 Bprint(fdebug, "static char* yytoknames[] =\n{\n"); 1656 TLOOP(i) { 1657 if(tokset[i].name) { 1658 chcopy(sar, tokset[i].name); 1659 Bprint(fdebug, "\t\"%s\",\n", sar); 1660 continue; 1661 } 1662 Bprint(fdebug, "\t0,\n"); 1663 } 1664 Bprint(fdebug, "};\n"); 1665 } 1666 } 1667 1668 char* 1669 cstash(char *s) 1670 { 1671 char *temp; 1672 1673 temp = cnamp; 1674 do { 1675 if(cnamp >= &cnames[cnamsz]) 1676 error("too many characters in id's and literals"); 1677 else 1678 *cnamp++ = *s; 1679 } while(*s++); 1680 return temp; 1681 } 1682 1683 long 1684 gettok(void) 1685 { 1686 long c; 1687 Rune rune; 1688 int i, base, match, reserve; 1689 static int peekline; 1690 1691 begin: 1692 reserve = 0; 1693 lineno += peekline; 1694 peekline = 0; 1695 c = Bgetrune(finput); 1696 while(c == ' ' || c == '\n' || c == '\t' || c == '\f') { 1697 if(c == '\n') 1698 lineno++; 1699 c = Bgetrune(finput); 1700 } 1701 1702 /* skip comment */ 1703 if(c == '/') { 1704 lineno += skipcom(); 1705 goto begin; 1706 } 1707 switch(c) { 1708 case Beof: 1709 return ENDFILE; 1710 1711 case '{': 1712 Bungetrune(finput); 1713 return '='; 1714 1715 case '<': 1716 /* get, and look up, a type name (union member name) */ 1717 i = 0; 1718 while((c=Bgetrune(finput)) != '>' && c >= 0 && c != '\n') { 1719 rune = c; 1720 c = runetochar(&tokname[i], &rune); 1721 if(i < NAMESIZE) 1722 i += c; 1723 } 1724 if(c != '>') 1725 error("unterminated < ... > clause"); 1726 tokname[i] = 0; 1727 for(i=1; i<=ntypes; i++) 1728 if(!strcmp(typeset[i], tokname)) { 1729 numbval = i; 1730 return TYPENAME; 1731 } 1732 ntypes++; 1733 numbval = ntypes; 1734 typeset[numbval] = cstash(tokname); 1735 return TYPENAME; 1736 1737 case '"': 1738 case '\'': 1739 match = c; 1740 tokname[0] = ' '; 1741 i = 1; 1742 for(;;) { 1743 c = Bgetrune(finput); 1744 if(c == '\n' || c <= 0) 1745 error("illegal or missing ' or \"" ); 1746 if(c == '\\') { 1747 tokname[i] = '\\'; 1748 if(i < NAMESIZE) 1749 i++; 1750 c = Bgetrune(finput); 1751 } else 1752 if(c == match) 1753 break; 1754 rune = c; 1755 c = runetochar(&tokname[i], &rune); 1756 if(i < NAMESIZE) 1757 i += c; 1758 } 1759 break; 1760 1761 case '%': 1762 case '\\': 1763 switch(c = Bgetrune(finput)) { 1764 case '0': return TERM; 1765 case '<': return LEFT; 1766 case '2': return BINARY; 1767 case '>': return RIGHT; 1768 case '%': 1769 case '\\': return MARK; 1770 case '=': return PREC; 1771 case '{': return LCURLY; 1772 default: reserve = 1; 1773 } 1774 1775 default: 1776 /* number */ 1777 if(isdigit(c)) { 1778 numbval = c-'0'; 1779 base = (c=='0')? 8: 10; 1780 for(c = Bgetrune(finput); isdigit(c); c = Bgetrune(finput)) 1781 numbval = numbval*base + (c-'0'); 1782 Bungetrune(finput); 1783 return NUMBER; 1784 } 1785 if(islower(c) || isupper(c) || c=='_' || c=='.' || c=='$') { 1786 i = 0; 1787 while(islower(c) || isupper(c) || isdigit(c) || 1788 c == '-' || c=='_' || c=='.' || c=='$') { 1789 if(reserve && isupper(c)) 1790 c += 'a'-'A'; 1791 rune = c; 1792 c = runetochar(&tokname[i], &rune); 1793 if(i < NAMESIZE) 1794 i += c; 1795 c = Bgetrune(finput); 1796 } 1797 } else 1798 return c; 1799 Bungetrune(finput); 1800 } 1801 tokname[i] = 0; 1802 1803 /* find a reserved word */ 1804 if(reserve) { 1805 for(c=0; resrv[c].name; c++) 1806 if(strcmp(tokname, resrv[c].name) == 0) 1807 return resrv[c].value; 1808 error("invalid escape, or illegal reserved word: %s", tokname); 1809 } 1810 1811 /* look ahead to distinguish IDENTIFIER from IDENTCOLON */ 1812 c = Bgetrune(finput); 1813 while(c == ' ' || c == '\t'|| c == '\n' || c == '\f' || c == '/') { 1814 if(c == '\n') 1815 peekline++; 1816 /* look for comments */ 1817 if(c == '/') 1818 peekline += skipcom(); 1819 c = Bgetrune(finput); 1820 } 1821 if(c == ':') 1822 return IDENTCOLON; 1823 Bungetrune(finput); 1824 return IDENTIFIER; 1825 } 1826 1827 /* 1828 * determine the type of a symbol 1829 */ 1830 int 1831 fdtype(int t) 1832 { 1833 int v; 1834 1835 if(t >= NTBASE) 1836 v = nontrst[t-NTBASE].value; 1837 else 1838 v = TYPE(toklev[t]); 1839 if(v <= 0) 1840 error("must specify type for %s", (t>=NTBASE)? 1841 nontrst[t-NTBASE].name: tokset[t].name); 1842 return v; 1843 } 1844 1845 int 1846 chfind(int t, char *s) 1847 { 1848 int i; 1849 1850 if(s[0] == ' ') 1851 t = 0; 1852 TLOOP(i) 1853 if(!strcmp(s, tokset[i].name)) 1854 return i; 1855 NTLOOP(i) 1856 if(!strcmp(s, nontrst[i].name)) 1857 return NTBASE+i; 1858 1859 /* cannot find name */ 1860 if(t > 1) 1861 error("%s should have been defined earlier", s); 1862 return defin(t, s); 1863 } 1864 1865 /* 1866 * copy the union declaration to the output, and the define file if present 1867 */ 1868 void 1869 cpyunion(void) 1870 { 1871 long c; 1872 int level; 1873 1874 Bprint(ftable, "\n"); 1875 if(yyline) 1876 Bprint(ftable, "#line\t%d\t\"%s\"\n", lineno, infile); 1877 Bprint(ftable, "typedef union "); 1878 if(fdefine != 0) 1879 Bprint(fdefine, "\ntypedef union "); 1880 1881 level = 0; 1882 for(;;) { 1883 if((c=Bgetrune(finput)) == Beof) 1884 error("EOF encountered while processing %%union"); 1885 Bputrune(ftable, c); 1886 if(fdefine != 0) 1887 Bputrune(fdefine, c); 1888 switch(c) { 1889 case '\n': 1890 lineno++; 1891 break; 1892 case '{': 1893 level++; 1894 break; 1895 case '}': 1896 level--; 1897 1898 /* we are finished copying */ 1899 if(level == 0) { 1900 Bprint(ftable, " YYSTYPE;\n"); 1901 if(fdefine != 0){ 1902 Bprint(fdefine, "\tYYSTYPE;\n"); 1903 if(!yyarg) 1904 Bprint(fdefine, "extern\tYYSTYPE\tyylval;\n"); 1905 } 1906 return; 1907 } 1908 } 1909 } 1910 } 1911 1912 /* 1913 * copies code between \{ and \} 1914 */ 1915 void 1916 cpycode(void) 1917 { 1918 long c; 1919 1920 c = Bgetrune(finput); 1921 if(c == '\n') { 1922 c = Bgetrune(finput); 1923 lineno++; 1924 } 1925 Bprint(ftable, "\n"); 1926 if(yyline) 1927 Bprint(ftable, "#line\t%d\t\"%s\"\n", lineno, infile); 1928 while(c != Beof) { 1929 if(c == '\\') { 1930 if((c=Bgetrune(finput)) == '}') 1931 return; 1932 Bputc(ftable, '\\'); 1933 } 1934 if(c == '%') { 1935 if((c=Bgetrune(finput)) == '}') 1936 return; 1937 Bputc(ftable, '%'); 1938 } 1939 Bputrune(ftable, c); 1940 if(c == '\n') 1941 lineno++; 1942 c = Bgetrune(finput); 1943 } 1944 error("eof before %%}"); 1945 } 1946 1947 /* 1948 * skip over comments 1949 * skipcom is called after reading a '/' 1950 */ 1951 int 1952 skipcom(void) 1953 { 1954 long c; 1955 int i; 1956 1957 /* i is the number of lines skipped */ 1958 i = 0; 1959 if(Bgetrune(finput) != '*') 1960 error("illegal comment"); 1961 c = Bgetrune(finput); 1962 while(c != Beof) { 1963 while(c == '*') 1964 if((c=Bgetrune(finput)) == '/') 1965 return i; 1966 if(c == '\n') 1967 i++; 1968 c = Bgetrune(finput); 1969 } 1970 error("EOF inside comment"); 1971 return 0; 1972 } 1973 1974 /* 1975 * copy C action to the next ; or closing } 1976 */ 1977 void 1978 cpyact(int offset) 1979 { 1980 long c; 1981 int brac, match, j, s, fnd, tok; 1982 1983 Bprint(faction, "\n"); 1984 if(yyline) 1985 Bprint(faction, "#line\t%d\t\"%s\"\n", lineno, infile); 1986 brac = 0; 1987 1988 loop: 1989 c = Bgetrune(finput); 1990 swt: 1991 switch(c) { 1992 case ';': 1993 if(brac == 0) { 1994 Bputrune(faction, c); 1995 return; 1996 } 1997 goto lcopy; 1998 1999 case '{': 2000 brac++; 2001 goto lcopy; 2002 2003 case '$': 2004 s = 1; 2005 tok = -1; 2006 c = Bgetrune(finput); 2007 2008 /* type description */ 2009 if(c == '<') { 2010 Bungetrune(finput); 2011 if(gettok() != TYPENAME) 2012 error("bad syntax on $<ident> clause"); 2013 tok = numbval; 2014 c = Bgetrune(finput); 2015 } 2016 if(c == '$') { 2017 Bprint(faction, "yyval"); 2018 2019 /* put out the proper tag... */ 2020 if(ntypes) { 2021 if(tok < 0) 2022 tok = fdtype(*prdptr[nprod]); 2023 Bprint(faction, ".%s", typeset[tok]); 2024 } 2025 goto loop; 2026 } 2027 if(c == '-') { 2028 s = -s; 2029 c = Bgetrune(finput); 2030 } 2031 if(isdigit(c)) { 2032 j = 0; 2033 while(isdigit(c)) { 2034 j = j*10 + (c-'0'); 2035 c = Bgetrune(finput); 2036 } 2037 Bungetrune(finput); 2038 j = j*s - offset; 2039 if(j > 0) 2040 error("Illegal use of $%d", j+offset); 2041 2042 dollar: 2043 Bprint(faction, "yypt[-%d].yyv", -j); 2044 2045 /* put out the proper tag */ 2046 if(ntypes) { 2047 if(j+offset <= 0 && tok < 0) 2048 error("must specify type of $%d", j+offset); 2049 if(tok < 0) 2050 tok = fdtype(prdptr[nprod][j+offset]); 2051 Bprint(faction, ".%s", typeset[tok]); 2052 } 2053 goto loop; 2054 } 2055 if(isupper(c) || islower(c) || c == '_' || c == '.') { 2056 int tok; /* tok used oustide for type info */ 2057 2058 /* look for $name */ 2059 Bungetrune(finput); 2060 if(gettok() != IDENTIFIER) 2061 error("$ must be followed by an identifier"); 2062 tok = chfind(2, tokname); 2063 if((c = Bgetrune(finput)) != '#') { 2064 Bungetrune(finput); 2065 fnd = -1; 2066 } else 2067 if(gettok() != NUMBER) { 2068 error("# must be followed by number"); 2069 fnd = -1; 2070 } else 2071 fnd = numbval; 2072 for(j=1; j<=offset; ++j) 2073 if(tok == prdptr[nprod][j]) { 2074 if(--fnd <= 0) { 2075 j -= offset; 2076 goto dollar; 2077 } 2078 } 2079 error("$name or $name#number not found"); 2080 } 2081 Bputc(faction, '$'); 2082 if(s < 0 ) 2083 Bputc(faction, '-'); 2084 goto swt; 2085 2086 case '}': 2087 brac--; 2088 if(brac) 2089 goto lcopy; 2090 Bputrune(faction, c); 2091 return; 2092 2093 case '/': 2094 /* look for comments */ 2095 Bputrune(faction, c); 2096 c = Bgetrune(finput); 2097 if(c != '*') 2098 goto swt; 2099 2100 /* it really is a comment */ 2101 Bputrune(faction, c); 2102 c = Bgetrune(finput); 2103 while(c >= 0) { 2104 while(c == '*') { 2105 Bputrune(faction, c); 2106 if((c=Bgetrune(finput)) == '/') 2107 goto lcopy; 2108 } 2109 Bputrune(faction, c); 2110 if(c == '\n') 2111 lineno++; 2112 c = Bgetrune(finput); 2113 } 2114 error("EOF inside comment"); 2115 2116 case '\'': 2117 /* character constant */ 2118 match = '\''; 2119 goto string; 2120 2121 case '"': 2122 /* character string */ 2123 match = '"'; 2124 2125 string: 2126 Bputrune(faction, c); 2127 while(c = Bgetrune(finput)) { 2128 if(c == '\\') { 2129 Bputrune(faction, c); 2130 c = Bgetrune(finput); 2131 if(c == '\n') 2132 lineno++; 2133 } else 2134 if(c == match) 2135 goto lcopy; 2136 if(c == '\n') 2137 error("newline in string or char. const."); 2138 Bputrune(faction, c); 2139 } 2140 error("EOF in string or character constant"); 2141 2142 case Beof: 2143 error("action does not terminate"); 2144 2145 case '\n': 2146 lineno++; 2147 goto lcopy; 2148 } 2149 2150 lcopy: 2151 Bputrune(faction, c); 2152 goto loop; 2153 } 2154 2155 void 2156 openup(char *stem, int dflag, int vflag, int ytab, char *ytabc) 2157 { 2158 char buf[256]; 2159 2160 if(vflag) { 2161 sprint(buf, "%s.%s", stem, FILEU); 2162 foutput = Bopen(buf, OWRITE); 2163 if(foutput == 0) 2164 error("cannot open %s", buf); 2165 } 2166 if(yydebug) { 2167 sprint(buf, "%s.%s", stem, FILEDEBUG); 2168 if((fdebug = Bopen(buf, OWRITE)) == 0) 2169 error("can't open %s", buf); 2170 } 2171 if(dflag) { 2172 sprint(buf, "%s.%s", stem, FILED); 2173 fdefine = Bopen(buf, OWRITE); 2174 if(fdefine == 0) 2175 error("can't create %s", buf); 2176 } 2177 if(ytab == 0) 2178 sprint(buf, "%s.%s", stem, OFILE); 2179 else 2180 strcpy(buf, ytabc); 2181 ftable = Bopen(buf, OWRITE); 2182 if(ftable == 0) 2183 error("cannot open table file %s", buf); 2184 } 2185 2186 /* 2187 * print the output for the states 2188 */ 2189 void 2190 output(void) 2191 { 2192 int i, k, c; 2193 Wset *u, *v; 2194 2195 Bprint(ftable, "static\tconst\tshort yyexca[] =\n{"); 2196 if(fdebug) 2197 Bprint(fdebug, "static\tconst\tchar* yystates[] =\n{\n"); 2198 2199 /* output the stuff for state i */ 2200 SLOOP(i) { 2201 nolook = tystate[i]!=MUSTLOOKAHEAD; 2202 closure(i); 2203 2204 /* output actions */ 2205 nolook = 1; 2206 aryfil(temp1, ntokens+nnonter+1, 0); 2207 WSLOOP(wsets, u) { 2208 c = *(u->pitem); 2209 if(c > 1 && c < NTBASE && temp1[c] == 0) { 2210 WSLOOP(u, v) 2211 if(c == *(v->pitem)) 2212 putitem(v->pitem+1, (Lkset*)0); 2213 temp1[c] = state(c); 2214 } else 2215 if(c > NTBASE && temp1[(c -= NTBASE) + ntokens] == 0) 2216 temp1[c+ntokens] = amem[indgo[i]+c]; 2217 } 2218 if(i == 1) 2219 temp1[1] = ACCEPTCODE; 2220 2221 /* now, we have the shifts; look at the reductions */ 2222 lastred = 0; 2223 WSLOOP(wsets, u) { 2224 c = *u->pitem; 2225 2226 /* reduction */ 2227 if(c <= 0) { 2228 lastred = -c; 2229 TLOOP(k) 2230 if(BIT(u->ws.lset, k)) { 2231 if(temp1[k] == 0) 2232 temp1[k] = c; 2233 else 2234 if(temp1[k] < 0) { /* reduce/reduce conflict */ 2235 if(foutput) 2236 Bprint(foutput, 2237 "\n%d: reduce/reduce conflict" 2238 " (red'ns %d and %d ) on %s", 2239 i, -temp1[k], lastred, 2240 symnam(k)); 2241 if(-temp1[k] > lastred) 2242 temp1[k] = -lastred; 2243 zzrrconf++; 2244 } else 2245 /* potential shift/reduce conflict */ 2246 precftn( lastred, k, i ); 2247 } 2248 } 2249 } 2250 wract(i); 2251 } 2252 2253 if(fdebug) 2254 Bprint(fdebug, "};\n"); 2255 Bprint(ftable, "};\n"); 2256 Bprint(ftable, "#define YYNPROD %d\n", nprod); 2257 Bprint(ftable, "#define YYPRIVATE %d\n", PRIVATE); 2258 if(yydebug) 2259 Bprint(ftable, "#define yydebug %s\n", yydebug); 2260 } 2261 2262 /* 2263 * pack state i from temp1 into amem 2264 */ 2265 int 2266 apack(int *p, int n) 2267 { 2268 int *pp, *qq, *rr, off, *q, *r; 2269 2270 /* we don't need to worry about checking because 2271 * we will only look at entries known to be there... 2272 * eliminate leading and trailing 0's 2273 */ 2274 2275 q = p+n; 2276 for(pp = p, off = 0; *pp == 0 && pp <= q; ++pp, --off) 2277 ; 2278 /* no actions */ 2279 if(pp > q) 2280 return 0; 2281 p = pp; 2282 2283 /* now, find a place for the elements from p to q, inclusive */ 2284 r = &amem[ACTSIZE-1]; 2285 for(rr = amem; rr <= r; rr++, off++) { 2286 for(qq = rr, pp = p; pp <= q; pp++, qq++) 2287 if(*pp != 0) 2288 if(*pp != *qq && *qq != 0) 2289 goto nextk; 2290 2291 /* we have found an acceptable k */ 2292 if(pkdebug && foutput != 0) 2293 Bprint(foutput, "off = %d, k = %d\n", off, (int)(rr-amem)); 2294 for(qq = rr, pp = p; pp <= q; pp++, qq++) 2295 if(*pp) { 2296 if(qq > r) 2297 error("action table overflow"); 2298 if(qq > memp) 2299 memp = qq; 2300 *qq = *pp; 2301 } 2302 if(pkdebug && foutput != 0) 2303 for(pp = amem; pp <= memp; pp += 10) { 2304 Bprint(foutput, "\t"); 2305 for(qq = pp; qq <= pp+9; qq++) 2306 Bprint(foutput, "%d ", *qq); 2307 Bprint(foutput, "\n"); 2308 } 2309 return(off); 2310 nextk:; 2311 } 2312 error("no space in action table"); 2313 return 0; 2314 } 2315 2316 /* 2317 * output the gotos for the nontermninals 2318 */ 2319 void 2320 go2out(void) 2321 { 2322 int i, j, k, best, count, cbest, times; 2323 2324 /* mark begining of gotos */ 2325 Bprint(ftemp, "$\n"); 2326 for(i = 1; i <= nnonter; i++) { 2327 go2gen(i); 2328 2329 /* find the best one to make default */ 2330 best = -1; 2331 times = 0; 2332 2333 /* is j the most frequent */ 2334 for(j = 0; j <= nstate; j++) { 2335 if(tystate[j] == 0) 2336 continue; 2337 if(tystate[j] == best) 2338 continue; 2339 2340 /* is tystate[j] the most frequent */ 2341 count = 0; 2342 cbest = tystate[j]; 2343 for(k = j; k <= nstate; k++) 2344 if(tystate[k] == cbest) 2345 count++; 2346 if(count > times) { 2347 best = cbest; 2348 times = count; 2349 } 2350 } 2351 2352 /* best is now the default entry */ 2353 zzgobest += times-1; 2354 for(j = 0; j <= nstate; j++) 2355 if(tystate[j] != 0 && tystate[j] != best) { 2356 Bprint(ftemp, "%d,%d,", j, tystate[j]); 2357 zzgoent++; 2358 } 2359 2360 /* now, the default */ 2361 if(best == -1) 2362 best = 0; 2363 zzgoent++; 2364 Bprint(ftemp, "%d\n", best); 2365 } 2366 } 2367 2368 /* 2369 * output the gotos for nonterminal c 2370 */ 2371 void 2372 go2gen(int c) 2373 { 2374 int i, work, cc; 2375 Item *p, *q; 2376 2377 2378 /* first, find nonterminals with gotos on c */ 2379 aryfil(temp1, nnonter+1, 0); 2380 temp1[c] = 1; 2381 work = 1; 2382 while(work) { 2383 work = 0; 2384 PLOOP(0, i) 2385 2386 /* cc is a nonterminal */ 2387 if((cc=prdptr[i][1]-NTBASE) >= 0) 2388 /* cc has a goto on c */ 2389 if(temp1[cc] != 0) { 2390 2391 /* thus, the left side of production i does too */ 2392 cc = *prdptr[i]-NTBASE; 2393 if(temp1[cc] == 0) { 2394 work = 1; 2395 temp1[cc] = 1; 2396 } 2397 } 2398 } 2399 2400 /* now, we have temp1[c] = 1 if a goto on c in closure of cc */ 2401 if(g2debug && foutput != 0) { 2402 Bprint(foutput, "%s: gotos on ", nontrst[c].name); 2403 NTLOOP(i) 2404 if(temp1[i]) 2405 Bprint(foutput, "%s ", nontrst[i].name); 2406 Bprint(foutput, "\n"); 2407 } 2408 2409 /* now, go through and put gotos into tystate */ 2410 aryfil(tystate, nstate, 0); 2411 SLOOP(i) 2412 ITMLOOP(i, p, q) 2413 if((cc = *p->pitem) >= NTBASE) 2414 /* goto on c is possible */ 2415 if(temp1[cc-NTBASE]) { 2416 tystate[i] = amem[indgo[i]+c]; 2417 break; 2418 } 2419 } 2420 2421 /* 2422 * decide a shift/reduce conflict by precedence. 2423 * r is a rule number, t a token number 2424 * the conflict is in state s 2425 * temp1[t] is changed to reflect the action 2426 */ 2427 void 2428 precftn(int r, int t, int s) 2429 { 2430 int lp, lt, action; 2431 2432 lp = levprd[r]; 2433 lt = toklev[t]; 2434 if(PLEVEL(lt) == 0 || PLEVEL(lp) == 0) { 2435 2436 /* conflict */ 2437 if(foutput != 0) 2438 Bprint(foutput, 2439 "\n%d: shift/reduce conflict (shift %d(%d), red'n %d(%d)) on %s", 2440 s, temp1[t], PLEVEL(lt), r, PLEVEL(lp), symnam(t)); 2441 zzsrconf++; 2442 return; 2443 } 2444 if(PLEVEL(lt) == PLEVEL(lp)) 2445 action = ASSOC(lt); 2446 else 2447 if(PLEVEL(lt) > PLEVEL(lp)) 2448 action = RASC; /* shift */ 2449 else 2450 action = LASC; /* reduce */ 2451 switch(action) { 2452 case BASC: /* error action */ 2453 temp1[t] = ERRCODE; 2454 break; 2455 case LASC: /* reduce */ 2456 temp1[t] = -r; 2457 break; 2458 } 2459 } 2460 2461 /* 2462 * output state i 2463 * temp1 has the actions, lastred the default 2464 */ 2465 void 2466 wract(int i) 2467 { 2468 int p, p0, p1, ntimes, tred, count, j, flag; 2469 2470 /* find the best choice for lastred */ 2471 lastred = 0; 2472 ntimes = 0; 2473 TLOOP(j) { 2474 if(temp1[j] >= 0) 2475 continue; 2476 if(temp1[j]+lastred == 0) 2477 continue; 2478 /* count the number of appearances of temp1[j] */ 2479 count = 0; 2480 tred = -temp1[j]; 2481 levprd[tred] |= REDFLAG; 2482 TLOOP(p) 2483 if(temp1[p]+tred == 0) 2484 count++; 2485 if(count > ntimes) { 2486 lastred = tred; 2487 ntimes = count; 2488 } 2489 } 2490 2491 /* 2492 * for error recovery, arrange that, if there is a shift on the 2493 * error recovery token, `error', that the default be the error action 2494 */ 2495 if(temp1[2] > 0) 2496 lastred = 0; 2497 2498 /* clear out entries in temp1 which equal lastred */ 2499 TLOOP(p) 2500 if(temp1[p]+lastred == 0) 2501 temp1[p] = 0; 2502 2503 wrstate(i); 2504 defact[i] = lastred; 2505 flag = 0; 2506 TLOOP(p0) 2507 if((p1=temp1[p0]) != 0) { 2508 if(p1 < 0) { 2509 p1 = -p1; 2510 goto exc; 2511 } 2512 if(p1 == ACCEPTCODE) { 2513 p1 = -1; 2514 goto exc; 2515 } 2516 if(p1 == ERRCODE) { 2517 p1 = 0; 2518 exc: 2519 if(flag++ == 0) 2520 Bprint(ftable, "-1, %d,\n", i); 2521 Bprint(ftable, "\t%d, %d,\n", p0, p1); 2522 zzexcp++; 2523 continue; 2524 } 2525 Bprint(ftemp, "%d,%d,", p0, p1); 2526 zzacent++; 2527 } 2528 if(flag) { 2529 defact[i] = -2; 2530 Bprint(ftable, "\t-2, %d,\n", lastred); 2531 } 2532 Bprint(ftemp, "\n"); 2533 } 2534 2535 /* 2536 * writes state i 2537 */ 2538 void 2539 wrstate(int i) 2540 { 2541 int j0, j1; 2542 Item *pp, *qq; 2543 Wset *u; 2544 2545 if(fdebug) { 2546 if(lastred) { 2547 Bprint(fdebug, " 0, /*%d*/\n", i); 2548 } else { 2549 Bprint(fdebug, " \""); 2550 ITMLOOP(i, pp, qq) 2551 Bprint(fdebug, "%s\\n", writem(pp->pitem)); 2552 if(tystate[i] == MUSTLOOKAHEAD) 2553 WSLOOP(wsets + (pstate[i+1] - pstate[i]), u) 2554 if(*u->pitem < 0) 2555 Bprint(fdebug, "%s\\n", writem(u->pitem)); 2556 Bprint(fdebug, "\", /*%d*/\n", i); 2557 } 2558 } 2559 if(foutput == 0) 2560 return; 2561 Bprint(foutput, "\nstate %d\n", i); 2562 ITMLOOP(i, pp, qq) 2563 Bprint(foutput, "\t%s\n", writem(pp->pitem)); 2564 if(tystate[i] == MUSTLOOKAHEAD) 2565 /* print out empty productions in closure */ 2566 WSLOOP(wsets+(pstate[i+1]-pstate[i]), u) 2567 if(*u->pitem < 0) 2568 Bprint(foutput, "\t%s\n", writem(u->pitem)); 2569 2570 /* check for state equal to another */ 2571 TLOOP(j0) 2572 if((j1=temp1[j0]) != 0) { 2573 Bprint(foutput, "\n\t%s ", symnam(j0)); 2574 /* shift, error, or accept */ 2575 if(j1 > 0) { 2576 if(j1 == ACCEPTCODE) 2577 Bprint(foutput, "accept"); 2578 else 2579 if(j1 == ERRCODE) 2580 Bprint(foutput, "error"); 2581 else 2582 Bprint(foutput, "shift %d", j1); 2583 } else 2584 Bprint(foutput, "reduce %d (src line %d)", -j1, rlines[-j1]); 2585 } 2586 2587 /* output the final production */ 2588 if(lastred) 2589 Bprint(foutput, "\n\t. reduce %d (src line %d)\n\n", 2590 lastred, rlines[lastred]); 2591 else 2592 Bprint(foutput, "\n\t. error\n\n"); 2593 2594 /* now, output nonterminal actions */ 2595 j1 = ntokens; 2596 for(j0 = 1; j0 <= nnonter; j0++) { 2597 j1++; 2598 if(temp1[j1]) 2599 Bprint(foutput, "\t%s goto %d\n", symnam(j0+NTBASE), temp1[j1]); 2600 } 2601 } 2602 2603 void 2604 warray(char *s, int *v, int n) 2605 { 2606 int i; 2607 2608 Bprint(ftable, "static\tconst\tshort %s[] =\n{", s); 2609 for(i=0;;) { 2610 if(i%10 == 0) 2611 Bprint(ftable, "\n"); 2612 Bprint(ftable, "%4d", v[i]); 2613 i++; 2614 if(i >= n) { 2615 Bprint(ftable, "\n};\n"); 2616 break; 2617 } 2618 Bprint(ftable, ","); 2619 } 2620 } 2621 2622 /* 2623 * in order to free up the mem and amem arrays for the optimizer, 2624 * and still be able to output yyr1, etc., after the sizes of 2625 * the action array is known, we hide the nonterminals 2626 * derived by productions in levprd. 2627 */ 2628 2629 void 2630 hideprod(void) 2631 { 2632 int i, j; 2633 2634 j = 0; 2635 levprd[0] = 0; 2636 PLOOP(1, i) { 2637 if(!(levprd[i] & REDFLAG)) { 2638 j++; 2639 if(foutput != 0) 2640 Bprint(foutput, "Rule not reduced: %s\n", writem(prdptr[i])); 2641 } 2642 levprd[i] = *prdptr[i] - NTBASE; 2643 } 2644 if(j) 2645 print("%d rules never reduced\n", j); 2646 } 2647 2648 void 2649 callopt(void) 2650 { 2651 int i, *p, j, k, *q; 2652 2653 /* read the arrays from tempfile and set parameters */ 2654 finput = Bopen(tempname, OREAD); 2655 if(finput == 0) 2656 error("optimizer cannot open tempfile"); 2657 2658 pgo[0] = 0; 2659 temp1[0] = 0; 2660 nstate = 0; 2661 nnonter = 0; 2662 for(;;) { 2663 switch(gtnm()) { 2664 case '\n': 2665 nstate++; 2666 pmem--; 2667 temp1[nstate] = pmem - mem0; 2668 case ',': 2669 continue; 2670 case '$': 2671 break; 2672 default: 2673 error("bad tempfile %s", tempname); 2674 } 2675 break; 2676 } 2677 2678 pmem--; 2679 temp1[nstate] = yypgo[0] = pmem - mem0; 2680 for(;;) { 2681 switch(gtnm()) { 2682 case '\n': 2683 nnonter++; 2684 yypgo[nnonter] = pmem-mem0; 2685 case ',': 2686 continue; 2687 case -1: 2688 break; 2689 default: 2690 error("bad tempfile"); 2691 } 2692 break; 2693 } 2694 pmem--; 2695 yypgo[nnonter--] = pmem - mem0; 2696 for(i = 0; i < nstate; i++) { 2697 k = 32000; 2698 j = 0; 2699 q = mem0 + temp1[i+1]; 2700 for(p = mem0 + temp1[i]; p < q ; p += 2) { 2701 if(*p > j) 2702 j = *p; 2703 if(*p < k) 2704 k = *p; 2705 } 2706 /* nontrivial situation */ 2707 if(k <= j) { 2708 /* j is now the range */ 2709 /* j -= k; *//* call scj */ 2710 if(k > maxoff) 2711 maxoff = k; 2712 } 2713 tystate[i] = (temp1[i+1]-temp1[i]) + 2*j; 2714 if(j > maxspr) 2715 maxspr = j; 2716 } 2717 2718 /* initialize ggreed table */ 2719 for(i = 1; i <= nnonter; i++) { 2720 ggreed[i] = 1; 2721 j = 0; 2722 2723 /* minimum entry index is always 0 */ 2724 q = mem0 + yypgo[i+1] - 1; 2725 for(p = mem0+yypgo[i]; p < q ; p += 2) { 2726 ggreed[i] += 2; 2727 if(*p > j) 2728 j = *p; 2729 } 2730 ggreed[i] = ggreed[i] + 2*j; 2731 if(j > maxoff) 2732 maxoff = j; 2733 } 2734 2735 /* now, prepare to put the shift actions into the amem array */ 2736 for(i = 0; i < ACTSIZE; i++) 2737 amem[i] = 0; 2738 maxa = amem; 2739 for(i = 0; i < nstate; i++) { 2740 if(tystate[i] == 0 && adb > 1) 2741 Bprint(ftable, "State %d: null\n", i); 2742 indgo[i] = YYFLAG1; 2743 } 2744 while((i = nxti()) != NOMORE) 2745 if(i >= 0) 2746 stin(i); 2747 else 2748 gin(-i); 2749 2750 /* print amem array */ 2751 if(adb > 2 ) 2752 for(p = amem; p <= maxa; p += 10) { 2753 Bprint(ftable, "%4d ", (int)(p-amem)); 2754 for(i = 0; i < 10; ++i) 2755 Bprint(ftable, "%4d ", p[i]); 2756 Bprint(ftable, "\n"); 2757 } 2758 2759 /* write out the output appropriate to the language */ 2760 aoutput(); 2761 osummary(); 2762 ZAPFILE(tempname); 2763 } 2764 2765 void 2766 gin(int i) 2767 { 2768 int *p, *r, *s, *q1, *q2; 2769 2770 /* enter gotos on nonterminal i into array amem */ 2771 ggreed[i] = 0; 2772 2773 q2 = mem0+ yypgo[i+1] - 1; 2774 q1 = mem0 + yypgo[i]; 2775 2776 /* now, find amem place for it */ 2777 for(p = amem; p < &amem[ACTSIZE]; p++) { 2778 if(*p) 2779 continue; 2780 for(r = q1; r < q2; r += 2) { 2781 s = p + *r + 1; 2782 if(*s) 2783 goto nextgp; 2784 if(s > maxa) 2785 if((maxa = s) > &amem[ACTSIZE]) 2786 error("a array overflow"); 2787 } 2788 /* we have found amem spot */ 2789 *p = *q2; 2790 if(p > maxa) 2791 if((maxa = p) > &amem[ACTSIZE]) 2792 error("a array overflow"); 2793 for(r = q1; r < q2; r += 2) { 2794 s = p + *r + 1; 2795 *s = r[1]; 2796 } 2797 pgo[i] = p-amem; 2798 if(adb > 1) 2799 Bprint(ftable, "Nonterminal %d, entry at %d\n", i, pgo[i]); 2800 return; 2801 2802 nextgp:; 2803 } 2804 error("cannot place goto %d\n", i); 2805 } 2806 2807 void 2808 stin(int i) 2809 { 2810 int *r, *s, n, flag, j, *q1, *q2; 2811 2812 tystate[i] = 0; 2813 2814 /* enter state i into the amem array */ 2815 q2 = mem0+temp1[i+1]; 2816 q1 = mem0+temp1[i]; 2817 /* find an acceptable place */ 2818 for(n = -maxoff; n < ACTSIZE; n++) { 2819 flag = 0; 2820 for(r = q1; r < q2; r += 2) { 2821 if((s = *r + n + amem) < amem) 2822 goto nextn; 2823 if(*s == 0) 2824 flag++; 2825 else 2826 if(*s != r[1]) 2827 goto nextn; 2828 } 2829 2830 /* check that the position equals another only if the states are identical */ 2831 for(j=0; j<nstate; j++) { 2832 if(indgo[j] == n) { 2833 2834 /* we have some disagreement */ 2835 if(flag) 2836 goto nextn; 2837 if(temp1[j+1]+temp1[i] == temp1[j]+temp1[i+1]) { 2838 2839 /* states are equal */ 2840 indgo[i] = n; 2841 if(adb > 1) 2842 Bprint(ftable, 2843 "State %d: entry at %d equals state %d\n", 2844 i, n, j); 2845 return; 2846 } 2847 2848 /* we have some disagreement */ 2849 goto nextn; 2850 } 2851 } 2852 2853 for(r = q1; r < q2; r += 2) { 2854 if((s = *r+n+amem) >= &amem[ACTSIZE]) 2855 error("out of space in optimizer a array"); 2856 if(s > maxa) 2857 maxa = s; 2858 if(*s != 0 && *s != r[1]) 2859 error("clobber of a array, pos'n %d, by %d", s-amem, r[1]); 2860 *s = r[1]; 2861 } 2862 indgo[i] = n; 2863 if(adb > 1) 2864 Bprint(ftable, "State %d: entry at %d\n", i, indgo[i]); 2865 return; 2866 nextn:; 2867 } 2868 error("Error; failure to place state %d\n", i); 2869 } 2870 2871 /* 2872 * finds the next i 2873 */ 2874 int 2875 nxti(void) 2876 { 2877 int i, max, maxi; 2878 2879 max = 0; 2880 maxi = 0; 2881 for(i = 1; i <= nnonter; i++) 2882 if(ggreed[i] >= max) { 2883 max = ggreed[i]; 2884 maxi = -i; 2885 } 2886 for(i = 0; i < nstate; ++i) 2887 if(tystate[i] >= max) { 2888 max = tystate[i]; 2889 maxi = i; 2890 } 2891 if(nxdb) 2892 Bprint(ftable, "nxti = %d, max = %d\n", maxi, max); 2893 if(max == 0) 2894 return NOMORE; 2895 return maxi; 2896 } 2897 2898 /* 2899 * write summary 2900 */ 2901 void 2902 osummary(void) 2903 { 2904 2905 int i, *p; 2906 2907 if(foutput == 0) 2908 return; 2909 i = 0; 2910 for(p = maxa; p >= amem; p--) 2911 if(*p == 0) 2912 i++; 2913 2914 Bprint(foutput, "Optimizer space used: input %d/%d, output %d/%d\n", 2915 (int)(pmem-mem0+1), MEMSIZE, (int)(maxa-amem+1), ACTSIZE); 2916 Bprint(foutput, "%d table entries, %d zero\n", (int)(maxa-amem+1), i); 2917 Bprint(foutput, "maximum spread: %d, maximum offset: %d\n", maxspr, maxoff); 2918 } 2919 2920 /* 2921 * this version is for C 2922 * write out the optimized parser 2923 */ 2924 void 2925 aoutput(void) 2926 { 2927 Bprint(ftable, "#define\tYYLAST\t%d\n", (int)(maxa-amem+1)); 2928 arout("yyact", amem, (maxa-amem)+1); 2929 arout("yypact", indgo, nstate); 2930 arout("yypgo", pgo, nnonter+1); 2931 } 2932 2933 void 2934 arout(char *s, int *v, int n) 2935 { 2936 int i; 2937 2938 Bprint(ftable, "static\tconst\tshort %s[] =\n{", s); 2939 for(i = 0; i < n;) { 2940 if(i%10 == 0) 2941 Bprint(ftable, "\n"); 2942 Bprint(ftable, "%4d", v[i]); 2943 i++; 2944 if(i == n) 2945 Bprint(ftable, "\n};\n"); 2946 else 2947 Bprint(ftable, ","); 2948 } 2949 } 2950 2951 /* 2952 * read and convert an integer from the standard input 2953 * return the terminating character 2954 * blanks, tabs, and newlines are ignored 2955 */ 2956 int 2957 gtnm(void) 2958 { 2959 int sign, val, c; 2960 2961 sign = 0; 2962 val = 0; 2963 while((c=Bgetrune(finput)) != Beof) { 2964 if(isdigit(c)) { 2965 val = val*10 + c-'0'; 2966 continue; 2967 } 2968 if(c == '-') { 2969 sign = 1; 2970 continue; 2971 } 2972 break; 2973 } 2974 if(sign) 2975 val = -val; 2976 *pmem++ = val; 2977 if(pmem >= &mem0[MEMSIZE]) 2978 error("out of space"); 2979 return c; 2980 }