grep.y (2815B)
1 %{ 2 #include "grep.h" 3 %} 4 5 %union 6 { 7 int val; 8 char* str; 9 Re2 re; 10 } 11 12 %type <re> expr prog 13 %type <re> expr0 expr1 expr2 expr3 expr4 14 %token <str> LCLASS 15 %token <val> LCHAR 16 %token LLPAREN LRPAREN LALT LSTAR LPLUS LQUES 17 %token LBEGIN LEND LDOT LBAD LNEWLINE 18 %% 19 20 prog: 21 expr newlines 22 { 23 $$.beg = ral(Tend); 24 $$.end = $$.beg; 25 $$ = re2cat(re2star(re2or(re2char(0x00, '\n'-1), re2char('\n'+1, 0xff))), $$); 26 $$ = re2cat($1, $$); 27 $$ = re2cat(re2star(re2char(0x00, 0xff)), $$); 28 topre = $$; 29 } 30 31 expr: 32 expr0 33 | expr newlines expr0 34 { 35 $$ = re2or($1, $3); 36 } 37 38 expr0: 39 expr1 40 | LSTAR { literal = 1; } expr1 41 { 42 $$ = $3; 43 } 44 45 expr1: 46 expr2 47 | expr1 LALT expr2 48 { 49 $$ = re2or($1, $3); 50 } 51 52 expr2: 53 expr3 54 | expr2 expr3 55 { 56 $$ = re2cat($1, $2); 57 } 58 59 expr3: 60 expr4 61 | expr3 LSTAR 62 { 63 $$ = re2star($1); 64 } 65 | expr3 LPLUS 66 { 67 $$.beg = ral(Talt); 68 patchnext($1.end, $$.beg); 69 $$.beg->u.alt = $1.beg; 70 $$.end = $$.beg; 71 $$.beg = $1.beg; 72 } 73 | expr3 LQUES 74 { 75 $$.beg = ral(Talt); 76 $$.beg->u.alt = $1.beg; 77 $$.end = $1.end; 78 appendnext($$.end, $$.beg); 79 } 80 81 expr4: 82 LCHAR 83 { 84 $$.beg = ral(Tclass); 85 $$.beg->u.x.lo = $1; 86 $$.beg->u.x.hi = $1; 87 $$.end = $$.beg; 88 } 89 | LBEGIN 90 { 91 $$.beg = ral(Tbegin); 92 $$.end = $$.beg; 93 } 94 | LEND 95 { 96 $$.beg = ral(Tend); 97 $$.end = $$.beg; 98 } 99 | LDOT 100 { 101 $$ = re2class("^\n"); 102 } 103 | LCLASS 104 { 105 $$ = re2class($1); 106 } 107 | LLPAREN expr1 LRPAREN 108 { 109 $$ = $2; 110 } 111 112 newlines: 113 LNEWLINE 114 | newlines LNEWLINE 115 %% 116 117 void 118 yyerror(char *e, ...) 119 { 120 if(filename) 121 fprint(2, "grep: %s:%ld: %s\n", filename, lineno, e); 122 else 123 fprint(2, "grep: %s\n", e); 124 exits("syntax"); 125 } 126 127 int 128 yylex(void) 129 { 130 char *q, *eq; 131 int c, s; 132 133 if(peekc) { 134 s = peekc; 135 peekc = 0; 136 return s; 137 } 138 c = getrec(); 139 if(literal) { 140 if(c != 0 && c != '\n') { 141 yylval.val = c; 142 return LCHAR; 143 } 144 literal = 0; 145 } 146 switch(c) { 147 default: 148 yylval.val = c; 149 s = LCHAR; 150 break; 151 case '\\': 152 c = getrec(); 153 yylval.val = c; 154 s = LCHAR; 155 if(c == '\n') 156 s = LNEWLINE; 157 break; 158 case '[': 159 goto getclass; 160 case '(': 161 s = LLPAREN; 162 break; 163 case ')': 164 s = LRPAREN; 165 break; 166 case '|': 167 s = LALT; 168 break; 169 case '*': 170 s = LSTAR; 171 break; 172 case '+': 173 s = LPLUS; 174 break; 175 case '?': 176 s = LQUES; 177 break; 178 case '^': 179 s = LBEGIN; 180 break; 181 case '$': 182 s = LEND; 183 break; 184 case '.': 185 s = LDOT; 186 break; 187 case 0: 188 peekc = -1; 189 case '\n': 190 s = LNEWLINE; 191 break; 192 } 193 return s; 194 195 getclass: 196 q = u.string; 197 eq = q + nelem(u.string) - 5; 198 c = getrec(); 199 if(c == '^') { 200 q[0] = '^'; 201 q[1] = '\n'; 202 q[2] = '-'; 203 q[3] = '\n'; 204 q += 4; 205 c = getrec(); 206 } 207 for(;;) { 208 if(q >= eq) 209 error("class too long"); 210 if(c == ']' || c == 0) 211 break; 212 if(c == '\\') { 213 *q++ = c; 214 c = getrec(); 215 if(c == 0) 216 break; 217 } 218 *q++ = c; 219 c = getrec(); 220 } 221 *q = 0; 222 if(c == 0) 223 return LBAD; 224 yylval.str = u.string; 225 return LCLASS; 226 }