comp.c (4165B)
1 #include "grep.h" 2 3 /* 4 * incremental compiler. 5 * add the branch c to the 6 * state s. 7 */ 8 void 9 increment(State *s, int c) 10 { 11 int i; 12 State *t, **tt; 13 Re *re1, *re2; 14 15 nfollow = 0; 16 gen++; 17 matched = 0; 18 for(i=0; i<s->count; i++) 19 fol1(s->re[i], c); 20 qsort(follow, nfollow, sizeof(*follow), fcmp); 21 for(tt=&state0; t = *tt;) { 22 if(t->count > nfollow) { 23 tt = &t->linkleft; 24 goto cont; 25 } 26 if(t->count < nfollow) { 27 tt = &t->linkright; 28 goto cont; 29 } 30 for(i=0; i<nfollow; i++) { 31 re1 = t->re[i]; 32 re2 = follow[i]; 33 if(re1 > re2) { 34 tt = &t->linkleft; 35 goto cont; 36 } 37 if(re1 < re2) { 38 tt = &t->linkright; 39 goto cont; 40 } 41 } 42 if(!!matched && !t->match) { 43 tt = &t->linkleft; 44 goto cont; 45 } 46 if(!matched && !!t->match) { 47 tt = &t->linkright; 48 goto cont; 49 } 50 s->next[c] = t; 51 return; 52 cont:; 53 } 54 55 t = sal(nfollow); 56 *tt = t; 57 for(i=0; i<nfollow; i++) { 58 re1 = follow[i]; 59 t->re[i] = re1; 60 } 61 s->next[c] = t; 62 t->match = matched; 63 } 64 65 int 66 fcmp(const void *va, const void *vb) 67 { 68 Re **aa, **bb; 69 Re *a, *b; 70 71 aa = (Re**)va; 72 bb = (Re**)vb; 73 a = *aa; 74 b = *bb; 75 if(a > b) 76 return 1; 77 if(a < b) 78 return -1; 79 return 0; 80 } 81 82 void 83 fol1(Re *r, int c) 84 { 85 Re *r1; 86 87 loop: 88 if(r->gen == gen) 89 return; 90 if(nfollow >= maxfollow) 91 error("nfollow"); 92 r->gen = gen; 93 switch(r->type) { 94 default: 95 error("fol1"); 96 97 case Tcase: 98 if(c >= 0 && c < 256) 99 if(r1 = r->u.cases[c]) 100 follow[nfollow++] = r1; 101 if(r = r->next) 102 goto loop; 103 break; 104 105 case Talt: 106 case Tor: 107 fol1(r->u.alt, c); 108 r = r->next; 109 goto loop; 110 111 case Tbegin: 112 if(c == '\n' || c == Cbegin) 113 follow[nfollow++] = r->next; 114 break; 115 116 case Tend: 117 if(c == '\n') 118 matched = 1; 119 break; 120 121 case Tclass: 122 if(c >= r->u.x.lo && c <= r->u.x.hi) 123 follow[nfollow++] = r->next; 124 break; 125 } 126 } 127 128 Rune tab1[] = 129 { 130 0x007f, 131 0x07ff 132 }; 133 Rune tab2[] = 134 { 135 0x003f, 136 0x0fff 137 }; 138 139 Re2 140 rclass(Rune p0, Rune p1) 141 { 142 char xc0[6], xc1[6]; 143 int i, n, m; 144 Re2 x; 145 146 if(p0 > p1) 147 return re2char(0xff, 0xff); /* no match */ 148 149 /* 150 * bust range into same length 151 * character sequences 152 */ 153 for(i=0; i<nelem(tab1); i++) { 154 m = tab1[i]; 155 if(p0 <= m && p1 > m) 156 return re2or(rclass(p0, m), rclass(m+1, p1)); 157 } 158 159 /* 160 * bust range into part of a single page 161 * or into full pages 162 */ 163 for(i=0; i<nelem(tab2); i++) { 164 m = tab2[i]; 165 if((p0 & ~m) != (p1 & ~m)) { 166 if((p0 & m) != 0) 167 return re2or(rclass(p0, p0|m), rclass((p0|m)+1, p1)); 168 if((p1 & m) != m) 169 return re2or(rclass(p0, (p1&~m)-1), rclass(p1&~m, p1)); 170 } 171 } 172 173 n = runetochar(xc0, &p0); 174 i = runetochar(xc1, &p1); 175 if(i != n) 176 error("length"); 177 178 x = re2char(xc0[0], xc1[0]); 179 for(i=1; i<n; i++) 180 x = re2cat(x, re2char(xc0[i], xc1[i])); 181 return x; 182 } 183 184 int 185 pcmp(const void *va, const void *vb) 186 { 187 int n; 188 Rune *a, *b; 189 190 a = (Rune*)va; 191 b = (Rune*)vb; 192 193 n = a[0] - b[0]; 194 if(n) 195 return n; 196 return a[1] - b[1]; 197 } 198 199 /* 200 * convert character chass into 201 * run-pair ranges of matches. 202 * this is 10646/utf specific and 203 * needs to be changed for some 204 * other input character set. 205 * this is the key to a fast 206 * regular search of characters 207 * by looking at sequential bytes. 208 */ 209 Re2 210 re2class(char *s) 211 { 212 Rune pairs[200], *p, *q, ov; 213 int nc; 214 Re2 x; 215 216 nc = 0; 217 if(*s == '^') { 218 nc = 1; 219 s++; 220 } 221 222 p = pairs; 223 s += chartorune(p, s); 224 for(;;) { 225 if(*p == '\\') 226 s += chartorune(p, s); 227 if(*p == 0) 228 break; 229 p[1] = *p; 230 p += 2; 231 s += chartorune(p, s); 232 if(*p != '-') 233 continue; 234 s += chartorune(p, s); 235 if(*p == '\\') 236 s += chartorune(p, s); 237 if(*p == 0) 238 break; 239 p[-1] = *p; 240 s += chartorune(p, s); 241 } 242 *p = 0; 243 qsort(pairs, (p-pairs)/2, 2*sizeof(*pairs), pcmp); 244 245 q = pairs; 246 for(p=pairs+2; *p; p+=2) { 247 if(p[0] > p[1]) 248 continue; 249 if(p[0] > q[1] || p[1] < q[0]) { 250 q[2] = p[0]; 251 q[3] = p[1]; 252 q += 2; 253 continue; 254 } 255 if(p[0] < q[0]) 256 q[0] = p[0]; 257 if(p[1] > q[1]) 258 q[1] = p[1]; 259 } 260 q[2] = 0; 261 262 p = pairs; 263 if(nc) { 264 x = rclass(0, p[0]-1); 265 ov = p[1]+1; 266 for(p+=2; *p; p+=2) { 267 x = re2or(x, rclass(ov, p[0]-1)); 268 ov = p[1]+1; 269 } 270 x = re2or(x, rclass(ov, 0xffff)); 271 } else { 272 x = rclass(p[0], p[1]); 273 for(p+=2; *p; p+=2) 274 x = re2or(x, rclass(p[0], p[1])); 275 } 276 return x; 277 }