look.c (6122B)
1 #include <u.h> 2 #include <libc.h> 3 #include <bio.h> 4 /* Macros for Rune support of ctype.h-like functions */ 5 6 #undef isupper 7 #undef islower 8 #undef isalpha 9 #undef isdigit 10 #undef isalnum 11 #undef isspace 12 #undef tolower 13 #define isupper(r) ('A' <= (r) && (r) <= 'Z') 14 #define islower(r) ('a' <= (r) && (r) <= 'z') 15 #define isalpha(r) (isupper(r) || islower(r)) 16 #define islatin1(r) (0xC0 <= (r) && (r) <= 0xFF) 17 18 #define isdigit(r) ('0' <= (r) && (r) <= '9') 19 20 #define isalnum(r) (isalpha(r) || isdigit(r)) 21 22 #define isspace(r) ((r) == ' ' || (r) == '\t' \ 23 || (0x0A <= (r) && (r) <= 0x0D)) 24 25 #define tolower(r) ((r)-'A'+'a') 26 27 #define sgn(v) ((v) < 0 ? -1 : ((v) > 0 ? 1 : 0)) 28 29 #define WORDSIZ 4000 30 char *filename = "#9/lib/words"; 31 Biobuf *dfile; 32 Biobuf bout; 33 Biobuf bin; 34 35 int fold; 36 int direc; 37 int exact; 38 int iflag; 39 int rev = 1; /*-1 for reverse-ordered file, not implemented*/ 40 int (*compare)(Rune*, Rune*); 41 Rune tab = '\t'; 42 Rune entry[WORDSIZ]; 43 Rune word[WORDSIZ]; 44 Rune key[50], orig[50]; 45 Rune latin_fold_tab[] = 46 { 47 /* Table to fold latin 1 characters to ASCII equivalents 48 based at Rune value 0xc0 49 50 À Á Â Ã Ä Å Æ Ç 51 È É Ê Ë Ì Í Î Ï 52 Ð Ñ Ò Ó Ô Õ Ö × 53 Ø Ù Ú Û Ü Ý Þ ß 54 à á â ã ä å æ ç 55 è é ê ë ì í î ï 56 ð ñ ò ó ô õ ö ÷ 57 ø ù ú û ü ý þ ÿ 58 */ 59 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c', 60 'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i', 61 'd', 'n', 'o', 'o', 'o', 'o', 'o', 0 , 62 'o', 'u', 'u', 'u', 'u', 'y', 0 , 0 , 63 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c', 64 'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i', 65 'd', 'n', 'o', 'o', 'o', 'o', 'o', 0 , 66 'o', 'u', 'u', 'u', 'u', 'y', 0 , 'y', 67 }; 68 69 int locate(void); 70 int acomp(Rune*, Rune*); 71 int getword(Biobuf*, Rune *rp, int n); 72 void torune(char*, Rune*); 73 void rcanon(Rune*, Rune*); 74 int ncomp(Rune*, Rune*); 75 76 void 77 main(int argc, char *argv[]) 78 { 79 int n; 80 81 filename = unsharp(filename); 82 83 Binit(&bin, 0, OREAD); 84 Binit(&bout, 1, OWRITE); 85 compare = acomp; 86 ARGBEGIN{ 87 case 'd': 88 direc++; 89 break; 90 case 'f': 91 fold++; 92 break; 93 case 'i': 94 iflag++; 95 break; 96 case 'n': 97 compare = ncomp; 98 break; 99 case 't': 100 chartorune(&tab,ARGF()); 101 break; 102 case 'x': 103 exact++; 104 break; 105 default: 106 fprint(2, "%s: bad option %c\n", argv0, ARGC()); 107 fprint(2, "usage: %s -[dfinx] [-t c] [string] [file]\n", argv0); 108 exits("usage"); 109 } ARGEND 110 if(!iflag){ 111 if(argc >= 1) { 112 torune(argv[0], orig); 113 argv++; 114 argc--; 115 } else 116 iflag++; 117 } 118 if(argc < 1) { 119 direc++; 120 fold++; 121 } else 122 filename = argv[0]; 123 if (!iflag) 124 rcanon(orig, key); 125 dfile = Bopen(filename, OREAD); 126 if(dfile == 0) { 127 fprint(2, "look: can't open %s\n", filename); 128 exits("no dictionary"); 129 } 130 if(!iflag) 131 if(!locate()) 132 exits("not found"); 133 do { 134 if(iflag) { 135 Bflush(&bout); 136 if(!getword(&bin, orig, sizeof(orig)/sizeof(orig[0]))) 137 exits(0); 138 rcanon(orig, key); 139 if(!locate()) 140 continue; 141 } 142 if (!exact || !acomp(word, key)) 143 Bprint(&bout, "%S\n", entry); 144 while(getword(dfile, entry, sizeof(entry)/sizeof(entry[0]))) { 145 rcanon(entry, word); 146 n = compare(key, word); 147 switch(n) { 148 case -1: 149 if(exact) 150 break; 151 case 0: 152 if (!exact || !acomp(word, orig)) 153 Bprint(&bout, "%S\n", entry); 154 continue; 155 } 156 break; 157 } 158 } while(iflag); 159 exits(0); 160 } 161 162 int 163 locate(void) 164 { 165 vlong top, bot, mid; 166 int c; 167 int n; 168 169 bot = 0; 170 top = Bseek(dfile, 0L, 2); 171 for(;;) { 172 mid = (top+bot) / 2; 173 Bseek(dfile, mid, 0); 174 do 175 c = Bgetrune(dfile); 176 while(c>=0 && c!='\n'); 177 mid = Boffset(dfile); 178 if(!getword(dfile, entry, sizeof(entry)/sizeof(entry[0]))) 179 break; 180 rcanon(entry, word); 181 n = compare(key, word); 182 switch(n) { 183 case -2: 184 case -1: 185 case 0: 186 if(top <= mid) 187 break; 188 top = mid; 189 continue; 190 case 1: 191 case 2: 192 bot = mid; 193 continue; 194 } 195 break; 196 } 197 Bseek(dfile, bot, 0); 198 while(getword(dfile, entry, sizeof(entry)/sizeof(entry[0]))) { 199 rcanon(entry, word); 200 n = compare(key, word); 201 switch(n) { 202 case -2: 203 return 0; 204 case -1: 205 if(exact) 206 return 0; 207 case 0: 208 return 1; 209 case 1: 210 case 2: 211 continue; 212 } 213 } 214 return 0; 215 } 216 217 /* 218 * acomp(s, t) returns: 219 * -2 if s strictly precedes t 220 * -1 if s is a prefix of t 221 * 0 if s is the same as t 222 * 1 if t is a prefix of s 223 * 2 if t strictly precedes s 224 */ 225 226 int 227 acomp(Rune *s, Rune *t) 228 { 229 int cs, ct; 230 231 for(;;) { 232 cs = *s; 233 ct = *t; 234 if(cs != ct) 235 break; 236 if(cs == 0) 237 return 0; 238 s++; 239 t++; 240 } 241 if(cs == 0) 242 return -1; 243 if(ct == 0) 244 return 1; 245 if(cs < ct) 246 return -2; 247 return 2; 248 } 249 250 void 251 torune(char *old, Rune *new) 252 { 253 do old += chartorune(new, old); 254 while(*new++); 255 } 256 257 void 258 rcanon(Rune *old, Rune *new) 259 { 260 Rune r; 261 262 while((r = *old++) && r != tab) { 263 if (islatin1(r) && latin_fold_tab[r-0xc0]) 264 r = latin_fold_tab[r-0xc0]; 265 if(direc) 266 if(!(isalnum(r) || r == ' ' || r == '\t')) 267 continue; 268 if(fold) 269 if(isupper(r)) 270 r = tolower(r); 271 *new++ = r; 272 } 273 *new = 0; 274 } 275 276 int 277 ncomp(Rune *s, Rune *t) 278 { 279 Rune *is, *it, *js, *jt; 280 int a, b; 281 int ssgn, tsgn; 282 283 while(isspace(*s)) 284 s++; 285 while(isspace(*t)) 286 t++; 287 ssgn = tsgn = -2*rev; 288 if(*s == '-') { 289 s++; 290 ssgn = -ssgn; 291 } 292 if(*t == '-') { 293 t++; 294 tsgn = -tsgn; 295 } 296 for(is = s; isdigit(*is); is++) 297 ; 298 for(it = t; isdigit(*it); it++) 299 ; 300 js = is; 301 jt = it; 302 a = 0; 303 if(ssgn == tsgn) 304 while(it>t && is>s) 305 if(b = *--it - *--is) 306 a = b; 307 while(is > s) 308 if(*--is != '0') 309 return -ssgn; 310 while(it > t) 311 if(*--it != '0') 312 return tsgn; 313 if(a) 314 return sgn(a)*ssgn; 315 if(*(s=js) == '.') 316 s++; 317 if(*(t=jt) == '.') 318 t++; 319 if(ssgn == tsgn) 320 while(isdigit(*s) && isdigit(*t)) 321 if(a = *t++ - *s++) 322 return sgn(a)*ssgn; 323 while(isdigit(*s)) 324 if(*s++ != '0') 325 return -ssgn; 326 while(isdigit(*t)) 327 if(*t++ != '0') 328 return tsgn; 329 return 0; 330 } 331 332 int 333 getword(Biobuf *f, Rune *rp, int n) 334 { 335 long c; 336 337 while(n-- > 0) { 338 c = Bgetrune(f); 339 if(c < 0) 340 return 0; 341 if(c == '\n') { 342 *rp = '\0'; 343 return 1; 344 } 345 *rp++ = c; 346 } 347 fprint(2, "Look: word too long. Bailing out.\n"); 348 return 0; 349 }