9base

revived minimalist port of Plan 9 userland to Unix
git clone git://git.suckless.org/9base
Log | Files | Refs | README | LICENSE

look.c (6122B)


      1 #include <u.h>
      2 #include <libc.h>
      3 #include <bio.h>
      4 	/* Macros for Rune support of ctype.h-like functions */
      5 
      6 #undef isupper
      7 #undef islower
      8 #undef isalpha
      9 #undef isdigit
     10 #undef isalnum
     11 #undef isspace
     12 #undef tolower
     13 #define	isupper(r)	('A' <= (r) && (r) <= 'Z')
     14 #define	islower(r)	('a' <= (r) && (r) <= 'z')
     15 #define	isalpha(r)	(isupper(r) || islower(r))
     16 #define	islatin1(r)	(0xC0 <= (r) && (r) <= 0xFF)
     17 
     18 #define	isdigit(r)	('0' <= (r) && (r) <= '9')
     19 
     20 #define	isalnum(r)	(isalpha(r) || isdigit(r))
     21 
     22 #define	isspace(r)	((r) == ' ' || (r) == '\t' \
     23 			|| (0x0A <= (r) && (r) <= 0x0D))
     24 
     25 #define	tolower(r)	((r)-'A'+'a')
     26 
     27 #define	sgn(v)		((v) < 0 ? -1 : ((v) > 0 ? 1 : 0))
     28 
     29 #define	WORDSIZ	4000
     30 char	*filename = "#9/lib/words";
     31 Biobuf	*dfile;
     32 Biobuf	bout;
     33 Biobuf	bin;
     34 
     35 int	fold;
     36 int	direc;
     37 int	exact;
     38 int	iflag;
     39 int	rev = 1;	/*-1 for reverse-ordered file, not implemented*/
     40 int	(*compare)(Rune*, Rune*);
     41 Rune	tab = '\t';
     42 Rune	entry[WORDSIZ];
     43 Rune	word[WORDSIZ];
     44 Rune	key[50], orig[50];
     45 Rune	latin_fold_tab[] =
     46 {
     47 /*	Table to fold latin 1 characters to ASCII equivalents
     48 			based at Rune value 0xc0
     49 
     50 	 À    Á    Â    Ã    Ä    Å    Æ    Ç
     51 	 È    É    Ê    Ë    Ì    Í    Î    Ï
     52 	 Ð    Ñ    Ò    Ó    Ô    Õ    Ö    ×
     53 	 Ø    Ù    Ú    Û    Ü    Ý    Þ    ß
     54 	 à    á    â    ã    ä    å    æ    ç
     55 	 è    é    ê    ë    ì    í    î    ï
     56 	 ð    ñ    ò    ó    ô    õ    ö    ÷
     57 	 ø    ù    ú    û    ü    ý    þ    ÿ
     58 */
     59 	'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c',
     60 	'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i',
     61 	'd', 'n', 'o', 'o', 'o', 'o', 'o',  0 ,
     62 	'o', 'u', 'u', 'u', 'u', 'y',  0 ,  0 ,
     63 	'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c',
     64 	'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i',
     65 	'd', 'n', 'o', 'o', 'o', 'o', 'o',  0 ,
     66 	'o', 'u', 'u', 'u', 'u', 'y',  0 , 'y',
     67 };
     68 
     69 int	locate(void);
     70 int	acomp(Rune*, Rune*);
     71 int	getword(Biobuf*, Rune *rp, int n);
     72 void	torune(char*, Rune*);
     73 void	rcanon(Rune*, Rune*);
     74 int	ncomp(Rune*, Rune*);
     75 
     76 void
     77 main(int argc, char *argv[])
     78 {
     79 	int n;
     80 
     81 	filename = unsharp(filename);
     82 
     83 	Binit(&bin, 0, OREAD);
     84 	Binit(&bout, 1, OWRITE);
     85 	compare = acomp;
     86 	ARGBEGIN{
     87 	case 'd':
     88 		direc++;
     89 		break;
     90 	case 'f':
     91 		fold++;
     92 		break;
     93 	case 'i': 
     94 		iflag++;
     95 		break;
     96 	case 'n':
     97 		compare = ncomp;
     98 		break;
     99 	case 't':
    100 		chartorune(&tab,ARGF());
    101 		break;
    102 	case 'x':
    103 		exact++;
    104 		break;
    105 	default:
    106 		fprint(2, "%s: bad option %c\n", argv0, ARGC());
    107 		fprint(2, "usage: %s -[dfinx] [-t c] [string] [file]\n", argv0);
    108 		exits("usage");
    109 	} ARGEND
    110 	if(!iflag){
    111 		if(argc >= 1) {
    112 			torune(argv[0], orig);
    113 			argv++;
    114 			argc--;
    115 		} else
    116 			iflag++;
    117 	}
    118 	if(argc < 1) {
    119 		direc++;
    120 		fold++;
    121 	} else 
    122 		filename = argv[0];
    123 	if (!iflag)
    124 		rcanon(orig, key);
    125 	dfile = Bopen(filename, OREAD);
    126 	if(dfile == 0) {
    127 		fprint(2, "look: can't open %s\n", filename);
    128 		exits("no dictionary");
    129 	}
    130 	if(!iflag)
    131 		if(!locate())
    132 			exits("not found");
    133 	do {
    134 		if(iflag) {
    135 			Bflush(&bout);
    136 			if(!getword(&bin, orig, sizeof(orig)/sizeof(orig[0])))
    137 				exits(0);
    138 			rcanon(orig, key);
    139 			if(!locate())
    140 				continue;
    141 		}
    142 		if (!exact || !acomp(word, key))
    143 			Bprint(&bout, "%S\n", entry);
    144 		while(getword(dfile, entry, sizeof(entry)/sizeof(entry[0]))) {
    145 			rcanon(entry, word);
    146 			n = compare(key, word);
    147 			switch(n) {
    148 			case -1:
    149 				if(exact)
    150 					break;
    151 			case 0:
    152 				if (!exact || !acomp(word, orig))
    153 					Bprint(&bout, "%S\n", entry);
    154 				continue;
    155 			}
    156 			break;
    157 		}
    158 	} while(iflag);
    159 	exits(0);
    160 }
    161 
    162 int
    163 locate(void)
    164 {
    165 	vlong top, bot, mid;
    166 	int c;
    167 	int n;
    168 
    169 	bot = 0;
    170 	top = Bseek(dfile, 0L, 2);
    171 	for(;;) {
    172 		mid = (top+bot) / 2;
    173 		Bseek(dfile, mid, 0);
    174 		do
    175 			c = Bgetrune(dfile);
    176 		while(c>=0 && c!='\n');
    177 		mid = Boffset(dfile);
    178 		if(!getword(dfile, entry, sizeof(entry)/sizeof(entry[0])))
    179 			break;
    180 		rcanon(entry, word);
    181 		n = compare(key, word);
    182 		switch(n) {
    183 		case -2:
    184 		case -1:
    185 		case 0:
    186 			if(top <= mid)
    187 				break;
    188 			top = mid;
    189 			continue;
    190 		case 1:
    191 		case 2:
    192 			bot = mid;
    193 			continue;
    194 		}
    195 		break;
    196 	}
    197 	Bseek(dfile, bot, 0);
    198 	while(getword(dfile, entry, sizeof(entry)/sizeof(entry[0]))) {
    199 		rcanon(entry, word);
    200 		n = compare(key, word);
    201 		switch(n) {
    202 		case -2:
    203 			return 0;
    204 		case -1:
    205 			if(exact)
    206 				return 0;
    207 		case 0:
    208 			return 1;
    209 		case 1:
    210 		case 2:
    211 			continue;
    212 		}
    213 	}
    214 	return 0;
    215 }
    216 
    217 /*
    218  *	acomp(s, t) returns:
    219  *		-2 if s strictly precedes t
    220  *		-1 if s is a prefix of t
    221  *		0 if s is the same as t
    222  *		1 if t is a prefix of s
    223  *		2 if t strictly precedes s
    224  */
    225 
    226 int
    227 acomp(Rune *s, Rune *t)
    228 {
    229 	int cs, ct;
    230 
    231 	for(;;) {
    232 		cs = *s;
    233 		ct = *t;
    234 		if(cs != ct)
    235 			break;
    236 		if(cs == 0)
    237 			return 0;
    238 		s++;
    239 		t++;
    240 	}
    241 	if(cs == 0)
    242 		return -1;
    243 	if(ct == 0)
    244 		return 1;
    245 	if(cs < ct)
    246 		return -2;
    247 	return 2;
    248 }
    249 
    250 void
    251 torune(char *old, Rune *new)
    252 {
    253 	do old += chartorune(new, old);
    254 	while(*new++);
    255 }
    256 
    257 void
    258 rcanon(Rune *old, Rune *new)
    259 {
    260 	Rune r;
    261 
    262 	while((r = *old++) && r != tab) {
    263 		if (islatin1(r) && latin_fold_tab[r-0xc0])
    264 				r = latin_fold_tab[r-0xc0];
    265 		if(direc)
    266 			if(!(isalnum(r) || r == ' ' || r == '\t'))
    267 				continue;
    268 		if(fold)
    269 			if(isupper(r))
    270 				r = tolower(r);
    271 		*new++ = r;
    272 	}
    273 	*new = 0;
    274 }
    275 
    276 int
    277 ncomp(Rune *s, Rune *t)
    278 {
    279 	Rune *is, *it, *js, *jt;
    280 	int a, b;
    281 	int ssgn, tsgn;
    282 
    283 	while(isspace(*s))
    284 		s++;
    285 	while(isspace(*t))
    286 		t++;
    287 	ssgn = tsgn = -2*rev;
    288 	if(*s == '-') {
    289 		s++;
    290 		ssgn = -ssgn;
    291 	}
    292 	if(*t == '-') {
    293 		t++;
    294 		tsgn = -tsgn;
    295 	}
    296 	for(is = s; isdigit(*is); is++)
    297 		;
    298 	for(it = t; isdigit(*it); it++)
    299 		;
    300 	js = is;
    301 	jt = it;
    302 	a = 0;
    303 	if(ssgn == tsgn)
    304 		while(it>t && is>s)
    305 			if(b = *--it - *--is)
    306 				a = b;
    307 	while(is > s)
    308 		if(*--is != '0')
    309 			return -ssgn;
    310 	while(it > t)
    311 		if(*--it != '0')
    312 			return tsgn;
    313 	if(a)
    314 		return sgn(a)*ssgn;
    315 	if(*(s=js) == '.')
    316 		s++;
    317 	if(*(t=jt) == '.')
    318 		t++;
    319 	if(ssgn == tsgn)
    320 		while(isdigit(*s) && isdigit(*t))
    321 			if(a = *t++ - *s++)
    322 				return sgn(a)*ssgn;
    323 	while(isdigit(*s))
    324 		if(*s++ != '0')
    325 			return -ssgn;
    326 	while(isdigit(*t))
    327 		if(*t++ != '0')
    328 			return tsgn;
    329 	return 0;
    330 }
    331 
    332 int
    333 getword(Biobuf *f, Rune *rp, int n)
    334 {
    335 	long c;
    336 
    337 	while(n-- > 0) {
    338 		c = Bgetrune(f);
    339 		if(c < 0)
    340 			return 0;
    341 		if(c == '\n') {
    342 			*rp = '\0';
    343 			return 1;
    344 		}
    345 		*rp++ = c;
    346 	}
    347 	fprint(2, "Look: word too long.  Bailing out.\n");
    348 	return 0;
    349 }