9base

revived minimalist port of Plan 9 userland to Unix
git clone git://git.suckless.org/9base
Log | Files | Refs | README | LICENSE

sort.c (29541B)


      1 #include	<u.h>
      2 #include	<libc.h>
      3 #include	<bio.h>
      4 
      5 /*
      6 bugs:
      7 	00/ff for end of file can conflict with 00/ff characters
      8 */
      9 
     10 enum
     11 {
     12 	Nline	= 500000,		/* default max number of lines saved in memory */
     13 	Nmerge	= 10,			/* max number of temporary files merged */
     14 	Nfield	= 20,			/* max number of argument fields */
     15 
     16 	Bflag	= 1<<0,			/* flags per field */
     17 	B1flag	= 1<<1,
     18 
     19 	Dflag	= 1<<2,
     20 	Fflag	= 1<<3,
     21 	Gflag	= 1<<4,
     22 	Iflag	= 1<<5,
     23 	Mflag	= 1<<6,
     24 	Nflag	= 1<<7,
     25 	Rflag	= 1<<8,
     26 	Wflag	= 1<<9,
     27 
     28 	NSstart	= 0,			/* states for number to key decoding */
     29 	NSsign,
     30 	NSzero,
     31 	NSdigit,
     32 	NSpoint,
     33 	NSfract,
     34 	NSzerofract,
     35 	NSexp,
     36 	NSexpsign,
     37 	NSexpdigit
     38 };
     39 
     40 typedef	struct	Line	Line;
     41 typedef	struct	Key	Key;
     42 typedef	struct	Merge	Merge;
     43 typedef	struct	Field	Field;
     44 
     45 struct	Line
     46 {
     47 	Key*	key;
     48 	int	llen;		/* always >= 1 */
     49 	uchar	line[1];	/* always ends in '\n' */
     50 };
     51 
     52 struct	Merge
     53 {
     54 	Key*	key;		/* copy of line->key so (Line*) looks like (Merge*) */
     55 	Line*	line;		/* line at the head of a merged temp file */
     56 	int	fd;		/* file descriptor */
     57 	Biobuf	b;		/* iobuf for reading a temp file */
     58 };
     59 
     60 struct	Key
     61 {
     62 	int	klen;
     63 	uchar	key[1];
     64 };
     65 
     66 struct	Field
     67 {
     68 	int	beg1;
     69 	int	beg2;
     70 	int	end1;
     71 	int	end2;
     72 
     73 	long	flags;
     74 	uchar	mapto[256];
     75 
     76 	void	(*dokey)(Key*, uchar*, uchar*, Field*);
     77 };
     78 
     79 struct args
     80 {
     81 	char*	ofile;
     82 	char*	tname;
     83 	Rune	tabchar;
     84 	char	cflag;
     85 	char	uflag;
     86 	char	vflag;
     87 	int	nfield;
     88 	int	nfile;
     89 	Field	field[Nfield];
     90 
     91 	Line**	linep;
     92 	long	nline;			/* number of lines in this temp file */
     93 	long	lineno;			/* overall ordinal for -s option */
     94 	int	ntemp;
     95 	long	mline;			/* max lines per file */
     96 } args;
     97 
     98 extern	int	latinmap[];
     99 extern	Rune*	month[12];
    100 
    101 void	buildkey(Line*);
    102 void	doargs(int, char*[]);
    103 void	dofield(char*, int*, int*, int, int);
    104 void	dofile(Biobuf*);
    105 void	dokey_(Key*, uchar*, uchar*, Field*);
    106 void	dokey_dfi(Key*, uchar*, uchar*, Field*);
    107 void	dokey_gn(Key*, uchar*, uchar*, Field*);
    108 void	dokey_m(Key*, uchar*, uchar*, Field*);
    109 void	dokey_r(Key*, uchar*, uchar*, Field*);
    110 void	done(char*);
    111 int	kcmp(Key*, Key*);
    112 void	makemapd(Field*);
    113 void	makemapm(Field*);
    114 void	mergefiles(int, int, Biobuf*);
    115 void	mergeout(Biobuf*);
    116 void	newfield(void);
    117 Line*	newline(Biobuf*);
    118 void	nomem(void);
    119 void	notifyf(void*, char*);
    120 void	printargs(void);
    121 void	printout(Biobuf*);
    122 void	setfield(int, int);
    123 uchar*	skip(uchar*, int, int, int, int);
    124 void	sort4(void*, ulong);
    125 char*	tempfile(int);
    126 void	tempout(void);
    127 void	lineout(Biobuf*, Line*);
    128 
    129 void
    130 main(int argc, char *argv[])
    131 {
    132 	int i, f;
    133 	char *s;
    134 	Biobuf bbuf;
    135 
    136 	notify(notifyf);	/**/
    137 	doargs(argc, argv);
    138 	if(args.vflag)
    139 		printargs();
    140 
    141 	for(i=1; i<argc; i++) {
    142 		s = argv[i];
    143 		if(s == 0)
    144 			continue;
    145 		if(strcmp(s, "-") == 0) {
    146 			Binit(&bbuf, 0, OREAD);
    147 			dofile(&bbuf);
    148 			Bterm(&bbuf);
    149 			continue;
    150 		}
    151 		f = open(s, OREAD);
    152 		if(f < 0) {
    153 			fprint(2, "sort: open %s: %r\n", s);
    154 			done("open");
    155 		}
    156 		Binit(&bbuf, f, OREAD);
    157 		dofile(&bbuf);
    158 		Bterm(&bbuf);
    159 		close(f);
    160 	}
    161 	if(args.nfile == 0) {
    162 		Binit(&bbuf, 0, OREAD);
    163 		dofile(&bbuf);
    164 		Bterm(&bbuf);
    165 	}
    166 	if(args.cflag)
    167 		done(0);
    168 	if(args.vflag)
    169 		fprint(2, "=========\n");
    170 
    171 	f = 1;
    172 	if(args.ofile) {
    173 		f = create(args.ofile, OWRITE, 0666);
    174 		if(f < 0) {
    175 			fprint(2, "sort: create %s: %r\n", args.ofile);
    176 			done("create");
    177 		}
    178 	}
    179 
    180 	Binit(&bbuf, f, OWRITE);
    181 	if(args.ntemp) {
    182 		tempout();
    183 		mergeout(&bbuf);
    184 	} else {
    185 		printout(&bbuf);
    186 	}
    187 	Bterm(&bbuf);
    188 	done(0);
    189 }
    190 
    191 void
    192 dofile(Biobuf *b)
    193 {
    194 	Line *l, *ol;
    195 	int n;
    196 
    197 	if(args.cflag) {
    198 		ol = newline(b);
    199 		if(ol == 0)
    200 			return;
    201 		for(;;) {
    202 			l = newline(b);
    203 			if(l == 0)
    204 				break;
    205 			n = kcmp(ol->key, l->key);
    206 			if(n > 0 || (n == 0 && args.uflag)) {
    207 				fprint(2, "sort: -c file not in sort\n"); /**/
    208 				done("order");
    209 			}
    210 			free(ol->key);
    211 			free(ol);
    212 			ol = l;
    213 		}
    214 		return;
    215 	}
    216 
    217 	if(args.linep == 0) {
    218 		args.linep = malloc(args.mline * sizeof(args.linep));
    219 		if(args.linep == 0)
    220 			nomem();
    221 	}
    222 	for(;;) {
    223 		l = newline(b);
    224 		if(l == 0)
    225 			break;
    226 		if(args.nline >= args.mline)
    227 			tempout();
    228 		args.linep[args.nline] = l;
    229 		args.nline++;
    230 		args.lineno++;
    231 	}
    232 }
    233 
    234 void
    235 notifyf(void *a, char *s)
    236 {
    237 	USED(a);
    238 	if(strcmp(s, "interrupt") == 0)
    239 		done(0);
    240 	if(strcmp(s, "hangup") == 0)
    241 		done(0);
    242 	if(strcmp(s, "kill") == 0)
    243 		done(0);
    244 	if(strncmp(s, "sys: write on closed pipe", 25) == 0)
    245 		done(0);
    246 	noted(NDFLT);
    247 }
    248 
    249 Line*
    250 newline(Biobuf *b)
    251 {
    252 	Line *l;
    253 	char *p;
    254 	int n, c;
    255 
    256 	p = Brdline(b, '\n');
    257 	n = Blinelen(b);
    258 	if(p == 0) {
    259 		if(n == 0)
    260 			return 0;
    261 		l = 0;
    262 		for(n=0;;) {
    263 			if((n & 31) == 0) {
    264 				l = realloc(l, sizeof(Line) +
    265 					(n+31)*sizeof(l->line[0]));
    266 				if(l == 0)
    267 					nomem();
    268 			}
    269 			c = Bgetc(b);
    270 			if(c < 0) {
    271 				fprint(2, "sort: newline added\n");
    272 				c = '\n';
    273 			}
    274 			l->line[n++] = c;
    275 			if(c == '\n')
    276 				break;
    277 		}
    278 		l->llen = n;
    279 		buildkey(l);
    280 		return l;
    281 	}
    282 	l = malloc(sizeof(Line) +
    283 		(n-1)*sizeof(l->line[0]));
    284 	if(l == 0)
    285 		nomem();
    286 	l->llen = n;
    287 	memmove(l->line, p, n);
    288 	buildkey(l);
    289 	return l;
    290 }
    291 
    292 void
    293 lineout(Biobuf *b, Line *l)
    294 {
    295 	int n, m;
    296 
    297 	n = l->llen;
    298 	m = Bwrite(b, l->line, n);
    299 	if(n != m)
    300 		exits("write");
    301 }
    302 
    303 void
    304 tempout(void)
    305 {
    306 	long n;
    307 	Line **lp, *l;
    308 	char *tf;
    309 	int f;
    310 	Biobuf tb;
    311 
    312 	sort4(args.linep, args.nline);
    313 	tf = tempfile(args.ntemp);
    314 	args.ntemp++;
    315 	f = create(tf, OWRITE, 0666);
    316 	if(f < 0) {
    317 		fprint(2, "sort: create %s: %r\n", tf);
    318 		done("create");
    319 	}
    320 
    321 	Binit(&tb, f, OWRITE);
    322 	lp = args.linep;
    323 	for(n=args.nline; n>0; n--) {
    324 		l = *lp++;
    325 		lineout(&tb, l);
    326 		free(l->key);
    327 		free(l);
    328 	}
    329 	args.nline = 0;
    330 	Bterm(&tb);
    331 	close(f);
    332 }
    333 
    334 void
    335 done(char *xs)
    336 {
    337 	int i;
    338 
    339 	for(i=0; i<args.ntemp; i++)
    340 		remove(tempfile(i));
    341 	exits(xs);
    342 }
    343 
    344 void
    345 nomem(void)
    346 {
    347 	fprint(2, "sort: out of memory\n");
    348 	done("mem");
    349 }
    350 
    351 char*
    352 tempfile(int n)
    353 {
    354 	static char file[100];
    355 	static uint pid;
    356 	char *dir;
    357 
    358 	dir = "/var/tmp";
    359 	if(args.tname)
    360 		dir = args.tname;
    361 	if(strlen(dir) >= nelem(file)-20) {
    362 		fprint(2, "temp file directory name is too long: %s\n", dir);
    363 		done("tdir");
    364 	}
    365 
    366 	if(pid == 0) {
    367 		pid = getpid();
    368 		if(pid == 0) {
    369 			pid = time(0);
    370 			if(pid == 0)
    371 				pid = 1;
    372 		}
    373 	}
    374 
    375 	sprint(file, "%s/sort.%.4d.%.4d", dir, pid%10000, n);
    376 	return file;
    377 }
    378 
    379 void
    380 mergeout(Biobuf *b)
    381 {
    382 	int n, i, f;
    383 	char *tf;
    384 	Biobuf tb;
    385 
    386 	for(i=0; i<args.ntemp; i+=n) {
    387 		n = args.ntemp - i;
    388 		if(n > Nmerge) {
    389 			tf = tempfile(args.ntemp);
    390 			args.ntemp++;
    391 			f = create(tf, OWRITE, 0666);
    392 			if(f < 0) {
    393 				fprint(2, "sort: create %s: %r\n", tf);
    394 				done("create");
    395 			}
    396 			Binit(&tb, f, OWRITE);
    397 
    398 			n = Nmerge;
    399 			mergefiles(i, n, &tb);
    400 
    401 			Bterm(&tb);
    402 			close(f);
    403 		} else
    404 			mergefiles(i, n, b);
    405 	}
    406 }
    407 
    408 void
    409 mergefiles(int t, int n, Biobuf *b)
    410 {
    411 	Merge *m, *mp, **mmp;
    412 	Key *ok;
    413 	Line *l;
    414 	char *tf;
    415 	int i, f, nn;
    416 
    417 	mmp = malloc(n*sizeof(*mmp));
    418 	mp = malloc(n*sizeof(*mp));
    419 	if(mmp == 0 || mp == 0)
    420 		nomem();
    421 
    422 	nn = 0;
    423 	m = mp;
    424 	for(i=0; i<n; i++,m++) {
    425 		tf = tempfile(t+i);
    426 		f = open(tf, OREAD);
    427 		if(f < 0) {
    428 			fprint(2, "sort: reopen %s: %r\n", tf);
    429 			done("open");
    430 		}
    431 		m->fd = f;
    432 		Binit(&m->b, f, OREAD);
    433 		mmp[nn] = m;
    434 
    435 		l = newline(&m->b);
    436 		if(l == 0)
    437 			continue;
    438 		nn++;
    439 		m->line = l;
    440 		m->key = l->key;
    441 	}
    442 
    443 	ok = 0;
    444 	for(;;) {
    445 		sort4(mmp, nn);
    446 		m = *mmp;
    447 		if(nn == 0)
    448 			break;
    449 		for(;;) {
    450 			l = m->line;
    451 			if(args.uflag && ok && kcmp(ok, l->key) == 0) {
    452 				free(l->key);
    453 				free(l);
    454 			} else {
    455 				lineout(b, l);
    456 				if(ok)
    457 					free(ok);
    458 				ok = l->key;
    459 				free(l);
    460 			}
    461 
    462 			l = newline(&m->b);
    463 			if(l == 0) {
    464 				nn--;
    465 				mmp[0] = mmp[nn];
    466 				break;
    467 			}
    468 			m->line = l;
    469 			m->key = l->key;
    470 			if(nn > 1 && kcmp(mmp[0]->key, mmp[1]->key) > 0)
    471 				break;
    472 		}
    473 	}
    474 	if(ok)
    475 		free(ok);
    476 
    477 	m = mp;
    478 	for(i=0; i<n; i++,m++) {
    479 		Bterm(&m->b);
    480 		close(m->fd);
    481 	}
    482 
    483 	free(mp);
    484 	free(mmp);
    485 }
    486 
    487 int
    488 kcmp(Key *ka, Key *kb)
    489 {
    490 	int n, m;
    491 
    492 	/*
    493 	 * set n to length of smaller key
    494 	 */
    495 	n = ka->klen;
    496 	m = kb->klen;
    497 	if(n > m)
    498 		n = m;
    499 	return memcmp(ka->key, kb->key, n);
    500 }
    501 
    502 void
    503 printout(Biobuf *b)
    504 {
    505 	long n;
    506 	Line **lp, *l;
    507 	Key *ok;
    508 
    509 	sort4(args.linep, args.nline);
    510 	lp = args.linep;
    511 	ok = 0;
    512 	for(n=args.nline; n>0; n--) {
    513 		l = *lp++;
    514 		if(args.uflag && ok && kcmp(ok, l->key) == 0)
    515 			continue;
    516 		lineout(b, l);
    517 		ok = l->key;
    518 	}
    519 }
    520 
    521 void
    522 setfield(int n, int c)
    523 {
    524 	Field *f;
    525 
    526 	f = &args.field[n];
    527 	switch(c) {
    528 	default:
    529 		fprint(2, "sort: unknown option: field.%C\n", c);
    530 		done("option");
    531 	case 'b':	/* skip blanks */
    532 		f->flags |= Bflag;
    533 		break;
    534 	case 'd':	/* directory order */
    535 		f->flags |= Dflag;
    536 		break;
    537 	case 'f':	/* fold case */
    538 		f->flags |= Fflag;
    539 		break;
    540 	case 'g':	/* floating point -n case */
    541 		f->flags |= Gflag;
    542 		break;
    543 	case 'i':	/* ignore non-ascii */
    544 		f->flags |= Iflag;
    545 		break;
    546 	case 'M':	/* month */
    547 		f->flags |= Mflag;
    548 		break;
    549 	case 'n':	/* numbers */
    550 		f->flags |= Nflag;
    551 		break;
    552 	case 'r':	/* reverse */
    553 		f->flags |= Rflag;
    554 		break;
    555 	case 'w':	/* ignore white */
    556 		f->flags |= Wflag;
    557 		break;
    558 	}
    559 }
    560 
    561 void
    562 dofield(char *s, int *n1, int *n2, int off1, int off2)
    563 {
    564 	int c, n;
    565 
    566 	c = *s++;
    567 	if(c >= '0' && c <= '9') {
    568 		n = 0;
    569 		while(c >= '0' && c <= '9') {
    570 			n = n*10 + (c-'0');
    571 			c = *s++;
    572 		}
    573 		n -= off1;	/* posix committee: rot in hell */
    574 		if(n < 0) {
    575 			fprint(2, "sort: field offset must be positive\n");
    576 			done("option");
    577 		}
    578 		*n1 = n;
    579 	}
    580 	if(c == '.') {
    581 		c = *s++;
    582 		if(c >= '0' && c <= '9') {
    583 			n = 0;
    584 			while(c >= '0' && c <= '9') {
    585 				n = n*10 + (c-'0');
    586 				c = *s++;
    587 			}
    588 			n -= off2;
    589 			if(n < 0) {
    590 				fprint(2, "sort: character offset must be positive\n");
    591 				done("option");
    592 			}
    593 			*n2 = n;
    594 		}
    595 	}
    596 	while(c != 0) {
    597 		setfield(args.nfield, c);
    598 		c = *s++;
    599 	}
    600 }
    601 
    602 void
    603 printargs(void)
    604 {
    605 	int i, n;
    606 	Field *f;
    607 	char *prefix;
    608 
    609 	fprint(2, "sort");
    610 	for(i=0; i<=args.nfield; i++) {
    611 		f = &args.field[i];
    612 		prefix = " -";
    613 		if(i) {
    614 			n = f->beg1;
    615 			if(n >= 0)
    616 				fprint(2, " +%d", n);
    617 			else
    618 				fprint(2, " +*");
    619 			n = f->beg2;
    620 			if(n >= 0)
    621 				fprint(2, ".%d", n);
    622 			else
    623 				fprint(2, ".*");
    624 
    625 			if(f->flags & B1flag)
    626 				fprint(2, "b");
    627 
    628 			n = f->end1;
    629 			if(n >= 0)
    630 				fprint(2, " -%d", n);
    631 			else
    632 				fprint(2, " -*");
    633 			n = f->end2;
    634 			if(n >= 0)
    635 				fprint(2, ".%d", n);
    636 			else
    637 				fprint(2, ".*");
    638 			prefix = "";
    639 		}
    640 		if(f->flags & Bflag)
    641 			fprint(2, "%sb", prefix);
    642 		if(f->flags & Dflag)
    643 			fprint(2, "%sd", prefix);
    644 		if(f->flags & Fflag)
    645 			fprint(2, "%sf", prefix);
    646 		if(f->flags & Gflag)
    647 			fprint(2, "%sg", prefix);
    648 		if(f->flags & Iflag)
    649 			fprint(2, "%si", prefix);
    650 		if(f->flags & Mflag)
    651 			fprint(2, "%sM", prefix);
    652 		if(f->flags & Nflag)
    653 			fprint(2, "%sn", prefix);
    654 		if(f->flags & Rflag)
    655 			fprint(2, "%sr", prefix);
    656 		if(f->flags & Wflag)
    657 			fprint(2, "%sw", prefix);
    658 	}
    659 	if(args.cflag)
    660 		fprint(2, " -c");
    661 	if(args.uflag)
    662 		fprint(2, " -u");
    663 	if(args.ofile)
    664 		fprint(2, " -o %s", args.ofile);
    665 	if(args.mline != Nline)
    666 		fprint(2, " -l %ld", args.mline);
    667 	fprint(2, "\n");
    668 }
    669 
    670 void
    671 newfield(void)
    672 {
    673 	int n;
    674 	Field *f;
    675 
    676 	n = args.nfield + 1;
    677 	if(n >= Nfield) {
    678 		fprint(2, "sort: too many fields specified\n");
    679 		done("option");
    680 	}
    681 	args.nfield = n;
    682 	f = &args.field[n];
    683 	f->beg1 = -1;
    684 	f->beg2 = -1;
    685 	f->end1 = -1;
    686 	f->end2 = -1;
    687 }
    688 
    689 void
    690 doargs(int argc, char *argv[])
    691 {
    692 	int i, c, hadplus;
    693 	char *s, *p, *q;
    694 	Field *f;
    695 
    696 	hadplus = 0;
    697 	args.mline = Nline;
    698 	for(i=1; i<argc; i++) {
    699 		s = argv[i];
    700 		c = *s++;
    701 		if(c == '-') {
    702 			c = *s;
    703 			if(c == 0)		/* forced end of arg marker */
    704 				break;
    705 			argv[i] = 0;		/* clobber args processed */
    706 			if(c == '.' || (c >= '0' && c <= '9')) {
    707 				if(!hadplus)
    708 					newfield();
    709 				f = &args.field[args.nfield];
    710 				dofield(s, &f->end1, &f->end2, 0, 0);
    711 				hadplus = 0;
    712 				continue;
    713 			}
    714 
    715 			while(c = *s++)
    716 			switch(c) {
    717 			case '-':	/* end of options */
    718 				i = argc;
    719 				continue;
    720 			case 'T':	/* temp directory */
    721 				if(*s == 0) {
    722 					i++;
    723 					if(i < argc) {
    724 						args.tname = argv[i];
    725 						argv[i] = 0;
    726 					}
    727 				} else
    728 					args.tname = s;
    729 				s = strchr(s, 0);
    730 				break;
    731 			case 'o':	/* output file */
    732 				if(*s == 0) {
    733 					i++;
    734 					if(i < argc) {
    735 						args.ofile = argv[i];
    736 						argv[i] = 0;
    737 					}
    738 				} else
    739 					args.ofile = s;
    740 				s = strchr(s, 0);
    741 				break;
    742 			case 'k':	/* posix key (what were they thinking?) */
    743 				p = 0;
    744 				if(*s == 0) {
    745 					i++;
    746 					if(i < argc) {
    747 						p = argv[i];
    748 						argv[i] = 0;
    749 					}
    750 				} else
    751 					p = s;
    752 				s = strchr(s, 0);
    753 				if(p == 0)
    754 					break;
    755 
    756 				newfield();
    757 				q = strchr(p, ',');
    758 				if(q)
    759 					*q++ = 0;
    760 				f = &args.field[args.nfield];
    761 				dofield(p, &f->beg1, &f->beg2, 1, 1);
    762 				if(f->flags & Bflag) {
    763 					f->flags |= B1flag;
    764 					f->flags &= ~Bflag;
    765 				}
    766 				if(q) {
    767 					dofield(q, &f->end1, &f->end2, 1, 0);
    768 					if(f->end2 <= 0)
    769 						f->end1++;
    770 				}
    771 				hadplus = 0;
    772 				break;
    773 			case 't':	/* tab character */
    774 				if(*s == 0) {
    775 					i++;
    776 					if(i < argc) {
    777 						chartorune(&args.tabchar, argv[i]);
    778 						argv[i] = 0;
    779 					}
    780 				} else
    781 					s += chartorune(&args.tabchar, s);
    782 				if(args.tabchar == '\n') {
    783 					fprint(2, "aw come on, rob\n");
    784 					done("rob");
    785 				}
    786 				break;
    787 			case 'c':	/* check order */
    788 				args.cflag = 1;
    789 				break;
    790 			case 'u':	/* unique */
    791 				args.uflag = 1;
    792 				break;
    793 			case 'v':	/* debugging noise */
    794 				args.vflag = 1;
    795 				break;
    796 			case 'l':
    797 				if(*s == 0) {
    798 					i++;
    799 					if(i < argc) {
    800 						args.mline = atol(argv[i]);
    801 						argv[i] = 0;
    802 					}
    803 				} else
    804 					args.mline = atol(s);
    805 				s = strchr(s, 0);
    806 				break;
    807 
    808 			case 'M':	/* month */
    809 			case 'b':	/* skip blanks */
    810 			case 'd':	/* directory order */
    811 			case 'f':	/* fold case */
    812 			case 'g':	/* floating numbers */
    813 			case 'i':	/* ignore non-ascii */
    814 			case 'n':	/* numbers */
    815 			case 'r':	/* reverse */
    816 			case 'w':	/* ignore white */
    817 				if(args.nfield > 0)
    818 					fprint(2, "sort: global field set after -k\n");
    819 				setfield(0, c);
    820 				break;
    821 			case 'm':
    822 				/* option m silently ignored but required by posix */
    823 				break;
    824 			default:
    825 				fprint(2, "sort: unknown option: -%C\n", c);
    826 				done("option");
    827 			}
    828 			continue;
    829 		}
    830 		if(c == '+') {
    831 			argv[i] = 0;		/* clobber args processed */
    832 			c = *s;
    833 			if(c == '.' || (c >= '0' && c <= '9')) {
    834 				newfield();
    835 				f = &args.field[args.nfield];
    836 				dofield(s, &f->beg1, &f->beg2, 0, 0);
    837 				if(f->flags & Bflag) {
    838 					f->flags |= B1flag;
    839 					f->flags &= ~Bflag;
    840 				}
    841 				hadplus = 1;
    842 				continue;
    843 			}
    844 			fprint(2, "sort: unknown option: +%C\n", c);
    845 			done("option");
    846 		}
    847 		args.nfile++;
    848 	}
    849 
    850 	for(i=0; i<=args.nfield; i++) {
    851 		f = &args.field[i];
    852 
    853 		/*
    854 		 * global options apply to fields that
    855 		 * specify no options
    856 		 */
    857 		if(f->flags == 0) {
    858 			f->flags = args.field[0].flags;
    859 			if(args.field[0].flags & Bflag)
    860 				f->flags |= B1flag;
    861 		}
    862 
    863 
    864 		/*
    865 		 * build buildkey specification
    866 		 */
    867 		switch(f->flags & ~(Bflag|B1flag)) {
    868 		default:
    869 			fprint(2, "sort: illegal combination of flags: %lx\n", f->flags);
    870 			done("option");
    871 		case 0:
    872 			f->dokey = dokey_;
    873 			break;
    874 		case Rflag:
    875 			f->dokey = dokey_r;
    876 			break;
    877 		case Gflag:
    878 		case Nflag:
    879 		case Gflag|Nflag:
    880 		case Gflag|Rflag:
    881 		case Nflag|Rflag:
    882 		case Gflag|Nflag|Rflag:
    883 			f->dokey = dokey_gn;
    884 			break;
    885 		case Mflag:
    886 		case Mflag|Rflag:
    887 			f->dokey = dokey_m;
    888 			makemapm(f);
    889 			break;
    890 		case Dflag:
    891 		case Dflag|Fflag:
    892 		case Dflag|Fflag|Iflag:
    893 		case Dflag|Fflag|Iflag|Rflag:
    894 		case Dflag|Fflag|Iflag|Rflag|Wflag:
    895 		case Dflag|Fflag|Iflag|Wflag:
    896 		case Dflag|Fflag|Rflag:
    897 		case Dflag|Fflag|Rflag|Wflag:
    898 		case Dflag|Fflag|Wflag:
    899 		case Dflag|Iflag:
    900 		case Dflag|Iflag|Rflag:
    901 		case Dflag|Iflag|Rflag|Wflag:
    902 		case Dflag|Iflag|Wflag:
    903 		case Dflag|Rflag:
    904 		case Dflag|Rflag|Wflag:
    905 		case Dflag|Wflag:
    906 		case Fflag:
    907 		case Fflag|Iflag:
    908 		case Fflag|Iflag|Rflag:
    909 		case Fflag|Iflag|Rflag|Wflag:
    910 		case Fflag|Iflag|Wflag:
    911 		case Fflag|Rflag:
    912 		case Fflag|Rflag|Wflag:
    913 		case Fflag|Wflag:
    914 		case Iflag:
    915 		case Iflag|Rflag:
    916 		case Iflag|Rflag|Wflag:
    917 		case Iflag|Wflag:
    918 		case Wflag:
    919 			f->dokey = dokey_dfi;
    920 			makemapd(f);
    921 			break;
    922 		}
    923 	}
    924 
    925 	/*
    926 	 * random spot checks
    927 	 */
    928 	if(args.nfile > 1 && args.cflag) {
    929 		fprint(2, "sort: -c can have at most one input file\n");
    930 		done("option");
    931 	}
    932 	return;
    933 }
    934 
    935 uchar*
    936 skip(uchar *l, int n1, int n2, int bflag, int endfield)
    937 {
    938 	int i, c, tc;
    939 	Rune r;
    940 
    941 	if(endfield && n1 < 0)
    942 		return 0;
    943 
    944 	c = *l++;
    945 	tc = args.tabchar;
    946 	if(tc) {
    947 		if(tc < Runeself) {
    948 			for(i=n1; i>0; i--) {
    949 				while(c != tc) {
    950 					if(c == '\n')
    951 						return 0;
    952 					c = *l++;
    953 				}
    954 				if(!(endfield && i == 1))
    955 					c = *l++;
    956 			}
    957 		} else {
    958 			l--;
    959 			l += chartorune(&r, (char*)l);
    960 			for(i=n1; i>0; i--) {
    961 				while(r != tc) {
    962 					if(r == '\n')
    963 						return 0;
    964 					l += chartorune(&r, (char*)l);
    965 				}
    966 				if(!(endfield && i == 1))
    967 					l += chartorune(&r, (char*)l);
    968 			}
    969 			c = r;
    970 		}
    971 	} else {
    972 		for(i=n1; i>0; i--) {
    973 			while(c == ' ' || c == '\t')
    974 				c = *l++;
    975 			while(c != ' ' && c != '\t') {
    976 				if(c == '\n')
    977 					return 0;
    978 				c = *l++;
    979 			}
    980 		}
    981 	}
    982 
    983 	if(bflag)
    984 		while(c == ' ' || c == '\t')
    985 			c = *l++;
    986 
    987 	l--;
    988 	for(i=n2; i>0; i--) {
    989 		c = *l;
    990 		if(c < Runeself) {
    991 			if(c == '\n')
    992 				return 0;
    993 			l++;
    994 			continue;
    995 		}
    996 		l += chartorune(&r, (char*)l);
    997 	}
    998 	return l;
    999 }
   1000 
   1001 void
   1002 dokey_gn(Key *k, uchar *lp, uchar *lpe, Field *f)
   1003 {
   1004 	uchar *kp;
   1005 	int c, cl, dp;
   1006 	int state, nzero, exp, expsign, rflag;
   1007 
   1008 	cl = k->klen + 3;
   1009 	kp = k->key + cl;	/* skip place for sign, exponent[2] */
   1010 
   1011 	nzero = 0;		/* number of trailing zeros */
   1012 	exp = 0;		/* value of the exponent */
   1013 	expsign = 0;		/* sign of the exponent */
   1014 	dp = 0x4040;		/* location of decimal point */
   1015 	rflag = f->flags&Rflag;	/* xor of rflag and - sign */
   1016 	state = NSstart;
   1017 
   1018 	for(;; lp++) {
   1019 		if(lp >= lpe)
   1020 			break;
   1021 		c = *lp;
   1022 
   1023 		if(c == ' ' || c == '\t') {
   1024 			switch(state) {
   1025 			case NSstart:
   1026 			case NSsign:
   1027 				continue;
   1028 			}
   1029 			break;
   1030 		}
   1031 		if(c == '+' || c == '-') {
   1032 			switch(state) {
   1033 			case NSstart:
   1034 				state = NSsign;
   1035 				if(c == '-')
   1036 					rflag = !rflag;
   1037 				continue;
   1038 			case NSexp:
   1039 				state = NSexpsign;
   1040 				if(c == '-')
   1041 					expsign = 1;
   1042 				continue;
   1043 			}
   1044 			break;
   1045 		}
   1046 		if(c == '0') {
   1047 			switch(state) {
   1048 			case NSdigit:
   1049 				if(rflag)
   1050 					c = ~c;
   1051 				*kp++ = c;
   1052 				cl++;
   1053 				nzero++;
   1054 				dp++;
   1055 				state = NSdigit;
   1056 				continue;
   1057 			case NSfract:
   1058 				if(rflag)
   1059 					c = ~c;
   1060 				*kp++ = c;
   1061 				cl++;
   1062 				nzero++;
   1063 				state = NSfract;
   1064 				continue;
   1065 			case NSstart:
   1066 			case NSsign:
   1067 			case NSzero:
   1068 				state = NSzero;
   1069 				continue;
   1070 			case NSzerofract:
   1071 			case NSpoint:
   1072 				dp--;
   1073 				state = NSzerofract;
   1074 				continue;
   1075 			case NSexpsign:
   1076 			case NSexp:
   1077 			case NSexpdigit:
   1078 				exp = exp*10 + (c - '0');
   1079 				state = NSexpdigit;
   1080 				continue;
   1081 			}
   1082 			break;
   1083 		}
   1084 		if(c >= '1' && c <= '9') {
   1085 			switch(state) {
   1086 			case NSzero:
   1087 			case NSstart:
   1088 			case NSsign:
   1089 			case NSdigit:
   1090 				if(rflag)
   1091 					c = ~c;
   1092 				*kp++ = c;
   1093 				cl++;
   1094 				nzero = 0;
   1095 				dp++;
   1096 				state = NSdigit;
   1097 				continue;
   1098 			case NSzerofract:
   1099 			case NSpoint:
   1100 			case NSfract:
   1101 				if(rflag)
   1102 					c = ~c;
   1103 				*kp++ = c;
   1104 				cl++;
   1105 				nzero = 0;
   1106 				state = NSfract;
   1107 				continue;
   1108 			case NSexpsign:
   1109 			case NSexp:
   1110 			case NSexpdigit:
   1111 				exp = exp*10 + (c - '0');
   1112 				state = NSexpdigit;
   1113 				continue;
   1114 			}
   1115 			break;
   1116 		}
   1117 		if(c == '.') {
   1118 			switch(state) {
   1119 			case NSstart:
   1120 			case NSsign:
   1121 				state = NSpoint;
   1122 				continue;
   1123 			case NSzero:
   1124 				state = NSzerofract;
   1125 				continue;
   1126 			case NSdigit:
   1127 				state = NSfract;
   1128 				continue;
   1129 			}
   1130 			break;
   1131 		}
   1132 		if((f->flags & Gflag) && (c == 'e' || c == 'E')) {
   1133 			switch(state) {
   1134 			case NSdigit:
   1135 			case NSfract:
   1136 				state = NSexp;
   1137 				continue;
   1138 			}
   1139 			break;
   1140 		}
   1141 		break;
   1142 	}
   1143 
   1144 	switch(state) {
   1145 	/*
   1146 	 * result is zero
   1147 	 */
   1148 	case NSstart:
   1149 	case NSsign:
   1150 	case NSzero:
   1151 	case NSzerofract:
   1152 	case NSpoint:
   1153 		kp = k->key + k->klen;
   1154 		k->klen += 2;
   1155 		kp[0] = 0x20;	/* between + and - */
   1156 		kp[1] = 0;
   1157 		return;
   1158 	/*
   1159 	 * result has exponent
   1160 	 */
   1161 	case NSexpsign:
   1162 	case NSexp:
   1163 	case NSexpdigit:
   1164 		if(expsign)
   1165 			exp = -exp;
   1166 		dp += exp;
   1167 
   1168 	/*
   1169 	 * result is fixed point number
   1170 	 */
   1171 	case NSdigit:
   1172 	case NSfract:
   1173 		kp -= nzero;
   1174 		cl -= nzero;
   1175 		break;
   1176 	}
   1177 
   1178 	/*
   1179 	 * end of number
   1180 	 */
   1181 	c = 0;
   1182 	if(rflag)
   1183 		c = ~c;
   1184 	*kp = c;
   1185 
   1186 	/*
   1187 	 * sign and exponent
   1188 	 */
   1189 	c = 0x30;
   1190 	if(rflag) {
   1191 		c = 0x10;
   1192 		dp = ~dp;
   1193 	}
   1194 	kp = k->key + k->klen;
   1195 	kp[0] = c;
   1196 	kp[1] = (dp >> 8);
   1197 	kp[2] = dp;
   1198 	k->klen = cl+1;
   1199 }
   1200 
   1201 void
   1202 dokey_m(Key *k, uchar *lp, uchar *lpe, Field *f)
   1203 {
   1204 	uchar *kp;
   1205 	Rune r, place[3];
   1206 	int c, cl, pc;
   1207 	int rflag;
   1208 
   1209 	rflag = f->flags&Rflag;
   1210 	pc = 0;
   1211 
   1212 	cl = k->klen;
   1213 	kp = k->key + cl;
   1214 
   1215 	for(;;) {
   1216 		/*
   1217 		 * get the character
   1218 		 */
   1219 		if(lp >= lpe)
   1220 			break;
   1221 		c = *lp;
   1222 		if(c >= Runeself) {
   1223 			lp += chartorune(&r, (char*)lp);
   1224 			c = r;
   1225 		} else
   1226 			lp++;
   1227 
   1228 		if(c < nelem(f->mapto)) {
   1229 			c = f->mapto[c];
   1230 			if(c == 0)
   1231 				continue;
   1232 		}
   1233 		place[pc++] = c;
   1234 		if(pc < 3)
   1235 			continue;
   1236 		for(c=11; c>=0; c--)
   1237 			if(memcmp(month[c], place, sizeof(place)) == 0)
   1238 				break;
   1239 		c += 10;
   1240 		if(rflag)
   1241 			c = ~c;
   1242 		*kp++ = c;
   1243 		cl++;
   1244 		break;
   1245 	}
   1246 
   1247 	c = 0;
   1248 	if(rflag)
   1249 		c = ~c;
   1250 	*kp = c;
   1251 	k->klen = cl+1;
   1252 }
   1253 
   1254 void
   1255 dokey_dfi(Key *k, uchar *lp, uchar *lpe, Field *f)
   1256 {
   1257 	uchar *kp;
   1258 	Rune r;
   1259 	int c, cl, n, rflag;
   1260 
   1261 	cl = k->klen;
   1262 	kp = k->key + cl;
   1263 	rflag = f->flags & Rflag;
   1264 
   1265 	for(;;) {
   1266 		/*
   1267 		 * get the character
   1268 		 */
   1269 		if(lp >= lpe)
   1270 			break;
   1271 		c = *lp;
   1272 		if(c >= Runeself) {
   1273 			lp += chartorune(&r, (char*)lp);
   1274 			c = r;
   1275 		} else
   1276 			lp++;
   1277 
   1278 		/*
   1279 		 * do the various mappings.
   1280 		 * the common case is handled
   1281 		 * completely by the table.
   1282 		 */
   1283 		if(c != 0 && c < Runeself) {
   1284 			c = f->mapto[c];
   1285 			if(c) {
   1286 				*kp++ = c;
   1287 				cl++;
   1288 			}
   1289 			continue;
   1290 		}
   1291 
   1292 		/*
   1293 		 * for characters out of range,
   1294 		 * the table does not do Rflag.
   1295 		 * ignore is based on mapto[255]
   1296 		 */
   1297 		if(c != 0 && c < nelem(f->mapto)) {
   1298 			c = f->mapto[c];
   1299 			if(c == 0)
   1300 				continue;
   1301 		} else
   1302 			if(f->mapto[nelem(f->mapto)-1] == 0)
   1303 				continue;
   1304 
   1305 		/*
   1306 		 * put it in the key
   1307 		 */
   1308 		r = c;
   1309 		n = runetochar((char*)kp, &r);
   1310 		kp += n;
   1311 		cl += n;
   1312 		if(rflag)
   1313 			while(n > 0) {
   1314 				kp[-n] = ~kp[-n];
   1315 				n--;
   1316 			}
   1317 	}
   1318 
   1319 	/*
   1320 	 * end of key
   1321 	 */
   1322 	k->klen = cl+1;
   1323 	if(rflag) {
   1324 		*kp = ~0;
   1325 		return;
   1326 	}
   1327 	*kp = 0;
   1328 }
   1329 
   1330 void
   1331 dokey_r(Key *k, uchar *lp, uchar *lpe, Field *f)
   1332 {
   1333 	int cl, n;
   1334 	uchar *kp;
   1335 
   1336 	USED(f);
   1337 	n = lpe - lp;
   1338 	if(n < 0)
   1339 		n = 0;
   1340 	cl = k->klen;
   1341 	kp = k->key + cl;
   1342 	k->klen = cl+n+1;
   1343 
   1344 	lpe -= 3;
   1345 	while(lp < lpe) {
   1346 		kp[0] = ~lp[0];
   1347 		kp[1] = ~lp[1];
   1348 		kp[2] = ~lp[2];
   1349 		kp[3] = ~lp[3];
   1350 		kp += 4;
   1351 		lp += 4;
   1352 	}
   1353 
   1354 	lpe += 3;
   1355 	while(lp < lpe)
   1356 		*kp++ = ~*lp++;
   1357 	*kp = ~0;
   1358 }
   1359 
   1360 void
   1361 dokey_(Key *k, uchar *lp, uchar *lpe, Field *f)
   1362 {
   1363 	int n, cl;
   1364 	uchar *kp;
   1365 
   1366 	USED(f);
   1367 	n = lpe - lp;
   1368 	if(n < 0)
   1369 		n = 0;
   1370 	cl = k->klen;
   1371 	kp = k->key + cl;
   1372 	k->klen = cl+n+1;
   1373 	memmove(kp, lp, n);
   1374 	kp[n] = 0;
   1375 }
   1376 
   1377 void
   1378 buildkey(Line *l)
   1379 {
   1380 	Key *k;
   1381 	uchar *lp, *lpe;
   1382 	int ll, kl, cl, i, n;
   1383 	Field *f;
   1384 
   1385 	ll = l->llen - 1;
   1386 	kl = 0;			/* allocated length */
   1387 	cl = 0;			/* current length */
   1388 	k = 0;
   1389 
   1390 	for(i=1; i<=args.nfield; i++) {
   1391 		f = &args.field[i];
   1392 		lp = skip(l->line, f->beg1, f->beg2, f->flags&B1flag, 0);
   1393 		if(lp == 0)
   1394 			lp = l->line + ll;
   1395 		lpe = skip(l->line, f->end1, f->end2, f->flags&Bflag, 1);
   1396 		if(lpe == 0)
   1397 			lpe = l->line + ll;
   1398 		n = (lpe - lp) + 1;
   1399 		if(n <= 0)
   1400 			n = 1;
   1401 		if(cl+(n+4) > kl) {
   1402 			kl = cl+(n+4);
   1403 			k = realloc(k, sizeof(Key) +
   1404 				(kl-1)*sizeof(k->key[0]));
   1405 			if(k == 0)
   1406 				nomem();
   1407 		}
   1408 		k->klen = cl;
   1409 		(*f->dokey)(k, lp, lpe, f);
   1410 		cl = k->klen;
   1411 	}
   1412 
   1413 	/*
   1414 	 * global comparisons
   1415 	 */
   1416 	if(!(args.uflag && cl > 0)) {
   1417 		f = &args.field[0];
   1418 		if(cl+(ll+4) > kl) {
   1419 			kl = cl+(ll+4);
   1420 			k = realloc(k, sizeof(Key) +
   1421 				(kl-1)*sizeof(k->key[0]));
   1422 			if(k == 0)
   1423 				nomem();
   1424 		}
   1425 		k->klen = cl;
   1426 		(*f->dokey)(k, l->line, l->line+ll, f);
   1427 		cl = k->klen;
   1428 	}
   1429 
   1430 	l->key = k;
   1431 	k->klen = cl;
   1432 
   1433 	if(args.vflag) {
   1434 		write(2, l->line, l->llen);
   1435 		for(i=0; i<k->klen; i++) {
   1436 			fprint(2, " %.2x", k->key[i]);
   1437 			if(k->key[i] == 0x00 || k->key[i] == 0xff)
   1438 				fprint(2, "\n");
   1439 		}
   1440 	}
   1441 }
   1442 
   1443 void
   1444 makemapm(Field *f)
   1445 {
   1446 	int i, c;
   1447 
   1448 	for(i=0; i<nelem(f->mapto); i++) {
   1449 		c = 1;
   1450 		if(i == ' ' || i == '\t')
   1451 			c = 0;
   1452 		if(i >= 'a' && i <= 'z')
   1453 			c = i + ('A' - 'a');
   1454 		if(i >= 'A' && i <= 'Z')
   1455 			c = i;
   1456 		f->mapto[i] = c;
   1457 		if(args.vflag) {
   1458 			if((i & 15) == 0)
   1459 				fprint(2, "	");
   1460 			fprint(2, " %.2x", c);
   1461 			if((i & 15) == 15)
   1462 				fprint(2, "\n");
   1463 		}
   1464 	}
   1465 }
   1466 
   1467 void
   1468 makemapd(Field *f)
   1469 {
   1470 	int i, j, c;
   1471 
   1472 	for(i=0; i<nelem(f->mapto); i++) {
   1473 		c = i;
   1474 		if(f->flags & Iflag)
   1475 			if(c < 040 || c > 0176)
   1476 				c = -1;
   1477 		if((f->flags & Wflag) && c >= 0)
   1478 			if(c == ' ' || c == '\t')
   1479 				c = -1;
   1480 		if((f->flags & Dflag) && c >= 0)
   1481 			if(!(c == ' ' || c == '\t' ||
   1482 			    (c >= 'a' && c <= 'z') ||
   1483 			    (c >= 'A' && c <= 'Z') ||
   1484 			    (c >= '0' && c <= '9'))) {
   1485 				for(j=0; latinmap[j]; j+=3)
   1486 					if(c == latinmap[j+0] ||
   1487 					   c == latinmap[j+1])
   1488 						break;
   1489 				if(latinmap[j] == 0)
   1490 					c = -1;
   1491 			}
   1492 		if((f->flags & Fflag) && c >= 0) {
   1493 			if(c >= 'a' && c <= 'z')
   1494 				c += 'A' - 'a';
   1495 			for(j=0; latinmap[j]; j+=3)
   1496 				if(c == latinmap[j+0] ||
   1497 				   c == latinmap[j+1]) {
   1498 					c = latinmap[j+2];
   1499 					break;
   1500 				}
   1501 		}
   1502 		if((f->flags & Rflag) && c >= 0 && i > 0 && i < Runeself)
   1503 			c = ~c & 0xff;
   1504 		if(c < 0)
   1505 			c = 0;
   1506 		f->mapto[i] = c;
   1507 		if(args.vflag) {
   1508 			if((i & 15) == 0)
   1509 				fprint(2, "	");
   1510 			fprint(2, " %.2x", c);
   1511 			if((i & 15) == 15)
   1512 				fprint(2, "\n");
   1513 		}
   1514 	}
   1515 }
   1516 
   1517 int	latinmap[] =
   1518 {
   1519 /*	lcase	ucase	fold	*/
   1520 	0xe0,	0xc0,	0x41,		/*	 L'à',	L'À',	L'A',	 */
   1521 	0xe1,	0xc1,	0x41,		/*	 L'á',	L'Á',	L'A',	 */
   1522 	0xe2,	0xc2,	0x41,		/*	 L'â',	L'Â',	L'A',	 */
   1523 	0xe4,	0xc4,	0x41,		/*	 L'ä',	L'Ä',	L'A',	 */
   1524 	0xe3,	0xc3,	0x41,		/*	 L'ã',	L'Ã',	L'A',	 */
   1525 	0xe5,	0xc5,	0x41,		/*	 L'å',	L'Å',	L'A',	 */
   1526 	0xe8,	0xc8,	0x45,		/*	 L'è',	L'È',	L'E',	 */
   1527 	0xe9,	0xc9,	0x45,		/*	 L'é',	L'É',	L'E',	 */
   1528 	0xea,	0xca,	0x45,		/*	 L'ê',	L'Ê',	L'E',	 */
   1529 	0xeb,	0xcb,	0x45,		/*	 L'ë',	L'Ë',	L'E',	 */
   1530 	0xec,	0xcc,	0x49,		/*	 L'ì',	L'Ì',	L'I',	 */
   1531 	0xed,	0xcd,	0x49,		/*	 L'í',	L'Í',	L'I',	 */
   1532 	0xee,	0xce,	0x49,		/*	 L'î',	L'Î',	L'I',	 */
   1533 	0xef,	0xcf,	0x49,		/*	 L'ï',	L'Ï',	L'I',	 */
   1534 	0xf2,	0xd2,	0x4f,		/*	 L'ò',	L'Ò',	L'O',	 */
   1535 	0xf3,	0xd3,	0x4f,		/*	 L'ó',	L'Ó',	L'O',	 */
   1536 	0xf4,	0xd4,	0x4f,		/*	 L'ô',	L'Ô',	L'O',	 */
   1537 	0xf6,	0xd6,	0x4f,		/*	 L'ö',	L'Ö',	L'O',	 */
   1538 	0xf5,	0xd5,	0x4f,		/*	 L'õ',	L'Õ',	L'O',	 */
   1539 	0xf8,	0xd8,	0x4f,		/*	 L'ø',	L'Ø',	L'O',	 */
   1540 	0xf9,	0xd9,	0x55,		/*	 L'ù',	L'Ù',	L'U',	 */
   1541 	0xfa,	0xda,	0x55,		/*	 L'ú',	L'Ú',	L'U',	 */
   1542 	0xfb,	0xdb,	0x55,		/*	 L'û',	L'Û',	L'U',	 */
   1543 	0xfc,	0xdc,	0x55,		/*	 L'ü',	L'Ü',	L'U',	 */
   1544 	0xe6,	0xc6,	0x41,		/*	 L'æ',	L'Æ',	L'A',	 */
   1545 	0xf0,	0xd0,	0x44,		/*	 L'ð',	L'Ð',	L'D',	 */
   1546 	0xf1,	0xd1,	0x4e,		/*	 L'ñ',	L'Ñ',	L'N',	 */
   1547 	0xfd,	0xdd,	0x59,		/*	 L'ý',	L'Ý',	L'Y',	 */
   1548 	0xe7,	0xc7,	0x43,		/*	 L'ç',	L'Ç',	L'C',	 */
   1549 	0,
   1550 };
   1551 
   1552 Rune LJAN[] = { 'J', 'A', 'N', 0 };
   1553 Rune LFEB[] = { 'F', 'E', 'B', 0 };
   1554 Rune LMAR[] = { 'M', 'A', 'R', 0 };
   1555 Rune LAPR[] = { 'A', 'P', 'R', 0 };
   1556 Rune LMAY[] = { 'M', 'A', 'Y', 0 };
   1557 Rune LJUN[] = { 'J', 'U', 'N', 0 };
   1558 Rune LJUL[] = { 'J', 'U', 'L', 0 };
   1559 Rune LAUG[] = { 'A', 'U', 'G', 0 };
   1560 Rune LSEP[] = { 'S', 'E', 'P', 0 };
   1561 Rune LOCT[] = { 'O', 'C', 'T', 0 };
   1562 Rune LNOV[] = { 'N', 'O', 'V', 0 };
   1563 Rune LDEC[] = { 'D', 'E', 'C', 0 };
   1564 
   1565 Rune*	month[12] =
   1566 {
   1567 	LJAN,
   1568 	LFEB,
   1569 	LMAR,
   1570 	LAPR,
   1571 	LMAY,
   1572 	LJUN,
   1573 	LJUL,
   1574 	LAUG,
   1575 	LSEP,
   1576 	LOCT,
   1577 	LNOV,
   1578 	LDEC,
   1579 };
   1580 
   1581 /************** radix sort ***********/
   1582 
   1583 enum
   1584 {
   1585 	Threshold	= 14
   1586 };
   1587 
   1588 void	rsort4(Key***, ulong, int);
   1589 void	bsort4(Key***, ulong, int);
   1590 
   1591 void
   1592 sort4(void *a, ulong n)
   1593 {
   1594 	if(n > Threshold)
   1595 		rsort4((Key***)a, n, 0);
   1596 	else
   1597 		bsort4((Key***)a, n, 0);
   1598 }
   1599 
   1600 void
   1601 rsort4(Key ***a, ulong n, int b)
   1602 {
   1603 	Key ***ea, ***t, ***u, **t1, **u1, *k;
   1604 	Key ***part[257];
   1605 	static long count[257];
   1606 	long clist[257+257], *cp, *cp1;
   1607 	int c, lowc, higc;
   1608 
   1609 	/*
   1610 	 * pass 1 over all keys,
   1611 	 * count the number of each key[b].
   1612 	 * find low count and high count.
   1613 	 */
   1614 	lowc = 256;
   1615 	higc = 0;
   1616 	ea = a+n;
   1617 	for(t=a; t<ea; t++) {
   1618 		k = **t;
   1619 		n = k->klen;
   1620 		if(b >= n) {
   1621 			count[256]++;
   1622 			continue;
   1623 		}
   1624 		c = k->key[b];
   1625 		n = count[c]++;
   1626 		if(n == 0) {
   1627 			if(c < lowc)
   1628 				lowc = c;
   1629 			if(c > higc)
   1630 				higc = c;
   1631 		}
   1632 	}
   1633 
   1634 	/*
   1635 	 * pass 2 over all counts,
   1636 	 * put partition pointers in part[c].
   1637 	 * save compacted indexes and counts
   1638 	 * in clist[].
   1639 	 */
   1640 	t = a;
   1641 	n = count[256];
   1642 	clist[0] = n;
   1643 	part[256] = t;
   1644 	t += n;
   1645 
   1646 	cp1 = clist+1;
   1647 	cp = count+lowc;
   1648 	for(c=lowc; c<=higc; c++,cp++) {
   1649 		n = *cp;
   1650 		if(n) {
   1651 			cp1[0] = n;
   1652 			cp1[1] = c;
   1653 			cp1 += 2;
   1654 			part[c] = t;
   1655 			t += n;
   1656 		}
   1657 	}
   1658 	*cp1 = 0;
   1659 
   1660 	/*
   1661 	 * pass 3 over all counts.
   1662 	 * chase lowest pointer in each partition
   1663 	 * around a permutation until it comes
   1664 	 * back and is stored where it started.
   1665 	 * static array, count[], should be
   1666 	 * reduced to zero entries except maybe
   1667 	 * count[256].
   1668 	 */
   1669 	for(cp1=clist+1; cp1[0]; cp1+=2) {
   1670 		c = cp1[1];
   1671 		cp = count+c;
   1672 		while(*cp) {
   1673 			t1 = *part[c];
   1674 			for(;;) {
   1675 				k = *t1;
   1676 				n = 256;
   1677 				if(b < k->klen)
   1678 					n = k->key[b];
   1679 				u = part[n]++;
   1680 				count[n]--;
   1681 				u1 = *u;
   1682 				*u = t1;
   1683 				if(n == c)
   1684 					break;
   1685 				t1 = u1;
   1686 			}
   1687 		}
   1688 	}
   1689 
   1690 	/*
   1691 	 * pass 4 over all partitions.
   1692 	 * call recursively.
   1693 	 */
   1694 	b++;
   1695 	t = a + clist[0];
   1696 	count[256] = 0;
   1697 	for(cp1=clist+1; n=cp1[0]; cp1+=2) {
   1698 		if(n > Threshold)
   1699 			rsort4(t, n, b);
   1700 		else
   1701 		if(n > 1)
   1702 			bsort4(t, n, b);
   1703 		t += n;
   1704 	}
   1705 }
   1706 
   1707 /*
   1708  * bubble sort to pick up
   1709  * the pieces.
   1710  */
   1711 void
   1712 bsort4(Key ***a, ulong n, int b)
   1713 {
   1714 	Key ***i, ***j, ***k, ***l, **t;
   1715 	Key *ka, *kb;
   1716 	int n1, n2;
   1717 
   1718 	l = a+n;
   1719 	j = a;
   1720 
   1721 loop:
   1722 	i = j;
   1723 	j++;
   1724 	if(j >= l)
   1725 		return;
   1726 
   1727 	ka = **i;
   1728 	kb = **j;
   1729 	n1 = ka->klen - b;
   1730 	n2 = kb->klen - b;
   1731 	if(n1 > n2)
   1732 		n1 = n2;
   1733 	if(n1 <= 0)
   1734 		goto loop;
   1735 	n2 = ka->key[b] - kb->key[b];
   1736 	if(n2 == 0)
   1737 		n2 = memcmp(ka->key+b, kb->key+b, n1);
   1738 	if(n2 <= 0)
   1739 		goto loop;
   1740 
   1741 	for(;;) {
   1742 		k = i+1;
   1743 
   1744 		t = *k;
   1745 		*k = *i;
   1746 		*i = t;
   1747 
   1748 		if(i <= a)
   1749 			goto loop;
   1750 
   1751 		i--;
   1752 		ka = **i;
   1753 		kb = *t;
   1754 		n1 = ka->klen - b;
   1755 		n2 = kb->klen - b;
   1756 		if(n1 > n2)
   1757 			n1 = n2;
   1758 		if(n1 <= 0)
   1759 			goto loop;
   1760 		n2 = ka->key[b] - kb->key[b];
   1761 		if(n2 == 0)
   1762 			n2 = memcmp(ka->key+b, kb->key+b, n1);
   1763 		if(n2 <= 0)
   1764 			goto loop;
   1765 	}
   1766 }