sbase

suckless unix tools
git clone git://git.suckless.org/sbase
Log | Files | Refs | README | LICENSE

sort.c (9340B)


      1 /* See LICENSE file for copyright and license details. */
      2 #include <ctype.h>
      3 #include <stdio.h>
      4 #include <stdlib.h>
      5 #include <string.h>
      6 
      7 #include "queue.h"
      8 #include "text.h"
      9 #include "utf.h"
     10 #include "util.h"
     11 
     12 struct keydef {
     13 	int start_column;
     14 	int end_column;
     15 	int start_char;
     16 	int end_char;
     17 	int flags;
     18 	TAILQ_ENTRY(keydef) entry;
     19 };
     20 
     21 struct column {
     22 	struct line line;
     23 	size_t cap;
     24 };
     25 
     26 enum {
     27 	MOD_N      = 1 << 0,
     28 	MOD_STARTB = 1 << 1,
     29 	MOD_ENDB   = 1 << 2,
     30 	MOD_R      = 1 << 3,
     31 	MOD_D      = 1 << 4,
     32 	MOD_F      = 1 << 5,
     33 	MOD_I      = 1 << 6,
     34 };
     35 
     36 static TAILQ_HEAD(kdhead, keydef) kdhead = TAILQ_HEAD_INITIALIZER(kdhead);
     37 
     38 static int Cflag = 0, cflag = 0, uflag = 0;
     39 static char *fieldsep = NULL;
     40 static size_t fieldseplen = 0;
     41 static struct column col1, col2;
     42 
     43 static void
     44 skipblank(struct line *a)
     45 {
     46 	while (a->len && (*(a->data) == ' ' || *(a->data) == '\t')) {
     47 		a->data++;
     48 		a->len--;
     49 	}
     50 }
     51 
     52 static void
     53 skipnonblank(struct line *a)
     54 {
     55 	while (a->len && (*(a->data) != '\n' && *(a->data) != ' ' &&
     56 	                  *(a->data) != '\t')) {
     57 		a->data++;
     58 		a->len--;
     59 	}
     60 }
     61 
     62 static void
     63 skipcolumn(struct line *a, int skip_to_next_col)
     64 {
     65 	char *s;
     66 
     67 	if (fieldsep) {
     68 		if ((s = memmem(a->data, a->len, fieldsep, fieldseplen))) {
     69 			if (skip_to_next_col)
     70 				s += fieldseplen;
     71 			a->len -= s - a->data;
     72 			a->data = s;
     73 		} else {
     74 			a->data += a->len - 1;
     75 			a->len = 1;
     76 		}
     77 	} else {
     78 		skipblank(a);
     79 		skipnonblank(a);
     80 	}
     81 }
     82 
     83 static void
     84 columns(struct line *line, const struct keydef *kd, struct column *col)
     85 {
     86 	Rune r;
     87 	struct line start, end;
     88 	size_t utflen, rlen;
     89 	int i;
     90 
     91 	start.data = line->data;
     92 	start.len = line->len;
     93 	for (i = 1; i < kd->start_column; i++)
     94 		skipcolumn(&start, 1);
     95 	if (kd->flags & MOD_STARTB)
     96 		skipblank(&start);
     97 	for (utflen = 0; start.len > 1 && utflen < kd->start_char - 1;) {
     98 		rlen = chartorune(&r, start.data);
     99 		start.data += rlen;
    100 		start.len -= rlen;
    101 		utflen++;
    102 	}
    103 
    104 	end.data = line->data;
    105 	end.len = line->len;
    106 	if (kd->end_column) {
    107 		for (i = 1; i < kd->end_column; i++)
    108 			skipcolumn(&end, 1);
    109 		if (kd->flags & MOD_ENDB)
    110 			skipblank(&end);
    111 		if (kd->end_char) {
    112 			for (utflen = 0; end.len > 1 && utflen < kd->end_char;) {
    113 				rlen = chartorune(&r, end.data);
    114 				end.data += rlen;
    115 				end.len -= rlen;
    116 				utflen++;
    117 			}
    118 		} else {
    119 			skipcolumn(&end, 0);
    120 		}
    121 	} else {
    122 		end.data += end.len - 1;
    123 		end.len = 1;
    124 	}
    125 	col->line.len = MAX(0, end.data - start.data);
    126 	col->line.data = start.data;
    127 	col->line.data[col->line.len] = '\0';
    128 }
    129 
    130 static int
    131 skipmodcmp(struct line *a, struct line *b, int flags)
    132 {
    133 	Rune r1, r2;
    134 	size_t offa = 0, offb = 0;
    135 
    136 	do {
    137 		offa += chartorune(&r1, a->data + offa);
    138 		offb += chartorune(&r2, b->data + offb);
    139 
    140 		if (flags & MOD_D && flags & MOD_I) {
    141 			while (offa < a->len && ((!isblankrune(r1) &&
    142 			       !isalnumrune(r1)) || (!isprintrune(r1))))
    143 				offa += chartorune(&r1, a->data + offa);
    144 			while (offb < b->len && ((!isblankrune(r2) &&
    145 			       !isalnumrune(r2)) || (!isprintrune(r2))))
    146 				offb += chartorune(&r2, b->data + offb);
    147 		}
    148 		else if (flags & MOD_D) {
    149 			while (offa < a->len && !isblankrune(r1) &&
    150 			       !isalnumrune(r1))
    151 				offa += chartorune(&r1, a->data + offa);
    152 			while (offb < b->len && !isblankrune(r2) &&
    153 			       !isalnumrune(r2))
    154 				offb += chartorune(&r2, b->data + offb);
    155 		}
    156 		else if (flags & MOD_I) {
    157 			while (offa < a->len && !isprintrune(r1))
    158 				offa += chartorune(&r1, a->data + offa);
    159 			while (offb < b->len && !isprintrune(r2))
    160 				offb += chartorune(&r2, b->data + offb);
    161 		}
    162 		if (flags & MOD_F) {
    163 			r1 = toupperrune(r1);
    164 			r2 = toupperrune(r2);
    165 		}
    166 	} while (r1 && r1 == r2);
    167 
    168 	return r1 - r2;
    169 }
    170 
    171 static int
    172 slinecmp(struct line *a, struct line *b)
    173 {
    174 	int res = 0;
    175 	double x, y;
    176 	struct keydef *kd;
    177 
    178 	TAILQ_FOREACH(kd, &kdhead, entry) {
    179 		columns(a, kd, &col1);
    180 		columns(b, kd, &col2);
    181 
    182 		/* if -u is given, don't use default key definition
    183 		 * unless it is the only one */
    184 		if (uflag && kd == TAILQ_LAST(&kdhead, kdhead) &&
    185 		    TAILQ_LAST(&kdhead, kdhead) != TAILQ_FIRST(&kdhead)) {
    186 			res = 0;
    187 		} else if (kd->flags & MOD_N) {
    188 			x = strtod(col1.line.data, NULL);
    189 			y = strtod(col2.line.data, NULL);
    190 			res = (x < y) ? -1 : (x > y);
    191 		} else if (kd->flags & (MOD_D | MOD_F | MOD_I)) {
    192 			res = skipmodcmp(&col1.line, &col2.line, kd->flags);
    193 		} else {
    194 			res = linecmp(&col1.line, &col2.line);
    195 		}
    196 
    197 		if (kd->flags & MOD_R)
    198 			res = -res;
    199 		if (res)
    200 			break;
    201 	}
    202 
    203 	return res;
    204 }
    205 
    206 static int
    207 check(FILE *fp, const char *fname)
    208 {
    209 	static struct line prev, cur, tmp;
    210 	static size_t prevsize, cursize, tmpsize;
    211 	ssize_t len;
    212 
    213 	if (!prev.data) {
    214 		if ((len = getline(&prev.data, &prevsize, fp)) < 0)
    215 			eprintf("getline:");
    216 		prev.len = len;
    217 	}
    218 	while ((len = getline(&cur.data, &cursize, fp)) > 0) {
    219 		cur.len = len;
    220 		if (uflag > slinecmp(&cur, &prev)) {
    221 			if (!Cflag) {
    222 				weprintf("disorder %s: ", fname);
    223 				fwrite(cur.data, 1, cur.len, stderr);
    224 			}
    225 			return 1;
    226 		}
    227 		tmp = cur;
    228 		tmpsize = cursize;
    229 		cur = prev;
    230 		cursize = prevsize;
    231 		prev = tmp;
    232 		prevsize = tmpsize;
    233 	}
    234 
    235 	return 0;
    236 }
    237 
    238 static int
    239 parse_flags(char **s, int *flags, int bflag)
    240 {
    241 	while (isalpha((int)**s)) {
    242 		switch (*((*s)++)) {
    243 		case 'b':
    244 			*flags |= bflag;
    245 			break;
    246 		case 'd':
    247 			*flags |= MOD_D;
    248 			break;
    249 		case 'f':
    250 			*flags |= MOD_F;
    251 			break;
    252 		case 'i':
    253 			*flags |= MOD_I;
    254 			break;
    255 		case 'n':
    256 			*flags |= MOD_N;
    257 			break;
    258 		case 'r':
    259 			*flags |= MOD_R;
    260 			break;
    261 		default:
    262 			return -1;
    263 		}
    264 	}
    265 
    266 	return 0;
    267 }
    268 
    269 static void
    270 addkeydef(char *kdstr, int flags)
    271 {
    272 	struct keydef *kd;
    273 
    274 	kd = enmalloc(2, sizeof(*kd));
    275 
    276 	/* parse key definition kdstr with format
    277 	 * start_column[.start_char][flags][,end_column[.end_char][flags]]
    278 	 */
    279 	kd->start_column = 1;
    280 	kd->start_char = 1;
    281 	kd->end_column = 0; /* 0 means end of line */
    282 	kd->end_char = 0;   /* 0 means end of column */
    283 	kd->flags = flags;
    284 
    285 	if ((kd->start_column = strtol(kdstr, &kdstr, 10)) < 1)
    286 		enprintf(2, "invalid start column in key definition\n");
    287 
    288 	if (*kdstr == '.') {
    289 		if ((kd->start_char = strtol(kdstr + 1, &kdstr, 10)) < 1)
    290 			enprintf(2, "invalid start character in key "
    291 			         "definition\n");
    292 	}
    293 	if (parse_flags(&kdstr, &kd->flags, MOD_STARTB) < 0)
    294 		enprintf(2, "invalid start flags in key definition\n");
    295 
    296 	if (*kdstr == ',') {
    297 		if ((kd->end_column = strtol(kdstr + 1, &kdstr, 10)) < 0)
    298 			enprintf(2, "invalid end column in key definition\n");
    299 		if (*kdstr == '.') {
    300 			if ((kd->end_char = strtol(kdstr + 1, &kdstr, 10)) < 0)
    301 				enprintf(2, "invalid end character in key "
    302 				         "definition\n");
    303 		}
    304 		if (parse_flags(&kdstr, &kd->flags, MOD_ENDB) < 0)
    305 			enprintf(2, "invalid end flags in key definition\n");
    306 	}
    307 
    308 	if (*kdstr != '\0')
    309 		enprintf(2, "invalid key definition\n");
    310 
    311 	TAILQ_INSERT_TAIL(&kdhead, kd, entry);
    312 }
    313 
    314 static void
    315 usage(void)
    316 {
    317 	enprintf(2, "usage: %s [-Cbcdfimnru] [-o outfile] [-t delim] "
    318 	         "[-k def]... [file ...]\n", argv0);
    319 }
    320 
    321 int
    322 main(int argc, char *argv[])
    323 {
    324 	FILE *fp, *ofp = stdout;
    325 	struct linebuf linebuf = EMPTY_LINEBUF;
    326 	size_t i;
    327 	int global_flags = 0, ret = 0;
    328 	char *outfile = NULL;
    329 
    330 	ARGBEGIN {
    331 	case 'C':
    332 		Cflag = 1;
    333 		break;
    334 	case 'b':
    335 		global_flags |= MOD_STARTB | MOD_ENDB;
    336 		break;
    337 	case 'c':
    338 		cflag = 1;
    339 		break;
    340 	case 'd':
    341 		global_flags |= MOD_D;
    342 		break;
    343 	case 'f':
    344 		global_flags |= MOD_F;
    345 		break;
    346 	case 'i':
    347 		global_flags |= MOD_I;
    348 		break;
    349 	case 'k':
    350 		addkeydef(EARGF(usage()), global_flags);
    351 		break;
    352 	case 'm':
    353 		/* more or less for free, but for performance-reasons,
    354 		 * we should keep this flag in mind and maybe some later
    355 		 * day implement it properly so we don't run out of memory
    356 		 * while merging large sorted files.
    357 		 */
    358 		break;
    359 	case 'n':
    360 		global_flags |= MOD_N;
    361 		break;
    362 	case 'o':
    363 		outfile = EARGF(usage());
    364 		break;
    365 	case 'r':
    366 		global_flags |= MOD_R;
    367 		break;
    368 	case 't':
    369 		fieldsep = EARGF(usage());
    370 		if (!*fieldsep)
    371 			eprintf("empty delimiter\n");
    372 		fieldseplen = unescape(fieldsep);
    373 		break;
    374 	case 'u':
    375 		uflag = 1;
    376 		break;
    377 	default:
    378 		usage();
    379 	} ARGEND
    380 
    381 	/* -b shall only apply to custom key definitions */
    382 	if (TAILQ_EMPTY(&kdhead) && global_flags)
    383 		addkeydef("1", global_flags & ~(MOD_STARTB | MOD_ENDB));
    384 	if (TAILQ_EMPTY(&kdhead) || (!Cflag && !cflag))
    385 		addkeydef("1", global_flags & MOD_R);
    386 
    387 	if (!argc) {
    388 		if (Cflag || cflag) {
    389 			if (check(stdin, "<stdin>") && !ret)
    390 				ret = 1;
    391 		} else {
    392 			getlines(stdin, &linebuf);
    393 		}
    394 	} else for (; *argv; argc--, argv++) {
    395 		if (!strcmp(*argv, "-")) {
    396 			*argv = "<stdin>";
    397 			fp = stdin;
    398 		} else if (!(fp = fopen(*argv, "r"))) {
    399 			enprintf(2, "fopen %s:", *argv);
    400 			continue;
    401 		}
    402 		if (Cflag || cflag) {
    403 			if (check(fp, *argv) && !ret)
    404 				ret = 1;
    405 		} else {
    406 			getlines(fp, &linebuf);
    407 		}
    408 		if (fp != stdin && fshut(fp, *argv))
    409 			ret = 2;
    410 	}
    411 
    412 	if (!Cflag && !cflag) {
    413 		if (outfile && !(ofp = fopen(outfile, "w")))
    414 			eprintf("fopen %s:", outfile);
    415 
    416 		qsort(linebuf.lines, linebuf.nlines, sizeof(*linebuf.lines),
    417 				(int (*)(const void *, const void *))slinecmp);
    418 
    419 		for (i = 0; i < linebuf.nlines; i++) {
    420 			if (!uflag || i == 0 ||
    421 			    slinecmp(&linebuf.lines[i], &linebuf.lines[i - 1])) {
    422 				linebuf.lines[i].data[linebuf.lines[i].len-1] = '\n';
    423 				fwrite(linebuf.lines[i].data, 1,
    424 				       linebuf.lines[i].len, ofp);
    425 			}
    426 		}
    427 	}
    428 
    429 	if (fshut(stdin, "<stdin>") | fshut(stdout, "<stdout>") |
    430 	    fshut(stderr, "<stderr>"))
    431 		ret = 2;
    432 
    433 	return ret;
    434 }