sbase

suckless unix tools
git clone git://git.suckless.org/sbase
Log | Files | Refs | README | LICENSE

sort.c (9436B)


      1 /* See LICENSE file for copyright and license details. */
      2 #include <ctype.h>
      3 #include <stdio.h>
      4 #include <stdlib.h>
      5 #include <string.h>
      6 
      7 #include "queue.h"
      8 #include "text.h"
      9 #include "utf.h"
     10 #include "util.h"
     11 
     12 struct keydef {
     13 	int start_column;
     14 	int end_column;
     15 	int start_char;
     16 	int end_char;
     17 	int flags;
     18 	TAILQ_ENTRY(keydef) entry;
     19 };
     20 
     21 struct column {
     22 	struct line line;
     23 	size_t cap;
     24 };
     25 
     26 enum {
     27 	MOD_N      = 1 << 0,
     28 	MOD_STARTB = 1 << 1,
     29 	MOD_ENDB   = 1 << 2,
     30 	MOD_R      = 1 << 3,
     31 	MOD_D      = 1 << 4,
     32 	MOD_F      = 1 << 5,
     33 	MOD_I      = 1 << 6,
     34 };
     35 
     36 static TAILQ_HEAD(kdhead, keydef) kdhead = TAILQ_HEAD_INITIALIZER(kdhead);
     37 
     38 static int Cflag = 0, cflag = 0, uflag = 0;
     39 static char *fieldsep = NULL;
     40 static size_t fieldseplen = 0;
     41 static struct column col1, col2;
     42 
     43 static void
     44 skipblank(struct line *a)
     45 {
     46 	while (a->len && (*(a->data) == ' ' || *(a->data) == '\t')) {
     47 		a->data++;
     48 		a->len--;
     49 	}
     50 }
     51 
     52 static void
     53 skipnonblank(struct line *a)
     54 {
     55 	while (a->len && (*(a->data) != '\n' && *(a->data) != ' ' &&
     56 	                  *(a->data) != '\t')) {
     57 		a->data++;
     58 		a->len--;
     59 	}
     60 }
     61 
     62 static void
     63 skipcolumn(struct line *a, int skip_to_next_col)
     64 {
     65 	char *s;
     66 
     67 	if (fieldsep) {
     68 		if ((s = memmem(a->data, a->len, fieldsep, fieldseplen))) {
     69 			if (skip_to_next_col)
     70 				s += fieldseplen;
     71 			a->len -= s - a->data;
     72 			a->data = s;
     73 		} else {
     74 			a->data += a->len - 1;
     75 			a->len = 1;
     76 		}
     77 	} else {
     78 		skipblank(a);
     79 		skipnonblank(a);
     80 	}
     81 }
     82 
     83 static void
     84 columns(struct line *line, const struct keydef *kd, struct column *col)
     85 {
     86 	Rune r;
     87 	struct line start, end;
     88 	size_t utflen, rlen;
     89 	int i;
     90 
     91 	start.data = line->data;
     92 	start.len = line->len;
     93 	for (i = 1; i < kd->start_column; i++)
     94 		skipcolumn(&start, 1);
     95 	if (kd->flags & MOD_STARTB)
     96 		skipblank(&start);
     97 	for (utflen = 0; start.len > 1 && utflen < kd->start_char - 1;) {
     98 		rlen = chartorune(&r, start.data);
     99 		start.data += rlen;
    100 		start.len -= rlen;
    101 		utflen++;
    102 	}
    103 
    104 	end.data = line->data;
    105 	end.len = line->len;
    106 	if (kd->end_column) {
    107 		for (i = 1; i < kd->end_column; i++)
    108 			skipcolumn(&end, 1);
    109 		if (kd->flags & MOD_ENDB)
    110 			skipblank(&end);
    111 		if (kd->end_char) {
    112 			for (utflen = 0; end.len > 1 && utflen < kd->end_char;) {
    113 				rlen = chartorune(&r, end.data);
    114 				end.data += rlen;
    115 				end.len -= rlen;
    116 				utflen++;
    117 			}
    118 		} else {
    119 			skipcolumn(&end, 0);
    120 		}
    121 	} else {
    122 		end.data += end.len - 1;
    123 		end.len = 1;
    124 	}
    125 	col->line.len = MAX(0, end.data - start.data);
    126 	if (!(col->line.data) || col->cap < col->line.len + 1) {
    127 		free(col->line.data);
    128 		col->line.data = emalloc(col->line.len + 1);
    129 	}
    130 	memcpy(col->line.data, start.data, col->line.len);
    131 	col->line.data[col->line.len] = '\0';
    132 }
    133 
    134 static int
    135 skipmodcmp(struct line *a, struct line *b, int flags)
    136 {
    137 	Rune r1, r2;
    138 	size_t offa = 0, offb = 0;
    139 
    140 	do {
    141 		offa += chartorune(&r1, a->data + offa);
    142 		offb += chartorune(&r2, b->data + offb);
    143 
    144 		if (flags & MOD_D && flags & MOD_I) {
    145 			while (offa < a->len && ((!isblankrune(r1) &&
    146 			       !isalnumrune(r1)) || (!isprintrune(r1))))
    147 				offa += chartorune(&r1, a->data + offa);
    148 			while (offb < b->len && ((!isblankrune(r2) &&
    149 			       !isalnumrune(r2)) || (!isprintrune(r2))))
    150 				offb += chartorune(&r2, b->data + offb);
    151 		}
    152 		else if (flags & MOD_D) {
    153 			while (offa < a->len && !isblankrune(r1) &&
    154 			       !isalnumrune(r1))
    155 				offa += chartorune(&r1, a->data + offa);
    156 			while (offb < b->len && !isblankrune(r2) &&
    157 			       !isalnumrune(r2))
    158 				offb += chartorune(&r2, b->data + offb);
    159 		}
    160 		else if (flags & MOD_I) {
    161 			while (offa < a->len && !isprintrune(r1))
    162 				offa += chartorune(&r1, a->data + offa);
    163 			while (offb < b->len && !isprintrune(r2))
    164 				offb += chartorune(&r2, b->data + offb);
    165 		}
    166 		if (flags & MOD_F) {
    167 			r1 = toupperrune(r1);
    168 			r2 = toupperrune(r2);
    169 		}
    170 	} while (r1 && r1 == r2);
    171 
    172 	return r1 - r2;
    173 }
    174 
    175 static int
    176 slinecmp(struct line *a, struct line *b)
    177 {
    178 	int res = 0;
    179 	double x, y;
    180 	struct keydef *kd;
    181 
    182 	TAILQ_FOREACH(kd, &kdhead, entry) {
    183 		columns(a, kd, &col1);
    184 		columns(b, kd, &col2);
    185 
    186 		/* if -u is given, don't use default key definition
    187 		 * unless it is the only one */
    188 		if (uflag && kd == TAILQ_LAST(&kdhead, kdhead) &&
    189 		    TAILQ_LAST(&kdhead, kdhead) != TAILQ_FIRST(&kdhead)) {
    190 			res = 0;
    191 		} else if (kd->flags & MOD_N) {
    192 			x = strtod(col1.line.data, NULL);
    193 			y = strtod(col2.line.data, NULL);
    194 			res = (x < y) ? -1 : (x > y);
    195 		} else if (kd->flags & (MOD_D | MOD_F | MOD_I)) {
    196 			res = skipmodcmp(&col1.line, &col2.line, kd->flags);
    197 		} else {
    198 			res = linecmp(&col1.line, &col2.line);
    199 		}
    200 
    201 		if (kd->flags & MOD_R)
    202 			res = -res;
    203 		if (res)
    204 			break;
    205 	}
    206 
    207 	return res;
    208 }
    209 
    210 static int
    211 check(FILE *fp, const char *fname)
    212 {
    213 	static struct line prev, cur, tmp;
    214 	static size_t prevsize, cursize, tmpsize;
    215 	ssize_t len;
    216 
    217 	if (!prev.data) {
    218 		if ((len = getline(&prev.data, &prevsize, fp)) < 0)
    219 			eprintf("getline:");
    220 		prev.len = len;
    221 	}
    222 	while ((len = getline(&cur.data, &cursize, fp)) > 0) {
    223 		cur.len = len;
    224 		if (uflag > slinecmp(&cur, &prev)) {
    225 			if (!Cflag) {
    226 				weprintf("disorder %s: ", fname);
    227 				fwrite(cur.data, 1, cur.len, stderr);
    228 			}
    229 			return 1;
    230 		}
    231 		tmp = cur;
    232 		tmpsize = cursize;
    233 		cur = prev;
    234 		cursize = prevsize;
    235 		prev = tmp;
    236 		prevsize = tmpsize;
    237 	}
    238 
    239 	return 0;
    240 }
    241 
    242 static int
    243 parse_flags(char **s, int *flags, int bflag)
    244 {
    245 	while (isalpha((int)**s)) {
    246 		switch (*((*s)++)) {
    247 		case 'b':
    248 			*flags |= bflag;
    249 			break;
    250 		case 'd':
    251 			*flags |= MOD_D;
    252 			break;
    253 		case 'f':
    254 			*flags |= MOD_F;
    255 			break;
    256 		case 'i':
    257 			*flags |= MOD_I;
    258 			break;
    259 		case 'n':
    260 			*flags |= MOD_N;
    261 			break;
    262 		case 'r':
    263 			*flags |= MOD_R;
    264 			break;
    265 		default:
    266 			return -1;
    267 		}
    268 	}
    269 
    270 	return 0;
    271 }
    272 
    273 static void
    274 addkeydef(char *kdstr, int flags)
    275 {
    276 	struct keydef *kd;
    277 
    278 	kd = enmalloc(2, sizeof(*kd));
    279 
    280 	/* parse key definition kdstr with format
    281 	 * start_column[.start_char][flags][,end_column[.end_char][flags]]
    282 	 */
    283 	kd->start_column = 1;
    284 	kd->start_char = 1;
    285 	kd->end_column = 0; /* 0 means end of line */
    286 	kd->end_char = 0;   /* 0 means end of column */
    287 	kd->flags = flags;
    288 
    289 	if ((kd->start_column = strtol(kdstr, &kdstr, 10)) < 1)
    290 		enprintf(2, "invalid start column in key definition\n");
    291 
    292 	if (*kdstr == '.') {
    293 		if ((kd->start_char = strtol(kdstr + 1, &kdstr, 10)) < 1)
    294 			enprintf(2, "invalid start character in key "
    295 			         "definition\n");
    296 	}
    297 	if (parse_flags(&kdstr, &kd->flags, MOD_STARTB) < 0)
    298 		enprintf(2, "invalid start flags in key definition\n");
    299 
    300 	if (*kdstr == ',') {
    301 		if ((kd->end_column = strtol(kdstr + 1, &kdstr, 10)) < 0)
    302 			enprintf(2, "invalid end column in key definition\n");
    303 		if (*kdstr == '.') {
    304 			if ((kd->end_char = strtol(kdstr + 1, &kdstr, 10)) < 0)
    305 				enprintf(2, "invalid end character in key "
    306 				         "definition\n");
    307 		}
    308 		if (parse_flags(&kdstr, &kd->flags, MOD_ENDB) < 0)
    309 			enprintf(2, "invalid end flags in key definition\n");
    310 	}
    311 
    312 	if (*kdstr != '\0')
    313 		enprintf(2, "invalid key definition\n");
    314 
    315 	TAILQ_INSERT_TAIL(&kdhead, kd, entry);
    316 }
    317 
    318 static void
    319 usage(void)
    320 {
    321 	enprintf(2, "usage: %s [-Cbcdfimnru] [-o outfile] [-t delim] "
    322 	         "[-k def]... [file ...]\n", argv0);
    323 }
    324 
    325 int
    326 main(int argc, char *argv[])
    327 {
    328 	FILE *fp, *ofp = stdout;
    329 	struct linebuf linebuf = EMPTY_LINEBUF;
    330 	size_t i;
    331 	int global_flags = 0, ret = 0;
    332 	char *outfile = NULL;
    333 
    334 	ARGBEGIN {
    335 	case 'C':
    336 		Cflag = 1;
    337 		break;
    338 	case 'b':
    339 		global_flags |= MOD_STARTB | MOD_ENDB;
    340 		break;
    341 	case 'c':
    342 		cflag = 1;
    343 		break;
    344 	case 'd':
    345 		global_flags |= MOD_D;
    346 		break;
    347 	case 'f':
    348 		global_flags |= MOD_F;
    349 		break;
    350 	case 'i':
    351 		global_flags |= MOD_I;
    352 		break;
    353 	case 'k':
    354 		addkeydef(EARGF(usage()), global_flags);
    355 		break;
    356 	case 'm':
    357 		/* more or less for free, but for performance-reasons,
    358 		 * we should keep this flag in mind and maybe some later
    359 		 * day implement it properly so we don't run out of memory
    360 		 * while merging large sorted files.
    361 		 */
    362 		break;
    363 	case 'n':
    364 		global_flags |= MOD_N;
    365 		break;
    366 	case 'o':
    367 		outfile = EARGF(usage());
    368 		break;
    369 	case 'r':
    370 		global_flags |= MOD_R;
    371 		break;
    372 	case 't':
    373 		fieldsep = EARGF(usage());
    374 		if (!*fieldsep)
    375 			eprintf("empty delimiter\n");
    376 		fieldseplen = unescape(fieldsep);
    377 		break;
    378 	case 'u':
    379 		uflag = 1;
    380 		break;
    381 	default:
    382 		usage();
    383 	} ARGEND
    384 
    385 	/* -b shall only apply to custom key definitions */
    386 	if (TAILQ_EMPTY(&kdhead) && global_flags)
    387 		addkeydef("1", global_flags & ~(MOD_STARTB | MOD_ENDB));
    388 	if (TAILQ_EMPTY(&kdhead) || (!Cflag && !cflag))
    389 		addkeydef("1", global_flags & MOD_R);
    390 
    391 	if (!argc) {
    392 		if (Cflag || cflag) {
    393 			if (check(stdin, "<stdin>") && !ret)
    394 				ret = 1;
    395 		} else {
    396 			getlines(stdin, &linebuf);
    397 		}
    398 	} else for (; *argv; argc--, argv++) {
    399 		if (!strcmp(*argv, "-")) {
    400 			*argv = "<stdin>";
    401 			fp = stdin;
    402 		} else if (!(fp = fopen(*argv, "r"))) {
    403 			enprintf(2, "fopen %s:", *argv);
    404 			continue;
    405 		}
    406 		if (Cflag || cflag) {
    407 			if (check(fp, *argv) && !ret)
    408 				ret = 1;
    409 		} else {
    410 			getlines(fp, &linebuf);
    411 		}
    412 		if (fp != stdin && fshut(fp, *argv))
    413 			ret = 2;
    414 	}
    415 
    416 	if (!Cflag && !cflag) {
    417 		if (outfile && !(ofp = fopen(outfile, "w")))
    418 			eprintf("fopen %s:", outfile);
    419 
    420 		qsort(linebuf.lines, linebuf.nlines, sizeof(*linebuf.lines),
    421 				(int (*)(const void *, const void *))slinecmp);
    422 
    423 		for (i = 0; i < linebuf.nlines; i++) {
    424 			if (!uflag || i == 0 ||
    425 			    slinecmp(&linebuf.lines[i], &linebuf.lines[i - 1])) {
    426 				fwrite(linebuf.lines[i].data, 1,
    427 				       linebuf.lines[i].len, ofp);
    428 			}
    429 		}
    430 	}
    431 
    432 	if (fshut(stdin, "<stdin>") | fshut(stdout, "<stdout>") |
    433 	    fshut(stderr, "<stderr>"))
    434 		ret = 2;
    435 
    436 	return ret;
    437 }