9base

revived minimalist port of Plan 9 userland to Unix
git clone git://git.suckless.org/9base
Log | Files | Refs | README | LICENSE

uniq.c (2243B)


      1 /*
      2  * Deal with duplicated lines in a file
      3  */
      4 #include <u.h>
      5 #include <libc.h>
      6 #include <bio.h>
      7 #include <ctype.h>
      8 
      9 #define	SIZE	8000
     10 
     11 int	fields	= 0;
     12 int	letters	= 0;
     13 int	linec	= 0;
     14 char	mode;
     15 int	uniq;
     16 char	*b1, *b2;
     17 long	bsize;
     18 Biobuf	fin;
     19 Biobuf	fout;
     20 
     21 int	gline(char *buf);
     22 void	pline(char *buf);
     23 int	equal(char *b1, char *b2);
     24 char*	skip(char *s);
     25 
     26 void
     27 main(int argc, char *argv[])
     28 {
     29 	int f;
     30 
     31 	bsize = SIZE;
     32 	b1 = malloc(bsize);
     33 	b2 = malloc(bsize);
     34 	f = 0;
     35 	while(argc > 1) {
     36 		if(*argv[1] == '-') {
     37 			if(isdigit((uchar)argv[1][1]))
     38 				fields = atoi(&argv[1][1]);
     39 			else
     40 				mode = argv[1][1];
     41 			argc--;
     42 			argv++;
     43 			continue;
     44 		}
     45 		if(*argv[1] == '+') {
     46 			letters = atoi(&argv[1][1]);
     47 			argc--;
     48 			argv++;
     49 			continue;
     50 		}
     51 		f = open(argv[1], 0);
     52 		if(f < 0) {
     53 			fprint(2, "cannot open %s\n", argv[1]);
     54 			exits("open");
     55 		}
     56 		break;
     57 	}
     58 	if(argc > 2) {
     59 		fprint(2, "unexpected argument %s\n", argv[2]);
     60 		exits("arg");
     61 	}
     62 	Binit(&fin, f, OREAD);
     63 	Binit(&fout, 1, OWRITE);
     64 
     65 	if(gline(b1))
     66 		exits(0);
     67 	for(;;) {
     68 		linec++;
     69 		if(gline(b2)) {
     70 			pline(b1);
     71 			exits(0);
     72 		}
     73 		if(!equal(b1, b2)) {
     74 			pline(b1);
     75 			linec = 0;
     76 			do {
     77 				linec++;
     78 				if(gline(b1)) {
     79 					pline(b2);
     80 					exits(0);
     81 				}
     82 			} while(equal(b2, b1));
     83 			pline(b2);
     84 			linec = 0;
     85 		}
     86 	}
     87 }
     88 
     89 int
     90 gline(char *buf)
     91 {
     92 	char *p;
     93 
     94 	p = Brdline(&fin, '\n');
     95 	if(p == 0)
     96 		return 1;
     97 	if(fin.rdline >= bsize-1) {
     98 		fprint(2, "line too long\n");
     99 		exits("too long");
    100 	}
    101 	memmove(buf, p, fin.rdline);
    102 	buf[fin.rdline-1] = 0;
    103 	return 0;
    104 }
    105 
    106 void
    107 pline(char *buf)
    108 {
    109 
    110 	switch(mode) {
    111 
    112 	case 'u':
    113 		if(uniq) {
    114 			uniq = 0;
    115 			return;
    116 		}
    117 		break;
    118 
    119 	case 'd':
    120 		if(uniq)
    121 			break;
    122 		return;
    123 
    124 	case 'c':
    125 		Bprint(&fout, "%4d ", linec);
    126 	}
    127 	uniq = 0;
    128 	Bprint(&fout, "%s\n", buf);
    129 }
    130 
    131 int
    132 equal(char *b1, char *b2)
    133 {
    134 	char c;
    135 
    136 	if(fields || letters) {
    137 		b1 = skip(b1);
    138 		b2 = skip(b2);
    139 	}
    140 	for(;;) {
    141 		c = *b1++;
    142 		if(c != *b2++) {
    143 			if(c == 0 && mode == 's')
    144 				return 1;
    145 			return 0;
    146 		}
    147 		if(c == 0) {
    148 			uniq++;
    149 			return 1;
    150 		}
    151 	}
    152 }
    153 
    154 char*
    155 skip(char *s)
    156 {
    157 	int nf, nl;
    158 
    159 	nf = nl = 0;
    160 	while(nf++ < fields) {
    161 		while(*s == ' ' || *s == '\t')
    162 			s++;
    163 		while(!(*s == ' ' || *s == '\t' || *s == 0) ) 
    164 			s++;
    165 	}
    166 	while(nl++ < letters && *s != 0) 
    167 			s++;
    168 	return s;
    169 }