uniq.c (2243B)
1 /* 2 * Deal with duplicated lines in a file 3 */ 4 #include <u.h> 5 #include <libc.h> 6 #include <bio.h> 7 #include <ctype.h> 8 9 #define SIZE 8000 10 11 int fields = 0; 12 int letters = 0; 13 int linec = 0; 14 char mode; 15 int uniq; 16 char *b1, *b2; 17 long bsize; 18 Biobuf fin; 19 Biobuf fout; 20 21 int gline(char *buf); 22 void pline(char *buf); 23 int equal(char *b1, char *b2); 24 char* skip(char *s); 25 26 void 27 main(int argc, char *argv[]) 28 { 29 int f; 30 31 bsize = SIZE; 32 b1 = malloc(bsize); 33 b2 = malloc(bsize); 34 f = 0; 35 while(argc > 1) { 36 if(*argv[1] == '-') { 37 if(isdigit((uchar)argv[1][1])) 38 fields = atoi(&argv[1][1]); 39 else 40 mode = argv[1][1]; 41 argc--; 42 argv++; 43 continue; 44 } 45 if(*argv[1] == '+') { 46 letters = atoi(&argv[1][1]); 47 argc--; 48 argv++; 49 continue; 50 } 51 f = open(argv[1], 0); 52 if(f < 0) { 53 fprint(2, "cannot open %s\n", argv[1]); 54 exits("open"); 55 } 56 break; 57 } 58 if(argc > 2) { 59 fprint(2, "unexpected argument %s\n", argv[2]); 60 exits("arg"); 61 } 62 Binit(&fin, f, OREAD); 63 Binit(&fout, 1, OWRITE); 64 65 if(gline(b1)) 66 exits(0); 67 for(;;) { 68 linec++; 69 if(gline(b2)) { 70 pline(b1); 71 exits(0); 72 } 73 if(!equal(b1, b2)) { 74 pline(b1); 75 linec = 0; 76 do { 77 linec++; 78 if(gline(b1)) { 79 pline(b2); 80 exits(0); 81 } 82 } while(equal(b2, b1)); 83 pline(b2); 84 linec = 0; 85 } 86 } 87 } 88 89 int 90 gline(char *buf) 91 { 92 char *p; 93 94 p = Brdline(&fin, '\n'); 95 if(p == 0) 96 return 1; 97 if(fin.rdline >= bsize-1) { 98 fprint(2, "line too long\n"); 99 exits("too long"); 100 } 101 memmove(buf, p, fin.rdline); 102 buf[fin.rdline-1] = 0; 103 return 0; 104 } 105 106 void 107 pline(char *buf) 108 { 109 110 switch(mode) { 111 112 case 'u': 113 if(uniq) { 114 uniq = 0; 115 return; 116 } 117 break; 118 119 case 'd': 120 if(uniq) 121 break; 122 return; 123 124 case 'c': 125 Bprint(&fout, "%4d ", linec); 126 } 127 uniq = 0; 128 Bprint(&fout, "%s\n", buf); 129 } 130 131 int 132 equal(char *b1, char *b2) 133 { 134 char c; 135 136 if(fields || letters) { 137 b1 = skip(b1); 138 b2 = skip(b2); 139 } 140 for(;;) { 141 c = *b1++; 142 if(c != *b2++) { 143 if(c == 0 && mode == 's') 144 return 1; 145 return 0; 146 } 147 if(c == 0) { 148 uniq++; 149 return 1; 150 } 151 } 152 } 153 154 char* 155 skip(char *s) 156 { 157 int nf, nl; 158 159 nf = nl = 0; 160 while(nf++ < fields) { 161 while(*s == ' ' || *s == '\t') 162 s++; 163 while(!(*s == ' ' || *s == '\t' || *s == 0) ) 164 s++; 165 } 166 while(nl++ < letters && *s != 0) 167 s++; 168 return s; 169 }