uniq.c (2526B)
1 /* See LICENSE file for copyright and license details. */ 2 #include <ctype.h> 3 #include <stdio.h> 4 #include <stdlib.h> 5 #include <string.h> 6 7 #include "text.h" 8 #include "util.h" 9 10 static const char *countfmt = ""; 11 static int dflag = 0; 12 static int uflag = 0; 13 static int fskip = 0; 14 static int sskip = 0; 15 16 static struct line prevl; 17 static ssize_t prevoff = -1; 18 static long prevlinecount = 0; 19 20 static size_t 21 uniqskip(struct line *l) 22 { 23 size_t i; 24 int f = fskip, s = sskip; 25 26 for (i = 0; i < l->len && f; --f) { 27 while (isblank(l->data[i])) 28 i++; 29 while (i < l->len && !isblank(l->data[i])) 30 i++; 31 } 32 for (; s && i < l->len && l->data[i] != '\n'; --s, i++) 33 ; 34 35 return i; 36 } 37 38 static void 39 uniqline(FILE *ofp, struct line *l) 40 { 41 size_t loff; 42 43 if (l) { 44 loff = uniqskip(l); 45 46 if (prevoff >= 0 && (l->len - loff) == (prevl.len - prevoff) && 47 !memcmp(l->data + loff, prevl.data + prevoff, l->len - loff)) { 48 ++prevlinecount; 49 return; 50 } 51 } 52 53 if (prevoff >= 0) { 54 if ((prevlinecount == 1 && !dflag) || 55 (prevlinecount != 1 && !uflag)) { 56 if (*countfmt) 57 fprintf(ofp, countfmt, prevlinecount); 58 fwrite(prevl.data, 1, prevl.len, ofp); 59 } 60 prevoff = -1; 61 } 62 63 if (l) { 64 if (!prevl.data || l->len >= prevl.len) { 65 prevl.data = erealloc(prevl.data, l->len); 66 } 67 prevl.len = l->len; 68 memcpy(prevl.data, l->data, prevl.len); 69 prevoff = loff; 70 } 71 prevlinecount = 1; 72 } 73 74 static void 75 uniq(FILE *fp, FILE *ofp) 76 { 77 static struct line line; 78 static size_t size; 79 ssize_t len; 80 81 while ((len = getline(&line.data, &size, fp)) > 0) { 82 line.len = len; 83 uniqline(ofp, &line); 84 } 85 } 86 87 static void 88 uniqfinish(FILE *ofp) 89 { 90 uniqline(ofp, NULL); 91 } 92 93 static void 94 usage(void) 95 { 96 eprintf("usage: %s [-c] [-d | -u] [-f fields] [-s chars]" 97 " [input [output]]\n", argv0); 98 } 99 100 int 101 main(int argc, char *argv[]) 102 { 103 FILE *fp[2] = { stdin, stdout }; 104 int ret = 0, i; 105 char *fname[2] = { "<stdin>", "<stdout>" }; 106 107 ARGBEGIN { 108 case 'c': 109 countfmt = "%7ld "; 110 break; 111 case 'd': 112 dflag = 1; 113 break; 114 case 'u': 115 uflag = 1; 116 break; 117 case 'f': 118 fskip = estrtonum(EARGF(usage()), 0, INT_MAX); 119 break; 120 case 's': 121 sskip = estrtonum(EARGF(usage()), 0, INT_MAX); 122 break; 123 default: 124 usage(); 125 } ARGEND 126 127 if (argc > 2) 128 usage(); 129 130 for (i = 0; i < argc; i++) { 131 if (strcmp(argv[i], "-")) { 132 fname[i] = argv[i]; 133 if (!(fp[i] = fopen(argv[i], (i == 0) ? "r" : "w"))) 134 eprintf("fopen %s:", argv[i]); 135 } 136 } 137 138 uniq(fp[0], fp[1]); 139 uniqfinish(fp[1]); 140 141 ret |= fshut(fp[0], fname[0]) | fshut(fp[1], fname[1]); 142 143 return ret; 144 }