join.c (7544B)
1 /* join F1 F2 on stuff */ 2 #include <u.h> 3 #include <stdio.h> 4 #include <libc.h> 5 #include <ctype.h> 6 #define F1 0 7 #define F2 1 8 #define F0 3 9 #define NFLD 100 /* max field per line */ 10 #define comp() runecmp(ppi[F1][j1],ppi[F2][j2]) 11 FILE *f[2]; 12 Rune buf[2][BUFSIZ]; /*input lines */ 13 Rune *ppi[2][NFLD+1]; /* pointers to fields in lines */ 14 Rune *s1,*s2; 15 #define j1 joinj1 16 #define j2 joinj2 17 18 int j1 = 1; /* join of this field of file 1 */ 19 int j2 = 1; /* join of this field of file 2 */ 20 int olist[2*NFLD]; /* output these fields */ 21 int olistf[2*NFLD]; /* from these files */ 22 int no; /* number of entries in olist */ 23 Rune sep1 = ' '; /* default field separator */ 24 Rune sep2 = '\t'; 25 char *sepstr=" "; 26 int discard; /* count of truncated lines */ 27 Rune null[BUFSIZ]/* = L""*/; 28 int a1; 29 int a2; 30 31 char *getoptarg(int*, char***); 32 void output(int, int); 33 int input(int); 34 void oparse(char*); 35 void error(char*, char*); 36 void seek1(void), seek2(void); 37 Rune *strtorune(Rune *, char *); 38 39 40 void 41 main(int argc, char **argv) 42 { 43 int i; 44 45 while (argc > 1 && argv[1][0] == '-') { 46 if (argv[1][1] == '\0') 47 break; 48 switch (argv[1][1]) { 49 case '-': 50 argc--; 51 argv++; 52 goto proceed; 53 case 'a': 54 switch(*getoptarg(&argc, &argv)) { 55 case '1': 56 a1++; 57 break; 58 case '2': 59 a2++; 60 break; 61 default: 62 error("incomplete option -a",""); 63 } 64 break; 65 case 'e': 66 strtorune(null, getoptarg(&argc, &argv)); 67 break; 68 case 't': 69 sepstr=getoptarg(&argc, &argv); 70 chartorune(&sep1, sepstr); 71 sep2 = sep1; 72 break; 73 case 'o': 74 if(argv[1][2]!=0 || 75 argc>2 && strchr(argv[2],',')!=0) 76 oparse(getoptarg(&argc, &argv)); 77 else for (no = 0; no<2*NFLD && argc>2; no++){ 78 if (argv[2][0] == '1' && argv[2][1] == '.') { 79 olistf[no] = F1; 80 olist[no] = atoi(&argv[2][2]); 81 } else if (argv[2][0] == '2' && argv[2][1] == '.') { 82 olist[no] = atoi(&argv[2][2]); 83 olistf[no] = F2; 84 } else if (argv[2][0] == '0') 85 olistf[no] = F0; 86 else 87 break; 88 argc--; 89 argv++; 90 } 91 break; 92 case 'j': 93 if(argc <= 2) 94 break; 95 if (argv[1][2] == '1') 96 j1 = atoi(argv[2]); 97 else if (argv[1][2] == '2') 98 j2 = atoi(argv[2]); 99 else 100 j1 = j2 = atoi(argv[2]); 101 argc--; 102 argv++; 103 break; 104 case '1': 105 j1 = atoi(getoptarg(&argc, &argv)); 106 break; 107 case '2': 108 j2 = atoi(getoptarg(&argc, &argv)); 109 break; 110 } 111 argc--; 112 argv++; 113 } 114 proceed: 115 for (i = 0; i < no; i++) 116 if (olist[i]-- > NFLD) /* 0 origin */ 117 error("field number too big in -o",""); 118 if (argc != 3) 119 error("usage: join [-1 x -2 y] [-o list] file1 file2",""); 120 j1--; 121 j2--; /* everyone else believes in 0 origin */ 122 s1 = ppi[F1][j1]; 123 s2 = ppi[F2][j2]; 124 if (strcmp(argv[1], "-") == 0) 125 f[F1] = stdin; 126 else if ((f[F1] = fopen(argv[1], "r")) == 0) 127 error("can't open %s", argv[1]); 128 if(strcmp(argv[2], "-") == 0) { 129 f[F2] = stdin; 130 } else if ((f[F2] = fopen(argv[2], "r")) == 0) 131 error("can't open %s", argv[2]); 132 133 if(ftell(f[F2]) >= 0) 134 seek2(); 135 else if(ftell(f[F1]) >= 0) 136 seek1(); 137 else 138 error("neither file is randomly accessible",""); 139 if (discard) 140 error("some input line was truncated", ""); 141 exits(""); 142 } 143 int runecmp(Rune *a, Rune *b){ 144 while(*a==*b){ 145 if(*a=='\0') return 0; 146 a++; 147 b++; 148 } 149 if(*a<*b) return -1; 150 return 1; 151 } 152 char *runetostr(char *buf, Rune *r){ 153 char *s; 154 for(s=buf;*r;r++) s+=runetochar(s, r); 155 *s='\0'; 156 return buf; 157 } 158 Rune *strtorune(Rune *buf, char *s){ 159 Rune *r; 160 for(r=buf;*s;r++) s+=chartorune(r, s); 161 *r='\0'; 162 return buf; 163 } 164 /* lazy. there ought to be a clean way to combine seek1 & seek2 */ 165 #define get1() n1=input(F1) 166 #define get2() n2=input(F2) 167 void 168 seek2(void) 169 { 170 int n1, n2; 171 int top2=0; 172 int bot2 = ftell(f[F2]); 173 get1(); 174 get2(); 175 while(n1>0 && n2>0 || (a1||a2) && n1+n2>0) { 176 if(n1>0 && n2>0 && comp()>0 || n1==0) { 177 if(a2) output(0, n2); 178 bot2 = ftell(f[F2]); 179 get2(); 180 } else if(n1>0 && n2>0 && comp()<0 || n2==0) { 181 if(a1) output(n1, 0); 182 get1(); 183 } else /*(n1>0 && n2>0 && comp()==0)*/ { 184 while(n2>0 && comp()==0) { 185 output(n1, n2); 186 top2 = ftell(f[F2]); 187 get2(); 188 } 189 fseek(f[F2], bot2, 0); 190 get2(); 191 get1(); 192 for(;;) { 193 if(n1>0 && n2>0 && comp()==0) { 194 output(n1, n2); 195 get2(); 196 } else if(n1>0 && n2>0 && comp()<0 || n2==0) { 197 fseek(f[F2], bot2, 0); 198 get2(); 199 get1(); 200 } else /*(n1>0 && n2>0 && comp()>0 || n1==0)*/{ 201 fseek(f[F2], top2, 0); 202 bot2 = top2; 203 get2(); 204 break; 205 } 206 } 207 } 208 } 209 } 210 void 211 seek1(void) 212 { 213 int n1, n2; 214 int top1=0; 215 int bot1 = ftell(f[F1]); 216 get1(); 217 get2(); 218 while(n1>0 && n2>0 || (a1||a2) && n1+n2>0) { 219 if(n1>0 && n2>0 && comp()>0 || n1==0) { 220 if(a2) output(0, n2); 221 get2(); 222 } else if(n1>0 && n2>0 && comp()<0 || n2==0) { 223 if(a1) output(n1, 0); 224 bot1 = ftell(f[F1]); 225 get1(); 226 } else /*(n1>0 && n2>0 && comp()==0)*/ { 227 while(n2>0 && comp()==0) { 228 output(n1, n2); 229 top1 = ftell(f[F1]); 230 get1(); 231 } 232 fseek(f[F1], bot1, 0); 233 get2(); 234 get1(); 235 for(;;) { 236 if(n1>0 && n2>0 && comp()==0) { 237 output(n1, n2); 238 get1(); 239 } else if(n1>0 && n2>0 && comp()>0 || n1==0) { 240 fseek(f[F1], bot1, 0); 241 get2(); 242 get1(); 243 } else /*(n1>0 && n2>0 && comp()<0 || n2==0)*/{ 244 fseek(f[F1], top1, 0); 245 bot1 = top1; 246 get1(); 247 break; 248 } 249 } 250 } 251 } 252 } 253 254 int 255 input(int n) /* get input line and split into fields */ 256 { 257 register int i, c; 258 Rune *bp; 259 Rune **pp; 260 char line[BUFSIZ]; 261 262 bp = buf[n]; 263 pp = ppi[n]; 264 if (fgets(line, BUFSIZ, f[n]) == 0) 265 return(0); 266 strtorune(bp, line); 267 i = 0; 268 do { 269 i++; 270 if (sep1 == ' ') /* strip multiples */ 271 while ((c = *bp) == sep1 || c == sep2) 272 bp++; /* skip blanks */ 273 *pp++ = bp; /* record beginning */ 274 while ((c = *bp) != sep1 && c != '\n' && c != sep2 && c != '\0') 275 bp++; 276 *bp++ = '\0'; /* mark end by overwriting blank */ 277 } while (c != '\n' && c != '\0' && i < NFLD-1); 278 if (c != '\n') 279 discard++; 280 281 *pp = 0; 282 return(i); 283 } 284 285 void 286 output(int on1, int on2) /* print items from olist */ 287 { 288 int i; 289 Rune *temp; 290 char buf[BUFSIZ]; 291 292 if (no <= 0) { /* default case */ 293 printf("%s", runetostr(buf, on1? ppi[F1][j1]: ppi[F2][j2])); 294 for (i = 0; i < on1; i++) 295 if (i != j1) 296 printf("%s%s", sepstr, runetostr(buf, ppi[F1][i])); 297 for (i = 0; i < on2; i++) 298 if (i != j2) 299 printf("%s%s", sepstr, runetostr(buf, ppi[F2][i])); 300 printf("\n"); 301 } else { 302 for (i = 0; i < no; i++) { 303 if (olistf[i]==F0 && on1>j1) 304 temp = ppi[F1][j1]; 305 else if (olistf[i]==F0 && on2>j2) 306 temp = ppi[F2][j2]; 307 else { 308 temp = ppi[olistf[i]][olist[i]]; 309 if(olistf[i]==F1 && on1<=olist[i] || 310 olistf[i]==F2 && on2<=olist[i] || 311 *temp==0) 312 temp = null; 313 } 314 printf("%s", runetostr(buf, temp)); 315 if (i == no - 1) 316 printf("\n"); 317 else 318 printf("%s", sepstr); 319 } 320 } 321 } 322 323 void 324 error(char *s1, char *s2) 325 { 326 fprintf(stderr, "join: "); 327 fprintf(stderr, s1, s2); 328 fprintf(stderr, "\n"); 329 exits(s1); 330 } 331 332 char * 333 getoptarg(int *argcp, char ***argvp) 334 { 335 int argc = *argcp; 336 char **argv = *argvp; 337 if(argv[1][2] != 0) 338 return &argv[1][2]; 339 if(argc<=2 || argv[2][0]=='-') 340 error("incomplete option %s", argv[1]); 341 *argcp = argc-1; 342 *argvp = ++argv; 343 return argv[1]; 344 } 345 346 void 347 oparse(char *s) 348 { 349 for (no = 0; no<2*NFLD && *s; no++, s++) { 350 switch(*s) { 351 case 0: 352 return; 353 case '0': 354 olistf[no] = F0; 355 break; 356 case '1': 357 case '2': 358 if(s[1] == '.' && isdigit((uchar)s[2])) { 359 olistf[no] = *s=='1'? F1: F2; 360 olist[no] = atoi(s += 2); 361 break; 362 } /* fall thru */ 363 default: 364 error("invalid -o list", ""); 365 } 366 if(s[1] == ',') 367 s++; 368 } 369 }