9base

revived minimalist port of Plan 9 userland to Unix
git clone git://git.suckless.org/9base
Log | Files | Refs | README | LICENSE

join.c (7544B)


      1 /*	join F1 F2 on stuff */
      2 #include <u.h>
      3 #include <stdio.h>
      4 #include <libc.h>
      5 #include <ctype.h>
      6 #define F1 0
      7 #define F2 1
      8 #define F0 3
      9 #define	NFLD	100	/* max field per line */
     10 #define comp() runecmp(ppi[F1][j1],ppi[F2][j2])
     11 FILE *f[2];
     12 Rune buf[2][BUFSIZ];	/*input lines */
     13 Rune *ppi[2][NFLD+1];	/* pointers to fields in lines */
     14 Rune *s1,*s2;
     15 #define j1 joinj1
     16 #define j2 joinj2
     17 
     18 int	j1	= 1;	/* join of this field of file 1 */
     19 int	j2	= 1;	/* join of this field of file 2 */
     20 int	olist[2*NFLD];	/* output these fields */
     21 int	olistf[2*NFLD];	/* from these files */
     22 int	no;		/* number of entries in olist */
     23 Rune	sep1	= ' ';	/* default field separator */
     24 Rune	sep2	= '\t';
     25 char *sepstr=" ";
     26 int	discard;	/* count of truncated lines */
     27 Rune	null[BUFSIZ]/*	= L""*/;
     28 int	a1;
     29 int 	a2;
     30 
     31 char *getoptarg(int*, char***);
     32 void output(int, int);
     33 int input(int);
     34 void oparse(char*);
     35 void error(char*, char*);
     36 void seek1(void), seek2(void);
     37 Rune *strtorune(Rune *, char *);
     38 
     39 
     40 void
     41 main(int argc, char **argv)
     42 {
     43 	int i;
     44 
     45 	while (argc > 1 && argv[1][0] == '-') {
     46 		if (argv[1][1] == '\0')
     47 			break;
     48 		switch (argv[1][1]) {
     49 		case '-':
     50 			argc--;
     51 			argv++;
     52 			goto proceed;
     53 		case 'a':
     54 			switch(*getoptarg(&argc, &argv)) {
     55 			case '1':
     56 				a1++;
     57 				break;
     58 			case '2':
     59 				a2++;
     60 				break;
     61 			default:
     62 				error("incomplete option -a","");
     63 			}
     64 			break;
     65 		case 'e':
     66 			strtorune(null, getoptarg(&argc, &argv));
     67 			break;
     68 		case 't':
     69 			sepstr=getoptarg(&argc, &argv);
     70 			chartorune(&sep1, sepstr);
     71 			sep2 = sep1;
     72 			break;
     73 		case 'o':
     74 			if(argv[1][2]!=0 ||
     75 			   argc>2 && strchr(argv[2],',')!=0)
     76 				oparse(getoptarg(&argc, &argv));
     77 			else for (no = 0; no<2*NFLD && argc>2; no++){
     78 				if (argv[2][0] == '1' && argv[2][1] == '.') {
     79 					olistf[no] = F1;
     80 					olist[no] = atoi(&argv[2][2]);
     81 				} else if (argv[2][0] == '2' && argv[2][1] == '.') {
     82 					olist[no] = atoi(&argv[2][2]);
     83 					olistf[no] = F2;
     84 				} else if (argv[2][0] == '0')
     85 					olistf[no] = F0;
     86 				else
     87 					break;
     88 				argc--;
     89 				argv++;
     90 			}
     91 			break;
     92 		case 'j':
     93 			if(argc <= 2)
     94 				break;
     95 			if (argv[1][2] == '1')
     96 				j1 = atoi(argv[2]);
     97 			else if (argv[1][2] == '2')
     98 				j2 = atoi(argv[2]);
     99 			else
    100 				j1 = j2 = atoi(argv[2]);
    101 			argc--;
    102 			argv++;
    103 			break;
    104 		case '1':
    105 			j1 = atoi(getoptarg(&argc, &argv));
    106 			break;
    107 		case '2':
    108 			j2 = atoi(getoptarg(&argc, &argv));
    109 			break;
    110 		}
    111 		argc--;
    112 		argv++;
    113 	}
    114 proceed:
    115 	for (i = 0; i < no; i++)
    116 		if (olist[i]-- > NFLD)	/* 0 origin */
    117 			error("field number too big in -o","");
    118 	if (argc != 3)
    119 		error("usage: join [-1 x -2 y] [-o list] file1 file2","");
    120 	j1--;
    121 	j2--;	/* everyone else believes in 0 origin */
    122 	s1 = ppi[F1][j1];
    123 	s2 = ppi[F2][j2];
    124 	if (strcmp(argv[1], "-") == 0)
    125 		f[F1] = stdin;
    126 	else if ((f[F1] = fopen(argv[1], "r")) == 0)
    127 		error("can't open %s", argv[1]);
    128 	if(strcmp(argv[2], "-") == 0) {
    129 		f[F2] = stdin;
    130 	} else if ((f[F2] = fopen(argv[2], "r")) == 0)
    131 		error("can't open %s", argv[2]);
    132 
    133 	if(ftell(f[F2]) >= 0)
    134 		seek2();
    135 	else if(ftell(f[F1]) >= 0)
    136 		seek1();
    137 	else
    138 		error("neither file is randomly accessible","");
    139 	if (discard)
    140 		error("some input line was truncated", "");
    141 	exits("");
    142 }
    143 int runecmp(Rune *a, Rune *b){
    144 	while(*a==*b){
    145 		if(*a=='\0') return 0;
    146 		a++;
    147 		b++;
    148 	}
    149 	if(*a<*b) return -1;
    150 	return 1;
    151 }
    152 char *runetostr(char *buf, Rune *r){
    153 	char *s;
    154 	for(s=buf;*r;r++) s+=runetochar(s, r);
    155 	*s='\0';
    156 	return buf;
    157 }
    158 Rune *strtorune(Rune *buf, char *s){
    159 	Rune *r;
    160 	for(r=buf;*s;r++) s+=chartorune(r, s);
    161 	*r='\0';
    162 	return buf;
    163 }
    164 /* lazy.  there ought to be a clean way to combine seek1 & seek2 */
    165 #define get1() n1=input(F1)
    166 #define get2() n2=input(F2)
    167 void
    168 seek2(void)
    169 {
    170 	int n1, n2;
    171 	int top2=0;
    172 	int bot2 = ftell(f[F2]);
    173 	get1();
    174 	get2();
    175 	while(n1>0 && n2>0 || (a1||a2) && n1+n2>0) {
    176 		if(n1>0 && n2>0 && comp()>0 || n1==0) {
    177 			if(a2) output(0, n2);
    178 			bot2 = ftell(f[F2]);
    179 			get2();
    180 		} else if(n1>0 && n2>0 && comp()<0 || n2==0) {
    181 			if(a1) output(n1, 0);
    182 			get1();
    183 		} else /*(n1>0 && n2>0 && comp()==0)*/ {
    184 			while(n2>0 && comp()==0) {
    185 				output(n1, n2);
    186 				top2 = ftell(f[F2]);
    187 				get2();
    188 			}
    189 			fseek(f[F2], bot2, 0);
    190 			get2();
    191 			get1();
    192 			for(;;) {
    193 				if(n1>0 && n2>0 && comp()==0) {
    194 					output(n1, n2);
    195 					get2();
    196 				} else if(n1>0 && n2>0 && comp()<0 || n2==0) {
    197 					fseek(f[F2], bot2, 0);
    198 					get2();
    199 					get1();
    200 				} else /*(n1>0 && n2>0 && comp()>0 || n1==0)*/{
    201 					fseek(f[F2], top2, 0);
    202 					bot2 = top2;
    203 					get2();
    204 					break;
    205 				}
    206 			}
    207 		}
    208 	}
    209 }
    210 void
    211 seek1(void)
    212 {
    213 	int n1, n2;
    214 	int top1=0;
    215 	int bot1 = ftell(f[F1]);
    216 	get1();
    217 	get2();
    218 	while(n1>0 && n2>0 || (a1||a2) && n1+n2>0) {
    219 		if(n1>0 && n2>0 && comp()>0 || n1==0) {
    220 			if(a2) output(0, n2);
    221 			get2();
    222 		} else if(n1>0 && n2>0 && comp()<0 || n2==0) {
    223 			if(a1) output(n1, 0);
    224 			bot1 = ftell(f[F1]);
    225 			get1();
    226 		} else /*(n1>0 && n2>0 && comp()==0)*/ {
    227 			while(n2>0 && comp()==0) {
    228 				output(n1, n2);
    229 				top1 = ftell(f[F1]);
    230 				get1();
    231 			}
    232 			fseek(f[F1], bot1, 0);
    233 			get2();
    234 			get1();
    235 			for(;;) {
    236 				if(n1>0 && n2>0 && comp()==0) {
    237 					output(n1, n2);
    238 					get1();
    239 				} else if(n1>0 && n2>0 && comp()>0 || n1==0) {
    240 					fseek(f[F1], bot1, 0);
    241 					get2();
    242 					get1();
    243 				} else /*(n1>0 && n2>0 && comp()<0 || n2==0)*/{
    244 					fseek(f[F1], top1, 0);
    245 					bot1 = top1;
    246 					get1();
    247 					break;
    248 				}
    249 			}
    250 		}
    251 	}
    252 }
    253 
    254 int
    255 input(int n)		/* get input line and split into fields */
    256 {
    257 	register int i, c;
    258 	Rune *bp;
    259 	Rune **pp;
    260 	char line[BUFSIZ];
    261 
    262 	bp = buf[n];
    263 	pp = ppi[n];
    264 	if (fgets(line, BUFSIZ, f[n]) == 0)
    265 		return(0);
    266 	strtorune(bp, line);
    267 	i = 0;
    268 	do {
    269 		i++;
    270 		if (sep1 == ' ')	/* strip multiples */
    271 			while ((c = *bp) == sep1 || c == sep2)
    272 				bp++;	/* skip blanks */
    273 		*pp++ = bp;	/* record beginning */
    274 		while ((c = *bp) != sep1 && c != '\n' && c != sep2 && c != '\0')
    275 			bp++;
    276 		*bp++ = '\0';	/* mark end by overwriting blank */
    277 	} while (c != '\n' && c != '\0' && i < NFLD-1);
    278 	if (c != '\n')
    279 		discard++;
    280 
    281 	*pp = 0;
    282 	return(i);
    283 }
    284 
    285 void
    286 output(int on1, int on2)	/* print items from olist */
    287 {
    288 	int i;
    289 	Rune *temp;
    290 	char buf[BUFSIZ];
    291 
    292 	if (no <= 0) {	/* default case */
    293 		printf("%s", runetostr(buf, on1? ppi[F1][j1]: ppi[F2][j2]));
    294 		for (i = 0; i < on1; i++)
    295 			if (i != j1)
    296 				printf("%s%s", sepstr, runetostr(buf, ppi[F1][i]));
    297 		for (i = 0; i < on2; i++)
    298 			if (i != j2)
    299 				printf("%s%s", sepstr, runetostr(buf, ppi[F2][i]));
    300 		printf("\n");
    301 	} else {
    302 		for (i = 0; i < no; i++) {
    303 			if (olistf[i]==F0 && on1>j1)
    304 				temp = ppi[F1][j1];
    305 			else if (olistf[i]==F0 && on2>j2)
    306 				temp = ppi[F2][j2];
    307 			else {
    308 				temp = ppi[olistf[i]][olist[i]];
    309 				if(olistf[i]==F1 && on1<=olist[i] ||
    310 				   olistf[i]==F2 && on2<=olist[i] ||
    311 				   *temp==0)
    312 					temp = null;
    313 			}
    314 			printf("%s", runetostr(buf, temp));
    315 			if (i == no - 1)
    316 				printf("\n");
    317 			else
    318 				printf("%s", sepstr);
    319 		}
    320 	}
    321 }
    322 
    323 void
    324 error(char *s1, char *s2)
    325 {
    326 	fprintf(stderr, "join: ");
    327 	fprintf(stderr, s1, s2);
    328 	fprintf(stderr, "\n");
    329 	exits(s1);
    330 }
    331 
    332 char *
    333 getoptarg(int *argcp, char ***argvp)
    334 {
    335 	int argc = *argcp;
    336 	char **argv = *argvp;
    337 	if(argv[1][2] != 0)
    338 		return &argv[1][2];
    339 	if(argc<=2 || argv[2][0]=='-')
    340 		error("incomplete option %s", argv[1]);
    341 	*argcp = argc-1;
    342 	*argvp = ++argv;
    343 	return argv[1];
    344 }
    345 
    346 void
    347 oparse(char *s)
    348 {
    349 	for (no = 0; no<2*NFLD && *s; no++, s++) {
    350 		switch(*s) {
    351 		case 0:
    352 			return;
    353 		case '0':
    354 			olistf[no] = F0;
    355 			break;
    356 		case '1':
    357 		case '2':
    358 			if(s[1] == '.' && isdigit((uchar)s[2])) {
    359 				olistf[no] = *s=='1'? F1: F2;
    360 				olist[no] = atoi(s += 2);
    361 				break;
    362 			} /* fall thru */
    363 		default:
    364 			error("invalid -o list", "");
    365 		}
    366 		if(s[1] == ',')
    367 			s++;
    368 	}
    369 }