9base

revived minimalist port of Plan 9 userland to Unix
git clone git://git.suckless.org/9base
Log | Files | Refs | README | LICENSE

commit fa62640154da08c5fd229af50efde0d33871a0aa
parent 85bacddf7706d2c89c30c2433fb8c43cd794cdb5
Author: Anselm R Garbe <anselm@garbe.us>
Date:   Fri, 28 May 2010 11:30:17 +0100

added commands as discussed with Uriel yesterday
Diffstat:
Makefile | 53++++++++++++++++++++++++++++++++++++++++++++++++++---
TODO | 11-----------
ascii/Makefile | 10++++++++++
ascii/ascii.1 | 160+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
ascii/ascii.c | 181+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
cmp/Makefile | 10++++++++++
cmp/cmp.1 | 57+++++++++++++++++++++++++++++++++++++++++++++++++++++++++
cmp/cmp.c | 112+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dd/Makefile | 10++++++++++
dd/dd.1 | 0
dd/dd.c | 660+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
diff/Makefile | 35+++++++++++++++++++++++++++++++++++
diff/diff.1 | 163+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
diff/diff.h | 27+++++++++++++++++++++++++++
diff/diffdir.c | 113+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
diff/diffio.c | 387+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
diff/diffreg.c | 420+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
diff/main.c | 270+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
join/Makefile | 10++++++++++
join/join.1 | 147+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
join/join.c | 369+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
lib9/utf.h | 3++-
look/Makefile | 10++++++++++
look/look.1 | 85+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
look/look.c | 349+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
pbd/Makefile | 10++++++++++
pbd/pbd.1 | 0
pbd/pbd.c | 19+++++++++++++++++++
rc/Makefile | 2+-
split/Makefile | 10++++++++++
split/split.1 | 82+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
split/split.c | 189+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
strings/Makefile | 10++++++++++
strings/strings.1 | 28++++++++++++++++++++++++++++
strings/strings.c | 90+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
unicode/Makefile | 10++++++++++
unicode/unicode.1 | 0
unicode/unicode.c | 122+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
unutf/Makefile | 10++++++++++
unutf/unutf.1 | 0
unutf/unutf.c | 20++++++++++++++++++++
41 files changed, 4238 insertions(+), 16 deletions(-)

diff --git a/Makefile b/Makefile @@ -2,9 +2,56 @@ include config.mk -SUBDIRS = lib9 yacc awk basename bc cal cat cleanname date dc du dd echo ed \ - factor fortune fmt freq getflags grep hoc ls mk mkdir mtime primes \ - rc read sha1sum sed seq sleep sort tail tee test touch tr troff uniq +SUBDIRS = lib9\ + yacc\ + ascii\ + awk\ + basename\ + bc\ + cal\ + cat\ + cleanname\ + cmp\ + date\ + dc\ + du\ + dd\ + diff\ + echo\ + ed\ + factor\ + fortune\ + fmt\ + freq\ + getflags\ + grep\ + hoc\ + join\ + look\ + ls\ + mk\ + mkdir\ + mtime\ + pbd\ + primes\ + rc\ + read\ + sha1sum\ + sed\ + seq\ + sleep\ + sort\ + split\ + strings\ + tail\ + tee\ + test\ + touch\ + tr\ + troff\ + unicode\ + uniq\ + unutf\ all: @echo 9base build options: diff --git a/TODO b/TODO @@ -1,11 +0,0 @@ -12:13 < uriel> garbeam: add dd and diff too -12:13 < uriel> and split -12:14 < uriel> (and join) -12:15 < uriel> and unutf (which I just noticed, seems to be undocumented, but seems quite useful too) -12:15 < uriel> and tcs -12:16 < uriel> and strings -12:18 < uriel> oh, oh, I'm finding some great bits: -12:18 < uriel> look(1), ascii(1) and unicode(1) -12:19 < uriel> ok, and cmp(1) is missing too -12:23 < uriel> hah! plan9/src/cmd/index/ is really interesting (but not worth including) -12:26 < uriel> oh! pbd! what a wonderful discovery, we certainly should add it too diff --git a/ascii/Makefile b/ascii/Makefile @@ -0,0 +1,10 @@ +# ascii - ascii unix port from plan9 +# Depends on ../lib9 + +TARG = ascii + +include ../std.mk + +pre-uninstall: + +post-install: diff --git a/ascii/ascii.1 b/ascii/ascii.1 @@ -0,0 +1,160 @@ +.TH ASCII 1 +.SH NAME +ascii, unicode \- interpret ASCII, Unicode characters +.SH SYNOPSIS +.B ascii +[ +.B -8 +] +[ +.BI -oxdb n +] +[ +.B -nct +] +[ +.I text +] +.PP +.B unicode +[ +.B -nt +] +.IB hexmin - hexmax +.PP +.B unicode +[ +.B -t +] +.I hex +[ +\&... +] +.PP +.B unicode +[ +.B -n +] +.I characters +.PP +.B look +.I hex +.B \*9/lib/unicode +.SH DESCRIPTION +.I Ascii +prints the +.SM ASCII +values corresponding to characters and +.I vice +.IR versa ; +under the +.B -8 +option, the +.SM ISO +Latin-1 extensions (codes 0200-0377) are included. +The values are interpreted in a settable numeric base; +.B -o +specifies octal, +.B -d +decimal, +.B -x +hexadecimal (the default), and +.BI -b n +base +.IR n . +.PP +With no arguments, +.I ascii +prints a table of the character set in the specified base. +Characters of +.I text +are converted to their +.SM ASCII +values, one per line. If, however, the first +.I text +argument is a valid number in the specified base, conversion +goes the opposite way. +Control characters are printed as two- or three-character mnemonics. +Other options are: +.TP +.B -n +Force numeric output. +.TP +.B -c +Force character output. +.TP +.B -t +Convert from numbers to running text; do not interpret +control characters or insert newlines. +.PP +.I Unicode +is similar; it converts between +.SM UTF +and character values from the Unicode Standard (see +.IR utf (7)). +If given a range of hexadecimal numbers, +.I unicode +prints a table of the specified Unicode characters \(em their values and +.SM UTF +representations. +Otherwise it translates from +.SM UTF +to numeric value or vice versa, +depending on the appearance of the supplied text; +the +.B -n +option forces numeric output to avoid ambiguity with numeric characters. +If converting to +.SM UTF , +the characters are printed one per line unless the +.B -t +flag is set, in which case the output is a single string +containing only the specified characters. +Unlike +.IR ascii , +.I unicode +treats no characters specially. +.PP +The output of +.I ascii +and +.I unicode +may be unhelpful if the characters printed are not available in the current font. +.PP +The file +.B \*9/lib/unicode +contains a +table of characters and descriptions, sorted in hexadecimal order, +suitable for +.IR look (1) +on the lower case +.I hex +values of characters. +.SH EXAMPLES +.TP +.B "ascii -d" +Print the +.SM ASCII +table base 10. +.TP +.B "unicode p" +Print the hex value of `p'. +.TP +.B "unicode 2200-22f1" +Print a table of miscellaneous mathematical symbols. +.TP +.B "look 039 \*9/lib/unicode" +See the start of the Greek alphabet's encoding in the Unicode Standard. +.SH FILES +.TP +.B \*9/lib/unicode +table of characters and descriptions. +.SH SOURCE +.B \*9/src/cmd/ascii.c +.br +.B \*9/src/cmd/unicode.c +.SH "SEE ALSO" +.IR look (1), +.IR tcs (1), +.IR utf (7), +.IR font (7) diff --git a/ascii/ascii.c b/ascii/ascii.c @@ -0,0 +1,181 @@ +#include <u.h> +#include <libc.h> +#include <bio.h> + +#define MAXBASE 36 + +void usage(void); +void put(int); +void putn(int, int); +void puttext(char *); +void putnum(char *); +int btoi(char *); +int value(int, int); +int isnum(char *); + +char *str[256]={ + "nul", "soh", "stx", "etx", "eot", "enq", "ack", "bel", + "bs ", "ht ", "nl ", "vt ", "np ", "cr ", "so ", "si ", + "dle", "dc1", "dc2", "dc3", "dc4", "nak", "syn", "etb", + "can", "em ", "sub", "esc", "fs ", "gs ", "rs ", "us ", + "sp ", " ! ", " \" ", " # ", " $ ", " % ", " & ", " ' ", + " ( ", " ) ", " * ", " + ", " , ", " - ", " . ", " / ", + " 0 ", " 1 ", " 2 ", " 3 ", " 4 ", " 5 ", " 6 ", " 7 ", + " 8 ", " 9 ", " : ", " ; ", " < ", " = ", " > ", " ? ", + " @ ", " A ", " B ", " C ", " D ", " E ", " F ", " G ", + " H ", " I ", " J ", " K ", " L ", " M ", " N ", " O ", + " P ", " Q ", " R ", " S ", " T ", " U ", " V ", " W ", + " X ", " Y ", " Z ", " [ ", " \\ ", " ] ", " ^ ", " _ ", + " ` ", " a ", " b ", " c ", " d ", " e ", " f ", " g ", + " h ", " i ", " j ", " k ", " l ", " m ", " n ", " o ", + " p ", " q ", " r ", " s ", " t ", " u ", " v ", " w ", + " x ", " y ", " z ", " { ", " | ", " } ", " ~ ", "del", + "x80", "x81", "x82", "x83", "x84", "x85", "x86", "x87", + "x88", "x89", "x8a", "x8b", "x8c", "x8d", "x8e", "x8f", + "x90", "x91", "x92", "x93", "x94", "x95", "x96", "x97", + "x98", "x99", "x9a", "x9b", "x9c", "x9d", "x9e", "x9f", + "xa0", " ¡ ", " ¢ ", " £ ", " ¤ ", " ¥ ", " ¦ ", " § ", + " ¨ ", " © ", " ª ", " « ", " ¬ ", " ­ ", " ® ", " ¯ ", + " ° ", " ± ", " ² ", " ³ ", " ´ ", " µ ", " ¶ ", " · ", + " ¸ ", " ¹ ", " º ", " » ", " ¼ ", " ½ ", " ¾ ", " ¿ ", + " À ", " Á ", " Â ", " Ã ", " Ä ", " Å ", " Æ ", " Ç ", + " È ", " É ", " Ê ", " Ë ", " Ì ", " Í ", " Î ", " Ï ", + " Ð ", " Ñ ", " Ò ", " Ó ", " Ô ", " Õ ", " Ö ", " × ", + " Ø ", " Ù ", " Ú ", " Û ", " Ü ", " Ý ", " Þ ", " ß ", + " à ", " á ", " â ", " ã ", " ä ", " å ", " æ ", " ç ", + " è ", " é ", " ê ", " ë ", " ì ", " í ", " î ", " ï ", + " ð ", " ñ ", " ò ", " ó ", " ô ", " õ ", " ö ", " ÷ ", + " ø ", " ù ", " ú ", " û ", " ü ", " ý ", " þ ", " ÿ " +}; + +char Ncol[]={ + 0,0,7,5,4,4,3,3,3,3,3,3,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, +}; + +int nchars=128; +int base=16; +int ncol; +int text=1; +int strip=0; +Biobuf bin; + +void +main(int argc, char **argv) +{ + int i; + + Binit(&bin, 1, OWRITE); + ARGBEGIN{ + case '8': + nchars=256; break; + case 'x': + base=16; break; + case 'o': + base=8; break; + case 'd': + base=10; break; + case 'b': + base=strtoul(EARGF(usage()), 0, 0); + if(base<2||base>MAXBASE) + usage(); + break; + case 'n': + text=0; break; + case 't': + strip=1; + /* fall through */ + case 'c': + text=2; break; + default: + usage(); + }ARGEND + + ncol=Ncol[base]; + if(argc==0){ + for(i=0;i<nchars;i++){ + put(i); + if((i&7)==7) + Bprint(&bin, "|\n"); + } + }else{ + if(text==1) + text=isnum(argv[0]); + while(argc--) + if(text) + puttext(*argv++); + else + putnum(*argv++); + } + Bputc(&bin, '\n'); + exits(0); +} +void +usage(void) +{ + fprint(2, "usage: %s [-8] [-xod | -b8] [-ncst] [--] [text]\n", argv0); + exits("usage"); +} +void +put(int i) +{ + Bputc(&bin, '|'); + putn(i, ncol); + Bprint(&bin, " %s", str[i]); +} +char dig[]="0123456789abcdefghijklmnopqrstuvwxyz"; +void +putn(int n, int ndig) +{ + if(ndig==0) + return; + putn(n/base, ndig-1); + Bputc(&bin, dig[n%base]); +} +void +puttext(char *s) +{ + int n; + n=btoi(s)&0377; + if(strip) + Bputc(&bin, n); + else + Bprint(&bin, "%s\n", str[n]); +} +void +putnum(char *s) +{ + while(*s){ + putn(*s++&0377, ncol); + Bputc(&bin, '\n'); + } +} +int +btoi(char *s) +{ + int n; + n=0; + while(*s) + n=n*base+value(*s++, 0); + return(n); +} +int +value(int c, int f) +{ + char *s; + for(s=dig; s<dig+base; s++) + if(*s==c) + return(s-dig); + if(f) + return(-1); + fprint(2, "%s: bad input char %c\n", argv0, c); + exits("bad"); + return 0; /* to keep ken happy */ +} +int +isnum(char *s) +{ + while(*s) + if(value(*s++, 1)==-1) + return(0); + return(1); +} diff --git a/cmp/Makefile b/cmp/Makefile @@ -0,0 +1,10 @@ +# cmp - cmp unix port from plan9 +# Depends on ../lib9 + +TARG = cmp + +include ../std.mk + +pre-uninstall: + +post-install: diff --git a/cmp/cmp.1 b/cmp/cmp.1 @@ -0,0 +1,57 @@ +.TH CMP 1 +.SH NAME +cmp \- compare two files +.SH SYNOPSIS +.B cmp +[ +.B -lsL +] +.I file1 file2 +[ +.I offset1 +[ +.I offset2 +] +] +.SH DESCRIPTION +The two files are +compared. +A diagnostic results if the contents differ, otherwise +there is no output. +.PP +The options are: +.TP +.B l +Print the byte number (decimal) and the +differing bytes (hexadecimal) for each difference. +.TP +.B s +Print nothing for differing files, +but set the exit status. +.TP +.B L +Print the line number of the first differing byte. +.PP +If offsets are given, +comparison starts at the designated byte position +of the corresponding file. +Offsets that begin with +.B 0x +are hexadecimal; +with +.BR 0 , +octal; with anything else, decimal. +.SH SOURCE +.B \*9/src/cmd/cmp.c +.SH "SEE ALSO" +.IR diff (1) +.SH DIAGNOSTICS +If a file is inaccessible or missing, the exit status is +.LR open . +If the files are the same, the exit status is empty (true). +If they are the same except that one is longer than the other, the exit status is +.LR EOF . +Otherwise +.I cmp +reports the position of the first disagreeing byte and the exit status is +.LR differ . diff --git a/cmp/cmp.c b/cmp/cmp.c @@ -0,0 +1,112 @@ +#include <u.h> +#include <libc.h> + +#define BUF 65536 + +int sflag = 0; +int lflag = 0; +int Lflag = 0; + +static void usage(void); + +void +main(int argc, char *argv[]) +{ + int n, i; + uchar *p, *q; + uchar buf1[BUF], buf2[BUF]; + int f1, f2; + vlong nc = 1, o, l = 1; + char *name1, *name2; + uchar *b1s, *b1e, *b2s, *b2e; + + ARGBEGIN{ + case 's': sflag = 1; break; + case 'l': lflag = 1; break; + case 'L': Lflag = 1; break; + default: usage(); + }ARGEND + if(argc < 2) + usage(); + if((f1 = open(name1 = *argv++, OREAD)) == -1){ + if(!sflag) perror(name1); + exits("open"); + } + if((f2 = open(name2 = *argv++, OREAD)) == -1){ + if(!sflag) perror(name2); + exits("open"); + } + if(*argv){ + o = strtoll(*argv++, 0, 0); + if(seek(f1, o, 0) < 0){ + if(!sflag) perror("cmp: seek by offset1"); + exits("seek 1"); + } + } + if(*argv){ + o = strtoll(*argv++, 0, 0); + if(seek(f2, o, 0) < 0){ + if(!sflag) perror("cmp: seek by offset2"); + exits("seek 2"); + } + } + if(*argv) + usage(); + b1s = b1e = buf1; + b2s = b2e = buf2; + for(;;){ + if(b1s >= b1e){ + if(b1s >= &buf1[BUF]) + b1s = buf1; + n = read(f1, b1s, &buf1[BUF] - b1s); + b1e = b1s + n; + } + if(b2s >= b2e){ + if(b2s >= &buf2[BUF]) + b2s = buf2; + n = read(f2, b2s, &buf2[BUF] - b2s); + b2e = b2s + n; + } + n = b2e - b2s; + if(n > b1e - b1s) + n = b1e - b1s; + if(n <= 0) + break; + if(memcmp((void *)b1s, (void *)b2s, n) != 0){ + if(sflag) + exits("differ"); + for(p = b1s, q = b2s, i = 0; i < n; p++, q++, i++) { + if(*p == '\n') + l++; + if(*p != *q){ + if(!lflag){ + print("%s %s differ: char %lld", + name1, name2, nc+i); + print(Lflag?" line %lld\n":"\n", l); + exits("differ"); + } + print("%6lld 0x%.2x 0x%.2x\n", nc+i, *p, *q); + } + } + } + if(Lflag) + for(p = b1s; p < b1e;) + if(*p++ == '\n') + l++; + nc += n; + b1s += n; + b2s += n; + } + if(b1e - b1s == b2e - b2s) + exits((char *)0); + if(!sflag) + print("EOF on %s\n", (b1e - b1s > b2e - b2s)? name2 : name1); + exits("EOF"); +} + +static void +usage(void) +{ + print("Usage: cmp [-lsL] file1 file2 [offset1 [offset2] ]\n"); + exits("usage"); +} diff --git a/dd/Makefile b/dd/Makefile @@ -0,0 +1,10 @@ +# dd - dd unix port from plan9 +# Depends on ../lib9 + +TARG = dd + +include ../std.mk + +pre-uninstall: + +post-install: diff --git a/dd/dd.1 b/dd/dd.1 diff --git a/dd/dd.c b/dd/dd.c @@ -0,0 +1,660 @@ +#include <u.h> +#include <libc.h> + +#define BIG 2147483647 +#define LCASE (1<<0) +#define UCASE (1<<1) +#define SWAB (1<<2) +#define NERR (1<<3) +#define SYNC (1<<4) +int cflag; +int fflag; +char *string; +char *ifile; +char *ofile; +char *ibuf; +char *obuf; +vlong skip; +vlong oseekn; +vlong iseekn; +vlong count; +long files = 1; +long ibs = 512; +long obs = 512; +long bs; +long cbs; +long ibc; +long obc; +long cbc; +long nifr; +long nipr; +long nofr; +long nopr; +long ntrunc; +int dotrunc = 1; +int ibf; +int obf; +char *op; +int nspace; +uchar etoa[256]; +uchar atoe[256]; +uchar atoibm[256]; + +void flsh(void); +int match(char *s); +vlong number(long big); +void cnull(int cc); +void null(int c); +void ascii(int cc); +void unblock(int cc); +void ebcdic(int cc); +void ibm(int cc); +void block(int cc); +void term(void); +void stats(void); + +#define iskey(s) ((key[0] == '-') && (strcmp(key+1, s) == 0)) + +void +main(int argc, char *argv[]) +{ + void (*conv)(int); + char *ip; + char *key; + int a, c; + + conv = null; + for(c=1; c<argc; c++) { + key = argv[c++]; + if(c >= argc){ + fprint(2, "dd: arg %s needs a value\n", key); + exits("arg"); + } + string = argv[c]; + if(iskey("ibs")) { + ibs = number(BIG); + continue; + } + if(iskey("obs")) { + obs = number(BIG); + continue; + } + if(iskey("cbs")) { + cbs = number(BIG); + continue; + } + if(iskey("bs")) { + bs = number(BIG); + continue; + } + if(iskey("if")) { + ifile = string; + continue; + } + if(iskey("of")) { + ofile = string; + continue; + } + if(iskey("trunc")) { + dotrunc = number(BIG); + continue; + } + if(iskey("skip")) { + skip = number(BIG); + continue; + } + if(iskey("seek") || iskey("oseek")) { + oseekn = number(BIG); + continue; + } + if(iskey("iseek")) { + iseekn = number(BIG); + continue; + } + if(iskey("count")) { + count = number(BIG); + continue; + } + if(iskey("files")) { + files = number(BIG); + continue; + } + if(iskey("conv")) { + cloop: + if(match(",")) + goto cloop; + if(*string == '\0') + continue; + if(match("ebcdic")) { + conv = ebcdic; + goto cloop; + } + if(match("ibm")) { + conv = ibm; + goto cloop; + } + if(match("ascii")) { + conv = ascii; + goto cloop; + } + if(match("block")) { + conv = block; + goto cloop; + } + if(match("unblock")) { + conv = unblock; + goto cloop; + } + if(match("lcase")) { + cflag |= LCASE; + goto cloop; + } + if(match("ucase")) { + cflag |= UCASE; + goto cloop; + } + if(match("swab")) { + cflag |= SWAB; + goto cloop; + } + if(match("noerror")) { + cflag |= NERR; + goto cloop; + } + if(match("sync")) { + cflag |= SYNC; + goto cloop; + } + } + fprint(2, "dd: bad arg: %s\n", key); + exits("arg"); + } + if(conv == null && cflag&(LCASE|UCASE)) + conv = cnull; + if(ifile) + ibf = open(ifile, 0); + else + ibf = dup(0, -1); + if(ibf < 0) { + fprint(2, "dd: open %s: %r\n", ifile); + exits("open"); + } + if(ofile){ + if(dotrunc) + obf = create(ofile, 1, 0664); + else + obf = open(ofile, 1); + if(obf < 0) { + fprint(2, "dd: create %s: %r\n", ofile); + exits("create"); + } + }else{ + obf = dup(1, -1); + if(obf < 0) { + fprint(2, "dd: can't dup file descriptor: %s: %r\n", ofile); + exits("dup"); + } + } + if(bs) + ibs = obs = bs; + if(ibs == obs && conv == null) + fflag++; + if(ibs == 0 || obs == 0) { + fprint(2, "dd: counts: cannot be zero\n"); + exits("counts"); + } + ibuf = sbrk(ibs); + if(fflag) + obuf = ibuf; + else + obuf = sbrk(obs); + sbrk(64); /* For good measure */ + if(ibuf == (char *)-1 || obuf == (char *)-1) { + fprint(2, "dd: not enough memory: %r\n"); + exits("memory"); + } + ibc = 0; + obc = 0; + cbc = 0; + op = obuf; + +/* + if(signal(SIGINT, SIG_IGN) != SIG_IGN) + signal(SIGINT, term); +*/ + seek(obf, obs*oseekn, 1); + seek(ibf, ibs*iseekn, 1); + while(skip) { + read(ibf, ibuf, ibs); + skip--; + } + + ip = 0; +loop: + if(ibc-- == 0) { + ibc = 0; + if(count==0 || nifr+nipr!=count) { + if(cflag&(NERR|SYNC)) + for(ip=ibuf+ibs; ip>ibuf;) + *--ip = 0; + ibc = read(ibf, ibuf, ibs); + } + if(ibc == -1) { + perror("read"); + if((cflag&NERR) == 0) { + flsh(); + term(); + } + ibc = 0; + for(c=0; c<ibs; c++) + if(ibuf[c] != 0) + ibc = c; + stats(); + } + if(ibc == 0 && --files<=0) { + flsh(); + term(); + } + if(ibc != ibs) { + nipr++; + if(cflag&SYNC) + ibc = ibs; + } else + nifr++; + ip = ibuf; + c = (ibc>>1) & ~1; + if(cflag&SWAB && c) + do { + a = *ip++; + ip[-1] = *ip; + *ip++ = a; + } while(--c); + ip = ibuf; + if(fflag) { + obc = ibc; + flsh(); + ibc = 0; + } + goto loop; + } + c = 0; + c |= *ip++; + c &= 0377; + (*conv)(c); + goto loop; +} + +void +flsh(void) +{ + int c; + + if(obc) { + c = write(obf, obuf, obc); + if(c != obc) { + if(c > 0) + ++nopr; + perror("write"); + term(); + } + if(obc == obs) + nofr++; + else + nopr++; + obc = 0; + } +} + +int +match(char *s) +{ + char *cs; + + cs = string; + while(*cs++ == *s) + if(*s++ == '\0') + goto true; + if(*s != '\0') + return 0; + +true: + cs--; + string = cs; + return 1; +} + +vlong +number(long big) +{ + char *cs; + vlong n; + + cs = string; + n = 0; + while(*cs >= '0' && *cs <= '9') + n = n*10 + *cs++ - '0'; + for(;;) + switch(*cs++) { + + case 'k': + n *= 1024; + continue; + +/* case 'w': + n *= sizeof(int); + continue; +*/ + + case 'b': + n *= 512; + continue; + +/* case '*':*/ + case 'x': + string = cs; + n *= number(BIG); + + case '\0': + if(n>=big || n<0) { + fprint(2, "dd: argument %lld out of range\n", n); + exits("range"); + } + return n; + } + /* never gets here */ +} + +void +cnull(int cc) +{ + int c; + + c = cc; + if((cflag&UCASE) && c>='a' && c<='z') + c += 'A'-'a'; + if((cflag&LCASE) && c>='A' && c<='Z') + c += 'a'-'A'; + null(c); +} + +void +null(int c) +{ + + *op = c; + op++; + if(++obc >= obs) { + flsh(); + op = obuf; + } +} + +void +ascii(int cc) +{ + int c; + + c = etoa[cc]; + if(cbs == 0) { + cnull(c); + return; + } + if(c == ' ') { + nspace++; + goto out; + } + while(nspace > 0) { + null(' '); + nspace--; + } + cnull(c); + +out: + if(++cbc >= cbs) { + null('\n'); + cbc = 0; + nspace = 0; + } +} + +void +unblock(int cc) +{ + int c; + + c = cc & 0377; + if(cbs == 0) { + cnull(c); + return; + } + if(c == ' ') { + nspace++; + goto out; + } + while(nspace > 0) { + null(' '); + nspace--; + } + cnull(c); + +out: + if(++cbc >= cbs) { + null('\n'); + cbc = 0; + nspace = 0; + } +} + +void +ebcdic(int cc) +{ + int c; + + c = cc; + if(cflag&UCASE && c>='a' && c<='z') + c += 'A'-'a'; + if(cflag&LCASE && c>='A' && c<='Z') + c += 'a'-'A'; + c = atoe[c]; + if(cbs == 0) { + null(c); + return; + } + if(cc == '\n') { + while(cbc < cbs) { + null(atoe[' ']); + cbc++; + } + cbc = 0; + return; + } + if(cbc == cbs) + ntrunc++; + cbc++; + if(cbc <= cbs) + null(c); +} + +void +ibm(int cc) +{ + int c; + + c = cc; + if(cflag&UCASE && c>='a' && c<='z') + c += 'A'-'a'; + if(cflag&LCASE && c>='A' && c<='Z') + c += 'a'-'A'; + c = atoibm[c] & 0377; + if(cbs == 0) { + null(c); + return; + } + if(cc == '\n') { + while(cbc < cbs) { + null(atoibm[' ']); + cbc++; + } + cbc = 0; + return; + } + if(cbc == cbs) + ntrunc++; + cbc++; + if(cbc <= cbs) + null(c); +} + +void +block(int cc) +{ + int c; + + c = cc; + if(cflag&UCASE && c>='a' && c<='z') + c += 'A'-'a'; + if(cflag&LCASE && c>='A' && c<='Z') + c += 'a'-'A'; + c &= 0377; + if(cbs == 0) { + null(c); + return; + } + if(cc == '\n') { + while(cbc < cbs) { + null(' '); + cbc++; + } + cbc = 0; + return; + } + if(cbc == cbs) + ntrunc++; + cbc++; + if(cbc <= cbs) + null(c); +} + +void +term(void) +{ + + stats(); + exits(0); +} + +void +stats(void) +{ + + fprint(2, "%lud+%lud records in\n", nifr, nipr); + fprint(2, "%lud+%lud records out\n", nofr, nopr); + if(ntrunc) + fprint(2, "%lud truncated records\n", ntrunc); +} + +uchar etoa[] = +{ + 0000,0001,0002,0003,0234,0011,0206,0177, + 0227,0215,0216,0013,0014,0015,0016,0017, + 0020,0021,0022,0023,0235,0205,0010,0207, + 0030,0031,0222,0217,0034,0035,0036,0037, + 0200,0201,0202,0203,0204,0012,0027,0033, + 0210,0211,0212,0213,0214,0005,0006,0007, + 0220,0221,0026,0223,0224,0225,0226,0004, + 0230,0231,0232,0233,0024,0025,0236,0032, + 0040,0240,0241,0242,0243,0244,0245,0246, + 0247,0250,0133,0056,0074,0050,0053,0041, + 0046,0251,0252,0253,0254,0255,0256,0257, + 0260,0261,0135,0044,0052,0051,0073,0136, + 0055,0057,0262,0263,0264,0265,0266,0267, + 0270,0271,0174,0054,0045,0137,0076,0077, + 0272,0273,0274,0275,0276,0277,0300,0301, + 0302,0140,0072,0043,0100,0047,0075,0042, + 0303,0141,0142,0143,0144,0145,0146,0147, + 0150,0151,0304,0305,0306,0307,0310,0311, + 0312,0152,0153,0154,0155,0156,0157,0160, + 0161,0162,0313,0314,0315,0316,0317,0320, + 0321,0176,0163,0164,0165,0166,0167,0170, + 0171,0172,0322,0323,0324,0325,0326,0327, + 0330,0331,0332,0333,0334,0335,0336,0337, + 0340,0341,0342,0343,0344,0345,0346,0347, + 0173,0101,0102,0103,0104,0105,0106,0107, + 0110,0111,0350,0351,0352,0353,0354,0355, + 0175,0112,0113,0114,0115,0116,0117,0120, + 0121,0122,0356,0357,0360,0361,0362,0363, + 0134,0237,0123,0124,0125,0126,0127,0130, + 0131,0132,0364,0365,0366,0367,0370,0371, + 0060,0061,0062,0063,0064,0065,0066,0067, + 0070,0071,0372,0373,0374,0375,0376,0377, +}; +uchar atoe[] = +{ + 0000,0001,0002,0003,0067,0055,0056,0057, + 0026,0005,0045,0013,0014,0015,0016,0017, + 0020,0021,0022,0023,0074,0075,0062,0046, + 0030,0031,0077,0047,0034,0035,0036,0037, + 0100,0117,0177,0173,0133,0154,0120,0175, + 0115,0135,0134,0116,0153,0140,0113,0141, + 0360,0361,0362,0363,0364,0365,0366,0367, + 0370,0371,0172,0136,0114,0176,0156,0157, + 0174,0301,0302,0303,0304,0305,0306,0307, + 0310,0311,0321,0322,0323,0324,0325,0326, + 0327,0330,0331,0342,0343,0344,0345,0346, + 0347,0350,0351,0112,0340,0132,0137,0155, + 0171,0201,0202,0203,0204,0205,0206,0207, + 0210,0211,0221,0222,0223,0224,0225,0226, + 0227,0230,0231,0242,0243,0244,0245,0246, + 0247,0250,0251,0300,0152,0320,0241,0007, + 0040,0041,0042,0043,0044,0025,0006,0027, + 0050,0051,0052,0053,0054,0011,0012,0033, + 0060,0061,0032,0063,0064,0065,0066,0010, + 0070,0071,0072,0073,0004,0024,0076,0341, + 0101,0102,0103,0104,0105,0106,0107,0110, + 0111,0121,0122,0123,0124,0125,0126,0127, + 0130,0131,0142,0143,0144,0145,0146,0147, + 0150,0151,0160,0161,0162,0163,0164,0165, + 0166,0167,0170,0200,0212,0213,0214,0215, + 0216,0217,0220,0232,0233,0234,0235,0236, + 0237,0240,0252,0253,0254,0255,0256,0257, + 0260,0261,0262,0263,0264,0265,0266,0267, + 0270,0271,0272,0273,0274,0275,0276,0277, + 0312,0313,0314,0315,0316,0317,0332,0333, + 0334,0335,0336,0337,0352,0353,0354,0355, + 0356,0357,0372,0373,0374,0375,0376,0377, +}; +uchar atoibm[] = +{ + 0000,0001,0002,0003,0067,0055,0056,0057, + 0026,0005,0045,0013,0014,0015,0016,0017, + 0020,0021,0022,0023,0074,0075,0062,0046, + 0030,0031,0077,0047,0034,0035,0036,0037, + 0100,0132,0177,0173,0133,0154,0120,0175, + 0115,0135,0134,0116,0153,0140,0113,0141, + 0360,0361,0362,0363,0364,0365,0366,0367, + 0370,0371,0172,0136,0114,0176,0156,0157, + 0174,0301,0302,0303,0304,0305,0306,0307, + 0310,0311,0321,0322,0323,0324,0325,0326, + 0327,0330,0331,0342,0343,0344,0345,0346, + 0347,0350,0351,0255,0340,0275,0137,0155, + 0171,0201,0202,0203,0204,0205,0206,0207, + 0210,0211,0221,0222,0223,0224,0225,0226, + 0227,0230,0231,0242,0243,0244,0245,0246, + 0247,0250,0251,0300,0117,0320,0241,0007, + 0040,0041,0042,0043,0044,0025,0006,0027, + 0050,0051,0052,0053,0054,0011,0012,0033, + 0060,0061,0032,0063,0064,0065,0066,0010, + 0070,0071,0072,0073,0004,0024,0076,0341, + 0101,0102,0103,0104,0105,0106,0107,0110, + 0111,0121,0122,0123,0124,0125,0126,0127, + 0130,0131,0142,0143,0144,0145,0146,0147, + 0150,0151,0160,0161,0162,0163,0164,0165, + 0166,0167,0170,0200,0212,0213,0214,0215, + 0216,0217,0220,0232,0233,0234,0235,0236, + 0237,0240,0252,0253,0254,0255,0256,0257, + 0260,0261,0262,0263,0264,0265,0266,0267, + 0270,0271,0272,0273,0274,0275,0276,0277, + 0312,0313,0314,0315,0316,0317,0332,0333, + 0334,0335,0336,0337,0352,0353,0354,0355, + 0356,0357,0372,0373,0374,0375,0376,0377, +}; diff --git a/diff/Makefile b/diff/Makefile @@ -0,0 +1,35 @@ +# diff - diff shell unix port from plan9 +# Depends on ../lib9 + +TARG = diff +OFILES = diffdir.o diffio.o diffreg.o main.o +MANFILES = diff.1 + +include ../config.mk + +all: ${TARG} + @strip ${TARG} + @echo built ${TARG} + +install: ${TARG} + @mkdir -p ${DESTDIR}${PREFIX}/bin + @cp -f ${TARG} ${DESTDIR}${PREFIX}/bin/ + @chmod 755 ${DESTDIR}${PREFIX}/bin/${TARG} + @mkdir -p ${DESTDIR}${MANPREFIX}/man1 + @cp -f ${MANFILES} ${DESTDIR}${MANPREFIX}/man1 + @chmod 444 ${DESTDIR}${MANPREFIX}/man1/${MANFILES} + +uninstall: + rm -f ${DESTDIR}${PREFIX}/bin/${TARG} + rm -f ${DESTDIR}${PREFIX}/man1/${MANFILES} + +.c.o: + @echo CC $*.c + @${CC} ${CFLAGS} -I../lib9 -I${PREFIX}/include -I../lib9 $*.c + +clean: + rm -f ${OFILES} ${TARG} + +${TARG}: ${OFILES} + @echo LD ${TARG} + @${CC} ${LDFLAGS} -o ${TARG} ${OFILES} -lm -L${PREFIX}/lib -L../lib9 -l9 diff --git a/diff/diff.1 b/diff/diff.1 @@ -0,0 +1,163 @@ +.TH DIFF 1 +.SH NAME +diff \- differential file comparator +.SH SYNOPSIS +.B diff +[ +.B -acefmnbwr +] file1 ... file2 +.SH DESCRIPTION +.I Diff +tells what lines must be changed in two files to bring them +into agreement. +If one file +is a directory, +then a file in that directory with basename the same as that of +the other file is used. +If both files are directories, similarly named files in the +two directories are compared by the method of +.I diff +for text +files and +.IR cmp (1) +otherwise. +If more than two file names are given, then each argument is compared +to the last argument as above. +The +.B -r +option causes +.I diff +to process similarly named subdirectories recursively. +When processing more than one file, +.I diff +prefixes file differences with a single line +listing the two differing files, in the form of +a +.I diff +command line. +The +.B -m +flag causes this behavior even when processing single files. +.PP +The normal output contains lines of these forms: +.IP "" 5 +.I n1 +.B a +.I n3,n4 +.br +.I n1,n2 +.B d +.I n3 +.br +.I n1,n2 +.B c +.I n3,n4 +.PP +These lines resemble +.I ed +commands to convert +.I file1 +into +.IR file2 . +The numbers after the letters pertain to +.IR file2 . +In fact, by exchanging `a' for `d' and reading backward +one may ascertain equally how to convert +.I file2 +into +.IR file1 . +As in +.IR ed , +identical pairs where +.I n1 += +.I n2 +or +.I n3 += +.I n4 +are abbreviated as a single number. +.PP +Following each of these lines come all the lines that are +affected in the first file flagged by `<', +then all the lines that are affected in the second file +flagged by `>'. +.PP +The +.B -b +option causes +trailing blanks (spaces and tabs) to be ignored +and other strings of blanks to compare equal. +The +.B -w +option causes all white-space to be removed from input lines +before applying the difference algorithm. +.PP +The +.B -n +option prefixes each range with +.IB file : \fR +and inserts a space around the +.BR a , +.BR c , +and +.B d +verbs. +The +.B -e +option produces a script of +.I "a, c" +and +.I d +commands for the editor +.IR ed , +which will recreate +.I file2 +from +.IR file1 . +The +.B -f +option produces a similar script, +not useful with +.IR ed , +in the opposite order. It may, however, be +useful as input to a stream-oriented post-processor. +.PP +The +.B -c +option includes three lines of context around each +change, merging changes whose contexts overlap. +The +.B -a +flag displays the entire file as context. +.PP +Except in rare circumstances, +.I diff +finds a smallest sufficient set of file +differences. +.SH FILES +.B /tmp/diff[12] +.SH SOURCE +.B \*9/src/cmd/diff +.SH "SEE ALSO" +.IR cmp (1), +.IR comm (1), +.IR ed (1) +.SH DIAGNOSTICS +Exit status is the empty string +for no differences, +.L some +for some, +and +.L error +for trouble. +.SH BUGS +Editing scripts produced under the +.BR -e " or" +.BR -f " option are naive about" +creating lines consisting of a single `\fB.\fR'. +.PP +When running +.I diff +on directories, the notion of what is a text +file is open to debate. diff --git a/diff/diff.h b/diff/diff.h @@ -0,0 +1,27 @@ +#define stdout bstdout + +char mode; /* '\0', 'e', 'f', 'h' */ +char bflag; /* ignore multiple and trailing blanks */ +char rflag; /* recurse down directory trees */ +char mflag; /* pseudo flag: doing multiple files, one dir */ +int anychange; +extern Biobuf stdout; +extern int binary; + +#define MALLOC(t, n) ((t *)emalloc((n)*sizeof(t))) +#define REALLOC(p, t, n) ((t *)erealloc((void *)(p), (n)*sizeof(t))) +#define FREE(p) free((void *)(p)) + +#define MAXPATHLEN 1024 + +int mkpathname(char *, char *, char *); +void *emalloc(unsigned); +void *erealloc(void *, unsigned); +void diff(char *, char *, int); +void diffdir(char *, char *, int); +void diffreg(char *, char *); +Biobuf *prepare(int, char *); +void panic(int, char *, ...); +void check(Biobuf *, Biobuf *); +void change(int, int, int, int); +void flushchanges(void); diff --git a/diff/diffdir.c b/diff/diffdir.c @@ -0,0 +1,113 @@ +#include <u.h> +#include <libc.h> +#include <bio.h> +#include "diff.h" + +static int +itemcmp(const void *v1, const void *v2) +{ + char *const*d1 = v1, *const*d2 = v2; + + return strcmp(*d1, *d2); +} + +static char ** +scandir(char *name) +{ + char **cp; + Dir *db; + int nitems; + int fd, n; + + if ((fd = open(name, OREAD)) < 0){ + panic(mflag ? 0 : 2, "can't open %s\n", name); + return nil; + } + cp = 0; + nitems = 0; + if((n = dirreadall(fd, &db)) > 0){ + while (n--) { + cp = REALLOC(cp, char *, (nitems+1)); + cp[nitems] = MALLOC(char, strlen((db+n)->name)+1); + strcpy(cp[nitems], (db+n)->name); + nitems++; + } + free(db); + } + cp = REALLOC(cp, char*, (nitems+1)); + cp[nitems] = 0; + close(fd); + qsort((char *)cp, nitems, sizeof(char*), itemcmp); + return cp; +} + +static int +isdotordotdot(char *p) +{ + if (*p == '.') { + if (!p[1]) + return 1; + if (p[1] == '.' && !p[2]) + return 1; + } + return 0; +} + +void +diffdir(char *f, char *t, int level) +{ + char **df, **dt, **dirf, **dirt; + char *from, *to; + int res; + char fb[MAXPATHLEN+1], tb[MAXPATHLEN+1]; + + df = scandir(f); + dt = scandir(t); + dirf = df; + dirt = dt; + if(df == nil || dt == nil) + goto Out; + while (*df || *dt) { + from = *df; + to = *dt; + if (from && isdotordotdot(from)) { + df++; + continue; + } + if (to && isdotordotdot(to)) { + dt++; + continue; + } + if (!from) + res = 1; + else if (!to) + res = -1; + else + res = strcmp(from, to); + if (res < 0) { + if (mode == 0 || mode == 'n') + Bprint(&stdout, "Only in %s: %s\n", f, from); + df++; + continue; + } + if (res > 0) { + if (mode == 0 || mode == 'n') + Bprint(&stdout, "Only in %s: %s\n", t, to); + dt++; + continue; + } + if (mkpathname(fb, f, from)) + continue; + if (mkpathname(tb, t, to)) + continue; + diff(fb, tb, level+1); + df++; dt++; + } +Out: + for (df = dirf; df && *df; df++) + FREE(*df); + for (dt = dirt; dt && *dt; dt++) + FREE(*dt); + FREE(dirf); + FREE(dirt); +} diff --git a/diff/diffio.c b/diff/diffio.c @@ -0,0 +1,387 @@ +#include <u.h> +#include <libc.h> +#include <bio.h> +#include <ctype.h> +#include "diff.h" + +struct line { + int serial; + int value; +}; +extern struct line *file[2]; +extern int len[2]; +extern long *ixold, *ixnew; +extern int *J; + +static Biobuf *input[2]; +static char *file1, *file2; +static int firstchange; + +#define MAXLINELEN 4096 +#define MIN(x, y) ((x) < (y) ? (x): (y)) + +static int +readline(Biobuf *bp, char *buf) +{ + int c; + char *p, *e; + + p = buf; + e = p + MAXLINELEN-1; + do { + c = Bgetc(bp); + if (c < 0) { + if (p == buf) + return -1; + break; + } + if (c == '\n') + break; + *p++ = c; + } while (p < e); + *p = 0; + if (c != '\n' && c >= 0) { + do c = Bgetc(bp); + while (c >= 0 && c != '\n'); + } + return p - buf; +} + +#define HALFLONG 16 +#define low(x) (x&((1L<<HALFLONG)-1)) +#define high(x) (x>>HALFLONG) + +/* + * hashing has the effect of + * arranging line in 7-bit bytes and then + * summing 1-s complement in 16-bit hunks + */ +static int +readhash(Biobuf *bp, char *buf) +{ + long sum; + unsigned shift; + char *p; + int len, space; + + sum = 1; + shift = 0; + if ((len = readline(bp, buf)) == -1) + return 0; + p = buf; + switch(bflag) /* various types of white space handling */ + { + case 0: + while (len--) { + sum += (long)*p++ << (shift &= (HALFLONG-1)); + shift += 7; + } + break; + case 1: + /* + * coalesce multiple white-space + */ + for (space = 0; len--; p++) { + if (isspace((uchar)*p)) { + space++; + continue; + } + if (space) { + shift += 7; + space = 0; + } + sum += (long)*p << (shift &= (HALFLONG-1)); + shift += 7; + } + break; + default: + /* + * strip all white-space + */ + while (len--) { + if (isspace((uchar)*p)) { + p++; + continue; + } + sum += (long)*p++ << (shift &= (HALFLONG-1)); + shift += 7; + } + break; + } + sum = low(sum) + high(sum); + return ((short)low(sum) + (short)high(sum)); +} + +Biobuf * +prepare(int i, char *arg) +{ + struct line *p; + int j, h; + Biobuf *bp; + char *cp, buf[MAXLINELEN]; + int nbytes; + Rune r; + + bp = Bopen(arg, OREAD); + if (!bp) { + panic(mflag ? 0: 2, "cannot open %s: %r\n", arg); + return 0; + } + if (binary) + return bp; + nbytes = Bread(bp, buf, MIN(1024, MAXLINELEN)); + if (nbytes > 0) { + cp = buf; + while (cp < buf+nbytes-UTFmax) { + /* + * heuristic for a binary file in the + * brave new UNICODE world + */ + cp += chartorune(&r, cp); + if (r == 0 || (r > 0x7f && r <= 0xa0)) { + binary++; + return bp; + } + } + Bseek(bp, 0, 0); + } + p = MALLOC(struct line, 3); + for (j = 0; h = readhash(bp, buf); p[j].value = h) + p = REALLOC(p, struct line, (++j+3)); + len[i] = j; + file[i] = p; + input[i] = bp; /*fix*/ + if (i == 0) { /*fix*/ + file1 = arg; + firstchange = 0; + } + else + file2 = arg; + return bp; +} + +static int +squishspace(char *buf) +{ + char *p, *q; + int space; + + for (space = 0, q = p = buf; *q; q++) { + if (isspace((uchar)*q)) { + space++; + continue; + } + if (space && bflag == 1) { + *p++ = ' '; + space = 0; + } + *p++ = *q; + } + *p = 0; + return p - buf; +} + +/* + * need to fix up for unexpected EOF's + */ +void +check(Biobuf *bf, Biobuf *bt) +{ + int f, t, flen, tlen; + char fbuf[MAXLINELEN], tbuf[MAXLINELEN]; + + ixold[0] = ixnew[0] = 0; + for (f = t = 1; f < len[0]; f++) { + flen = readline(bf, fbuf); + ixold[f] = ixold[f-1] + flen + 1; /* ftell(bf) */ + if (J[f] == 0) + continue; + do { + tlen = readline(bt, tbuf); + ixnew[t] = ixnew[t-1] + tlen + 1; /* ftell(bt) */ + } while (t++ < J[f]); + if (bflag) { + flen = squishspace(fbuf); + tlen = squishspace(tbuf); + } + if (flen != tlen || strcmp(fbuf, tbuf)) + J[f] = 0; + } + while (t < len[1]) { + tlen = readline(bt, tbuf); + ixnew[t] = ixnew[t-1] + tlen + 1; /* fseek(bt) */ + t++; + } +} + +static void +range(int a, int b, char *separator) +{ + Bprint(&stdout, "%d", a > b ? b: a); + if (a < b) + Bprint(&stdout, "%s%d", separator, b); +} + +static void +fetch(long *f, int a, int b, Biobuf *bp, char *s) +{ + char buf[MAXLINELEN]; + int maxb; + + if(a <= 1) + a = 1; + if(bp == input[0]) + maxb = len[0]; + else + maxb = len[1]; + if(b > maxb) + b = maxb; + if(a > maxb) + return; + Bseek(bp, f[a-1], 0); + while (a++ <= b) { + readline(bp, buf); + Bprint(&stdout, "%s%s\n", s, buf); + } +} + +typedef struct Change Change; +struct Change +{ + int a; + int b; + int c; + int d; +}; + +Change *changes; +int nchanges; + +void +change(int a, int b, int c, int d) +{ + char verb; + char buf[4]; + Change *ch; + + if (a > b && c > d) + return; + anychange = 1; + if (mflag && firstchange == 0) { + if(mode) { + buf[0] = '-'; + buf[1] = mode; + buf[2] = ' '; + buf[3] = '\0'; + } else { + buf[0] = '\0'; + } + Bprint(&stdout, "diff %s%s %s\n", buf, file1, file2); + firstchange = 1; + } + verb = a > b ? 'a': c > d ? 'd': 'c'; + switch(mode) { + case 'e': + range(a, b, ","); + Bputc(&stdout, verb); + break; + case 0: + range(a, b, ","); + Bputc(&stdout, verb); + range(c, d, ","); + break; + case 'n': + Bprint(&stdout, "%s:", file1); + range(a, b, ","); + Bprint(&stdout, " %c ", verb); + Bprint(&stdout, "%s:", file2); + range(c, d, ","); + break; + case 'f': + Bputc(&stdout, verb); + range(a, b, " "); + break; + case 'c': + case 'a': + if(nchanges%1024 == 0) + changes = erealloc(changes, (nchanges+1024)*sizeof(changes[0])); + ch = &changes[nchanges++]; + ch->a = a; + ch->b = b; + ch->c = c; + ch->d = d; + return; + } + Bputc(&stdout, '\n'); + if (mode == 0 || mode == 'n') { + fetch(ixold, a, b, input[0], "< "); + if (a <= b && c <= d) + Bprint(&stdout, "---\n"); + } + fetch(ixnew, c, d, input[1], mode == 0 || mode == 'n' ? "> ": ""); + if (mode != 0 && mode != 'n' && c <= d) + Bprint(&stdout, ".\n"); +} + +enum +{ + Lines = 3 /* number of lines of context shown */ +}; + +int +changeset(int i) +{ + while(i<nchanges && changes[i].b+1+2*Lines > changes[i+1].a) + i++; + if(i<nchanges) + return i+1; + return nchanges; +} + +void +flushchanges(void) +{ + int a, b, c, d, at; + int i, j; + + if(nchanges == 0) + return; + + for(i=0; i<nchanges; ){ + j = changeset(i); + a = changes[i].a-Lines; + b = changes[j-1].b+Lines; + c = changes[i].c-Lines; + d = changes[j-1].d+Lines; + if(a < 1) + a = 1; + if(c < 1) + c = 1; + if(b > len[0]) + b = len[0]; + if(d > len[1]) + d = len[1]; + if(mode == 'a'){ + a = 1; + b = len[0]; + c = 1; + d = len[1]; + j = nchanges; + } + Bprint(&stdout, "%s:", file1); + range(a, b, ","); + Bprint(&stdout, " - "); + Bprint(&stdout, "%s:", file2); + range(c, d, ","); + Bputc(&stdout, '\n'); + at = a; + for(; i<j; i++){ + fetch(ixold, at, changes[i].a-1, input[0], " "); + fetch(ixold, changes[i].a, changes[i].b, input[0], "- "); + fetch(ixnew, changes[i].c, changes[i].d, input[1], "+ "); + at = changes[i].b+1; + } + fetch(ixold, at, b, input[0], " "); + } + nchanges = 0; +} diff --git a/diff/diffreg.c b/diff/diffreg.c @@ -0,0 +1,420 @@ +#include <u.h> +#include <libc.h> +#include <bio.h> +#include "diff.h" + +/* diff - differential file comparison +* +* Uses an algorithm due to Harold Stone, which finds +* a pair of longest identical subsequences in the two +* files. +* +* The major goal is to generate the match vector J. +* J[i] is the index of the line in file1 corresponding +* to line i file0. J[i] = 0 if there is no +* such line in file1. +* +* Lines are hashed so as to work in core. All potential +* matches are located by sorting the lines of each file +* on the hash (called value). In particular, this +* collects the equivalence classes in file1 together. +* Subroutine equiv replaces the value of each line in +* file0 by the index of the first element of its +* matching equivalence in (the reordered) file1. +* To save space equiv squeezes file1 into a single +* array member in which the equivalence classes +* are simply concatenated, except that their first +* members are flagged by changing sign. +* +* Next the indices that point into member are unsorted into +* array class according to the original order of file0. +* +* The cleverness lies in routine stone. This marches +* through the lines of file0, developing a vector klist +* of "k-candidates". At step i a k-candidate is a matched +* pair of lines x,y (x in file0 y in file1) such that +* there is a common subsequence of lenght k +* between the first i lines of file0 and the first y +* lines of file1, but there is no such subsequence for +* any smaller y. x is the earliest possible mate to y +* that occurs in such a subsequence. +* +* Whenever any of the members of the equivalence class of +* lines in file1 matable to a line in file0 has serial number +* less than the y of some k-candidate, that k-candidate +* with the smallest such y is replaced. The new +* k-candidate is chained (via pred) to the current +* k-1 candidate so that the actual subsequence can +* be recovered. When a member has serial number greater +* that the y of all k-candidates, the klist is extended. +* At the end, the longest subsequence is pulled out +* and placed in the array J by unravel. +* +* With J in hand, the matches there recorded are +* check'ed against reality to assure that no spurious +* matches have crept in due to hashing. If they have, +* they are broken, and "jackpot " is recorded--a harmless +* matter except that a true match for a spuriously +* mated line may now be unnecessarily reported as a change. +* +* Much of the complexity of the program comes simply +* from trying to minimize core utilization and +* maximize the range of doable problems by dynamically +* allocating what is needed and reusing what is not. +* The core requirements for problems larger than somewhat +* are (in words) 2*length(file0) + length(file1) + +* 3*(number of k-candidates installed), typically about +* 6n words for files of length n. +*/ +/* TIDY THIS UP */ +struct cand { + int x; + int y; + int pred; +} cand; +struct line { + int serial; + int value; +} *file[2], line; +int len[2]; +int binary; +struct line *sfile[2]; /*shortened by pruning common prefix and suffix*/ +int slen[2]; +int pref, suff; /*length of prefix and suffix*/ +int *class; /*will be overlaid on file[0]*/ +int *member; /*will be overlaid on file[1]*/ +int *klist; /*will be overlaid on file[0] after class*/ +struct cand *clist; /* merely a free storage pot for candidates */ +int clen; +int *J; /*will be overlaid on class*/ +long *ixold; /*will be overlaid on klist*/ +long *ixnew; /*will be overlaid on file[1]*/ +/* END OF SOME TIDYING */ + +static void +sort(struct line *a, int n) /*shellsort CACM #201*/ +{ + int m; + struct line *ai, *aim, *j, *k; + struct line w; + int i; + + m = 0; + for (i = 1; i <= n; i *= 2) + m = 2*i - 1; + for (m /= 2; m != 0; m /= 2) { + k = a+(n-m); + for (j = a+1; j <= k; j++) { + ai = j; + aim = ai+m; + do { + if (aim->value > ai->value || + aim->value == ai->value && + aim->serial > ai->serial) + break; + w = *ai; + *ai = *aim; + *aim = w; + + aim = ai; + ai -= m; + } while (ai > a && aim >= ai); + } + } +} + +static void +unsort(struct line *f, int l, int *b) +{ + int *a; + int i; + + a = MALLOC(int, (l+1)); + for(i=1;i<=l;i++) + a[f[i].serial] = f[i].value; + for(i=1;i<=l;i++) + b[i] = a[i]; + FREE(a); +} + +static void +prune(void) +{ + int i,j; + + for(pref=0;pref<len[0]&&pref<len[1]&& + file[0][pref+1].value==file[1][pref+1].value; + pref++ ) ; + for(suff=0;suff<len[0]-pref&&suff<len[1]-pref&& + file[0][len[0]-suff].value==file[1][len[1]-suff].value; + suff++) ; + for(j=0;j<2;j++) { + sfile[j] = file[j]+pref; + slen[j] = len[j]-pref-suff; + for(i=0;i<=slen[j];i++) + sfile[j][i].serial = i; + } +} + +static void +equiv(struct line *a, int n, struct line *b, int m, int *c) +{ + int i, j; + + i = j = 1; + while(i<=n && j<=m) { + if(a[i].value < b[j].value) + a[i++].value = 0; + else if(a[i].value == b[j].value) + a[i++].value = j; + else + j++; + } + while(i <= n) + a[i++].value = 0; + b[m+1].value = 0; + j = 0; + while(++j <= m) { + c[j] = -b[j].serial; + while(b[j+1].value == b[j].value) { + j++; + c[j] = b[j].serial; + } + } + c[j] = -1; +} + +static int +newcand(int x, int y, int pred) +{ + struct cand *q; + + clist = REALLOC(clist, struct cand, (clen+1)); + q = clist + clen; + q->x = x; + q->y = y; + q->pred = pred; + return clen++; +} + +static int +search(int *c, int k, int y) +{ + int i, j, l; + int t; + + if(clist[c[k]].y < y) /*quick look for typical case*/ + return k+1; + i = 0; + j = k+1; + while((l=(i+j)/2) > i) { + t = clist[c[l]].y; + if(t > y) + j = l; + else if(t < y) + i = l; + else + return l; + } + return l+1; +} + +static int +stone(int *a, int n, int *b, int *c) +{ + int i, k,y; + int j, l; + int oldc, tc; + int oldl; + + k = 0; + c[0] = newcand(0,0,0); + for(i=1; i<=n; i++) { + j = a[i]; + if(j==0) + continue; + y = -b[j]; + oldl = 0; + oldc = c[0]; + do { + if(y <= clist[oldc].y) + continue; + l = search(c, k, y); + if(l!=oldl+1) + oldc = c[l-1]; + if(l<=k) { + if(clist[c[l]].y <= y) + continue; + tc = c[l]; + c[l] = newcand(i,y,oldc); + oldc = tc; + oldl = l; + } else { + c[l] = newcand(i,y,oldc); + k++; + break; + } + } while((y=b[++j]) > 0); + } + return k; +} + +static void +unravel(int p) +{ + int i; + struct cand *q; + + for(i=0; i<=len[0]; i++) { + if (i <= pref) + J[i] = i; + else if (i > len[0]-suff) + J[i] = i+len[1]-len[0]; + else + J[i] = 0; + } + for(q=clist+p;q->y!=0;q=clist+q->pred) + J[q->x+pref] = q->y+pref; +} + +static void +output(void) +{ + int m, i0, i1, j0, j1; + + m = len[0]; + J[0] = 0; + J[m+1] = len[1]+1; + if (mode != 'e') { + for (i0 = 1; i0 <= m; i0 = i1+1) { + while (i0 <= m && J[i0] == J[i0-1]+1) + i0++; + j0 = J[i0-1]+1; + i1 = i0-1; + while (i1 < m && J[i1+1] == 0) + i1++; + j1 = J[i1+1]-1; + J[i1] = j1; + change(i0, i1, j0, j1); + } + } + else { + for (i0 = m; i0 >= 1; i0 = i1-1) { + while (i0 >= 1 && J[i0] == J[i0+1]-1 && J[i0]) + i0--; + j0 = J[i0+1]-1; + i1 = i0+1; + while (i1 > 1 && J[i1-1] == 0) + i1--; + j1 = J[i1-1]+1; + J[i1] = j1; + change(i1 , i0, j1, j0); + } + } + if (m == 0) + change(1, 0, 1, len[1]); + flushchanges(); +} + +#define BUF 4096 +static int +cmp(Biobuf* b1, Biobuf* b2) +{ + int n; + uchar buf1[BUF], buf2[BUF]; + int f1, f2; + vlong nc = 1; + uchar *b1s, *b1e, *b2s, *b2e; + + f1 = Bfildes(b1); + f2 = Bfildes(b2); + seek(f1, 0, 0); + seek(f2, 0, 0); + b1s = b1e = buf1; + b2s = b2e = buf2; + for(;;){ + if(b1s >= b1e){ + if(b1s >= &buf1[BUF]) + b1s = buf1; + n = read(f1, b1s, &buf1[BUF] - b1s); + b1e = b1s + n; + } + if(b2s >= b2e){ + if(b2s >= &buf2[BUF]) + b2s = buf2; + n = read(f2, b2s, &buf2[BUF] - b2s); + b2e = b2s + n; + } + n = b2e - b2s; + if(n > b1e - b1s) + n = b1e - b1s; + if(n <= 0) + break; + if(memcmp((void *)b1s, (void *)b2s, n) != 0){ + return 1; + } + nc += n; + b1s += n; + b2s += n; + } + if(b1e - b1s == b2e - b2s) + return 0; + return 1; +} + +void +diffreg(char *f, char *t) +{ + Biobuf *b0, *b1; + int k; + + binary = 0; + b0 = prepare(0, f); + if (!b0) + return; + b1 = prepare(1, t); + if (!b1) { + FREE(file[0]); + Bterm(b0); + return; + } + if (binary){ + /* could use b0 and b1 but this is simpler. */ + if (cmp(b0, b1)) + print("binary files %s %s differ\n", f, t); + Bterm(b0); + Bterm(b1); + return; + } + clen = 0; + prune(); + sort(sfile[0], slen[0]); + sort(sfile[1], slen[1]); + + member = (int *)file[1]; + equiv(sfile[0], slen[0], sfile[1], slen[1], member); + member = REALLOC(member, int, slen[1]+2); + + class = (int *)file[0]; + unsort(sfile[0], slen[0], class); + class = REALLOC(class, int, slen[0]+2); + + klist = MALLOC(int, slen[0]+2); + clist = MALLOC(struct cand, 1); + k = stone(class, slen[0], member, klist); + FREE(member); + FREE(class); + + J = MALLOC(int, len[0]+2); + unravel(klist[k]); + FREE(clist); + FREE(klist); + + ixold = MALLOC(long, len[0]+2); + ixnew = MALLOC(long, len[1]+2); + Bseek(b0, 0, 0); Bseek(b1, 0, 0); + check(b0, b1); + output(); + FREE(J); FREE(ixold); FREE(ixnew); + Bterm(b0); Bterm(b1); /* ++++ */ +} diff --git a/diff/main.c b/diff/main.c @@ -0,0 +1,270 @@ +#include <u.h> +#include <libc.h> +#include <bio.h> +#include "diff.h" + +#define DIRECTORY(s) ((s)->qid.type&QTDIR) +#define REGULAR_FILE(s) ((s)->type == 'M' && !DIRECTORY(s)) + +Biobuf stdout; + +static char *tmp[] = {"/tmp/diff1XXXXXXXXXXX", "/tmp/diff2XXXXXXXXXXX"}; +static int whichtmp; +static char *progname; +static char usage[] = "diff [ -acefmnbwr ] file1 ... file2\n"; + +static void +rmtmpfiles(void) +{ + while (whichtmp > 0) { + whichtmp--; + remove(tmp[whichtmp]); + } +} + +void +done(int status) +{ + rmtmpfiles(); + switch(status) + { + case 0: + exits(""); + case 1: + exits("some"); + default: + exits("error"); + } + /*NOTREACHED*/ +} + +void +panic(int status, char *fmt, ...) +{ + va_list arg; + + Bflush(&stdout); + + fprint(2, "%s: ", progname); + va_start(arg, fmt); + vfprint(2, fmt, arg); + va_end(arg); + if (status) + done(status); + /*NOTREACHED*/ +} + +static int +catch(void *a, char *msg) +{ + USED(a); + panic(2, msg); + return 1; +} + +int +mkpathname(char *pathname, char *path, char *name) +{ + if (strlen(path) + strlen(name) > MAXPATHLEN) { + panic(0, "pathname %s/%s too long\n", path, name); + return 1; + } + sprint(pathname, "%s/%s", path, name); + return 0; +} + +static char * +mktmpfile(int input, Dir **sb) +{ + int fd, i; + char *p; + char buf[8192]; + + atnotify(catch, 1); +/* + p = mktemp(tmp[whichtmp++]); + fd = create(p, OWRITE, 0600); +*/ + fd = mkstemp(p=tmp[whichtmp++]); + if (fd < 0) { + panic(mflag ? 0: 2, "cannot create %s: %r\n", p); + return 0; + } + while ((i = read(input, buf, sizeof(buf))) > 0) { + if ((i = write(fd, buf, i)) < 0) + break; + } + *sb = dirfstat(fd); + close(fd); + if (i < 0) { + panic(mflag ? 0: 2, "cannot read/write %s: %r\n", p); + return 0; + } + return p; +} + +static char * +statfile(char *file, Dir **sb) +{ + Dir *dir; + int input; + + dir = dirstat(file); + if(dir == nil) { + if (strcmp(file, "-") || (dir = dirfstat(0)) == nil) { + panic(mflag ? 0: 2, "cannot stat %s: %r\n", file); + return 0; + } + free(dir); + return mktmpfile(0, sb); + } + else if (!REGULAR_FILE(dir) && !DIRECTORY(dir)) { + free(dir); + if ((input = open(file, OREAD)) == -1) { + panic(mflag ? 0: 2, "cannot open %s: %r\n", file); + return 0; + } + file = mktmpfile(input, sb); + close(input); + } + else + *sb = dir; + return file; +} + +void +diff(char *f, char *t, int level) +{ + char *fp, *tp, *p, fb[MAXPATHLEN+1], tb[MAXPATHLEN+1]; + Dir *fsb, *tsb; + + if ((fp = statfile(f, &fsb)) == 0) + goto Return; + if ((tp = statfile(t, &tsb)) == 0){ + free(fsb); + goto Return; + } + if (DIRECTORY(fsb) && DIRECTORY(tsb)) { + if (rflag || level == 0) + diffdir(fp, tp, level); + else + Bprint(&stdout, "Common subdirectories: %s and %s\n", + fp, tp); + } + else if (REGULAR_FILE(fsb) && REGULAR_FILE(tsb)) + diffreg(fp, tp); + else { + if (REGULAR_FILE(fsb)) { + if ((p = utfrrune(f, '/')) == 0) + p = f; + else + p++; + if (mkpathname(tb, tp, p) == 0) + diffreg(fp, tb); + } + else { + if ((p = utfrrune(t, '/')) == 0) + p = t; + else + p++; + if (mkpathname(fb, fp, p) == 0) + diffreg(fb, tp); + } + } + free(fsb); + free(tsb); +Return: + rmtmpfiles(); +} + +void +main(int argc, char *argv[]) +{ + char *p; + int i; + Dir *fsb, *tsb; + extern int _p9usepwlibrary; + + _p9usepwlibrary = 0; + Binit(&stdout, 1, OWRITE); + progname = *argv; + while (--argc && (*++argv)[0] == '-' && (*argv)[1]) { + for (p = *argv+1; *p; p++) { + switch (*p) { + + case 'e': + case 'f': + case 'n': + case 'c': + case 'a': + mode = *p; + break; + + case 'w': + bflag = 2; + break; + + case 'b': + bflag = 1; + break; + + case 'r': + rflag = 1; + mflag = 1; + break; + + case 'm': + mflag = 1; + break; + + case 'h': + default: + progname = "Usage"; + panic(2, usage); + } + } + } + if (argc < 2) + panic(2, usage, progname); + if ((tsb = dirstat(argv[argc-1])) == nil) + panic(2, "can't stat %s\n", argv[argc-1]); + if (argc > 2) { + if (!DIRECTORY(tsb)) + panic(2, usage, progname); + mflag = 1; + } + else { + if ((fsb = dirstat(argv[0])) == nil) + panic(2, "can't stat %s\n", argv[0]); + if (DIRECTORY(fsb) && DIRECTORY(tsb)) + mflag = 1; + free(fsb); + } + free(tsb); + for (i = 0; i < argc-1; i++) + diff(argv[i], argv[argc-1], 0); + done(anychange); + /*NOTREACHED*/ +} + +static char noroom[] = "out of memory - try diff -h\n"; + +void * +emalloc(unsigned n) +{ + register void *p; + + if ((p = malloc(n)) == 0) + panic(2, noroom); + return p; +} + +void * +erealloc(void *p, unsigned n) +{ + register void *rp; + + if ((rp = realloc(p, n)) == 0) + panic(2, noroom); + return rp; +} diff --git a/join/Makefile b/join/Makefile @@ -0,0 +1,10 @@ +# join - join unix port from plan9 +# Depends on ../lib9 + +TARG = join + +include ../std.mk + +pre-uninstall: + +post-install: diff --git a/join/join.1 b/join/join.1 @@ -0,0 +1,147 @@ +.TH JOIN 1 +.CT 1 files +.SH NAME +join \- relational database operator +.SH SYNOPSIS +.B join +[ +.I options +] +.I file1 file2 +.SH DESCRIPTION +.I Join +forms, on the standard output, +a join +of the two relations specified by the lines of +.I file1 +and +.IR file2 . +If one of the file names is +.LR - , +the standard input is used. +.PP +.I File1 +and +.I file2 +must be sorted in increasing +.SM ASCII +collating +sequence on the fields +on which they are to be joined, +normally the first in each line. +.PP +There is one line in the output +for each pair of lines in +.I file1 +and +.I file2 +that have identical join fields. +The output line normally consists of the common field, +then the rest of the line from +.IR file1 , +then the rest of the line from +.IR file2 . +.PP +Input fields are normally separated spaces or tabs; +output fields by space. +In this case, multiple separators count as one, and +leading separators are discarded. +.PP +The following options are recognized, with POSIX syntax. +.TP +.BI -a " n +In addition to the normal output, +produce a line for each unpairable line in file +.IR n , +where +.I n +is 1 or 2. +.TP +.BI -v " n +Like +.BR -a , +omitting output for paired lines. +.TP +.BI -e " s +Replace empty output fields by string +.IR s . +.TP +.BI -1 " m +.br +.ns +.TP +.BI -2 " m +Join on the +.IR m th +field of +.I file1 +or +.IR file2 . +.TP +.BI -j "n m" +Archaic equivalent for +.BI - n " m"\f1. +.TP +.BI -o fields +Each output line comprises the designated fields. +The comma-separated field designators are either +.BR 0 , +meaning the join field, or have the form +.IR n . m , +where +.I n +is a file number and +.I m +is a field number. +Archaic usage allows separate arguments for field designators. +.PP +.TP +.BI -t c +Use character +.I c +as the only separator (tab character) on input and output. +Every appearance of +.I c +in a line is significant. +.SH EXAMPLES +.TP +.L +sort /etc/passwd | join -t: -1 1 -a 1 -e "" - bdays +Add birthdays to the +.B /etc/passwd +file, leaving unknown +birthdays empty. +The layout of +.B /adm/users +is given in +.IR passwd (5); +.B bdays +contains sorted lines like +.LR "ken:Feb\ 4,\ 1953" . +.TP +.L +tr : ' ' </etc/passwd | sort -k 3 3 >temp +.br +.ns +.TP +.L +join -1 3 -2 3 -o 1.1,2.1 temp temp | awk '$1 < $2' +Print all pairs of users with identical userids. +.SH SOURCE +.B \*9/src/cmd/join.c +.SH "SEE ALSO" +.IR sort (1), +.IR comm (1), +.IR awk (1) +.SH BUGS +With default field separation, +the collating sequence is that of +.BI "sort -b" +.BI -k y , y\f1; +with +.BR -t , +the sequence is that of +.BI "sort -t" x +.BI -k y , y\f1. +.PP +One of the files must be randomly accessible. diff --git a/join/join.c b/join/join.c @@ -0,0 +1,369 @@ +/* join F1 F2 on stuff */ +#include <u.h> +#include <libc.h> +#include <stdio.h> +#include <ctype.h> +#define F1 0 +#define F2 1 +#define F0 3 +#define NFLD 100 /* max field per line */ +#define comp() runecmp(ppi[F1][j1],ppi[F2][j2]) +FILE *f[2]; +Rune buf[2][BUFSIZ]; /*input lines */ +Rune *ppi[2][NFLD+1]; /* pointers to fields in lines */ +Rune *s1,*s2; +#define j1 joinj1 +#define j2 joinj2 + +int j1 = 1; /* join of this field of file 1 */ +int j2 = 1; /* join of this field of file 2 */ +int olist[2*NFLD]; /* output these fields */ +int olistf[2*NFLD]; /* from these files */ +int no; /* number of entries in olist */ +Rune sep1 = ' '; /* default field separator */ +Rune sep2 = '\t'; +char *sepstr=" "; +int discard; /* count of truncated lines */ +Rune null[BUFSIZ]/* = L""*/; +int a1; +int a2; + +char *getoptarg(int*, char***); +void output(int, int); +int input(int); +void oparse(char*); +void error(char*, char*); +void seek1(void), seek2(void); +Rune *strtorune(Rune *, char *); + + +void +main(int argc, char **argv) +{ + int i; + + while (argc > 1 && argv[1][0] == '-') { + if (argv[1][1] == '\0') + break; + switch (argv[1][1]) { + case '-': + argc--; + argv++; + goto proceed; + case 'a': + switch(*getoptarg(&argc, &argv)) { + case '1': + a1++; + break; + case '2': + a2++; + break; + default: + error("incomplete option -a",""); + } + break; + case 'e': + strtorune(null, getoptarg(&argc, &argv)); + break; + case 't': + sepstr=getoptarg(&argc, &argv); + chartorune(&sep1, sepstr); + sep2 = sep1; + break; + case 'o': + if(argv[1][2]!=0 || + argc>2 && strchr(argv[2],',')!=0) + oparse(getoptarg(&argc, &argv)); + else for (no = 0; no<2*NFLD && argc>2; no++){ + if (argv[2][0] == '1' && argv[2][1] == '.') { + olistf[no] = F1; + olist[no] = atoi(&argv[2][2]); + } else if (argv[2][0] == '2' && argv[2][1] == '.') { + olist[no] = atoi(&argv[2][2]); + olistf[no] = F2; + } else if (argv[2][0] == '0') + olistf[no] = F0; + else + break; + argc--; + argv++; + } + break; + case 'j': + if(argc <= 2) + break; + if (argv[1][2] == '1') + j1 = atoi(argv[2]); + else if (argv[1][2] == '2') + j2 = atoi(argv[2]); + else + j1 = j2 = atoi(argv[2]); + argc--; + argv++; + break; + case '1': + j1 = atoi(getoptarg(&argc, &argv)); + break; + case '2': + j2 = atoi(getoptarg(&argc, &argv)); + break; + } + argc--; + argv++; + } +proceed: + for (i = 0; i < no; i++) + if (olist[i]-- > NFLD) /* 0 origin */ + error("field number too big in -o",""); + if (argc != 3) + error("usage: join [-1 x -2 y] [-o list] file1 file2",""); + j1--; + j2--; /* everyone else believes in 0 origin */ + s1 = ppi[F1][j1]; + s2 = ppi[F2][j2]; + if (strcmp(argv[1], "-") == 0) + f[F1] = stdin; + else if ((f[F1] = fopen(argv[1], "r")) == 0) + error("can't open %s", argv[1]); + if(strcmp(argv[2], "-") == 0) { + f[F2] = stdin; + } else if ((f[F2] = fopen(argv[2], "r")) == 0) + error("can't open %s", argv[2]); + + if(ftell(f[F2]) >= 0) + seek2(); + else if(ftell(f[F1]) >= 0) + seek1(); + else + error("neither file is randomly accessible",""); + if (discard) + error("some input line was truncated", ""); + exits(""); +} +int runecmp(Rune *a, Rune *b){ + while(*a==*b){ + if(*a=='\0') return 0; + a++; + b++; + } + if(*a<*b) return -1; + return 1; +} +char *runetostr(char *buf, Rune *r){ + char *s; + for(s=buf;*r;r++) s+=runetochar(s, r); + *s='\0'; + return buf; +} +Rune *strtorune(Rune *buf, char *s){ + Rune *r; + for(r=buf;*s;r++) s+=chartorune(r, s); + *r='\0'; + return buf; +} +/* lazy. there ought to be a clean way to combine seek1 & seek2 */ +#define get1() n1=input(F1) +#define get2() n2=input(F2) +void +seek2(void) +{ + int n1, n2; + int top2=0; + int bot2 = ftell(f[F2]); + get1(); + get2(); + while(n1>0 && n2>0 || (a1||a2) && n1+n2>0) { + if(n1>0 && n2>0 && comp()>0 || n1==0) { + if(a2) output(0, n2); + bot2 = ftell(f[F2]); + get2(); + } else if(n1>0 && n2>0 && comp()<0 || n2==0) { + if(a1) output(n1, 0); + get1(); + } else /*(n1>0 && n2>0 && comp()==0)*/ { + while(n2>0 && comp()==0) { + output(n1, n2); + top2 = ftell(f[F2]); + get2(); + } + fseek(f[F2], bot2, 0); + get2(); + get1(); + for(;;) { + if(n1>0 && n2>0 && comp()==0) { + output(n1, n2); + get2(); + } else if(n1>0 && n2>0 && comp()<0 || n2==0) { + fseek(f[F2], bot2, 0); + get2(); + get1(); + } else /*(n1>0 && n2>0 && comp()>0 || n1==0)*/{ + fseek(f[F2], top2, 0); + bot2 = top2; + get2(); + break; + } + } + } + } +} +void +seek1(void) +{ + int n1, n2; + int top1=0; + int bot1 = ftell(f[F1]); + get1(); + get2(); + while(n1>0 && n2>0 || (a1||a2) && n1+n2>0) { + if(n1>0 && n2>0 && comp()>0 || n1==0) { + if(a2) output(0, n2); + get2(); + } else if(n1>0 && n2>0 && comp()<0 || n2==0) { + if(a1) output(n1, 0); + bot1 = ftell(f[F1]); + get1(); + } else /*(n1>0 && n2>0 && comp()==0)*/ { + while(n2>0 && comp()==0) { + output(n1, n2); + top1 = ftell(f[F1]); + get1(); + } + fseek(f[F1], bot1, 0); + get2(); + get1(); + for(;;) { + if(n1>0 && n2>0 && comp()==0) { + output(n1, n2); + get1(); + } else if(n1>0 && n2>0 && comp()>0 || n1==0) { + fseek(f[F1], bot1, 0); + get2(); + get1(); + } else /*(n1>0 && n2>0 && comp()<0 || n2==0)*/{ + fseek(f[F1], top1, 0); + bot1 = top1; + get1(); + break; + } + } + } + } +} + +int +input(int n) /* get input line and split into fields */ +{ + register int i, c; + Rune *bp; + Rune **pp; + char line[BUFSIZ]; + + bp = buf[n]; + pp = ppi[n]; + if (fgets(line, BUFSIZ, f[n]) == 0) + return(0); + strtorune(bp, line); + i = 0; + do { + i++; + if (sep1 == ' ') /* strip multiples */ + while ((c = *bp) == sep1 || c == sep2) + bp++; /* skip blanks */ + *pp++ = bp; /* record beginning */ + while ((c = *bp) != sep1 && c != '\n' && c != sep2 && c != '\0') + bp++; + *bp++ = '\0'; /* mark end by overwriting blank */ + } while (c != '\n' && c != '\0' && i < NFLD-1); + if (c != '\n') + discard++; + + *pp = 0; + return(i); +} + +void +output(int on1, int on2) /* print items from olist */ +{ + int i; + Rune *temp; + char buf[BUFSIZ]; + + if (no <= 0) { /* default case */ + printf("%s", runetostr(buf, on1? ppi[F1][j1]: ppi[F2][j2])); + for (i = 0; i < on1; i++) + if (i != j1) + printf("%s%s", sepstr, runetostr(buf, ppi[F1][i])); + for (i = 0; i < on2; i++) + if (i != j2) + printf("%s%s", sepstr, runetostr(buf, ppi[F2][i])); + printf("\n"); + } else { + for (i = 0; i < no; i++) { + if (olistf[i]==F0 && on1>j1) + temp = ppi[F1][j1]; + else if (olistf[i]==F0 && on2>j2) + temp = ppi[F2][j2]; + else { + temp = ppi[olistf[i]][olist[i]]; + if(olistf[i]==F1 && on1<=olist[i] || + olistf[i]==F2 && on2<=olist[i] || + *temp==0) + temp = null; + } + printf("%s", runetostr(buf, temp)); + if (i == no - 1) + printf("\n"); + else + printf("%s", sepstr); + } + } +} + +void +error(char *s1, char *s2) +{ + fprintf(stderr, "join: "); + fprintf(stderr, s1, s2); + fprintf(stderr, "\n"); + exits(s1); +} + +char * +getoptarg(int *argcp, char ***argvp) +{ + int argc = *argcp; + char **argv = *argvp; + if(argv[1][2] != 0) + return &argv[1][2]; + if(argc<=2 || argv[2][0]=='-') + error("incomplete option %s", argv[1]); + *argcp = argc-1; + *argvp = ++argv; + return argv[1]; +} + +void +oparse(char *s) +{ + for (no = 0; no<2*NFLD && *s; no++, s++) { + switch(*s) { + case 0: + return; + case '0': + olistf[no] = F0; + break; + case '1': + case '2': + if(s[1] == '.' && isdigit((uchar)s[2])) { + olistf[no] = *s=='1'? F1: F2; + olist[no] = atoi(s += 2); + break; + } /* fall thru */ + default: + error("invalid -o list", ""); + } + if(s[1] == ',') + s++; + } +} diff --git a/lib9/utf.h b/lib9/utf.h @@ -11,7 +11,8 @@ enum UTFmax = 3, /* maximum bytes per rune */ Runesync = 0x80, /* cannot represent part of a UTF sequence (<) */ Runeself = 0x80, /* rune and UTF sequences are the same (<) */ - Runeerror = 0xFFFD /* decoding error in UTF */ + Runeerror = 0xFFFD, /* decoding error in UTF */ + Runemax = 0x10FFFF /* maximum rune value */ }; /* Edit .+1,/^$/ | cfn $PLAN9/src/lib9/utf/?*.c | grep -v static |grep -v __ */ diff --git a/look/Makefile b/look/Makefile @@ -0,0 +1,10 @@ +# look - look unix port from plan9 +# Depends on ../lib9 + +TARG = look + +include ../std.mk + +pre-uninstall: + +post-install: diff --git a/look/look.1 b/look/look.1 @@ -0,0 +1,85 @@ +.TH LOOK 1 +.SH NAME +look \- find lines in a sorted list +.SH SYNOPSIS +.B look +[ +.BI -dfnixt c +] +[ +.I string +] +[ +.I file +] +.SH DESCRIPTION +.I Look +consults a sorted +.I file +and prints all lines that begin with +.IR string . +It uses binary search. +.PP +The following options are recognized. +Options +.B dfnt +affect comparisons as in +.IR sort (1). +.TP +.B -i +Interactive. +There is no +.I string +argument; instead +.I look +takes lines from the standard input as strings to be looked up. +.TP +.B -x +Exact. +Print only lines of the file whose key matches +.I string +exactly. +.TP +.B -d +`Directory' order: +only letters, digits, +tabs and blanks participate in comparisons. +.TP +.B -f +Fold. +Upper case letters compare equal to lower case. +.TP +.B -n +Numeric comparison with initial string of digits, optional minus sign, +and optional decimal point. +.TP +.BR -t [ \f2c\f1 ] +Character +.I c +terminates the sort key in the +.IR file . +By default, tab terminates the key. If +.I c +is missing the entire line comprises the key. +.PP +If no +.I file +is specified, +.B /lib/words +is assumed, with collating sequence +.BR df . +.SH FILES +.B /lib/words +.SH SOURCE +.B \*9/src/cmd/look.c +.SH "SEE ALSO" +.IR sort (1), +.IR grep (1) +.SH DIAGNOSTICS +The exit status is +.RB `` "not found" '' +if no match is found, and +.RB `` "no dictionary" '' +if +.I file +or the default dictionary cannot be opened. diff --git a/look/look.c b/look/look.c @@ -0,0 +1,349 @@ +#include <u.h> +#include <libc.h> +#include <bio.h> + /* Macros for Rune support of ctype.h-like functions */ + +#undef isupper +#undef islower +#undef isalpha +#undef isdigit +#undef isalnum +#undef isspace +#undef tolower +#define isupper(r) ('A' <= (r) && (r) <= 'Z') +#define islower(r) ('a' <= (r) && (r) <= 'z') +#define isalpha(r) (isupper(r) || islower(r)) +#define islatin1(r) (0xC0 <= (r) && (r) <= 0xFF) + +#define isdigit(r) ('0' <= (r) && (r) <= '9') + +#define isalnum(r) (isalpha(r) || isdigit(r)) + +#define isspace(r) ((r) == ' ' || (r) == '\t' \ + || (0x0A <= (r) && (r) <= 0x0D)) + +#define tolower(r) ((r)-'A'+'a') + +#define sgn(v) ((v) < 0 ? -1 : ((v) > 0 ? 1 : 0)) + +#define WORDSIZ 4000 +char *filename = "#9/lib/words"; +Biobuf *dfile; +Biobuf bout; +Biobuf bin; + +int fold; +int direc; +int exact; +int iflag; +int rev = 1; /*-1 for reverse-ordered file, not implemented*/ +int (*compare)(Rune*, Rune*); +Rune tab = '\t'; +Rune entry[WORDSIZ]; +Rune word[WORDSIZ]; +Rune key[50], orig[50]; +Rune latin_fold_tab[] = +{ +/* Table to fold latin 1 characters to ASCII equivalents + based at Rune value 0xc0 + + À Á Â Ã Ä Å Æ Ç + È É Ê Ë Ì Í Î Ï + Ð Ñ Ò Ó Ô Õ Ö × + Ø Ù Ú Û Ü Ý Þ ß + à á â ã ä å æ ç + è é ê ë ì í î ï + ð ñ ò ó ô õ ö ÷ + ø ù ú û ü ý þ ÿ +*/ + 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c', + 'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i', + 'd', 'n', 'o', 'o', 'o', 'o', 'o', 0 , + 'o', 'u', 'u', 'u', 'u', 'y', 0 , 0 , + 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c', + 'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i', + 'd', 'n', 'o', 'o', 'o', 'o', 'o', 0 , + 'o', 'u', 'u', 'u', 'u', 'y', 0 , 'y', +}; + +int locate(void); +int acomp(Rune*, Rune*); +int getword(Biobuf*, Rune *rp, int n); +void torune(char*, Rune*); +void rcanon(Rune*, Rune*); +int ncomp(Rune*, Rune*); + +void +main(int argc, char *argv[]) +{ + int n; + + filename = unsharp(filename); + + Binit(&bin, 0, OREAD); + Binit(&bout, 1, OWRITE); + compare = acomp; + ARGBEGIN{ + case 'd': + direc++; + break; + case 'f': + fold++; + break; + case 'i': + iflag++; + break; + case 'n': + compare = ncomp; + break; + case 't': + chartorune(&tab,ARGF()); + break; + case 'x': + exact++; + break; + default: + fprint(2, "%s: bad option %c\n", argv0, ARGC()); + fprint(2, "usage: %s -[dfinx] [-t c] [string] [file]\n", argv0); + exits("usage"); + } ARGEND + if(!iflag){ + if(argc >= 1) { + torune(argv[0], orig); + argv++; + argc--; + } else + iflag++; + } + if(argc < 1) { + direc++; + fold++; + } else + filename = argv[0]; + if (!iflag) + rcanon(orig, key); + dfile = Bopen(filename, OREAD); + if(dfile == 0) { + fprint(2, "look: can't open %s\n", filename); + exits("no dictionary"); + } + if(!iflag) + if(!locate()) + exits("not found"); + do { + if(iflag) { + Bflush(&bout); + if(!getword(&bin, orig, sizeof(orig)/sizeof(orig[0]))) + exits(0); + rcanon(orig, key); + if(!locate()) + continue; + } + if (!exact || !acomp(word, key)) + Bprint(&bout, "%S\n", entry); + while(getword(dfile, entry, sizeof(entry)/sizeof(entry[0]))) { + rcanon(entry, word); + n = compare(key, word); + switch(n) { + case -1: + if(exact) + break; + case 0: + if (!exact || !acomp(word, orig)) + Bprint(&bout, "%S\n", entry); + continue; + } + break; + } + } while(iflag); + exits(0); +} + +int +locate(void) +{ + vlong top, bot, mid; + int c; + int n; + + bot = 0; + top = Bseek(dfile, 0L, 2); + for(;;) { + mid = (top+bot) / 2; + Bseek(dfile, mid, 0); + do + c = Bgetrune(dfile); + while(c>=0 && c!='\n'); + mid = Boffset(dfile); + if(!getword(dfile, entry, sizeof(entry)/sizeof(entry[0]))) + break; + rcanon(entry, word); + n = compare(key, word); + switch(n) { + case -2: + case -1: + case 0: + if(top <= mid) + break; + top = mid; + continue; + case 1: + case 2: + bot = mid; + continue; + } + break; + } + Bseek(dfile, bot, 0); + while(getword(dfile, entry, sizeof(entry)/sizeof(entry[0]))) { + rcanon(entry, word); + n = compare(key, word); + switch(n) { + case -2: + return 0; + case -1: + if(exact) + return 0; + case 0: + return 1; + case 1: + case 2: + continue; + } + } + return 0; +} + +/* + * acomp(s, t) returns: + * -2 if s strictly precedes t + * -1 if s is a prefix of t + * 0 if s is the same as t + * 1 if t is a prefix of s + * 2 if t strictly precedes s + */ + +int +acomp(Rune *s, Rune *t) +{ + int cs, ct; + + for(;;) { + cs = *s; + ct = *t; + if(cs != ct) + break; + if(cs == 0) + return 0; + s++; + t++; + } + if(cs == 0) + return -1; + if(ct == 0) + return 1; + if(cs < ct) + return -2; + return 2; +} + +void +torune(char *old, Rune *new) +{ + do old += chartorune(new, old); + while(*new++); +} + +void +rcanon(Rune *old, Rune *new) +{ + Rune r; + + while((r = *old++) && r != tab) { + if (islatin1(r) && latin_fold_tab[r-0xc0]) + r = latin_fold_tab[r-0xc0]; + if(direc) + if(!(isalnum(r) || r == ' ' || r == '\t')) + continue; + if(fold) + if(isupper(r)) + r = tolower(r); + *new++ = r; + } + *new = 0; +} + +int +ncomp(Rune *s, Rune *t) +{ + Rune *is, *it, *js, *jt; + int a, b; + int ssgn, tsgn; + + while(isspace(*s)) + s++; + while(isspace(*t)) + t++; + ssgn = tsgn = -2*rev; + if(*s == '-') { + s++; + ssgn = -ssgn; + } + if(*t == '-') { + t++; + tsgn = -tsgn; + } + for(is = s; isdigit(*is); is++) + ; + for(it = t; isdigit(*it); it++) + ; + js = is; + jt = it; + a = 0; + if(ssgn == tsgn) + while(it>t && is>s) + if(b = *--it - *--is) + a = b; + while(is > s) + if(*--is != '0') + return -ssgn; + while(it > t) + if(*--it != '0') + return tsgn; + if(a) + return sgn(a)*ssgn; + if(*(s=js) == '.') + s++; + if(*(t=jt) == '.') + t++; + if(ssgn == tsgn) + while(isdigit(*s) && isdigit(*t)) + if(a = *t++ - *s++) + return sgn(a)*ssgn; + while(isdigit(*s)) + if(*s++ != '0') + return -ssgn; + while(isdigit(*t)) + if(*t++ != '0') + return tsgn; + return 0; +} + +int +getword(Biobuf *f, Rune *rp, int n) +{ + long c; + + while(n-- > 0) { + c = Bgetrune(f); + if(c < 0) + return 0; + if(c == '\n') { + *rp = '\0'; + return 1; + } + *rp++ = c; + } + fprint(2, "Look: word too long. Bailing out.\n"); + return 0; +} diff --git a/pbd/Makefile b/pbd/Makefile @@ -0,0 +1,10 @@ +# pbd - pbd unix port from plan9 +# Depends on ../lib9 + +TARG = pbd + +include ../std.mk + +pre-uninstall: + +post-install: diff --git a/pbd/pbd.1 b/pbd/pbd.1 diff --git a/pbd/pbd.c b/pbd/pbd.c @@ -0,0 +1,19 @@ +#include <u.h> +#include <libc.h> + +void +main(void) +{ + char buf[512], *p; + + p = "???"; + if(getwd(buf, sizeof buf)){ + p = strrchr(buf, '/'); + if(p == nil) + p = buf; + else if(p>buf || p[1]!='\0') + p++; + } + write(1, p, strlen(p)); + exits(0); +} diff --git a/rc/Makefile b/rc/Makefile @@ -46,7 +46,7 @@ uninstall: @${CC} ${CFLAGS} -I../lib9 -I${PREFIX}/include -I../lib9 $*.c clean: - rm -f ${OFILES} ${TARG} y.tab.c y.tab.h + rm -f ${OFILES} ${TARG} y.tab.c y.tab.h x.tab.h ${TARG}: ${OFILES} @echo LD ${TARG} diff --git a/split/Makefile b/split/Makefile @@ -0,0 +1,10 @@ +# split - split unix port from plan9 +# Depends on ../lib9 + +TARG = split + +include ../std.mk + +pre-uninstall: + +post-install: diff --git a/split/split.1 b/split/split.1 @@ -0,0 +1,82 @@ +.TH SPLIT 1 +.CT 1 files +.SH NAME +split \- split a file into pieces +.SH SYNOPSIS +.B split +[ +.I option ... +] +[ +.I file +] +.SH DESCRIPTION +.I Split +reads +.I file +(standard input by default) +and writes it in pieces of 1000 +lines per output file. +The names of the +output files are +.BR xaa , +.BR xab , +and so on to +.BR xzz . +The options are +.TP +.BI -n " n" +Split into +.IR n -line +pieces. +.TP +.BI -l " n" +Synonym for +.B -n +.IR n , +a nod to Unix's syntax. +.TP +.BI -e " expression" +File divisions occur at each line +that matches a regular +.IR expression ; +see +.IR regexp (7). +Multiple +.B -e +options may appear. +If a subexpression of +.I expression +is contained in parentheses +.BR ( ... ) , +the output file name is the portion of the +line which matches the subexpression. +.TP +.BI -f " stem +Use +.I stem +instead of +.B x +in output file names. +.TP +.BI -s " suffix +Append +.I suffix +to names identified under +.BR -e . +.TP +.B -x +Exclude the matched input line from the output file. +.TP +.B -i +Ignore case in option +.BR -e ; +force output file names (excluding the suffix) +to lower case. +.SH SOURCE +.B \*9/src/cmd/split.c +.SH SEE ALSO +.IR sed (1), +.IR awk (1), +.IR grep (1), +.IR regexp (7) diff --git a/split/split.c b/split/split.c @@ -0,0 +1,189 @@ +#include <u.h> +#include <libc.h> +#include <bio.h> +#include <ctype.h> +#include <regexp.h> + +char digit[] = "0123456789"; +char *suffix = ""; +char *stem = "x"; +char suff[] = "aa"; +char name[200]; +Biobuf bout; +Biobuf *output = &bout; + +extern int nextfile(void); +extern int matchfile(Resub*); +extern void openf(void); +extern char *fold(char*,int); +extern void usage(void); +extern void badexp(void); + +void +main(int argc, char *argv[]) +{ + Reprog *exp; + char *pattern = 0; + int n = 1000; + char *line; + int xflag = 0; + int iflag = 0; + Biobuf bin; + Biobuf *b = &bin; + char buf[256]; + + ARGBEGIN { + case 'l': + case 'n': + n=atoi(EARGF(usage())); + break; + case 'e': + pattern = strdup(EARGF(usage())); + break; + case 'f': + stem = strdup(EARGF(usage())); + break; + case 's': + suffix = strdup(EARGF(usage())); + break; + case 'x': + xflag++; + break; + case 'i': + iflag++; + break; + default: + usage(); + break; + + } ARGEND; + + if(argc < 0 || argc > 1) + usage(); + + if(argc != 0) { + b = Bopen(argv[0], OREAD); + if(b == nil) { + fprint(2, "split: can't open %s: %r\n", argv[0]); + exits("open"); + } + } else + Binit(b, 0, OREAD); + + if(pattern) { + if(!(exp = regcomp(iflag? fold(pattern,strlen(pattern)): pattern))) + badexp(); + while((line=Brdline(b,'\n')) != 0) { + Resub match[2]; + memset(match, 0, sizeof match); + line[Blinelen(b)-1] = 0; + if(regexec(exp,iflag?fold(line,Blinelen(b)-1):line,match,2)) { + if(matchfile(match) && xflag) + continue; + } else if(output == 0) + nextfile(); /* at most once */ + Bwrite(output, line, Blinelen(b)-1); + Bputc(output, '\n'); + } + } else { + int linecnt = n; + + while((line=Brdline(b,'\n')) != 0) { + if(++linecnt > n) { + nextfile(); + linecnt = 1; + } + Bwrite(output, line, Blinelen(b)); + } + + /* + * in case we didn't end with a newline, tack whatever's + * left onto the last file + */ + while((n = Bread(b, buf, sizeof(buf))) > 0) + Bwrite(output, buf, n); + } + if(b != nil) + Bterm(b); + exits(0); +} + +int +nextfile(void) +{ + static int canopen = 1; + if(suff[0] > 'z') { + if(canopen) + fprint(2, "split: file %szz not split\n",stem); + canopen = 0; + } else { + strcpy(name, stem); + strcat(name, suff); + if(++suff[1] > 'z') + suff[1] = 'a', ++suff[0]; + openf(); + } + return canopen; +} + +int +matchfile(Resub *match) +{ + if(match[1].s.sp) { + int len = match[1].e.ep - match[1].s.sp; + strncpy(name, match[1].s.sp, len); + strcpy(name+len, suffix); + openf(); + return 1; + } + return nextfile(); +} + +void +openf(void) +{ + static int fd = 0; + Bflush(output); + Bterm(output); + if(fd > 0) + close(fd); + fd = create(name,OWRITE,0666); + if(fd < 0) { + fprint(2, "grep: can't create %s: %r\n", name); + exits("create"); + } + Binit(output, fd, OWRITE); +} + +char * +fold(char *s, int n) +{ + static char *fline; + static int linesize = 0; + char *t; + + if(linesize < n+1){ + fline = realloc(fline,n+1); + linesize = n+1; + } + for(t=fline; *t++ = tolower((uchar)*s++); ) + continue; + /* we assume the 'A'-'Z' only appear as themselves + * in a utf encoding. + */ + return fline; +} + +void +usage(void) +{ + fprint(2, "usage: split [-n num] [-e exp] [-f stem] [-s suff] [-x] [-i] [file]\n"); + exits("usage"); +} + +void +badexp(void) +{ + fprint(2, "split: bad regular expression\n"); + exits("bad regular expression"); +} diff --git a/strings/Makefile b/strings/Makefile @@ -0,0 +1,10 @@ +# strings - strings unix port from plan9 +# Depends on ../lib9 + +TARG = strings + +include ../std.mk + +pre-uninstall: + +post-install: diff --git a/strings/strings.1 b/strings/strings.1 @@ -0,0 +1,28 @@ +.TH STRINGS 1 +.SH NAME +strings \- extract printable strings +.SH SYNOPSIS +.B strings +[ +.I file ... +] +.SH DESCRIPTION +.I Strings +finds and prints strings containing 6 or more +consecutive printable UTF-encoded characters +in a (typically) binary file, default +standard input. +Printable characters are taken to be +.SM ASCII +characters from blank through tilde (hexadecimal 20 through 7E), inclusive, +and +all other characters from value 00A0 to FFFF. +Strings reports +the decimal offset within the file at which the string starts and the text +of the string. If the string is longer than 70 runes the line is +terminated by three dots and the printing is resumed on the next +line with the offset of the continuation line. +.SH SOURCE +.B \*9/src/cmd/strings.c +.SH SEE ALSO +.IR nm (1) diff --git a/strings/strings.c b/strings/strings.c @@ -0,0 +1,90 @@ +#include <u.h> +#include <libc.h> +#include <bio.h> + +Biobuf *fin; +Biobuf fout; + +#define MINSPAN 6 /* Min characters in string */ + +#define BUFSIZE 70 + +void stringit(char *); +#undef isprint +#define isprint risprint +int isprint(Rune); + +void +main(int argc, char **argv) +{ + int i; + + Binit(&fout, 1, OWRITE); + if(argc < 2) { + stringit("/dev/stdin"); + exits(0); + } + + for(i = 1; i < argc; i++) { + if(argc > 2) + print("%s:\n", argv[i]); + + stringit(argv[i]); + } + + exits(0); +} + +void +stringit(char *str) +{ + long posn, start; + int cnt = 0; + long c; + + Rune buf[BUFSIZE]; + + if ((fin = Bopen(str, OREAD)) == 0) { + perror("open"); + return; + } + + start = 0; + posn = Boffset(fin); + while((c = Bgetrune(fin)) >= 0) { + if(isprint(c)) { + if(start == 0) + start = posn; + buf[cnt++] = c; + if(cnt == BUFSIZE-1) { + buf[cnt] = 0; + Bprint(&fout, "%8ld: %S ...\n", start, buf); + start = 0; + cnt = 0; + } + } else { + if(cnt >= MINSPAN) { + buf[cnt] = 0; + Bprint(&fout, "%8ld: %S\n", start, buf); + } + start = 0; + cnt = 0; + } + posn = Boffset(fin); + } + + if(cnt >= MINSPAN){ + buf[cnt] = 0; + Bprint(&fout, "%8ld: %S\n", start, buf); + } + Bterm(fin); +} + +int +isprint(Rune r) +{ + if ((r >= ' ' && r <0x7f) || r > 0xA0) + return 1; + else + return 0; +} diff --git a/unicode/Makefile b/unicode/Makefile @@ -0,0 +1,10 @@ +# unicode - unicode unix port from plan9 +# Depends on ../lib9 + +TARG = unicode + +include ../std.mk + +pre-uninstall: + +post-install: diff --git a/unicode/unicode.1 b/unicode/unicode.1 diff --git a/unicode/unicode.c b/unicode/unicode.c @@ -0,0 +1,122 @@ +#include <u.h> +#include <libc.h> +#include <bio.h> + +char usage[] = "unicode { [-t] hex hex ... | hexmin-hexmax ... | [-n] char ... }"; +char hex[] = "0123456789abcdefABCDEF"; +int numout = 0; +int text = 0; +char *err; +Biobuf bout; + +char *range(char*[]); +char *nums(char*[]); +char *chars(char*[]); + +void +main(int argc, char *argv[]) +{ + ARGBEGIN{ + case 'n': + numout = 1; + break; + case 't': + text = 1; + break; + }ARGEND + Binit(&bout, 1, OWRITE); + if(argc == 0){ + fprint(2, "usage: %s\n", usage); + exits("usage"); + } + if(!numout && utfrune(argv[0], '-')) + exits(range(argv)); + if(numout || strchr(hex, argv[0][0])==0) + exits(nums(argv)); + exits(chars(argv)); +} + +char* +range(char *argv[]) +{ + char *q; + int min, max; + int i; + + while(*argv){ + q = *argv; + if(strchr(hex, q[0]) == 0){ + err: + fprint(2, "unicode: bad range %s\n", *argv); + return "bad range"; + } + min = strtoul(q, &q, 16); + if(min<0 || min>Runemax || *q!='-') + goto err; + q++; + if(strchr(hex, *q) == 0) + goto err; + max = strtoul(q, &q, 16); + if(max<0 || max>Runemax || max<min || *q!=0) + goto err; + i = 0; + do{ + Bprint(&bout, "%.4x %C", min, min); + i++; + if(min==max || (i&7)==0) + Bprint(&bout, "\n"); + else + Bprint(&bout, "\t"); + min++; + }while(min<=max); + argv++; + } + return 0; +} + +char* +nums(char *argv[]) +{ + char *q; + Rune r; + int w; + + while(*argv){ + q = *argv; + while(*q){ + w = chartorune(&r, q); + if(r==0x80 && (q[0]&0xFF)!=0x80){ + fprint(2, "unicode: invalid utf string %s\n", *argv); + return "bad utf"; + } + Bprint(&bout, "%.4x\n", r); + q += w; + } + argv++; + } + return 0; +} + +char* +chars(char *argv[]) +{ + char *q; + int m; + + while(*argv){ + q = *argv; + if(strchr(hex, q[0]) == 0){ + err: + fprint(2, "unicode: bad unicode value %s\n", *argv); + return "bad char"; + } + m = strtoul(q, &q, 16); + if(m<0 || m>Runemax || *q!=0) + goto err; + Bprint(&bout, "%C", m); + if(!text) + Bprint(&bout, "\n"); + argv++; + } + return 0; +} diff --git a/unutf/Makefile b/unutf/Makefile @@ -0,0 +1,10 @@ +# unutf - unutf unix port from plan9 +# Depends on ../lib9 + +TARG = unutf + +include ../std.mk + +pre-uninstall: + +post-install: diff --git a/unutf/unutf.1 b/unutf/unutf.1 diff --git a/unutf/unutf.c b/unutf/unutf.c @@ -0,0 +1,20 @@ +/* + * stupid little program to pipe unicode chars through + * when converting to non-utf compilers. + */ +#include <u.h> +#include <libc.h> +#include <bio.h> + +Biobuf bin; + +void +main(void) +{ + int c; + + Binit(&bin, 0, OREAD); + while((c = Bgetrune(&bin)) >= 0) + print("0x%ux\n", c); + exits(0); +}