commit fa62640154da08c5fd229af50efde0d33871a0aa
parent 85bacddf7706d2c89c30c2433fb8c43cd794cdb5
Author: Anselm R Garbe <anselm@garbe.us>
Date: Fri, 28 May 2010 11:30:17 +0100
added commands as discussed with Uriel yesterday
Diffstat:
M | Makefile | | | 53 | ++++++++++++++++++++++++++++++++++++++++++++++++++--- |
D | TODO | | | 11 | ----------- |
A | ascii/Makefile | | | 10 | ++++++++++ |
A | ascii/ascii.1 | | | 160 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | ascii/ascii.c | | | 181 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | cmp/Makefile | | | 10 | ++++++++++ |
A | cmp/cmp.1 | | | 57 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | cmp/cmp.c | | | 112 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | dd/Makefile | | | 10 | ++++++++++ |
A | dd/dd.1 | | | 0 | |
A | dd/dd.c | | | 660 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | diff/Makefile | | | 35 | +++++++++++++++++++++++++++++++++++ |
A | diff/diff.1 | | | 163 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | diff/diff.h | | | 27 | +++++++++++++++++++++++++++ |
A | diff/diffdir.c | | | 113 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | diff/diffio.c | | | 387 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | diff/diffreg.c | | | 420 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | diff/main.c | | | 270 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | join/Makefile | | | 10 | ++++++++++ |
A | join/join.1 | | | 147 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | join/join.c | | | 369 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
M | lib9/utf.h | | | 3 | ++- |
A | look/Makefile | | | 10 | ++++++++++ |
A | look/look.1 | | | 85 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | look/look.c | | | 349 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | pbd/Makefile | | | 10 | ++++++++++ |
A | pbd/pbd.1 | | | 0 | |
A | pbd/pbd.c | | | 19 | +++++++++++++++++++ |
M | rc/Makefile | | | 2 | +- |
A | split/Makefile | | | 10 | ++++++++++ |
A | split/split.1 | | | 82 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | split/split.c | | | 189 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | strings/Makefile | | | 10 | ++++++++++ |
A | strings/strings.1 | | | 28 | ++++++++++++++++++++++++++++ |
A | strings/strings.c | | | 90 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | unicode/Makefile | | | 10 | ++++++++++ |
A | unicode/unicode.1 | | | 0 | |
A | unicode/unicode.c | | | 122 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | unutf/Makefile | | | 10 | ++++++++++ |
A | unutf/unutf.1 | | | 0 | |
A | unutf/unutf.c | | | 20 | ++++++++++++++++++++ |
41 files changed, 4238 insertions(+), 16 deletions(-)
diff --git a/Makefile b/Makefile
@@ -2,9 +2,56 @@
include config.mk
-SUBDIRS = lib9 yacc awk basename bc cal cat cleanname date dc du dd echo ed \
- factor fortune fmt freq getflags grep hoc ls mk mkdir mtime primes \
- rc read sha1sum sed seq sleep sort tail tee test touch tr troff uniq
+SUBDIRS = lib9\
+ yacc\
+ ascii\
+ awk\
+ basename\
+ bc\
+ cal\
+ cat\
+ cleanname\
+ cmp\
+ date\
+ dc\
+ du\
+ dd\
+ diff\
+ echo\
+ ed\
+ factor\
+ fortune\
+ fmt\
+ freq\
+ getflags\
+ grep\
+ hoc\
+ join\
+ look\
+ ls\
+ mk\
+ mkdir\
+ mtime\
+ pbd\
+ primes\
+ rc\
+ read\
+ sha1sum\
+ sed\
+ seq\
+ sleep\
+ sort\
+ split\
+ strings\
+ tail\
+ tee\
+ test\
+ touch\
+ tr\
+ troff\
+ unicode\
+ uniq\
+ unutf\
all:
@echo 9base build options:
diff --git a/TODO b/TODO
@@ -1,11 +0,0 @@
-12:13 < uriel> garbeam: add dd and diff too
-12:13 < uriel> and split
-12:14 < uriel> (and join)
-12:15 < uriel> and unutf (which I just noticed, seems to be undocumented, but seems quite useful too)
-12:15 < uriel> and tcs
-12:16 < uriel> and strings
-12:18 < uriel> oh, oh, I'm finding some great bits:
-12:18 < uriel> look(1), ascii(1) and unicode(1)
-12:19 < uriel> ok, and cmp(1) is missing too
-12:23 < uriel> hah! plan9/src/cmd/index/ is really interesting (but not worth including)
-12:26 < uriel> oh! pbd! what a wonderful discovery, we certainly should add it too
diff --git a/ascii/Makefile b/ascii/Makefile
@@ -0,0 +1,10 @@
+# ascii - ascii unix port from plan9
+# Depends on ../lib9
+
+TARG = ascii
+
+include ../std.mk
+
+pre-uninstall:
+
+post-install:
diff --git a/ascii/ascii.1 b/ascii/ascii.1
@@ -0,0 +1,160 @@
+.TH ASCII 1
+.SH NAME
+ascii, unicode \- interpret ASCII, Unicode characters
+.SH SYNOPSIS
+.B ascii
+[
+.B -8
+]
+[
+.BI -oxdb n
+]
+[
+.B -nct
+]
+[
+.I text
+]
+.PP
+.B unicode
+[
+.B -nt
+]
+.IB hexmin - hexmax
+.PP
+.B unicode
+[
+.B -t
+]
+.I hex
+[
+\&...
+]
+.PP
+.B unicode
+[
+.B -n
+]
+.I characters
+.PP
+.B look
+.I hex
+.B \*9/lib/unicode
+.SH DESCRIPTION
+.I Ascii
+prints the
+.SM ASCII
+values corresponding to characters and
+.I vice
+.IR versa ;
+under the
+.B -8
+option, the
+.SM ISO
+Latin-1 extensions (codes 0200-0377) are included.
+The values are interpreted in a settable numeric base;
+.B -o
+specifies octal,
+.B -d
+decimal,
+.B -x
+hexadecimal (the default), and
+.BI -b n
+base
+.IR n .
+.PP
+With no arguments,
+.I ascii
+prints a table of the character set in the specified base.
+Characters of
+.I text
+are converted to their
+.SM ASCII
+values, one per line. If, however, the first
+.I text
+argument is a valid number in the specified base, conversion
+goes the opposite way.
+Control characters are printed as two- or three-character mnemonics.
+Other options are:
+.TP
+.B -n
+Force numeric output.
+.TP
+.B -c
+Force character output.
+.TP
+.B -t
+Convert from numbers to running text; do not interpret
+control characters or insert newlines.
+.PP
+.I Unicode
+is similar; it converts between
+.SM UTF
+and character values from the Unicode Standard (see
+.IR utf (7)).
+If given a range of hexadecimal numbers,
+.I unicode
+prints a table of the specified Unicode characters \(em their values and
+.SM UTF
+representations.
+Otherwise it translates from
+.SM UTF
+to numeric value or vice versa,
+depending on the appearance of the supplied text;
+the
+.B -n
+option forces numeric output to avoid ambiguity with numeric characters.
+If converting to
+.SM UTF ,
+the characters are printed one per line unless the
+.B -t
+flag is set, in which case the output is a single string
+containing only the specified characters.
+Unlike
+.IR ascii ,
+.I unicode
+treats no characters specially.
+.PP
+The output of
+.I ascii
+and
+.I unicode
+may be unhelpful if the characters printed are not available in the current font.
+.PP
+The file
+.B \*9/lib/unicode
+contains a
+table of characters and descriptions, sorted in hexadecimal order,
+suitable for
+.IR look (1)
+on the lower case
+.I hex
+values of characters.
+.SH EXAMPLES
+.TP
+.B "ascii -d"
+Print the
+.SM ASCII
+table base 10.
+.TP
+.B "unicode p"
+Print the hex value of `p'.
+.TP
+.B "unicode 2200-22f1"
+Print a table of miscellaneous mathematical symbols.
+.TP
+.B "look 039 \*9/lib/unicode"
+See the start of the Greek alphabet's encoding in the Unicode Standard.
+.SH FILES
+.TP
+.B \*9/lib/unicode
+table of characters and descriptions.
+.SH SOURCE
+.B \*9/src/cmd/ascii.c
+.br
+.B \*9/src/cmd/unicode.c
+.SH "SEE ALSO"
+.IR look (1),
+.IR tcs (1),
+.IR utf (7),
+.IR font (7)
diff --git a/ascii/ascii.c b/ascii/ascii.c
@@ -0,0 +1,181 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+
+#define MAXBASE 36
+
+void usage(void);
+void put(int);
+void putn(int, int);
+void puttext(char *);
+void putnum(char *);
+int btoi(char *);
+int value(int, int);
+int isnum(char *);
+
+char *str[256]={
+ "nul", "soh", "stx", "etx", "eot", "enq", "ack", "bel",
+ "bs ", "ht ", "nl ", "vt ", "np ", "cr ", "so ", "si ",
+ "dle", "dc1", "dc2", "dc3", "dc4", "nak", "syn", "etb",
+ "can", "em ", "sub", "esc", "fs ", "gs ", "rs ", "us ",
+ "sp ", " ! ", " \" ", " # ", " $ ", " % ", " & ", " ' ",
+ " ( ", " ) ", " * ", " + ", " , ", " - ", " . ", " / ",
+ " 0 ", " 1 ", " 2 ", " 3 ", " 4 ", " 5 ", " 6 ", " 7 ",
+ " 8 ", " 9 ", " : ", " ; ", " < ", " = ", " > ", " ? ",
+ " @ ", " A ", " B ", " C ", " D ", " E ", " F ", " G ",
+ " H ", " I ", " J ", " K ", " L ", " M ", " N ", " O ",
+ " P ", " Q ", " R ", " S ", " T ", " U ", " V ", " W ",
+ " X ", " Y ", " Z ", " [ ", " \\ ", " ] ", " ^ ", " _ ",
+ " ` ", " a ", " b ", " c ", " d ", " e ", " f ", " g ",
+ " h ", " i ", " j ", " k ", " l ", " m ", " n ", " o ",
+ " p ", " q ", " r ", " s ", " t ", " u ", " v ", " w ",
+ " x ", " y ", " z ", " { ", " | ", " } ", " ~ ", "del",
+ "x80", "x81", "x82", "x83", "x84", "x85", "x86", "x87",
+ "x88", "x89", "x8a", "x8b", "x8c", "x8d", "x8e", "x8f",
+ "x90", "x91", "x92", "x93", "x94", "x95", "x96", "x97",
+ "x98", "x99", "x9a", "x9b", "x9c", "x9d", "x9e", "x9f",
+ "xa0", " ¡ ", " ¢ ", " £ ", " ¤ ", " ¥ ", " ¦ ", " § ",
+ " ¨ ", " © ", " ª ", " « ", " ¬ ", " ", " ® ", " ¯ ",
+ " ° ", " ± ", " ² ", " ³ ", " ´ ", " µ ", " ¶ ", " · ",
+ " ¸ ", " ¹ ", " º ", " » ", " ¼ ", " ½ ", " ¾ ", " ¿ ",
+ " À ", " Á ", " Â ", " Ã ", " Ä ", " Å ", " Æ ", " Ç ",
+ " È ", " É ", " Ê ", " Ë ", " Ì ", " Í ", " Î ", " Ï ",
+ " Ð ", " Ñ ", " Ò ", " Ó ", " Ô ", " Õ ", " Ö ", " × ",
+ " Ø ", " Ù ", " Ú ", " Û ", " Ü ", " Ý ", " Þ ", " ß ",
+ " à ", " á ", " â ", " ã ", " ä ", " å ", " æ ", " ç ",
+ " è ", " é ", " ê ", " ë ", " ì ", " í ", " î ", " ï ",
+ " ð ", " ñ ", " ò ", " ó ", " ô ", " õ ", " ö ", " ÷ ",
+ " ø ", " ù ", " ú ", " û ", " ü ", " ý ", " þ ", " ÿ "
+};
+
+char Ncol[]={
+ 0,0,7,5,4,4,3,3,3,3,3,3,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+};
+
+int nchars=128;
+int base=16;
+int ncol;
+int text=1;
+int strip=0;
+Biobuf bin;
+
+void
+main(int argc, char **argv)
+{
+ int i;
+
+ Binit(&bin, 1, OWRITE);
+ ARGBEGIN{
+ case '8':
+ nchars=256; break;
+ case 'x':
+ base=16; break;
+ case 'o':
+ base=8; break;
+ case 'd':
+ base=10; break;
+ case 'b':
+ base=strtoul(EARGF(usage()), 0, 0);
+ if(base<2||base>MAXBASE)
+ usage();
+ break;
+ case 'n':
+ text=0; break;
+ case 't':
+ strip=1;
+ /* fall through */
+ case 'c':
+ text=2; break;
+ default:
+ usage();
+ }ARGEND
+
+ ncol=Ncol[base];
+ if(argc==0){
+ for(i=0;i<nchars;i++){
+ put(i);
+ if((i&7)==7)
+ Bprint(&bin, "|\n");
+ }
+ }else{
+ if(text==1)
+ text=isnum(argv[0]);
+ while(argc--)
+ if(text)
+ puttext(*argv++);
+ else
+ putnum(*argv++);
+ }
+ Bputc(&bin, '\n');
+ exits(0);
+}
+void
+usage(void)
+{
+ fprint(2, "usage: %s [-8] [-xod | -b8] [-ncst] [--] [text]\n", argv0);
+ exits("usage");
+}
+void
+put(int i)
+{
+ Bputc(&bin, '|');
+ putn(i, ncol);
+ Bprint(&bin, " %s", str[i]);
+}
+char dig[]="0123456789abcdefghijklmnopqrstuvwxyz";
+void
+putn(int n, int ndig)
+{
+ if(ndig==0)
+ return;
+ putn(n/base, ndig-1);
+ Bputc(&bin, dig[n%base]);
+}
+void
+puttext(char *s)
+{
+ int n;
+ n=btoi(s)&0377;
+ if(strip)
+ Bputc(&bin, n);
+ else
+ Bprint(&bin, "%s\n", str[n]);
+}
+void
+putnum(char *s)
+{
+ while(*s){
+ putn(*s++&0377, ncol);
+ Bputc(&bin, '\n');
+ }
+}
+int
+btoi(char *s)
+{
+ int n;
+ n=0;
+ while(*s)
+ n=n*base+value(*s++, 0);
+ return(n);
+}
+int
+value(int c, int f)
+{
+ char *s;
+ for(s=dig; s<dig+base; s++)
+ if(*s==c)
+ return(s-dig);
+ if(f)
+ return(-1);
+ fprint(2, "%s: bad input char %c\n", argv0, c);
+ exits("bad");
+ return 0; /* to keep ken happy */
+}
+int
+isnum(char *s)
+{
+ while(*s)
+ if(value(*s++, 1)==-1)
+ return(0);
+ return(1);
+}
diff --git a/cmp/Makefile b/cmp/Makefile
@@ -0,0 +1,10 @@
+# cmp - cmp unix port from plan9
+# Depends on ../lib9
+
+TARG = cmp
+
+include ../std.mk
+
+pre-uninstall:
+
+post-install:
diff --git a/cmp/cmp.1 b/cmp/cmp.1
@@ -0,0 +1,57 @@
+.TH CMP 1
+.SH NAME
+cmp \- compare two files
+.SH SYNOPSIS
+.B cmp
+[
+.B -lsL
+]
+.I file1 file2
+[
+.I offset1
+[
+.I offset2
+]
+]
+.SH DESCRIPTION
+The two files are
+compared.
+A diagnostic results if the contents differ, otherwise
+there is no output.
+.PP
+The options are:
+.TP
+.B l
+Print the byte number (decimal) and the
+differing bytes (hexadecimal) for each difference.
+.TP
+.B s
+Print nothing for differing files,
+but set the exit status.
+.TP
+.B L
+Print the line number of the first differing byte.
+.PP
+If offsets are given,
+comparison starts at the designated byte position
+of the corresponding file.
+Offsets that begin with
+.B 0x
+are hexadecimal;
+with
+.BR 0 ,
+octal; with anything else, decimal.
+.SH SOURCE
+.B \*9/src/cmd/cmp.c
+.SH "SEE ALSO"
+.IR diff (1)
+.SH DIAGNOSTICS
+If a file is inaccessible or missing, the exit status is
+.LR open .
+If the files are the same, the exit status is empty (true).
+If they are the same except that one is longer than the other, the exit status is
+.LR EOF .
+Otherwise
+.I cmp
+reports the position of the first disagreeing byte and the exit status is
+.LR differ .
diff --git a/cmp/cmp.c b/cmp/cmp.c
@@ -0,0 +1,112 @@
+#include <u.h>
+#include <libc.h>
+
+#define BUF 65536
+
+int sflag = 0;
+int lflag = 0;
+int Lflag = 0;
+
+static void usage(void);
+
+void
+main(int argc, char *argv[])
+{
+ int n, i;
+ uchar *p, *q;
+ uchar buf1[BUF], buf2[BUF];
+ int f1, f2;
+ vlong nc = 1, o, l = 1;
+ char *name1, *name2;
+ uchar *b1s, *b1e, *b2s, *b2e;
+
+ ARGBEGIN{
+ case 's': sflag = 1; break;
+ case 'l': lflag = 1; break;
+ case 'L': Lflag = 1; break;
+ default: usage();
+ }ARGEND
+ if(argc < 2)
+ usage();
+ if((f1 = open(name1 = *argv++, OREAD)) == -1){
+ if(!sflag) perror(name1);
+ exits("open");
+ }
+ if((f2 = open(name2 = *argv++, OREAD)) == -1){
+ if(!sflag) perror(name2);
+ exits("open");
+ }
+ if(*argv){
+ o = strtoll(*argv++, 0, 0);
+ if(seek(f1, o, 0) < 0){
+ if(!sflag) perror("cmp: seek by offset1");
+ exits("seek 1");
+ }
+ }
+ if(*argv){
+ o = strtoll(*argv++, 0, 0);
+ if(seek(f2, o, 0) < 0){
+ if(!sflag) perror("cmp: seek by offset2");
+ exits("seek 2");
+ }
+ }
+ if(*argv)
+ usage();
+ b1s = b1e = buf1;
+ b2s = b2e = buf2;
+ for(;;){
+ if(b1s >= b1e){
+ if(b1s >= &buf1[BUF])
+ b1s = buf1;
+ n = read(f1, b1s, &buf1[BUF] - b1s);
+ b1e = b1s + n;
+ }
+ if(b2s >= b2e){
+ if(b2s >= &buf2[BUF])
+ b2s = buf2;
+ n = read(f2, b2s, &buf2[BUF] - b2s);
+ b2e = b2s + n;
+ }
+ n = b2e - b2s;
+ if(n > b1e - b1s)
+ n = b1e - b1s;
+ if(n <= 0)
+ break;
+ if(memcmp((void *)b1s, (void *)b2s, n) != 0){
+ if(sflag)
+ exits("differ");
+ for(p = b1s, q = b2s, i = 0; i < n; p++, q++, i++) {
+ if(*p == '\n')
+ l++;
+ if(*p != *q){
+ if(!lflag){
+ print("%s %s differ: char %lld",
+ name1, name2, nc+i);
+ print(Lflag?" line %lld\n":"\n", l);
+ exits("differ");
+ }
+ print("%6lld 0x%.2x 0x%.2x\n", nc+i, *p, *q);
+ }
+ }
+ }
+ if(Lflag)
+ for(p = b1s; p < b1e;)
+ if(*p++ == '\n')
+ l++;
+ nc += n;
+ b1s += n;
+ b2s += n;
+ }
+ if(b1e - b1s == b2e - b2s)
+ exits((char *)0);
+ if(!sflag)
+ print("EOF on %s\n", (b1e - b1s > b2e - b2s)? name2 : name1);
+ exits("EOF");
+}
+
+static void
+usage(void)
+{
+ print("Usage: cmp [-lsL] file1 file2 [offset1 [offset2] ]\n");
+ exits("usage");
+}
diff --git a/dd/Makefile b/dd/Makefile
@@ -0,0 +1,10 @@
+# dd - dd unix port from plan9
+# Depends on ../lib9
+
+TARG = dd
+
+include ../std.mk
+
+pre-uninstall:
+
+post-install:
diff --git a/dd/dd.1 b/dd/dd.1
diff --git a/dd/dd.c b/dd/dd.c
@@ -0,0 +1,660 @@
+#include <u.h>
+#include <libc.h>
+
+#define BIG 2147483647
+#define LCASE (1<<0)
+#define UCASE (1<<1)
+#define SWAB (1<<2)
+#define NERR (1<<3)
+#define SYNC (1<<4)
+int cflag;
+int fflag;
+char *string;
+char *ifile;
+char *ofile;
+char *ibuf;
+char *obuf;
+vlong skip;
+vlong oseekn;
+vlong iseekn;
+vlong count;
+long files = 1;
+long ibs = 512;
+long obs = 512;
+long bs;
+long cbs;
+long ibc;
+long obc;
+long cbc;
+long nifr;
+long nipr;
+long nofr;
+long nopr;
+long ntrunc;
+int dotrunc = 1;
+int ibf;
+int obf;
+char *op;
+int nspace;
+uchar etoa[256];
+uchar atoe[256];
+uchar atoibm[256];
+
+void flsh(void);
+int match(char *s);
+vlong number(long big);
+void cnull(int cc);
+void null(int c);
+void ascii(int cc);
+void unblock(int cc);
+void ebcdic(int cc);
+void ibm(int cc);
+void block(int cc);
+void term(void);
+void stats(void);
+
+#define iskey(s) ((key[0] == '-') && (strcmp(key+1, s) == 0))
+
+void
+main(int argc, char *argv[])
+{
+ void (*conv)(int);
+ char *ip;
+ char *key;
+ int a, c;
+
+ conv = null;
+ for(c=1; c<argc; c++) {
+ key = argv[c++];
+ if(c >= argc){
+ fprint(2, "dd: arg %s needs a value\n", key);
+ exits("arg");
+ }
+ string = argv[c];
+ if(iskey("ibs")) {
+ ibs = number(BIG);
+ continue;
+ }
+ if(iskey("obs")) {
+ obs = number(BIG);
+ continue;
+ }
+ if(iskey("cbs")) {
+ cbs = number(BIG);
+ continue;
+ }
+ if(iskey("bs")) {
+ bs = number(BIG);
+ continue;
+ }
+ if(iskey("if")) {
+ ifile = string;
+ continue;
+ }
+ if(iskey("of")) {
+ ofile = string;
+ continue;
+ }
+ if(iskey("trunc")) {
+ dotrunc = number(BIG);
+ continue;
+ }
+ if(iskey("skip")) {
+ skip = number(BIG);
+ continue;
+ }
+ if(iskey("seek") || iskey("oseek")) {
+ oseekn = number(BIG);
+ continue;
+ }
+ if(iskey("iseek")) {
+ iseekn = number(BIG);
+ continue;
+ }
+ if(iskey("count")) {
+ count = number(BIG);
+ continue;
+ }
+ if(iskey("files")) {
+ files = number(BIG);
+ continue;
+ }
+ if(iskey("conv")) {
+ cloop:
+ if(match(","))
+ goto cloop;
+ if(*string == '\0')
+ continue;
+ if(match("ebcdic")) {
+ conv = ebcdic;
+ goto cloop;
+ }
+ if(match("ibm")) {
+ conv = ibm;
+ goto cloop;
+ }
+ if(match("ascii")) {
+ conv = ascii;
+ goto cloop;
+ }
+ if(match("block")) {
+ conv = block;
+ goto cloop;
+ }
+ if(match("unblock")) {
+ conv = unblock;
+ goto cloop;
+ }
+ if(match("lcase")) {
+ cflag |= LCASE;
+ goto cloop;
+ }
+ if(match("ucase")) {
+ cflag |= UCASE;
+ goto cloop;
+ }
+ if(match("swab")) {
+ cflag |= SWAB;
+ goto cloop;
+ }
+ if(match("noerror")) {
+ cflag |= NERR;
+ goto cloop;
+ }
+ if(match("sync")) {
+ cflag |= SYNC;
+ goto cloop;
+ }
+ }
+ fprint(2, "dd: bad arg: %s\n", key);
+ exits("arg");
+ }
+ if(conv == null && cflag&(LCASE|UCASE))
+ conv = cnull;
+ if(ifile)
+ ibf = open(ifile, 0);
+ else
+ ibf = dup(0, -1);
+ if(ibf < 0) {
+ fprint(2, "dd: open %s: %r\n", ifile);
+ exits("open");
+ }
+ if(ofile){
+ if(dotrunc)
+ obf = create(ofile, 1, 0664);
+ else
+ obf = open(ofile, 1);
+ if(obf < 0) {
+ fprint(2, "dd: create %s: %r\n", ofile);
+ exits("create");
+ }
+ }else{
+ obf = dup(1, -1);
+ if(obf < 0) {
+ fprint(2, "dd: can't dup file descriptor: %s: %r\n", ofile);
+ exits("dup");
+ }
+ }
+ if(bs)
+ ibs = obs = bs;
+ if(ibs == obs && conv == null)
+ fflag++;
+ if(ibs == 0 || obs == 0) {
+ fprint(2, "dd: counts: cannot be zero\n");
+ exits("counts");
+ }
+ ibuf = sbrk(ibs);
+ if(fflag)
+ obuf = ibuf;
+ else
+ obuf = sbrk(obs);
+ sbrk(64); /* For good measure */
+ if(ibuf == (char *)-1 || obuf == (char *)-1) {
+ fprint(2, "dd: not enough memory: %r\n");
+ exits("memory");
+ }
+ ibc = 0;
+ obc = 0;
+ cbc = 0;
+ op = obuf;
+
+/*
+ if(signal(SIGINT, SIG_IGN) != SIG_IGN)
+ signal(SIGINT, term);
+*/
+ seek(obf, obs*oseekn, 1);
+ seek(ibf, ibs*iseekn, 1);
+ while(skip) {
+ read(ibf, ibuf, ibs);
+ skip--;
+ }
+
+ ip = 0;
+loop:
+ if(ibc-- == 0) {
+ ibc = 0;
+ if(count==0 || nifr+nipr!=count) {
+ if(cflag&(NERR|SYNC))
+ for(ip=ibuf+ibs; ip>ibuf;)
+ *--ip = 0;
+ ibc = read(ibf, ibuf, ibs);
+ }
+ if(ibc == -1) {
+ perror("read");
+ if((cflag&NERR) == 0) {
+ flsh();
+ term();
+ }
+ ibc = 0;
+ for(c=0; c<ibs; c++)
+ if(ibuf[c] != 0)
+ ibc = c;
+ stats();
+ }
+ if(ibc == 0 && --files<=0) {
+ flsh();
+ term();
+ }
+ if(ibc != ibs) {
+ nipr++;
+ if(cflag&SYNC)
+ ibc = ibs;
+ } else
+ nifr++;
+ ip = ibuf;
+ c = (ibc>>1) & ~1;
+ if(cflag&SWAB && c)
+ do {
+ a = *ip++;
+ ip[-1] = *ip;
+ *ip++ = a;
+ } while(--c);
+ ip = ibuf;
+ if(fflag) {
+ obc = ibc;
+ flsh();
+ ibc = 0;
+ }
+ goto loop;
+ }
+ c = 0;
+ c |= *ip++;
+ c &= 0377;
+ (*conv)(c);
+ goto loop;
+}
+
+void
+flsh(void)
+{
+ int c;
+
+ if(obc) {
+ c = write(obf, obuf, obc);
+ if(c != obc) {
+ if(c > 0)
+ ++nopr;
+ perror("write");
+ term();
+ }
+ if(obc == obs)
+ nofr++;
+ else
+ nopr++;
+ obc = 0;
+ }
+}
+
+int
+match(char *s)
+{
+ char *cs;
+
+ cs = string;
+ while(*cs++ == *s)
+ if(*s++ == '\0')
+ goto true;
+ if(*s != '\0')
+ return 0;
+
+true:
+ cs--;
+ string = cs;
+ return 1;
+}
+
+vlong
+number(long big)
+{
+ char *cs;
+ vlong n;
+
+ cs = string;
+ n = 0;
+ while(*cs >= '0' && *cs <= '9')
+ n = n*10 + *cs++ - '0';
+ for(;;)
+ switch(*cs++) {
+
+ case 'k':
+ n *= 1024;
+ continue;
+
+/* case 'w':
+ n *= sizeof(int);
+ continue;
+*/
+
+ case 'b':
+ n *= 512;
+ continue;
+
+/* case '*':*/
+ case 'x':
+ string = cs;
+ n *= number(BIG);
+
+ case '\0':
+ if(n>=big || n<0) {
+ fprint(2, "dd: argument %lld out of range\n", n);
+ exits("range");
+ }
+ return n;
+ }
+ /* never gets here */
+}
+
+void
+cnull(int cc)
+{
+ int c;
+
+ c = cc;
+ if((cflag&UCASE) && c>='a' && c<='z')
+ c += 'A'-'a';
+ if((cflag&LCASE) && c>='A' && c<='Z')
+ c += 'a'-'A';
+ null(c);
+}
+
+void
+null(int c)
+{
+
+ *op = c;
+ op++;
+ if(++obc >= obs) {
+ flsh();
+ op = obuf;
+ }
+}
+
+void
+ascii(int cc)
+{
+ int c;
+
+ c = etoa[cc];
+ if(cbs == 0) {
+ cnull(c);
+ return;
+ }
+ if(c == ' ') {
+ nspace++;
+ goto out;
+ }
+ while(nspace > 0) {
+ null(' ');
+ nspace--;
+ }
+ cnull(c);
+
+out:
+ if(++cbc >= cbs) {
+ null('\n');
+ cbc = 0;
+ nspace = 0;
+ }
+}
+
+void
+unblock(int cc)
+{
+ int c;
+
+ c = cc & 0377;
+ if(cbs == 0) {
+ cnull(c);
+ return;
+ }
+ if(c == ' ') {
+ nspace++;
+ goto out;
+ }
+ while(nspace > 0) {
+ null(' ');
+ nspace--;
+ }
+ cnull(c);
+
+out:
+ if(++cbc >= cbs) {
+ null('\n');
+ cbc = 0;
+ nspace = 0;
+ }
+}
+
+void
+ebcdic(int cc)
+{
+ int c;
+
+ c = cc;
+ if(cflag&UCASE && c>='a' && c<='z')
+ c += 'A'-'a';
+ if(cflag&LCASE && c>='A' && c<='Z')
+ c += 'a'-'A';
+ c = atoe[c];
+ if(cbs == 0) {
+ null(c);
+ return;
+ }
+ if(cc == '\n') {
+ while(cbc < cbs) {
+ null(atoe[' ']);
+ cbc++;
+ }
+ cbc = 0;
+ return;
+ }
+ if(cbc == cbs)
+ ntrunc++;
+ cbc++;
+ if(cbc <= cbs)
+ null(c);
+}
+
+void
+ibm(int cc)
+{
+ int c;
+
+ c = cc;
+ if(cflag&UCASE && c>='a' && c<='z')
+ c += 'A'-'a';
+ if(cflag&LCASE && c>='A' && c<='Z')
+ c += 'a'-'A';
+ c = atoibm[c] & 0377;
+ if(cbs == 0) {
+ null(c);
+ return;
+ }
+ if(cc == '\n') {
+ while(cbc < cbs) {
+ null(atoibm[' ']);
+ cbc++;
+ }
+ cbc = 0;
+ return;
+ }
+ if(cbc == cbs)
+ ntrunc++;
+ cbc++;
+ if(cbc <= cbs)
+ null(c);
+}
+
+void
+block(int cc)
+{
+ int c;
+
+ c = cc;
+ if(cflag&UCASE && c>='a' && c<='z')
+ c += 'A'-'a';
+ if(cflag&LCASE && c>='A' && c<='Z')
+ c += 'a'-'A';
+ c &= 0377;
+ if(cbs == 0) {
+ null(c);
+ return;
+ }
+ if(cc == '\n') {
+ while(cbc < cbs) {
+ null(' ');
+ cbc++;
+ }
+ cbc = 0;
+ return;
+ }
+ if(cbc == cbs)
+ ntrunc++;
+ cbc++;
+ if(cbc <= cbs)
+ null(c);
+}
+
+void
+term(void)
+{
+
+ stats();
+ exits(0);
+}
+
+void
+stats(void)
+{
+
+ fprint(2, "%lud+%lud records in\n", nifr, nipr);
+ fprint(2, "%lud+%lud records out\n", nofr, nopr);
+ if(ntrunc)
+ fprint(2, "%lud truncated records\n", ntrunc);
+}
+
+uchar etoa[] =
+{
+ 0000,0001,0002,0003,0234,0011,0206,0177,
+ 0227,0215,0216,0013,0014,0015,0016,0017,
+ 0020,0021,0022,0023,0235,0205,0010,0207,
+ 0030,0031,0222,0217,0034,0035,0036,0037,
+ 0200,0201,0202,0203,0204,0012,0027,0033,
+ 0210,0211,0212,0213,0214,0005,0006,0007,
+ 0220,0221,0026,0223,0224,0225,0226,0004,
+ 0230,0231,0232,0233,0024,0025,0236,0032,
+ 0040,0240,0241,0242,0243,0244,0245,0246,
+ 0247,0250,0133,0056,0074,0050,0053,0041,
+ 0046,0251,0252,0253,0254,0255,0256,0257,
+ 0260,0261,0135,0044,0052,0051,0073,0136,
+ 0055,0057,0262,0263,0264,0265,0266,0267,
+ 0270,0271,0174,0054,0045,0137,0076,0077,
+ 0272,0273,0274,0275,0276,0277,0300,0301,
+ 0302,0140,0072,0043,0100,0047,0075,0042,
+ 0303,0141,0142,0143,0144,0145,0146,0147,
+ 0150,0151,0304,0305,0306,0307,0310,0311,
+ 0312,0152,0153,0154,0155,0156,0157,0160,
+ 0161,0162,0313,0314,0315,0316,0317,0320,
+ 0321,0176,0163,0164,0165,0166,0167,0170,
+ 0171,0172,0322,0323,0324,0325,0326,0327,
+ 0330,0331,0332,0333,0334,0335,0336,0337,
+ 0340,0341,0342,0343,0344,0345,0346,0347,
+ 0173,0101,0102,0103,0104,0105,0106,0107,
+ 0110,0111,0350,0351,0352,0353,0354,0355,
+ 0175,0112,0113,0114,0115,0116,0117,0120,
+ 0121,0122,0356,0357,0360,0361,0362,0363,
+ 0134,0237,0123,0124,0125,0126,0127,0130,
+ 0131,0132,0364,0365,0366,0367,0370,0371,
+ 0060,0061,0062,0063,0064,0065,0066,0067,
+ 0070,0071,0372,0373,0374,0375,0376,0377,
+};
+uchar atoe[] =
+{
+ 0000,0001,0002,0003,0067,0055,0056,0057,
+ 0026,0005,0045,0013,0014,0015,0016,0017,
+ 0020,0021,0022,0023,0074,0075,0062,0046,
+ 0030,0031,0077,0047,0034,0035,0036,0037,
+ 0100,0117,0177,0173,0133,0154,0120,0175,
+ 0115,0135,0134,0116,0153,0140,0113,0141,
+ 0360,0361,0362,0363,0364,0365,0366,0367,
+ 0370,0371,0172,0136,0114,0176,0156,0157,
+ 0174,0301,0302,0303,0304,0305,0306,0307,
+ 0310,0311,0321,0322,0323,0324,0325,0326,
+ 0327,0330,0331,0342,0343,0344,0345,0346,
+ 0347,0350,0351,0112,0340,0132,0137,0155,
+ 0171,0201,0202,0203,0204,0205,0206,0207,
+ 0210,0211,0221,0222,0223,0224,0225,0226,
+ 0227,0230,0231,0242,0243,0244,0245,0246,
+ 0247,0250,0251,0300,0152,0320,0241,0007,
+ 0040,0041,0042,0043,0044,0025,0006,0027,
+ 0050,0051,0052,0053,0054,0011,0012,0033,
+ 0060,0061,0032,0063,0064,0065,0066,0010,
+ 0070,0071,0072,0073,0004,0024,0076,0341,
+ 0101,0102,0103,0104,0105,0106,0107,0110,
+ 0111,0121,0122,0123,0124,0125,0126,0127,
+ 0130,0131,0142,0143,0144,0145,0146,0147,
+ 0150,0151,0160,0161,0162,0163,0164,0165,
+ 0166,0167,0170,0200,0212,0213,0214,0215,
+ 0216,0217,0220,0232,0233,0234,0235,0236,
+ 0237,0240,0252,0253,0254,0255,0256,0257,
+ 0260,0261,0262,0263,0264,0265,0266,0267,
+ 0270,0271,0272,0273,0274,0275,0276,0277,
+ 0312,0313,0314,0315,0316,0317,0332,0333,
+ 0334,0335,0336,0337,0352,0353,0354,0355,
+ 0356,0357,0372,0373,0374,0375,0376,0377,
+};
+uchar atoibm[] =
+{
+ 0000,0001,0002,0003,0067,0055,0056,0057,
+ 0026,0005,0045,0013,0014,0015,0016,0017,
+ 0020,0021,0022,0023,0074,0075,0062,0046,
+ 0030,0031,0077,0047,0034,0035,0036,0037,
+ 0100,0132,0177,0173,0133,0154,0120,0175,
+ 0115,0135,0134,0116,0153,0140,0113,0141,
+ 0360,0361,0362,0363,0364,0365,0366,0367,
+ 0370,0371,0172,0136,0114,0176,0156,0157,
+ 0174,0301,0302,0303,0304,0305,0306,0307,
+ 0310,0311,0321,0322,0323,0324,0325,0326,
+ 0327,0330,0331,0342,0343,0344,0345,0346,
+ 0347,0350,0351,0255,0340,0275,0137,0155,
+ 0171,0201,0202,0203,0204,0205,0206,0207,
+ 0210,0211,0221,0222,0223,0224,0225,0226,
+ 0227,0230,0231,0242,0243,0244,0245,0246,
+ 0247,0250,0251,0300,0117,0320,0241,0007,
+ 0040,0041,0042,0043,0044,0025,0006,0027,
+ 0050,0051,0052,0053,0054,0011,0012,0033,
+ 0060,0061,0032,0063,0064,0065,0066,0010,
+ 0070,0071,0072,0073,0004,0024,0076,0341,
+ 0101,0102,0103,0104,0105,0106,0107,0110,
+ 0111,0121,0122,0123,0124,0125,0126,0127,
+ 0130,0131,0142,0143,0144,0145,0146,0147,
+ 0150,0151,0160,0161,0162,0163,0164,0165,
+ 0166,0167,0170,0200,0212,0213,0214,0215,
+ 0216,0217,0220,0232,0233,0234,0235,0236,
+ 0237,0240,0252,0253,0254,0255,0256,0257,
+ 0260,0261,0262,0263,0264,0265,0266,0267,
+ 0270,0271,0272,0273,0274,0275,0276,0277,
+ 0312,0313,0314,0315,0316,0317,0332,0333,
+ 0334,0335,0336,0337,0352,0353,0354,0355,
+ 0356,0357,0372,0373,0374,0375,0376,0377,
+};
diff --git a/diff/Makefile b/diff/Makefile
@@ -0,0 +1,35 @@
+# diff - diff shell unix port from plan9
+# Depends on ../lib9
+
+TARG = diff
+OFILES = diffdir.o diffio.o diffreg.o main.o
+MANFILES = diff.1
+
+include ../config.mk
+
+all: ${TARG}
+ @strip ${TARG}
+ @echo built ${TARG}
+
+install: ${TARG}
+ @mkdir -p ${DESTDIR}${PREFIX}/bin
+ @cp -f ${TARG} ${DESTDIR}${PREFIX}/bin/
+ @chmod 755 ${DESTDIR}${PREFIX}/bin/${TARG}
+ @mkdir -p ${DESTDIR}${MANPREFIX}/man1
+ @cp -f ${MANFILES} ${DESTDIR}${MANPREFIX}/man1
+ @chmod 444 ${DESTDIR}${MANPREFIX}/man1/${MANFILES}
+
+uninstall:
+ rm -f ${DESTDIR}${PREFIX}/bin/${TARG}
+ rm -f ${DESTDIR}${PREFIX}/man1/${MANFILES}
+
+.c.o:
+ @echo CC $*.c
+ @${CC} ${CFLAGS} -I../lib9 -I${PREFIX}/include -I../lib9 $*.c
+
+clean:
+ rm -f ${OFILES} ${TARG}
+
+${TARG}: ${OFILES}
+ @echo LD ${TARG}
+ @${CC} ${LDFLAGS} -o ${TARG} ${OFILES} -lm -L${PREFIX}/lib -L../lib9 -l9
diff --git a/diff/diff.1 b/diff/diff.1
@@ -0,0 +1,163 @@
+.TH DIFF 1
+.SH NAME
+diff \- differential file comparator
+.SH SYNOPSIS
+.B diff
+[
+.B -acefmnbwr
+] file1 ... file2
+.SH DESCRIPTION
+.I Diff
+tells what lines must be changed in two files to bring them
+into agreement.
+If one file
+is a directory,
+then a file in that directory with basename the same as that of
+the other file is used.
+If both files are directories, similarly named files in the
+two directories are compared by the method of
+.I diff
+for text
+files and
+.IR cmp (1)
+otherwise.
+If more than two file names are given, then each argument is compared
+to the last argument as above.
+The
+.B -r
+option causes
+.I diff
+to process similarly named subdirectories recursively.
+When processing more than one file,
+.I diff
+prefixes file differences with a single line
+listing the two differing files, in the form of
+a
+.I diff
+command line.
+The
+.B -m
+flag causes this behavior even when processing single files.
+.PP
+The normal output contains lines of these forms:
+.IP "" 5
+.I n1
+.B a
+.I n3,n4
+.br
+.I n1,n2
+.B d
+.I n3
+.br
+.I n1,n2
+.B c
+.I n3,n4
+.PP
+These lines resemble
+.I ed
+commands to convert
+.I file1
+into
+.IR file2 .
+The numbers after the letters pertain to
+.IR file2 .
+In fact, by exchanging `a' for `d' and reading backward
+one may ascertain equally how to convert
+.I file2
+into
+.IR file1 .
+As in
+.IR ed ,
+identical pairs where
+.I n1
+=
+.I n2
+or
+.I n3
+=
+.I n4
+are abbreviated as a single number.
+.PP
+Following each of these lines come all the lines that are
+affected in the first file flagged by `<',
+then all the lines that are affected in the second file
+flagged by `>'.
+.PP
+The
+.B -b
+option causes
+trailing blanks (spaces and tabs) to be ignored
+and other strings of blanks to compare equal.
+The
+.B -w
+option causes all white-space to be removed from input lines
+before applying the difference algorithm.
+.PP
+The
+.B -n
+option prefixes each range with
+.IB file : \fR
+and inserts a space around the
+.BR a ,
+.BR c ,
+and
+.B d
+verbs.
+The
+.B -e
+option produces a script of
+.I "a, c"
+and
+.I d
+commands for the editor
+.IR ed ,
+which will recreate
+.I file2
+from
+.IR file1 .
+The
+.B -f
+option produces a similar script,
+not useful with
+.IR ed ,
+in the opposite order. It may, however, be
+useful as input to a stream-oriented post-processor.
+.PP
+The
+.B -c
+option includes three lines of context around each
+change, merging changes whose contexts overlap.
+The
+.B -a
+flag displays the entire file as context.
+.PP
+Except in rare circumstances,
+.I diff
+finds a smallest sufficient set of file
+differences.
+.SH FILES
+.B /tmp/diff[12]
+.SH SOURCE
+.B \*9/src/cmd/diff
+.SH "SEE ALSO"
+.IR cmp (1),
+.IR comm (1),
+.IR ed (1)
+.SH DIAGNOSTICS
+Exit status is the empty string
+for no differences,
+.L some
+for some,
+and
+.L error
+for trouble.
+.SH BUGS
+Editing scripts produced under the
+.BR -e " or"
+.BR -f " option are naive about"
+creating lines consisting of a single `\fB.\fR'.
+.PP
+When running
+.I diff
+on directories, the notion of what is a text
+file is open to debate.
diff --git a/diff/diff.h b/diff/diff.h
@@ -0,0 +1,27 @@
+#define stdout bstdout
+
+char mode; /* '\0', 'e', 'f', 'h' */
+char bflag; /* ignore multiple and trailing blanks */
+char rflag; /* recurse down directory trees */
+char mflag; /* pseudo flag: doing multiple files, one dir */
+int anychange;
+extern Biobuf stdout;
+extern int binary;
+
+#define MALLOC(t, n) ((t *)emalloc((n)*sizeof(t)))
+#define REALLOC(p, t, n) ((t *)erealloc((void *)(p), (n)*sizeof(t)))
+#define FREE(p) free((void *)(p))
+
+#define MAXPATHLEN 1024
+
+int mkpathname(char *, char *, char *);
+void *emalloc(unsigned);
+void *erealloc(void *, unsigned);
+void diff(char *, char *, int);
+void diffdir(char *, char *, int);
+void diffreg(char *, char *);
+Biobuf *prepare(int, char *);
+void panic(int, char *, ...);
+void check(Biobuf *, Biobuf *);
+void change(int, int, int, int);
+void flushchanges(void);
diff --git a/diff/diffdir.c b/diff/diffdir.c
@@ -0,0 +1,113 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+#include "diff.h"
+
+static int
+itemcmp(const void *v1, const void *v2)
+{
+ char *const*d1 = v1, *const*d2 = v2;
+
+ return strcmp(*d1, *d2);
+}
+
+static char **
+scandir(char *name)
+{
+ char **cp;
+ Dir *db;
+ int nitems;
+ int fd, n;
+
+ if ((fd = open(name, OREAD)) < 0){
+ panic(mflag ? 0 : 2, "can't open %s\n", name);
+ return nil;
+ }
+ cp = 0;
+ nitems = 0;
+ if((n = dirreadall(fd, &db)) > 0){
+ while (n--) {
+ cp = REALLOC(cp, char *, (nitems+1));
+ cp[nitems] = MALLOC(char, strlen((db+n)->name)+1);
+ strcpy(cp[nitems], (db+n)->name);
+ nitems++;
+ }
+ free(db);
+ }
+ cp = REALLOC(cp, char*, (nitems+1));
+ cp[nitems] = 0;
+ close(fd);
+ qsort((char *)cp, nitems, sizeof(char*), itemcmp);
+ return cp;
+}
+
+static int
+isdotordotdot(char *p)
+{
+ if (*p == '.') {
+ if (!p[1])
+ return 1;
+ if (p[1] == '.' && !p[2])
+ return 1;
+ }
+ return 0;
+}
+
+void
+diffdir(char *f, char *t, int level)
+{
+ char **df, **dt, **dirf, **dirt;
+ char *from, *to;
+ int res;
+ char fb[MAXPATHLEN+1], tb[MAXPATHLEN+1];
+
+ df = scandir(f);
+ dt = scandir(t);
+ dirf = df;
+ dirt = dt;
+ if(df == nil || dt == nil)
+ goto Out;
+ while (*df || *dt) {
+ from = *df;
+ to = *dt;
+ if (from && isdotordotdot(from)) {
+ df++;
+ continue;
+ }
+ if (to && isdotordotdot(to)) {
+ dt++;
+ continue;
+ }
+ if (!from)
+ res = 1;
+ else if (!to)
+ res = -1;
+ else
+ res = strcmp(from, to);
+ if (res < 0) {
+ if (mode == 0 || mode == 'n')
+ Bprint(&stdout, "Only in %s: %s\n", f, from);
+ df++;
+ continue;
+ }
+ if (res > 0) {
+ if (mode == 0 || mode == 'n')
+ Bprint(&stdout, "Only in %s: %s\n", t, to);
+ dt++;
+ continue;
+ }
+ if (mkpathname(fb, f, from))
+ continue;
+ if (mkpathname(tb, t, to))
+ continue;
+ diff(fb, tb, level+1);
+ df++; dt++;
+ }
+Out:
+ for (df = dirf; df && *df; df++)
+ FREE(*df);
+ for (dt = dirt; dt && *dt; dt++)
+ FREE(*dt);
+ FREE(dirf);
+ FREE(dirt);
+}
diff --git a/diff/diffio.c b/diff/diffio.c
@@ -0,0 +1,387 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+#include <ctype.h>
+#include "diff.h"
+
+struct line {
+ int serial;
+ int value;
+};
+extern struct line *file[2];
+extern int len[2];
+extern long *ixold, *ixnew;
+extern int *J;
+
+static Biobuf *input[2];
+static char *file1, *file2;
+static int firstchange;
+
+#define MAXLINELEN 4096
+#define MIN(x, y) ((x) < (y) ? (x): (y))
+
+static int
+readline(Biobuf *bp, char *buf)
+{
+ int c;
+ char *p, *e;
+
+ p = buf;
+ e = p + MAXLINELEN-1;
+ do {
+ c = Bgetc(bp);
+ if (c < 0) {
+ if (p == buf)
+ return -1;
+ break;
+ }
+ if (c == '\n')
+ break;
+ *p++ = c;
+ } while (p < e);
+ *p = 0;
+ if (c != '\n' && c >= 0) {
+ do c = Bgetc(bp);
+ while (c >= 0 && c != '\n');
+ }
+ return p - buf;
+}
+
+#define HALFLONG 16
+#define low(x) (x&((1L<<HALFLONG)-1))
+#define high(x) (x>>HALFLONG)
+
+/*
+ * hashing has the effect of
+ * arranging line in 7-bit bytes and then
+ * summing 1-s complement in 16-bit hunks
+ */
+static int
+readhash(Biobuf *bp, char *buf)
+{
+ long sum;
+ unsigned shift;
+ char *p;
+ int len, space;
+
+ sum = 1;
+ shift = 0;
+ if ((len = readline(bp, buf)) == -1)
+ return 0;
+ p = buf;
+ switch(bflag) /* various types of white space handling */
+ {
+ case 0:
+ while (len--) {
+ sum += (long)*p++ << (shift &= (HALFLONG-1));
+ shift += 7;
+ }
+ break;
+ case 1:
+ /*
+ * coalesce multiple white-space
+ */
+ for (space = 0; len--; p++) {
+ if (isspace((uchar)*p)) {
+ space++;
+ continue;
+ }
+ if (space) {
+ shift += 7;
+ space = 0;
+ }
+ sum += (long)*p << (shift &= (HALFLONG-1));
+ shift += 7;
+ }
+ break;
+ default:
+ /*
+ * strip all white-space
+ */
+ while (len--) {
+ if (isspace((uchar)*p)) {
+ p++;
+ continue;
+ }
+ sum += (long)*p++ << (shift &= (HALFLONG-1));
+ shift += 7;
+ }
+ break;
+ }
+ sum = low(sum) + high(sum);
+ return ((short)low(sum) + (short)high(sum));
+}
+
+Biobuf *
+prepare(int i, char *arg)
+{
+ struct line *p;
+ int j, h;
+ Biobuf *bp;
+ char *cp, buf[MAXLINELEN];
+ int nbytes;
+ Rune r;
+
+ bp = Bopen(arg, OREAD);
+ if (!bp) {
+ panic(mflag ? 0: 2, "cannot open %s: %r\n", arg);
+ return 0;
+ }
+ if (binary)
+ return bp;
+ nbytes = Bread(bp, buf, MIN(1024, MAXLINELEN));
+ if (nbytes > 0) {
+ cp = buf;
+ while (cp < buf+nbytes-UTFmax) {
+ /*
+ * heuristic for a binary file in the
+ * brave new UNICODE world
+ */
+ cp += chartorune(&r, cp);
+ if (r == 0 || (r > 0x7f && r <= 0xa0)) {
+ binary++;
+ return bp;
+ }
+ }
+ Bseek(bp, 0, 0);
+ }
+ p = MALLOC(struct line, 3);
+ for (j = 0; h = readhash(bp, buf); p[j].value = h)
+ p = REALLOC(p, struct line, (++j+3));
+ len[i] = j;
+ file[i] = p;
+ input[i] = bp; /*fix*/
+ if (i == 0) { /*fix*/
+ file1 = arg;
+ firstchange = 0;
+ }
+ else
+ file2 = arg;
+ return bp;
+}
+
+static int
+squishspace(char *buf)
+{
+ char *p, *q;
+ int space;
+
+ for (space = 0, q = p = buf; *q; q++) {
+ if (isspace((uchar)*q)) {
+ space++;
+ continue;
+ }
+ if (space && bflag == 1) {
+ *p++ = ' ';
+ space = 0;
+ }
+ *p++ = *q;
+ }
+ *p = 0;
+ return p - buf;
+}
+
+/*
+ * need to fix up for unexpected EOF's
+ */
+void
+check(Biobuf *bf, Biobuf *bt)
+{
+ int f, t, flen, tlen;
+ char fbuf[MAXLINELEN], tbuf[MAXLINELEN];
+
+ ixold[0] = ixnew[0] = 0;
+ for (f = t = 1; f < len[0]; f++) {
+ flen = readline(bf, fbuf);
+ ixold[f] = ixold[f-1] + flen + 1; /* ftell(bf) */
+ if (J[f] == 0)
+ continue;
+ do {
+ tlen = readline(bt, tbuf);
+ ixnew[t] = ixnew[t-1] + tlen + 1; /* ftell(bt) */
+ } while (t++ < J[f]);
+ if (bflag) {
+ flen = squishspace(fbuf);
+ tlen = squishspace(tbuf);
+ }
+ if (flen != tlen || strcmp(fbuf, tbuf))
+ J[f] = 0;
+ }
+ while (t < len[1]) {
+ tlen = readline(bt, tbuf);
+ ixnew[t] = ixnew[t-1] + tlen + 1; /* fseek(bt) */
+ t++;
+ }
+}
+
+static void
+range(int a, int b, char *separator)
+{
+ Bprint(&stdout, "%d", a > b ? b: a);
+ if (a < b)
+ Bprint(&stdout, "%s%d", separator, b);
+}
+
+static void
+fetch(long *f, int a, int b, Biobuf *bp, char *s)
+{
+ char buf[MAXLINELEN];
+ int maxb;
+
+ if(a <= 1)
+ a = 1;
+ if(bp == input[0])
+ maxb = len[0];
+ else
+ maxb = len[1];
+ if(b > maxb)
+ b = maxb;
+ if(a > maxb)
+ return;
+ Bseek(bp, f[a-1], 0);
+ while (a++ <= b) {
+ readline(bp, buf);
+ Bprint(&stdout, "%s%s\n", s, buf);
+ }
+}
+
+typedef struct Change Change;
+struct Change
+{
+ int a;
+ int b;
+ int c;
+ int d;
+};
+
+Change *changes;
+int nchanges;
+
+void
+change(int a, int b, int c, int d)
+{
+ char verb;
+ char buf[4];
+ Change *ch;
+
+ if (a > b && c > d)
+ return;
+ anychange = 1;
+ if (mflag && firstchange == 0) {
+ if(mode) {
+ buf[0] = '-';
+ buf[1] = mode;
+ buf[2] = ' ';
+ buf[3] = '\0';
+ } else {
+ buf[0] = '\0';
+ }
+ Bprint(&stdout, "diff %s%s %s\n", buf, file1, file2);
+ firstchange = 1;
+ }
+ verb = a > b ? 'a': c > d ? 'd': 'c';
+ switch(mode) {
+ case 'e':
+ range(a, b, ",");
+ Bputc(&stdout, verb);
+ break;
+ case 0:
+ range(a, b, ",");
+ Bputc(&stdout, verb);
+ range(c, d, ",");
+ break;
+ case 'n':
+ Bprint(&stdout, "%s:", file1);
+ range(a, b, ",");
+ Bprint(&stdout, " %c ", verb);
+ Bprint(&stdout, "%s:", file2);
+ range(c, d, ",");
+ break;
+ case 'f':
+ Bputc(&stdout, verb);
+ range(a, b, " ");
+ break;
+ case 'c':
+ case 'a':
+ if(nchanges%1024 == 0)
+ changes = erealloc(changes, (nchanges+1024)*sizeof(changes[0]));
+ ch = &changes[nchanges++];
+ ch->a = a;
+ ch->b = b;
+ ch->c = c;
+ ch->d = d;
+ return;
+ }
+ Bputc(&stdout, '\n');
+ if (mode == 0 || mode == 'n') {
+ fetch(ixold, a, b, input[0], "< ");
+ if (a <= b && c <= d)
+ Bprint(&stdout, "---\n");
+ }
+ fetch(ixnew, c, d, input[1], mode == 0 || mode == 'n' ? "> ": "");
+ if (mode != 0 && mode != 'n' && c <= d)
+ Bprint(&stdout, ".\n");
+}
+
+enum
+{
+ Lines = 3 /* number of lines of context shown */
+};
+
+int
+changeset(int i)
+{
+ while(i<nchanges && changes[i].b+1+2*Lines > changes[i+1].a)
+ i++;
+ if(i<nchanges)
+ return i+1;
+ return nchanges;
+}
+
+void
+flushchanges(void)
+{
+ int a, b, c, d, at;
+ int i, j;
+
+ if(nchanges == 0)
+ return;
+
+ for(i=0; i<nchanges; ){
+ j = changeset(i);
+ a = changes[i].a-Lines;
+ b = changes[j-1].b+Lines;
+ c = changes[i].c-Lines;
+ d = changes[j-1].d+Lines;
+ if(a < 1)
+ a = 1;
+ if(c < 1)
+ c = 1;
+ if(b > len[0])
+ b = len[0];
+ if(d > len[1])
+ d = len[1];
+ if(mode == 'a'){
+ a = 1;
+ b = len[0];
+ c = 1;
+ d = len[1];
+ j = nchanges;
+ }
+ Bprint(&stdout, "%s:", file1);
+ range(a, b, ",");
+ Bprint(&stdout, " - ");
+ Bprint(&stdout, "%s:", file2);
+ range(c, d, ",");
+ Bputc(&stdout, '\n');
+ at = a;
+ for(; i<j; i++){
+ fetch(ixold, at, changes[i].a-1, input[0], " ");
+ fetch(ixold, changes[i].a, changes[i].b, input[0], "- ");
+ fetch(ixnew, changes[i].c, changes[i].d, input[1], "+ ");
+ at = changes[i].b+1;
+ }
+ fetch(ixold, at, b, input[0], " ");
+ }
+ nchanges = 0;
+}
diff --git a/diff/diffreg.c b/diff/diffreg.c
@@ -0,0 +1,420 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+#include "diff.h"
+
+/* diff - differential file comparison
+*
+* Uses an algorithm due to Harold Stone, which finds
+* a pair of longest identical subsequences in the two
+* files.
+*
+* The major goal is to generate the match vector J.
+* J[i] is the index of the line in file1 corresponding
+* to line i file0. J[i] = 0 if there is no
+* such line in file1.
+*
+* Lines are hashed so as to work in core. All potential
+* matches are located by sorting the lines of each file
+* on the hash (called value). In particular, this
+* collects the equivalence classes in file1 together.
+* Subroutine equiv replaces the value of each line in
+* file0 by the index of the first element of its
+* matching equivalence in (the reordered) file1.
+* To save space equiv squeezes file1 into a single
+* array member in which the equivalence classes
+* are simply concatenated, except that their first
+* members are flagged by changing sign.
+*
+* Next the indices that point into member are unsorted into
+* array class according to the original order of file0.
+*
+* The cleverness lies in routine stone. This marches
+* through the lines of file0, developing a vector klist
+* of "k-candidates". At step i a k-candidate is a matched
+* pair of lines x,y (x in file0 y in file1) such that
+* there is a common subsequence of lenght k
+* between the first i lines of file0 and the first y
+* lines of file1, but there is no such subsequence for
+* any smaller y. x is the earliest possible mate to y
+* that occurs in such a subsequence.
+*
+* Whenever any of the members of the equivalence class of
+* lines in file1 matable to a line in file0 has serial number
+* less than the y of some k-candidate, that k-candidate
+* with the smallest such y is replaced. The new
+* k-candidate is chained (via pred) to the current
+* k-1 candidate so that the actual subsequence can
+* be recovered. When a member has serial number greater
+* that the y of all k-candidates, the klist is extended.
+* At the end, the longest subsequence is pulled out
+* and placed in the array J by unravel.
+*
+* With J in hand, the matches there recorded are
+* check'ed against reality to assure that no spurious
+* matches have crept in due to hashing. If they have,
+* they are broken, and "jackpot " is recorded--a harmless
+* matter except that a true match for a spuriously
+* mated line may now be unnecessarily reported as a change.
+*
+* Much of the complexity of the program comes simply
+* from trying to minimize core utilization and
+* maximize the range of doable problems by dynamically
+* allocating what is needed and reusing what is not.
+* The core requirements for problems larger than somewhat
+* are (in words) 2*length(file0) + length(file1) +
+* 3*(number of k-candidates installed), typically about
+* 6n words for files of length n.
+*/
+/* TIDY THIS UP */
+struct cand {
+ int x;
+ int y;
+ int pred;
+} cand;
+struct line {
+ int serial;
+ int value;
+} *file[2], line;
+int len[2];
+int binary;
+struct line *sfile[2]; /*shortened by pruning common prefix and suffix*/
+int slen[2];
+int pref, suff; /*length of prefix and suffix*/
+int *class; /*will be overlaid on file[0]*/
+int *member; /*will be overlaid on file[1]*/
+int *klist; /*will be overlaid on file[0] after class*/
+struct cand *clist; /* merely a free storage pot for candidates */
+int clen;
+int *J; /*will be overlaid on class*/
+long *ixold; /*will be overlaid on klist*/
+long *ixnew; /*will be overlaid on file[1]*/
+/* END OF SOME TIDYING */
+
+static void
+sort(struct line *a, int n) /*shellsort CACM #201*/
+{
+ int m;
+ struct line *ai, *aim, *j, *k;
+ struct line w;
+ int i;
+
+ m = 0;
+ for (i = 1; i <= n; i *= 2)
+ m = 2*i - 1;
+ for (m /= 2; m != 0; m /= 2) {
+ k = a+(n-m);
+ for (j = a+1; j <= k; j++) {
+ ai = j;
+ aim = ai+m;
+ do {
+ if (aim->value > ai->value ||
+ aim->value == ai->value &&
+ aim->serial > ai->serial)
+ break;
+ w = *ai;
+ *ai = *aim;
+ *aim = w;
+
+ aim = ai;
+ ai -= m;
+ } while (ai > a && aim >= ai);
+ }
+ }
+}
+
+static void
+unsort(struct line *f, int l, int *b)
+{
+ int *a;
+ int i;
+
+ a = MALLOC(int, (l+1));
+ for(i=1;i<=l;i++)
+ a[f[i].serial] = f[i].value;
+ for(i=1;i<=l;i++)
+ b[i] = a[i];
+ FREE(a);
+}
+
+static void
+prune(void)
+{
+ int i,j;
+
+ for(pref=0;pref<len[0]&&pref<len[1]&&
+ file[0][pref+1].value==file[1][pref+1].value;
+ pref++ ) ;
+ for(suff=0;suff<len[0]-pref&&suff<len[1]-pref&&
+ file[0][len[0]-suff].value==file[1][len[1]-suff].value;
+ suff++) ;
+ for(j=0;j<2;j++) {
+ sfile[j] = file[j]+pref;
+ slen[j] = len[j]-pref-suff;
+ for(i=0;i<=slen[j];i++)
+ sfile[j][i].serial = i;
+ }
+}
+
+static void
+equiv(struct line *a, int n, struct line *b, int m, int *c)
+{
+ int i, j;
+
+ i = j = 1;
+ while(i<=n && j<=m) {
+ if(a[i].value < b[j].value)
+ a[i++].value = 0;
+ else if(a[i].value == b[j].value)
+ a[i++].value = j;
+ else
+ j++;
+ }
+ while(i <= n)
+ a[i++].value = 0;
+ b[m+1].value = 0;
+ j = 0;
+ while(++j <= m) {
+ c[j] = -b[j].serial;
+ while(b[j+1].value == b[j].value) {
+ j++;
+ c[j] = b[j].serial;
+ }
+ }
+ c[j] = -1;
+}
+
+static int
+newcand(int x, int y, int pred)
+{
+ struct cand *q;
+
+ clist = REALLOC(clist, struct cand, (clen+1));
+ q = clist + clen;
+ q->x = x;
+ q->y = y;
+ q->pred = pred;
+ return clen++;
+}
+
+static int
+search(int *c, int k, int y)
+{
+ int i, j, l;
+ int t;
+
+ if(clist[c[k]].y < y) /*quick look for typical case*/
+ return k+1;
+ i = 0;
+ j = k+1;
+ while((l=(i+j)/2) > i) {
+ t = clist[c[l]].y;
+ if(t > y)
+ j = l;
+ else if(t < y)
+ i = l;
+ else
+ return l;
+ }
+ return l+1;
+}
+
+static int
+stone(int *a, int n, int *b, int *c)
+{
+ int i, k,y;
+ int j, l;
+ int oldc, tc;
+ int oldl;
+
+ k = 0;
+ c[0] = newcand(0,0,0);
+ for(i=1; i<=n; i++) {
+ j = a[i];
+ if(j==0)
+ continue;
+ y = -b[j];
+ oldl = 0;
+ oldc = c[0];
+ do {
+ if(y <= clist[oldc].y)
+ continue;
+ l = search(c, k, y);
+ if(l!=oldl+1)
+ oldc = c[l-1];
+ if(l<=k) {
+ if(clist[c[l]].y <= y)
+ continue;
+ tc = c[l];
+ c[l] = newcand(i,y,oldc);
+ oldc = tc;
+ oldl = l;
+ } else {
+ c[l] = newcand(i,y,oldc);
+ k++;
+ break;
+ }
+ } while((y=b[++j]) > 0);
+ }
+ return k;
+}
+
+static void
+unravel(int p)
+{
+ int i;
+ struct cand *q;
+
+ for(i=0; i<=len[0]; i++) {
+ if (i <= pref)
+ J[i] = i;
+ else if (i > len[0]-suff)
+ J[i] = i+len[1]-len[0];
+ else
+ J[i] = 0;
+ }
+ for(q=clist+p;q->y!=0;q=clist+q->pred)
+ J[q->x+pref] = q->y+pref;
+}
+
+static void
+output(void)
+{
+ int m, i0, i1, j0, j1;
+
+ m = len[0];
+ J[0] = 0;
+ J[m+1] = len[1]+1;
+ if (mode != 'e') {
+ for (i0 = 1; i0 <= m; i0 = i1+1) {
+ while (i0 <= m && J[i0] == J[i0-1]+1)
+ i0++;
+ j0 = J[i0-1]+1;
+ i1 = i0-1;
+ while (i1 < m && J[i1+1] == 0)
+ i1++;
+ j1 = J[i1+1]-1;
+ J[i1] = j1;
+ change(i0, i1, j0, j1);
+ }
+ }
+ else {
+ for (i0 = m; i0 >= 1; i0 = i1-1) {
+ while (i0 >= 1 && J[i0] == J[i0+1]-1 && J[i0])
+ i0--;
+ j0 = J[i0+1]-1;
+ i1 = i0+1;
+ while (i1 > 1 && J[i1-1] == 0)
+ i1--;
+ j1 = J[i1-1]+1;
+ J[i1] = j1;
+ change(i1 , i0, j1, j0);
+ }
+ }
+ if (m == 0)
+ change(1, 0, 1, len[1]);
+ flushchanges();
+}
+
+#define BUF 4096
+static int
+cmp(Biobuf* b1, Biobuf* b2)
+{
+ int n;
+ uchar buf1[BUF], buf2[BUF];
+ int f1, f2;
+ vlong nc = 1;
+ uchar *b1s, *b1e, *b2s, *b2e;
+
+ f1 = Bfildes(b1);
+ f2 = Bfildes(b2);
+ seek(f1, 0, 0);
+ seek(f2, 0, 0);
+ b1s = b1e = buf1;
+ b2s = b2e = buf2;
+ for(;;){
+ if(b1s >= b1e){
+ if(b1s >= &buf1[BUF])
+ b1s = buf1;
+ n = read(f1, b1s, &buf1[BUF] - b1s);
+ b1e = b1s + n;
+ }
+ if(b2s >= b2e){
+ if(b2s >= &buf2[BUF])
+ b2s = buf2;
+ n = read(f2, b2s, &buf2[BUF] - b2s);
+ b2e = b2s + n;
+ }
+ n = b2e - b2s;
+ if(n > b1e - b1s)
+ n = b1e - b1s;
+ if(n <= 0)
+ break;
+ if(memcmp((void *)b1s, (void *)b2s, n) != 0){
+ return 1;
+ }
+ nc += n;
+ b1s += n;
+ b2s += n;
+ }
+ if(b1e - b1s == b2e - b2s)
+ return 0;
+ return 1;
+}
+
+void
+diffreg(char *f, char *t)
+{
+ Biobuf *b0, *b1;
+ int k;
+
+ binary = 0;
+ b0 = prepare(0, f);
+ if (!b0)
+ return;
+ b1 = prepare(1, t);
+ if (!b1) {
+ FREE(file[0]);
+ Bterm(b0);
+ return;
+ }
+ if (binary){
+ /* could use b0 and b1 but this is simpler. */
+ if (cmp(b0, b1))
+ print("binary files %s %s differ\n", f, t);
+ Bterm(b0);
+ Bterm(b1);
+ return;
+ }
+ clen = 0;
+ prune();
+ sort(sfile[0], slen[0]);
+ sort(sfile[1], slen[1]);
+
+ member = (int *)file[1];
+ equiv(sfile[0], slen[0], sfile[1], slen[1], member);
+ member = REALLOC(member, int, slen[1]+2);
+
+ class = (int *)file[0];
+ unsort(sfile[0], slen[0], class);
+ class = REALLOC(class, int, slen[0]+2);
+
+ klist = MALLOC(int, slen[0]+2);
+ clist = MALLOC(struct cand, 1);
+ k = stone(class, slen[0], member, klist);
+ FREE(member);
+ FREE(class);
+
+ J = MALLOC(int, len[0]+2);
+ unravel(klist[k]);
+ FREE(clist);
+ FREE(klist);
+
+ ixold = MALLOC(long, len[0]+2);
+ ixnew = MALLOC(long, len[1]+2);
+ Bseek(b0, 0, 0); Bseek(b1, 0, 0);
+ check(b0, b1);
+ output();
+ FREE(J); FREE(ixold); FREE(ixnew);
+ Bterm(b0); Bterm(b1); /* ++++ */
+}
diff --git a/diff/main.c b/diff/main.c
@@ -0,0 +1,270 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+#include "diff.h"
+
+#define DIRECTORY(s) ((s)->qid.type&QTDIR)
+#define REGULAR_FILE(s) ((s)->type == 'M' && !DIRECTORY(s))
+
+Biobuf stdout;
+
+static char *tmp[] = {"/tmp/diff1XXXXXXXXXXX", "/tmp/diff2XXXXXXXXXXX"};
+static int whichtmp;
+static char *progname;
+static char usage[] = "diff [ -acefmnbwr ] file1 ... file2\n";
+
+static void
+rmtmpfiles(void)
+{
+ while (whichtmp > 0) {
+ whichtmp--;
+ remove(tmp[whichtmp]);
+ }
+}
+
+void
+done(int status)
+{
+ rmtmpfiles();
+ switch(status)
+ {
+ case 0:
+ exits("");
+ case 1:
+ exits("some");
+ default:
+ exits("error");
+ }
+ /*NOTREACHED*/
+}
+
+void
+panic(int status, char *fmt, ...)
+{
+ va_list arg;
+
+ Bflush(&stdout);
+
+ fprint(2, "%s: ", progname);
+ va_start(arg, fmt);
+ vfprint(2, fmt, arg);
+ va_end(arg);
+ if (status)
+ done(status);
+ /*NOTREACHED*/
+}
+
+static int
+catch(void *a, char *msg)
+{
+ USED(a);
+ panic(2, msg);
+ return 1;
+}
+
+int
+mkpathname(char *pathname, char *path, char *name)
+{
+ if (strlen(path) + strlen(name) > MAXPATHLEN) {
+ panic(0, "pathname %s/%s too long\n", path, name);
+ return 1;
+ }
+ sprint(pathname, "%s/%s", path, name);
+ return 0;
+}
+
+static char *
+mktmpfile(int input, Dir **sb)
+{
+ int fd, i;
+ char *p;
+ char buf[8192];
+
+ atnotify(catch, 1);
+/*
+ p = mktemp(tmp[whichtmp++]);
+ fd = create(p, OWRITE, 0600);
+*/
+ fd = mkstemp(p=tmp[whichtmp++]);
+ if (fd < 0) {
+ panic(mflag ? 0: 2, "cannot create %s: %r\n", p);
+ return 0;
+ }
+ while ((i = read(input, buf, sizeof(buf))) > 0) {
+ if ((i = write(fd, buf, i)) < 0)
+ break;
+ }
+ *sb = dirfstat(fd);
+ close(fd);
+ if (i < 0) {
+ panic(mflag ? 0: 2, "cannot read/write %s: %r\n", p);
+ return 0;
+ }
+ return p;
+}
+
+static char *
+statfile(char *file, Dir **sb)
+{
+ Dir *dir;
+ int input;
+
+ dir = dirstat(file);
+ if(dir == nil) {
+ if (strcmp(file, "-") || (dir = dirfstat(0)) == nil) {
+ panic(mflag ? 0: 2, "cannot stat %s: %r\n", file);
+ return 0;
+ }
+ free(dir);
+ return mktmpfile(0, sb);
+ }
+ else if (!REGULAR_FILE(dir) && !DIRECTORY(dir)) {
+ free(dir);
+ if ((input = open(file, OREAD)) == -1) {
+ panic(mflag ? 0: 2, "cannot open %s: %r\n", file);
+ return 0;
+ }
+ file = mktmpfile(input, sb);
+ close(input);
+ }
+ else
+ *sb = dir;
+ return file;
+}
+
+void
+diff(char *f, char *t, int level)
+{
+ char *fp, *tp, *p, fb[MAXPATHLEN+1], tb[MAXPATHLEN+1];
+ Dir *fsb, *tsb;
+
+ if ((fp = statfile(f, &fsb)) == 0)
+ goto Return;
+ if ((tp = statfile(t, &tsb)) == 0){
+ free(fsb);
+ goto Return;
+ }
+ if (DIRECTORY(fsb) && DIRECTORY(tsb)) {
+ if (rflag || level == 0)
+ diffdir(fp, tp, level);
+ else
+ Bprint(&stdout, "Common subdirectories: %s and %s\n",
+ fp, tp);
+ }
+ else if (REGULAR_FILE(fsb) && REGULAR_FILE(tsb))
+ diffreg(fp, tp);
+ else {
+ if (REGULAR_FILE(fsb)) {
+ if ((p = utfrrune(f, '/')) == 0)
+ p = f;
+ else
+ p++;
+ if (mkpathname(tb, tp, p) == 0)
+ diffreg(fp, tb);
+ }
+ else {
+ if ((p = utfrrune(t, '/')) == 0)
+ p = t;
+ else
+ p++;
+ if (mkpathname(fb, fp, p) == 0)
+ diffreg(fb, tp);
+ }
+ }
+ free(fsb);
+ free(tsb);
+Return:
+ rmtmpfiles();
+}
+
+void
+main(int argc, char *argv[])
+{
+ char *p;
+ int i;
+ Dir *fsb, *tsb;
+ extern int _p9usepwlibrary;
+
+ _p9usepwlibrary = 0;
+ Binit(&stdout, 1, OWRITE);
+ progname = *argv;
+ while (--argc && (*++argv)[0] == '-' && (*argv)[1]) {
+ for (p = *argv+1; *p; p++) {
+ switch (*p) {
+
+ case 'e':
+ case 'f':
+ case 'n':
+ case 'c':
+ case 'a':
+ mode = *p;
+ break;
+
+ case 'w':
+ bflag = 2;
+ break;
+
+ case 'b':
+ bflag = 1;
+ break;
+
+ case 'r':
+ rflag = 1;
+ mflag = 1;
+ break;
+
+ case 'm':
+ mflag = 1;
+ break;
+
+ case 'h':
+ default:
+ progname = "Usage";
+ panic(2, usage);
+ }
+ }
+ }
+ if (argc < 2)
+ panic(2, usage, progname);
+ if ((tsb = dirstat(argv[argc-1])) == nil)
+ panic(2, "can't stat %s\n", argv[argc-1]);
+ if (argc > 2) {
+ if (!DIRECTORY(tsb))
+ panic(2, usage, progname);
+ mflag = 1;
+ }
+ else {
+ if ((fsb = dirstat(argv[0])) == nil)
+ panic(2, "can't stat %s\n", argv[0]);
+ if (DIRECTORY(fsb) && DIRECTORY(tsb))
+ mflag = 1;
+ free(fsb);
+ }
+ free(tsb);
+ for (i = 0; i < argc-1; i++)
+ diff(argv[i], argv[argc-1], 0);
+ done(anychange);
+ /*NOTREACHED*/
+}
+
+static char noroom[] = "out of memory - try diff -h\n";
+
+void *
+emalloc(unsigned n)
+{
+ register void *p;
+
+ if ((p = malloc(n)) == 0)
+ panic(2, noroom);
+ return p;
+}
+
+void *
+erealloc(void *p, unsigned n)
+{
+ register void *rp;
+
+ if ((rp = realloc(p, n)) == 0)
+ panic(2, noroom);
+ return rp;
+}
diff --git a/join/Makefile b/join/Makefile
@@ -0,0 +1,10 @@
+# join - join unix port from plan9
+# Depends on ../lib9
+
+TARG = join
+
+include ../std.mk
+
+pre-uninstall:
+
+post-install:
diff --git a/join/join.1 b/join/join.1
@@ -0,0 +1,147 @@
+.TH JOIN 1
+.CT 1 files
+.SH NAME
+join \- relational database operator
+.SH SYNOPSIS
+.B join
+[
+.I options
+]
+.I file1 file2
+.SH DESCRIPTION
+.I Join
+forms, on the standard output,
+a join
+of the two relations specified by the lines of
+.I file1
+and
+.IR file2 .
+If one of the file names is
+.LR - ,
+the standard input is used.
+.PP
+.I File1
+and
+.I file2
+must be sorted in increasing
+.SM ASCII
+collating
+sequence on the fields
+on which they are to be joined,
+normally the first in each line.
+.PP
+There is one line in the output
+for each pair of lines in
+.I file1
+and
+.I file2
+that have identical join fields.
+The output line normally consists of the common field,
+then the rest of the line from
+.IR file1 ,
+then the rest of the line from
+.IR file2 .
+.PP
+Input fields are normally separated spaces or tabs;
+output fields by space.
+In this case, multiple separators count as one, and
+leading separators are discarded.
+.PP
+The following options are recognized, with POSIX syntax.
+.TP
+.BI -a " n
+In addition to the normal output,
+produce a line for each unpairable line in file
+.IR n ,
+where
+.I n
+is 1 or 2.
+.TP
+.BI -v " n
+Like
+.BR -a ,
+omitting output for paired lines.
+.TP
+.BI -e " s
+Replace empty output fields by string
+.IR s .
+.TP
+.BI -1 " m
+.br
+.ns
+.TP
+.BI -2 " m
+Join on the
+.IR m th
+field of
+.I file1
+or
+.IR file2 .
+.TP
+.BI -j "n m"
+Archaic equivalent for
+.BI - n " m"\f1.
+.TP
+.BI -o fields
+Each output line comprises the designated fields.
+The comma-separated field designators are either
+.BR 0 ,
+meaning the join field, or have the form
+.IR n . m ,
+where
+.I n
+is a file number and
+.I m
+is a field number.
+Archaic usage allows separate arguments for field designators.
+.PP
+.TP
+.BI -t c
+Use character
+.I c
+as the only separator (tab character) on input and output.
+Every appearance of
+.I c
+in a line is significant.
+.SH EXAMPLES
+.TP
+.L
+sort /etc/passwd | join -t: -1 1 -a 1 -e "" - bdays
+Add birthdays to the
+.B /etc/passwd
+file, leaving unknown
+birthdays empty.
+The layout of
+.B /adm/users
+is given in
+.IR passwd (5);
+.B bdays
+contains sorted lines like
+.LR "ken:Feb\ 4,\ 1953" .
+.TP
+.L
+tr : ' ' </etc/passwd | sort -k 3 3 >temp
+.br
+.ns
+.TP
+.L
+join -1 3 -2 3 -o 1.1,2.1 temp temp | awk '$1 < $2'
+Print all pairs of users with identical userids.
+.SH SOURCE
+.B \*9/src/cmd/join.c
+.SH "SEE ALSO"
+.IR sort (1),
+.IR comm (1),
+.IR awk (1)
+.SH BUGS
+With default field separation,
+the collating sequence is that of
+.BI "sort -b"
+.BI -k y , y\f1;
+with
+.BR -t ,
+the sequence is that of
+.BI "sort -t" x
+.BI -k y , y\f1.
+.PP
+One of the files must be randomly accessible.
diff --git a/join/join.c b/join/join.c
@@ -0,0 +1,369 @@
+/* join F1 F2 on stuff */
+#include <u.h>
+#include <libc.h>
+#include <stdio.h>
+#include <ctype.h>
+#define F1 0
+#define F2 1
+#define F0 3
+#define NFLD 100 /* max field per line */
+#define comp() runecmp(ppi[F1][j1],ppi[F2][j2])
+FILE *f[2];
+Rune buf[2][BUFSIZ]; /*input lines */
+Rune *ppi[2][NFLD+1]; /* pointers to fields in lines */
+Rune *s1,*s2;
+#define j1 joinj1
+#define j2 joinj2
+
+int j1 = 1; /* join of this field of file 1 */
+int j2 = 1; /* join of this field of file 2 */
+int olist[2*NFLD]; /* output these fields */
+int olistf[2*NFLD]; /* from these files */
+int no; /* number of entries in olist */
+Rune sep1 = ' '; /* default field separator */
+Rune sep2 = '\t';
+char *sepstr=" ";
+int discard; /* count of truncated lines */
+Rune null[BUFSIZ]/* = L""*/;
+int a1;
+int a2;
+
+char *getoptarg(int*, char***);
+void output(int, int);
+int input(int);
+void oparse(char*);
+void error(char*, char*);
+void seek1(void), seek2(void);
+Rune *strtorune(Rune *, char *);
+
+
+void
+main(int argc, char **argv)
+{
+ int i;
+
+ while (argc > 1 && argv[1][0] == '-') {
+ if (argv[1][1] == '\0')
+ break;
+ switch (argv[1][1]) {
+ case '-':
+ argc--;
+ argv++;
+ goto proceed;
+ case 'a':
+ switch(*getoptarg(&argc, &argv)) {
+ case '1':
+ a1++;
+ break;
+ case '2':
+ a2++;
+ break;
+ default:
+ error("incomplete option -a","");
+ }
+ break;
+ case 'e':
+ strtorune(null, getoptarg(&argc, &argv));
+ break;
+ case 't':
+ sepstr=getoptarg(&argc, &argv);
+ chartorune(&sep1, sepstr);
+ sep2 = sep1;
+ break;
+ case 'o':
+ if(argv[1][2]!=0 ||
+ argc>2 && strchr(argv[2],',')!=0)
+ oparse(getoptarg(&argc, &argv));
+ else for (no = 0; no<2*NFLD && argc>2; no++){
+ if (argv[2][0] == '1' && argv[2][1] == '.') {
+ olistf[no] = F1;
+ olist[no] = atoi(&argv[2][2]);
+ } else if (argv[2][0] == '2' && argv[2][1] == '.') {
+ olist[no] = atoi(&argv[2][2]);
+ olistf[no] = F2;
+ } else if (argv[2][0] == '0')
+ olistf[no] = F0;
+ else
+ break;
+ argc--;
+ argv++;
+ }
+ break;
+ case 'j':
+ if(argc <= 2)
+ break;
+ if (argv[1][2] == '1')
+ j1 = atoi(argv[2]);
+ else if (argv[1][2] == '2')
+ j2 = atoi(argv[2]);
+ else
+ j1 = j2 = atoi(argv[2]);
+ argc--;
+ argv++;
+ break;
+ case '1':
+ j1 = atoi(getoptarg(&argc, &argv));
+ break;
+ case '2':
+ j2 = atoi(getoptarg(&argc, &argv));
+ break;
+ }
+ argc--;
+ argv++;
+ }
+proceed:
+ for (i = 0; i < no; i++)
+ if (olist[i]-- > NFLD) /* 0 origin */
+ error("field number too big in -o","");
+ if (argc != 3)
+ error("usage: join [-1 x -2 y] [-o list] file1 file2","");
+ j1--;
+ j2--; /* everyone else believes in 0 origin */
+ s1 = ppi[F1][j1];
+ s2 = ppi[F2][j2];
+ if (strcmp(argv[1], "-") == 0)
+ f[F1] = stdin;
+ else if ((f[F1] = fopen(argv[1], "r")) == 0)
+ error("can't open %s", argv[1]);
+ if(strcmp(argv[2], "-") == 0) {
+ f[F2] = stdin;
+ } else if ((f[F2] = fopen(argv[2], "r")) == 0)
+ error("can't open %s", argv[2]);
+
+ if(ftell(f[F2]) >= 0)
+ seek2();
+ else if(ftell(f[F1]) >= 0)
+ seek1();
+ else
+ error("neither file is randomly accessible","");
+ if (discard)
+ error("some input line was truncated", "");
+ exits("");
+}
+int runecmp(Rune *a, Rune *b){
+ while(*a==*b){
+ if(*a=='\0') return 0;
+ a++;
+ b++;
+ }
+ if(*a<*b) return -1;
+ return 1;
+}
+char *runetostr(char *buf, Rune *r){
+ char *s;
+ for(s=buf;*r;r++) s+=runetochar(s, r);
+ *s='\0';
+ return buf;
+}
+Rune *strtorune(Rune *buf, char *s){
+ Rune *r;
+ for(r=buf;*s;r++) s+=chartorune(r, s);
+ *r='\0';
+ return buf;
+}
+/* lazy. there ought to be a clean way to combine seek1 & seek2 */
+#define get1() n1=input(F1)
+#define get2() n2=input(F2)
+void
+seek2(void)
+{
+ int n1, n2;
+ int top2=0;
+ int bot2 = ftell(f[F2]);
+ get1();
+ get2();
+ while(n1>0 && n2>0 || (a1||a2) && n1+n2>0) {
+ if(n1>0 && n2>0 && comp()>0 || n1==0) {
+ if(a2) output(0, n2);
+ bot2 = ftell(f[F2]);
+ get2();
+ } else if(n1>0 && n2>0 && comp()<0 || n2==0) {
+ if(a1) output(n1, 0);
+ get1();
+ } else /*(n1>0 && n2>0 && comp()==0)*/ {
+ while(n2>0 && comp()==0) {
+ output(n1, n2);
+ top2 = ftell(f[F2]);
+ get2();
+ }
+ fseek(f[F2], bot2, 0);
+ get2();
+ get1();
+ for(;;) {
+ if(n1>0 && n2>0 && comp()==0) {
+ output(n1, n2);
+ get2();
+ } else if(n1>0 && n2>0 && comp()<0 || n2==0) {
+ fseek(f[F2], bot2, 0);
+ get2();
+ get1();
+ } else /*(n1>0 && n2>0 && comp()>0 || n1==0)*/{
+ fseek(f[F2], top2, 0);
+ bot2 = top2;
+ get2();
+ break;
+ }
+ }
+ }
+ }
+}
+void
+seek1(void)
+{
+ int n1, n2;
+ int top1=0;
+ int bot1 = ftell(f[F1]);
+ get1();
+ get2();
+ while(n1>0 && n2>0 || (a1||a2) && n1+n2>0) {
+ if(n1>0 && n2>0 && comp()>0 || n1==0) {
+ if(a2) output(0, n2);
+ get2();
+ } else if(n1>0 && n2>0 && comp()<0 || n2==0) {
+ if(a1) output(n1, 0);
+ bot1 = ftell(f[F1]);
+ get1();
+ } else /*(n1>0 && n2>0 && comp()==0)*/ {
+ while(n2>0 && comp()==0) {
+ output(n1, n2);
+ top1 = ftell(f[F1]);
+ get1();
+ }
+ fseek(f[F1], bot1, 0);
+ get2();
+ get1();
+ for(;;) {
+ if(n1>0 && n2>0 && comp()==0) {
+ output(n1, n2);
+ get1();
+ } else if(n1>0 && n2>0 && comp()>0 || n1==0) {
+ fseek(f[F1], bot1, 0);
+ get2();
+ get1();
+ } else /*(n1>0 && n2>0 && comp()<0 || n2==0)*/{
+ fseek(f[F1], top1, 0);
+ bot1 = top1;
+ get1();
+ break;
+ }
+ }
+ }
+ }
+}
+
+int
+input(int n) /* get input line and split into fields */
+{
+ register int i, c;
+ Rune *bp;
+ Rune **pp;
+ char line[BUFSIZ];
+
+ bp = buf[n];
+ pp = ppi[n];
+ if (fgets(line, BUFSIZ, f[n]) == 0)
+ return(0);
+ strtorune(bp, line);
+ i = 0;
+ do {
+ i++;
+ if (sep1 == ' ') /* strip multiples */
+ while ((c = *bp) == sep1 || c == sep2)
+ bp++; /* skip blanks */
+ *pp++ = bp; /* record beginning */
+ while ((c = *bp) != sep1 && c != '\n' && c != sep2 && c != '\0')
+ bp++;
+ *bp++ = '\0'; /* mark end by overwriting blank */
+ } while (c != '\n' && c != '\0' && i < NFLD-1);
+ if (c != '\n')
+ discard++;
+
+ *pp = 0;
+ return(i);
+}
+
+void
+output(int on1, int on2) /* print items from olist */
+{
+ int i;
+ Rune *temp;
+ char buf[BUFSIZ];
+
+ if (no <= 0) { /* default case */
+ printf("%s", runetostr(buf, on1? ppi[F1][j1]: ppi[F2][j2]));
+ for (i = 0; i < on1; i++)
+ if (i != j1)
+ printf("%s%s", sepstr, runetostr(buf, ppi[F1][i]));
+ for (i = 0; i < on2; i++)
+ if (i != j2)
+ printf("%s%s", sepstr, runetostr(buf, ppi[F2][i]));
+ printf("\n");
+ } else {
+ for (i = 0; i < no; i++) {
+ if (olistf[i]==F0 && on1>j1)
+ temp = ppi[F1][j1];
+ else if (olistf[i]==F0 && on2>j2)
+ temp = ppi[F2][j2];
+ else {
+ temp = ppi[olistf[i]][olist[i]];
+ if(olistf[i]==F1 && on1<=olist[i] ||
+ olistf[i]==F2 && on2<=olist[i] ||
+ *temp==0)
+ temp = null;
+ }
+ printf("%s", runetostr(buf, temp));
+ if (i == no - 1)
+ printf("\n");
+ else
+ printf("%s", sepstr);
+ }
+ }
+}
+
+void
+error(char *s1, char *s2)
+{
+ fprintf(stderr, "join: ");
+ fprintf(stderr, s1, s2);
+ fprintf(stderr, "\n");
+ exits(s1);
+}
+
+char *
+getoptarg(int *argcp, char ***argvp)
+{
+ int argc = *argcp;
+ char **argv = *argvp;
+ if(argv[1][2] != 0)
+ return &argv[1][2];
+ if(argc<=2 || argv[2][0]=='-')
+ error("incomplete option %s", argv[1]);
+ *argcp = argc-1;
+ *argvp = ++argv;
+ return argv[1];
+}
+
+void
+oparse(char *s)
+{
+ for (no = 0; no<2*NFLD && *s; no++, s++) {
+ switch(*s) {
+ case 0:
+ return;
+ case '0':
+ olistf[no] = F0;
+ break;
+ case '1':
+ case '2':
+ if(s[1] == '.' && isdigit((uchar)s[2])) {
+ olistf[no] = *s=='1'? F1: F2;
+ olist[no] = atoi(s += 2);
+ break;
+ } /* fall thru */
+ default:
+ error("invalid -o list", "");
+ }
+ if(s[1] == ',')
+ s++;
+ }
+}
diff --git a/lib9/utf.h b/lib9/utf.h
@@ -11,7 +11,8 @@ enum
UTFmax = 3, /* maximum bytes per rune */
Runesync = 0x80, /* cannot represent part of a UTF sequence (<) */
Runeself = 0x80, /* rune and UTF sequences are the same (<) */
- Runeerror = 0xFFFD /* decoding error in UTF */
+ Runeerror = 0xFFFD, /* decoding error in UTF */
+ Runemax = 0x10FFFF /* maximum rune value */
};
/* Edit .+1,/^$/ | cfn $PLAN9/src/lib9/utf/?*.c | grep -v static |grep -v __ */
diff --git a/look/Makefile b/look/Makefile
@@ -0,0 +1,10 @@
+# look - look unix port from plan9
+# Depends on ../lib9
+
+TARG = look
+
+include ../std.mk
+
+pre-uninstall:
+
+post-install:
diff --git a/look/look.1 b/look/look.1
@@ -0,0 +1,85 @@
+.TH LOOK 1
+.SH NAME
+look \- find lines in a sorted list
+.SH SYNOPSIS
+.B look
+[
+.BI -dfnixt c
+]
+[
+.I string
+]
+[
+.I file
+]
+.SH DESCRIPTION
+.I Look
+consults a sorted
+.I file
+and prints all lines that begin with
+.IR string .
+It uses binary search.
+.PP
+The following options are recognized.
+Options
+.B dfnt
+affect comparisons as in
+.IR sort (1).
+.TP
+.B -i
+Interactive.
+There is no
+.I string
+argument; instead
+.I look
+takes lines from the standard input as strings to be looked up.
+.TP
+.B -x
+Exact.
+Print only lines of the file whose key matches
+.I string
+exactly.
+.TP
+.B -d
+`Directory' order:
+only letters, digits,
+tabs and blanks participate in comparisons.
+.TP
+.B -f
+Fold.
+Upper case letters compare equal to lower case.
+.TP
+.B -n
+Numeric comparison with initial string of digits, optional minus sign,
+and optional decimal point.
+.TP
+.BR -t [ \f2c\f1 ]
+Character
+.I c
+terminates the sort key in the
+.IR file .
+By default, tab terminates the key. If
+.I c
+is missing the entire line comprises the key.
+.PP
+If no
+.I file
+is specified,
+.B /lib/words
+is assumed, with collating sequence
+.BR df .
+.SH FILES
+.B /lib/words
+.SH SOURCE
+.B \*9/src/cmd/look.c
+.SH "SEE ALSO"
+.IR sort (1),
+.IR grep (1)
+.SH DIAGNOSTICS
+The exit status is
+.RB `` "not found" ''
+if no match is found, and
+.RB `` "no dictionary" ''
+if
+.I file
+or the default dictionary cannot be opened.
diff --git a/look/look.c b/look/look.c
@@ -0,0 +1,349 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+ /* Macros for Rune support of ctype.h-like functions */
+
+#undef isupper
+#undef islower
+#undef isalpha
+#undef isdigit
+#undef isalnum
+#undef isspace
+#undef tolower
+#define isupper(r) ('A' <= (r) && (r) <= 'Z')
+#define islower(r) ('a' <= (r) && (r) <= 'z')
+#define isalpha(r) (isupper(r) || islower(r))
+#define islatin1(r) (0xC0 <= (r) && (r) <= 0xFF)
+
+#define isdigit(r) ('0' <= (r) && (r) <= '9')
+
+#define isalnum(r) (isalpha(r) || isdigit(r))
+
+#define isspace(r) ((r) == ' ' || (r) == '\t' \
+ || (0x0A <= (r) && (r) <= 0x0D))
+
+#define tolower(r) ((r)-'A'+'a')
+
+#define sgn(v) ((v) < 0 ? -1 : ((v) > 0 ? 1 : 0))
+
+#define WORDSIZ 4000
+char *filename = "#9/lib/words";
+Biobuf *dfile;
+Biobuf bout;
+Biobuf bin;
+
+int fold;
+int direc;
+int exact;
+int iflag;
+int rev = 1; /*-1 for reverse-ordered file, not implemented*/
+int (*compare)(Rune*, Rune*);
+Rune tab = '\t';
+Rune entry[WORDSIZ];
+Rune word[WORDSIZ];
+Rune key[50], orig[50];
+Rune latin_fold_tab[] =
+{
+/* Table to fold latin 1 characters to ASCII equivalents
+ based at Rune value 0xc0
+
+ À Á Â Ã Ä Å Æ Ç
+ È É Ê Ë Ì Í Î Ï
+ Ð Ñ Ò Ó Ô Õ Ö ×
+ Ø Ù Ú Û Ü Ý Þ ß
+ à á â ã ä å æ ç
+ è é ê ë ì í î ï
+ ð ñ ò ó ô õ ö ÷
+ ø ù ú û ü ý þ ÿ
+*/
+ 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c',
+ 'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i',
+ 'd', 'n', 'o', 'o', 'o', 'o', 'o', 0 ,
+ 'o', 'u', 'u', 'u', 'u', 'y', 0 , 0 ,
+ 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c',
+ 'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i',
+ 'd', 'n', 'o', 'o', 'o', 'o', 'o', 0 ,
+ 'o', 'u', 'u', 'u', 'u', 'y', 0 , 'y',
+};
+
+int locate(void);
+int acomp(Rune*, Rune*);
+int getword(Biobuf*, Rune *rp, int n);
+void torune(char*, Rune*);
+void rcanon(Rune*, Rune*);
+int ncomp(Rune*, Rune*);
+
+void
+main(int argc, char *argv[])
+{
+ int n;
+
+ filename = unsharp(filename);
+
+ Binit(&bin, 0, OREAD);
+ Binit(&bout, 1, OWRITE);
+ compare = acomp;
+ ARGBEGIN{
+ case 'd':
+ direc++;
+ break;
+ case 'f':
+ fold++;
+ break;
+ case 'i':
+ iflag++;
+ break;
+ case 'n':
+ compare = ncomp;
+ break;
+ case 't':
+ chartorune(&tab,ARGF());
+ break;
+ case 'x':
+ exact++;
+ break;
+ default:
+ fprint(2, "%s: bad option %c\n", argv0, ARGC());
+ fprint(2, "usage: %s -[dfinx] [-t c] [string] [file]\n", argv0);
+ exits("usage");
+ } ARGEND
+ if(!iflag){
+ if(argc >= 1) {
+ torune(argv[0], orig);
+ argv++;
+ argc--;
+ } else
+ iflag++;
+ }
+ if(argc < 1) {
+ direc++;
+ fold++;
+ } else
+ filename = argv[0];
+ if (!iflag)
+ rcanon(orig, key);
+ dfile = Bopen(filename, OREAD);
+ if(dfile == 0) {
+ fprint(2, "look: can't open %s\n", filename);
+ exits("no dictionary");
+ }
+ if(!iflag)
+ if(!locate())
+ exits("not found");
+ do {
+ if(iflag) {
+ Bflush(&bout);
+ if(!getword(&bin, orig, sizeof(orig)/sizeof(orig[0])))
+ exits(0);
+ rcanon(orig, key);
+ if(!locate())
+ continue;
+ }
+ if (!exact || !acomp(word, key))
+ Bprint(&bout, "%S\n", entry);
+ while(getword(dfile, entry, sizeof(entry)/sizeof(entry[0]))) {
+ rcanon(entry, word);
+ n = compare(key, word);
+ switch(n) {
+ case -1:
+ if(exact)
+ break;
+ case 0:
+ if (!exact || !acomp(word, orig))
+ Bprint(&bout, "%S\n", entry);
+ continue;
+ }
+ break;
+ }
+ } while(iflag);
+ exits(0);
+}
+
+int
+locate(void)
+{
+ vlong top, bot, mid;
+ int c;
+ int n;
+
+ bot = 0;
+ top = Bseek(dfile, 0L, 2);
+ for(;;) {
+ mid = (top+bot) / 2;
+ Bseek(dfile, mid, 0);
+ do
+ c = Bgetrune(dfile);
+ while(c>=0 && c!='\n');
+ mid = Boffset(dfile);
+ if(!getword(dfile, entry, sizeof(entry)/sizeof(entry[0])))
+ break;
+ rcanon(entry, word);
+ n = compare(key, word);
+ switch(n) {
+ case -2:
+ case -1:
+ case 0:
+ if(top <= mid)
+ break;
+ top = mid;
+ continue;
+ case 1:
+ case 2:
+ bot = mid;
+ continue;
+ }
+ break;
+ }
+ Bseek(dfile, bot, 0);
+ while(getword(dfile, entry, sizeof(entry)/sizeof(entry[0]))) {
+ rcanon(entry, word);
+ n = compare(key, word);
+ switch(n) {
+ case -2:
+ return 0;
+ case -1:
+ if(exact)
+ return 0;
+ case 0:
+ return 1;
+ case 1:
+ case 2:
+ continue;
+ }
+ }
+ return 0;
+}
+
+/*
+ * acomp(s, t) returns:
+ * -2 if s strictly precedes t
+ * -1 if s is a prefix of t
+ * 0 if s is the same as t
+ * 1 if t is a prefix of s
+ * 2 if t strictly precedes s
+ */
+
+int
+acomp(Rune *s, Rune *t)
+{
+ int cs, ct;
+
+ for(;;) {
+ cs = *s;
+ ct = *t;
+ if(cs != ct)
+ break;
+ if(cs == 0)
+ return 0;
+ s++;
+ t++;
+ }
+ if(cs == 0)
+ return -1;
+ if(ct == 0)
+ return 1;
+ if(cs < ct)
+ return -2;
+ return 2;
+}
+
+void
+torune(char *old, Rune *new)
+{
+ do old += chartorune(new, old);
+ while(*new++);
+}
+
+void
+rcanon(Rune *old, Rune *new)
+{
+ Rune r;
+
+ while((r = *old++) && r != tab) {
+ if (islatin1(r) && latin_fold_tab[r-0xc0])
+ r = latin_fold_tab[r-0xc0];
+ if(direc)
+ if(!(isalnum(r) || r == ' ' || r == '\t'))
+ continue;
+ if(fold)
+ if(isupper(r))
+ r = tolower(r);
+ *new++ = r;
+ }
+ *new = 0;
+}
+
+int
+ncomp(Rune *s, Rune *t)
+{
+ Rune *is, *it, *js, *jt;
+ int a, b;
+ int ssgn, tsgn;
+
+ while(isspace(*s))
+ s++;
+ while(isspace(*t))
+ t++;
+ ssgn = tsgn = -2*rev;
+ if(*s == '-') {
+ s++;
+ ssgn = -ssgn;
+ }
+ if(*t == '-') {
+ t++;
+ tsgn = -tsgn;
+ }
+ for(is = s; isdigit(*is); is++)
+ ;
+ for(it = t; isdigit(*it); it++)
+ ;
+ js = is;
+ jt = it;
+ a = 0;
+ if(ssgn == tsgn)
+ while(it>t && is>s)
+ if(b = *--it - *--is)
+ a = b;
+ while(is > s)
+ if(*--is != '0')
+ return -ssgn;
+ while(it > t)
+ if(*--it != '0')
+ return tsgn;
+ if(a)
+ return sgn(a)*ssgn;
+ if(*(s=js) == '.')
+ s++;
+ if(*(t=jt) == '.')
+ t++;
+ if(ssgn == tsgn)
+ while(isdigit(*s) && isdigit(*t))
+ if(a = *t++ - *s++)
+ return sgn(a)*ssgn;
+ while(isdigit(*s))
+ if(*s++ != '0')
+ return -ssgn;
+ while(isdigit(*t))
+ if(*t++ != '0')
+ return tsgn;
+ return 0;
+}
+
+int
+getword(Biobuf *f, Rune *rp, int n)
+{
+ long c;
+
+ while(n-- > 0) {
+ c = Bgetrune(f);
+ if(c < 0)
+ return 0;
+ if(c == '\n') {
+ *rp = '\0';
+ return 1;
+ }
+ *rp++ = c;
+ }
+ fprint(2, "Look: word too long. Bailing out.\n");
+ return 0;
+}
diff --git a/pbd/Makefile b/pbd/Makefile
@@ -0,0 +1,10 @@
+# pbd - pbd unix port from plan9
+# Depends on ../lib9
+
+TARG = pbd
+
+include ../std.mk
+
+pre-uninstall:
+
+post-install:
diff --git a/pbd/pbd.1 b/pbd/pbd.1
diff --git a/pbd/pbd.c b/pbd/pbd.c
@@ -0,0 +1,19 @@
+#include <u.h>
+#include <libc.h>
+
+void
+main(void)
+{
+ char buf[512], *p;
+
+ p = "???";
+ if(getwd(buf, sizeof buf)){
+ p = strrchr(buf, '/');
+ if(p == nil)
+ p = buf;
+ else if(p>buf || p[1]!='\0')
+ p++;
+ }
+ write(1, p, strlen(p));
+ exits(0);
+}
diff --git a/rc/Makefile b/rc/Makefile
@@ -46,7 +46,7 @@ uninstall:
@${CC} ${CFLAGS} -I../lib9 -I${PREFIX}/include -I../lib9 $*.c
clean:
- rm -f ${OFILES} ${TARG} y.tab.c y.tab.h
+ rm -f ${OFILES} ${TARG} y.tab.c y.tab.h x.tab.h
${TARG}: ${OFILES}
@echo LD ${TARG}
diff --git a/split/Makefile b/split/Makefile
@@ -0,0 +1,10 @@
+# split - split unix port from plan9
+# Depends on ../lib9
+
+TARG = split
+
+include ../std.mk
+
+pre-uninstall:
+
+post-install:
diff --git a/split/split.1 b/split/split.1
@@ -0,0 +1,82 @@
+.TH SPLIT 1
+.CT 1 files
+.SH NAME
+split \- split a file into pieces
+.SH SYNOPSIS
+.B split
+[
+.I option ...
+]
+[
+.I file
+]
+.SH DESCRIPTION
+.I Split
+reads
+.I file
+(standard input by default)
+and writes it in pieces of 1000
+lines per output file.
+The names of the
+output files are
+.BR xaa ,
+.BR xab ,
+and so on to
+.BR xzz .
+The options are
+.TP
+.BI -n " n"
+Split into
+.IR n -line
+pieces.
+.TP
+.BI -l " n"
+Synonym for
+.B -n
+.IR n ,
+a nod to Unix's syntax.
+.TP
+.BI -e " expression"
+File divisions occur at each line
+that matches a regular
+.IR expression ;
+see
+.IR regexp (7).
+Multiple
+.B -e
+options may appear.
+If a subexpression of
+.I expression
+is contained in parentheses
+.BR ( ... ) ,
+the output file name is the portion of the
+line which matches the subexpression.
+.TP
+.BI -f " stem
+Use
+.I stem
+instead of
+.B x
+in output file names.
+.TP
+.BI -s " suffix
+Append
+.I suffix
+to names identified under
+.BR -e .
+.TP
+.B -x
+Exclude the matched input line from the output file.
+.TP
+.B -i
+Ignore case in option
+.BR -e ;
+force output file names (excluding the suffix)
+to lower case.
+.SH SOURCE
+.B \*9/src/cmd/split.c
+.SH SEE ALSO
+.IR sed (1),
+.IR awk (1),
+.IR grep (1),
+.IR regexp (7)
diff --git a/split/split.c b/split/split.c
@@ -0,0 +1,189 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+#include <ctype.h>
+#include <regexp.h>
+
+char digit[] = "0123456789";
+char *suffix = "";
+char *stem = "x";
+char suff[] = "aa";
+char name[200];
+Biobuf bout;
+Biobuf *output = &bout;
+
+extern int nextfile(void);
+extern int matchfile(Resub*);
+extern void openf(void);
+extern char *fold(char*,int);
+extern void usage(void);
+extern void badexp(void);
+
+void
+main(int argc, char *argv[])
+{
+ Reprog *exp;
+ char *pattern = 0;
+ int n = 1000;
+ char *line;
+ int xflag = 0;
+ int iflag = 0;
+ Biobuf bin;
+ Biobuf *b = &bin;
+ char buf[256];
+
+ ARGBEGIN {
+ case 'l':
+ case 'n':
+ n=atoi(EARGF(usage()));
+ break;
+ case 'e':
+ pattern = strdup(EARGF(usage()));
+ break;
+ case 'f':
+ stem = strdup(EARGF(usage()));
+ break;
+ case 's':
+ suffix = strdup(EARGF(usage()));
+ break;
+ case 'x':
+ xflag++;
+ break;
+ case 'i':
+ iflag++;
+ break;
+ default:
+ usage();
+ break;
+
+ } ARGEND;
+
+ if(argc < 0 || argc > 1)
+ usage();
+
+ if(argc != 0) {
+ b = Bopen(argv[0], OREAD);
+ if(b == nil) {
+ fprint(2, "split: can't open %s: %r\n", argv[0]);
+ exits("open");
+ }
+ } else
+ Binit(b, 0, OREAD);
+
+ if(pattern) {
+ if(!(exp = regcomp(iflag? fold(pattern,strlen(pattern)): pattern)))
+ badexp();
+ while((line=Brdline(b,'\n')) != 0) {
+ Resub match[2];
+ memset(match, 0, sizeof match);
+ line[Blinelen(b)-1] = 0;
+ if(regexec(exp,iflag?fold(line,Blinelen(b)-1):line,match,2)) {
+ if(matchfile(match) && xflag)
+ continue;
+ } else if(output == 0)
+ nextfile(); /* at most once */
+ Bwrite(output, line, Blinelen(b)-1);
+ Bputc(output, '\n');
+ }
+ } else {
+ int linecnt = n;
+
+ while((line=Brdline(b,'\n')) != 0) {
+ if(++linecnt > n) {
+ nextfile();
+ linecnt = 1;
+ }
+ Bwrite(output, line, Blinelen(b));
+ }
+
+ /*
+ * in case we didn't end with a newline, tack whatever's
+ * left onto the last file
+ */
+ while((n = Bread(b, buf, sizeof(buf))) > 0)
+ Bwrite(output, buf, n);
+ }
+ if(b != nil)
+ Bterm(b);
+ exits(0);
+}
+
+int
+nextfile(void)
+{
+ static int canopen = 1;
+ if(suff[0] > 'z') {
+ if(canopen)
+ fprint(2, "split: file %szz not split\n",stem);
+ canopen = 0;
+ } else {
+ strcpy(name, stem);
+ strcat(name, suff);
+ if(++suff[1] > 'z')
+ suff[1] = 'a', ++suff[0];
+ openf();
+ }
+ return canopen;
+}
+
+int
+matchfile(Resub *match)
+{
+ if(match[1].s.sp) {
+ int len = match[1].e.ep - match[1].s.sp;
+ strncpy(name, match[1].s.sp, len);
+ strcpy(name+len, suffix);
+ openf();
+ return 1;
+ }
+ return nextfile();
+}
+
+void
+openf(void)
+{
+ static int fd = 0;
+ Bflush(output);
+ Bterm(output);
+ if(fd > 0)
+ close(fd);
+ fd = create(name,OWRITE,0666);
+ if(fd < 0) {
+ fprint(2, "grep: can't create %s: %r\n", name);
+ exits("create");
+ }
+ Binit(output, fd, OWRITE);
+}
+
+char *
+fold(char *s, int n)
+{
+ static char *fline;
+ static int linesize = 0;
+ char *t;
+
+ if(linesize < n+1){
+ fline = realloc(fline,n+1);
+ linesize = n+1;
+ }
+ for(t=fline; *t++ = tolower((uchar)*s++); )
+ continue;
+ /* we assume the 'A'-'Z' only appear as themselves
+ * in a utf encoding.
+ */
+ return fline;
+}
+
+void
+usage(void)
+{
+ fprint(2, "usage: split [-n num] [-e exp] [-f stem] [-s suff] [-x] [-i] [file]\n");
+ exits("usage");
+}
+
+void
+badexp(void)
+{
+ fprint(2, "split: bad regular expression\n");
+ exits("bad regular expression");
+}
diff --git a/strings/Makefile b/strings/Makefile
@@ -0,0 +1,10 @@
+# strings - strings unix port from plan9
+# Depends on ../lib9
+
+TARG = strings
+
+include ../std.mk
+
+pre-uninstall:
+
+post-install:
diff --git a/strings/strings.1 b/strings/strings.1
@@ -0,0 +1,28 @@
+.TH STRINGS 1
+.SH NAME
+strings \- extract printable strings
+.SH SYNOPSIS
+.B strings
+[
+.I file ...
+]
+.SH DESCRIPTION
+.I Strings
+finds and prints strings containing 6 or more
+consecutive printable UTF-encoded characters
+in a (typically) binary file, default
+standard input.
+Printable characters are taken to be
+.SM ASCII
+characters from blank through tilde (hexadecimal 20 through 7E), inclusive,
+and
+all other characters from value 00A0 to FFFF.
+Strings reports
+the decimal offset within the file at which the string starts and the text
+of the string. If the string is longer than 70 runes the line is
+terminated by three dots and the printing is resumed on the next
+line with the offset of the continuation line.
+.SH SOURCE
+.B \*9/src/cmd/strings.c
+.SH SEE ALSO
+.IR nm (1)
diff --git a/strings/strings.c b/strings/strings.c
@@ -0,0 +1,90 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+
+Biobuf *fin;
+Biobuf fout;
+
+#define MINSPAN 6 /* Min characters in string */
+
+#define BUFSIZE 70
+
+void stringit(char *);
+#undef isprint
+#define isprint risprint
+int isprint(Rune);
+
+void
+main(int argc, char **argv)
+{
+ int i;
+
+ Binit(&fout, 1, OWRITE);
+ if(argc < 2) {
+ stringit("/dev/stdin");
+ exits(0);
+ }
+
+ for(i = 1; i < argc; i++) {
+ if(argc > 2)
+ print("%s:\n", argv[i]);
+
+ stringit(argv[i]);
+ }
+
+ exits(0);
+}
+
+void
+stringit(char *str)
+{
+ long posn, start;
+ int cnt = 0;
+ long c;
+
+ Rune buf[BUFSIZE];
+
+ if ((fin = Bopen(str, OREAD)) == 0) {
+ perror("open");
+ return;
+ }
+
+ start = 0;
+ posn = Boffset(fin);
+ while((c = Bgetrune(fin)) >= 0) {
+ if(isprint(c)) {
+ if(start == 0)
+ start = posn;
+ buf[cnt++] = c;
+ if(cnt == BUFSIZE-1) {
+ buf[cnt] = 0;
+ Bprint(&fout, "%8ld: %S ...\n", start, buf);
+ start = 0;
+ cnt = 0;
+ }
+ } else {
+ if(cnt >= MINSPAN) {
+ buf[cnt] = 0;
+ Bprint(&fout, "%8ld: %S\n", start, buf);
+ }
+ start = 0;
+ cnt = 0;
+ }
+ posn = Boffset(fin);
+ }
+
+ if(cnt >= MINSPAN){
+ buf[cnt] = 0;
+ Bprint(&fout, "%8ld: %S\n", start, buf);
+ }
+ Bterm(fin);
+}
+
+int
+isprint(Rune r)
+{
+ if ((r >= ' ' && r <0x7f) || r > 0xA0)
+ return 1;
+ else
+ return 0;
+}
diff --git a/unicode/Makefile b/unicode/Makefile
@@ -0,0 +1,10 @@
+# unicode - unicode unix port from plan9
+# Depends on ../lib9
+
+TARG = unicode
+
+include ../std.mk
+
+pre-uninstall:
+
+post-install:
diff --git a/unicode/unicode.1 b/unicode/unicode.1
diff --git a/unicode/unicode.c b/unicode/unicode.c
@@ -0,0 +1,122 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+
+char usage[] = "unicode { [-t] hex hex ... | hexmin-hexmax ... | [-n] char ... }";
+char hex[] = "0123456789abcdefABCDEF";
+int numout = 0;
+int text = 0;
+char *err;
+Biobuf bout;
+
+char *range(char*[]);
+char *nums(char*[]);
+char *chars(char*[]);
+
+void
+main(int argc, char *argv[])
+{
+ ARGBEGIN{
+ case 'n':
+ numout = 1;
+ break;
+ case 't':
+ text = 1;
+ break;
+ }ARGEND
+ Binit(&bout, 1, OWRITE);
+ if(argc == 0){
+ fprint(2, "usage: %s\n", usage);
+ exits("usage");
+ }
+ if(!numout && utfrune(argv[0], '-'))
+ exits(range(argv));
+ if(numout || strchr(hex, argv[0][0])==0)
+ exits(nums(argv));
+ exits(chars(argv));
+}
+
+char*
+range(char *argv[])
+{
+ char *q;
+ int min, max;
+ int i;
+
+ while(*argv){
+ q = *argv;
+ if(strchr(hex, q[0]) == 0){
+ err:
+ fprint(2, "unicode: bad range %s\n", *argv);
+ return "bad range";
+ }
+ min = strtoul(q, &q, 16);
+ if(min<0 || min>Runemax || *q!='-')
+ goto err;
+ q++;
+ if(strchr(hex, *q) == 0)
+ goto err;
+ max = strtoul(q, &q, 16);
+ if(max<0 || max>Runemax || max<min || *q!=0)
+ goto err;
+ i = 0;
+ do{
+ Bprint(&bout, "%.4x %C", min, min);
+ i++;
+ if(min==max || (i&7)==0)
+ Bprint(&bout, "\n");
+ else
+ Bprint(&bout, "\t");
+ min++;
+ }while(min<=max);
+ argv++;
+ }
+ return 0;
+}
+
+char*
+nums(char *argv[])
+{
+ char *q;
+ Rune r;
+ int w;
+
+ while(*argv){
+ q = *argv;
+ while(*q){
+ w = chartorune(&r, q);
+ if(r==0x80 && (q[0]&0xFF)!=0x80){
+ fprint(2, "unicode: invalid utf string %s\n", *argv);
+ return "bad utf";
+ }
+ Bprint(&bout, "%.4x\n", r);
+ q += w;
+ }
+ argv++;
+ }
+ return 0;
+}
+
+char*
+chars(char *argv[])
+{
+ char *q;
+ int m;
+
+ while(*argv){
+ q = *argv;
+ if(strchr(hex, q[0]) == 0){
+ err:
+ fprint(2, "unicode: bad unicode value %s\n", *argv);
+ return "bad char";
+ }
+ m = strtoul(q, &q, 16);
+ if(m<0 || m>Runemax || *q!=0)
+ goto err;
+ Bprint(&bout, "%C", m);
+ if(!text)
+ Bprint(&bout, "\n");
+ argv++;
+ }
+ return 0;
+}
diff --git a/unutf/Makefile b/unutf/Makefile
@@ -0,0 +1,10 @@
+# unutf - unutf unix port from plan9
+# Depends on ../lib9
+
+TARG = unutf
+
+include ../std.mk
+
+pre-uninstall:
+
+post-install:
diff --git a/unutf/unutf.1 b/unutf/unutf.1
diff --git a/unutf/unutf.c b/unutf/unutf.c
@@ -0,0 +1,20 @@
+/*
+ * stupid little program to pipe unicode chars through
+ * when converting to non-utf compilers.
+ */
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+
+Biobuf bin;
+
+void
+main(void)
+{
+ int c;
+
+ Binit(&bin, 0, OREAD);
+ while((c = Bgetrune(&bin)) >= 0)
+ print("0x%ux\n", c);
+ exits(0);
+}