commit 78a6c092e93295e60ed4cba344117a34b586d7da
parent e70375948b9e9afebde07fb294abf5f039f19d44
Author: anselm@garbe.us <unknown>
Date: Thu, 27 May 2010 13:02:29 +0100
added fmt as well
Diffstat:
M | Makefile | | | 4 | ++-- |
A | fmt/Makefile | | | 10 | ++++++++++ |
A | fmt/fmt.1 | | | 90 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | fmt/fmt.c | | | 241 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
4 files changed, 343 insertions(+), 2 deletions(-)
diff --git a/Makefile b/Makefile
@@ -3,8 +3,8 @@
include config.mk
SUBDIRS = lib9 yacc awk basename bc cal cat cleanname date dc du echo ed \
- factor fortune freq getflags grep hoc ls mk mkdir mtime primes rc read \
- sha1sum sed seq sleep sort tail tee test touch tr troff uniq
+ factor fortune fmt freq getflags grep hoc ls mk mkdir mtime primes \
+ rc read sha1sum sed seq sleep sort tail tee test touch tr troff uniq
all:
@echo 9base build options:
diff --git a/fmt/Makefile b/fmt/Makefile
@@ -0,0 +1,10 @@
+# fmt - fmt unix port from plan9
+# Depends on ../lib9
+
+TARG = fmt
+
+include ../std.mk
+
+pre-uninstall:
+
+post-install:
diff --git a/fmt/fmt.1 b/fmt/fmt.1
@@ -0,0 +1,90 @@
+.TH FMT 1
+.SH NAME
+fmt, htmlfmt \- simple text formatters
+.SH SYNOPSIS
+.B fmt
+[
+.I option ...
+]
+[
+.I file ...
+]
+.PP
+.B htmlfmt
+[
+.B -a
+] [
+.B -c
+.I charset
+] [
+.B -u
+.I url
+] [
+.I file ...
+]
+.SH DESCRIPTION
+.I Fmt
+copies the given
+.I files
+(standard input by default)
+to its standard output, filling and indenting lines.
+The options are
+.TP
+.BI -l " n
+Output line length is
+.IR n ,
+including indent (default 70).
+.TP
+.BI -w " n
+A synonym for
+.BR -l .
+.TP
+.BI -i " n
+Indent
+.I n
+spaces (default 0).
+.TP
+.BI -j
+Do not join short lines: only fold long lines.
+.PP
+Empty lines and initial white space in input lines are preserved.
+Empty lines are inserted between input files.
+.PP
+.I Fmt
+is idempotent: it leaves already formatted text unchanged.
+.PP
+.I Htmlfmt
+performs a similar service, but accepts as input text formatted with
+HTML tags.
+It accepts
+.IR fmt 's
+.B -l
+and
+.B -w
+flags and also:
+.TP
+.BI -a
+Normally
+.I htmlfmt
+suppresses the contents of form fields and anchors (URLs and image files); this flag
+causes it to print them, in square brackets.
+.TP
+.BI -c " charset
+change the default character set from iso-8859-1 to
+.IR charset .
+This is the character set assumed if there isn't one
+specified by the html itself in a <meta> directive.
+.TP
+.BI -u " url
+Use
+.I url
+as the base URL for the document when displaying anchors; sets
+.BI -a .
+.SH SOURCE
+.B \*9/src/cmd/fmt.c
+.PP
+.B \*9/src/cmd/htmlfmt
+.SH BUGS
+.I Htmlfmt
+makes no attempt to render the two-dimensional geometry of tables;
+it just treats the table entries as plain, to-be-formatted text.
diff --git a/fmt/fmt.c b/fmt/fmt.c
@@ -0,0 +1,241 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+#include <ctype.h>
+
+/*
+ * block up paragraphs, possibly with indentation
+ */
+
+int extraindent = 0; /* how many spaces to indent all lines */
+int indent = 0; /* current value of indent, before extra indent */
+int length = 70; /* how many columns per output line */
+int join = 1; /* can lines be joined? */
+int maxtab = 8;
+Biobuf bin;
+Biobuf bout;
+
+typedef struct Word Word;
+struct Word{
+ int bol;
+ int indent;
+ char text[1];
+};
+
+void fmt(void);
+
+void
+usage(void)
+{
+ fprint(2, "usage: %s [-j] [-i indent] [-l length] [file...]\n", argv0);
+ exits("usage");
+}
+
+void
+main(int argc, char **argv)
+{
+ int i, f;
+ char *s, *err;
+
+ ARGBEGIN{
+ case 'i':
+ extraindent = atoi(EARGF(usage()));
+ break;
+ case 'j':
+ join = 0;
+ break;
+ case 'w':
+ case 'l':
+ length = atoi(EARGF(usage()));
+ break;
+ default:
+ usage();
+ }ARGEND
+
+ if(length <= indent){
+ fprint(2, "%s: line length<=indentation\n", argv0);
+ exits("length");
+ }
+
+ s=getenv("tabstop");
+ if(s!=nil && atoi(s)>0)
+ maxtab=atoi(s);
+ err = nil;
+ Binit(&bout, 1, OWRITE);
+ if(argc <= 0){
+ Binit(&bin, 0, OREAD);
+ fmt();
+ }else{
+ for(i=0; i<argc; i++){
+ f = open(argv[i], OREAD);
+ if(f < 0){
+ fprint(2, "%s: can't open %s: %r\n", argv0, argv[i]);
+ err = "open";
+ }else{
+ Binit(&bin, f, OREAD);
+ fmt();
+ Bterm(&bin);
+ if(i != argc-1)
+ Bputc(&bout, '\n');
+ }
+ }
+ }
+ exits(err);
+}
+
+int
+indentof(char **linep)
+{
+ int i, ind;
+ char *line;
+
+ ind = 0;
+ line = *linep;
+ for(i=0; line[i]; i++)
+ switch(line[i]){
+ default:
+ *linep = line;
+ return ind;
+ case ' ':
+ ind++;
+ break;
+ case '\t':
+ ind += maxtab;
+ ind -= ind%maxtab;
+ break;
+ }
+
+ /* plain white space doesn't change the indent */
+ *linep = "";
+ return indent;
+}
+
+Word**
+addword(Word **words, int *nwordp, char *s, int l, int indent, int bol)
+{
+ Word *w;
+
+ w = malloc(sizeof(Word)+l+1);
+ memmove(w->text, s, l);
+ w->text[l] = '\0';
+ w->indent = indent;
+ w->bol = bol;
+ words = realloc(words, (*nwordp+1)*sizeof(Word*));
+ words[(*nwordp)++] = w;
+ return words;
+}
+
+Word**
+parseline(char *line, Word **words, int *nwordp)
+{
+ int ind, l, bol;
+
+ ind = indentof(&line);
+ indent = ind;
+ bol = 1;
+ for(;;){
+ /* find next word */
+ while(*line==' ' || *line=='\t')
+ line++;
+ if(*line == '\0'){
+ if(bol)
+ return addword(words, nwordp, "", 0, -1, bol);
+ break;
+ }
+ /* how long is this word? */
+ for(l=0; line[l]; l++)
+ if(line[l]==' ' || line[l]=='\t')
+ break;
+ words = addword(words, nwordp, line, l, indent, bol);
+ bol = 0;
+ line += l;
+ }
+ return words;
+}
+
+void
+printindent(int w)
+{
+ while(w >= maxtab){
+ Bputc(&bout, '\t');
+ w -= maxtab;
+ }
+ while(w > 0){
+ Bputc(&bout, ' ');
+ w--;
+ }
+}
+
+/* give extra space if word ends with period, etc. */
+int
+nspaceafter(char *s)
+{
+ int n;
+
+ n = strlen(s);
+ if(n < 2)
+ return 1;
+ if(isupper((uchar)s[0]) && n < 4)
+ return 1;
+ if(strchr(".!?", s[n-1]) != nil)
+ return 2;
+ return 1;
+}
+
+
+void
+printwords(Word **w, int nw)
+{
+ int i, j, n, col, nsp;
+
+ /* one output line per loop */
+ for(i=0; i<nw; ){
+ /* if it's a blank line, print it */
+ if(w[i]->indent == -1){
+ Bputc(&bout, '\n');
+ if(++i == nw) /* out of words */
+ break;
+ }
+ /* emit leading indent */
+ col = extraindent+w[i]->indent;
+ printindent(col);
+ /* emit words until overflow; always emit at least one word */
+ for(n=0;; n++){
+ Bprint(&bout, "%s", w[i]->text);
+ col += utflen(w[i]->text);
+ if(++i == nw)
+ break; /* out of words */
+ if(w[i]->indent != w[i-1]->indent)
+ break; /* indent change */
+ nsp = nspaceafter(w[i-1]->text);
+ if(col+nsp+utflen(w[i]->text) > extraindent+length)
+ break; /* fold line */
+ if(!join && w[i]->bol)
+ break;
+ for(j=0; j<nsp; j++)
+ Bputc(&bout, ' '); /* emit space; another word will follow */
+ col += nsp;
+ }
+ /* emit newline */
+ Bputc(&bout, '\n');
+ }
+}
+
+void
+fmt(void)
+{
+ char *s;
+ int i, nw;
+ Word **w;
+
+ nw = 0;
+ w = nil;
+ while((s = Brdstr(&bin, '\n', 1)) != nil){
+ w = parseline(s, w, &nw);
+ free(s);
+ }
+ printwords(w, nw);
+ for(i=0; i<nw; i++)
+ free(w[i]);
+ free(w);
+}