swerc

anselm's simpler werc fork
git clone git://git.suckless.org/swerc
Log | Files | Refs | README

commit 43b7ac0606570b80f16420ea5ab217f2dc9c309a
parent 26dcb1bde3d31a9608487766925c8452a2ea0a4c
Author: anselm@garbe.us <unknown>
Date:   Sun, 12 Feb 2012 17:37:20 +0100

removed useless bin/aux, bin/contrib -- Uriel do you add every crap that someone contributes?
hell, werc needs a serious clean, and I think it can only survive as a fork now
Diffstat:
Dbin/aux/addwuser.rc | 33---------------------------------
Dbin/aux/bpst.rc | 64----------------------------------------------------------------
Dbin/aux/gensitemaptxt.rc | 14--------------
Dbin/aux/runtsts.rc | 16----------------
Dbin/contrib/hgweb.config | 12------------
Dbin/contrib/hgwebdir.cgi | 47-----------------------------------------------
Dbin/contrib/markdown | 0
Dbin/contrib/markdown.pl | 1447-------------------------------------------------------------------------------
Dbin/contrib/md2html.awk | 427-------------------------------------------------------------------------------
Dbin/contrib/urldecode.awk | 39---------------------------------------
Dbin/contrib/urlencode.awk | 126-------------------------------------------------------------------------------
Dbin/contrib/webserver.rc | 30------------------------------
Metc/initrc | 1-
13 files changed, 0 insertions(+), 2256 deletions(-)

diff --git a/bin/aux/addwuser.rc b/bin/aux/addwuser.rc @@ -1,33 +0,0 @@ -#!/usr/bin/env rc - -if(! ~ $#werc_root 0) - cd $werc_root - -fn usage { - if(! ~ $#* 0) - echo $0: $* >[1=2] - echo 'Usage:' $0 'user_name user_password [groups ...]' >[1=2] - exit usage -} - -if(! test -d etc/users/) - usage 'Run for root of werc installation or set $werc_root' - -user_name=$1 -shift -user_pass=$1 -shift -user_groups=$* - -if(~ $"user_name '' || ~ $"user_pass '') - usage - -mkdir etc/users/$user_name -echo $user_pass > etc/users/$user_name/password - -if(! ~ $#user_groups 0) - for(g in $user_groups) { - mkdir -p etc/users/$g - echo $user_name >> etc/users/$g/members - } - diff --git a/bin/aux/bpst.rc b/bin/aux/bpst.rc @@ -1,64 +0,0 @@ -#!/usr/bin/env rc - -path=( $PLAN9/bin $path ) -base=. - -if(~ $#user 0) - user=`{whoami} - -file=(); title=(); -bloguser=$user -while(! ~ $#* 0) { - switch($1) { - case -u - base=/gsoc/www/people/$user/blog/ - case -b - shift - base=$1 - case -f - shift - file=$1 - } - shift -} - -if(~ $"EDITOR '') - EDITOR=vi - -if(~ $#file 0 || ! test -f $file) { - file=/tmp/blogtmp.$pid - rm $file >[2]/dev/null - touch $file -} - -$EDITOR $file -ispell $file -rm $file.bak >[2]/dev/null - -fn mkbpost { - umask 002 # Let group write - bptext=$1 - if(! ~ $#2 0) - bpid=`{echo -n '-'^$"bpid | sed 's/'$forbidden_uri_chars'+/_/g; 1q'} - d=`{/bin/date +%F|sed 's,-,/,g'} - - ddir=$blagh_root^$d^'/' - n=`{ls $ddir >[2]/dev/null |wc -l} - - mkdir -p $ddir/$"n^$"bpid/ - { - # TODO: Enable metadata - #echo '* Posted:' `{date} - #if(! ~ $#logged_user 0) - # echo '* Author: '$logged_user - cat $bptext - }> $ddir/$"n^$"bpid/index.md -} - -forbidden_uri_chars='[^a-zA-Z0-9_+\-\/\.]' -blagh_root=$base - -if(test -s $file) - mkbpost $file -if not - echo Empty file! diff --git a/bin/aux/gensitemaptxt.rc b/bin/aux/gensitemaptxt.rc @@ -1,14 +0,0 @@ -#!/usr/bin/env rc -# DEPRECATED: sitemap.tpl now generates and updates a sitemap.txt when requested, and is also more smart than this simplistic script. - -for(d in sites/*/) { -echo $d -9 du -a $d | awk '/\.(md|html)$/ { print $2 }; {}' | 9 sed -e 's/\.(md|html)$//' -e 's,/index$,/,' -e 's,^sites/,http://,' > $d/sitemap.txt - -if(! test -f $d/robots.txt) { - echo generating missing robots.txt for $d - echo $d|sed 's,sites/,Sitemap: http://,; s/$/sitemap.txt/;' > $d/robots.txt - cat $d/robots.txt -} - -} diff --git a/bin/aux/runtsts.rc b/bin/aux/runtsts.rc @@ -1,16 +0,0 @@ -#!/usr/bin/env rc - -tstdom='http://test.cat-v.org' - -cd sites/tst.cat-v.org - -tstfiles=`{du -a |awk '/\.tst$/ { print $2 }; {} ' | sed 's/^\.//; s/\.tst$//'} - -for(f in $tstfiles) { - ifs=' -' { tsts=`{cat ./$f.tst} } - - for(t in $tsts) { - echo tst $t - } -} diff --git a/bin/contrib/hgweb.config b/bin/contrib/hgweb.config @@ -1,12 +0,0 @@ -[web] -style = gitweb -allow_archive = bz2 - -#[paths] -#w9 = /gsoc/hg/w9/ - -[collections] -#allow_archive = bz2 zip -/gsoc/hg = /gsoc/hg/ -#/var/hg = /var/hg/ - diff --git a/bin/contrib/hgwebdir.cgi b/bin/contrib/hgwebdir.cgi @@ -1,47 +0,0 @@ -#!/usr/bin/env python -# -# An example CGI script to export multiple hgweb repos, edit as necessary - -# send python tracebacks to the browser if an error occurs: -import cgitb -cgitb.enable() - -# adjust python path if not a system-wide install: -#import sys -#sys.path.insert(0, "/path/to/python/lib") - -# If you'd like to serve pages with UTF-8 instead of your default -# locale charset, you can do so by uncommenting the following lines. -# Note that this will cause your .hgrc files to be interpreted in -# UTF-8 and all your repo files to be displayed using UTF-8. -# -#import os -#os.environ["HGENCODING"] = "UTF-8" - -from mercurial.hgweb.hgwebdir_mod import hgwebdir -from mercurial.hgweb.request import wsgiapplication -import mercurial.hgweb.wsgicgi as wsgicgi - -# The config file looks like this. You can have paths to individual -# repos, collections of repos in a directory tree, or both. -# -# [paths] -# virtual/path = /real/path -# virtual/path = /real/path -# -# [collections] -# /prefix/to/strip/off = /root/of/tree/full/of/repos -# -# collections example: say directory tree /foo contains repos /foo/bar, -# /foo/quux/baz. Give this config section: -# [collections] -# /foo = /foo -# Then repos will list as bar and quux/baz. -# -# Alternatively you can pass a list of ('virtual/path', '/real/path') tuples -# or use a dictionary with entries like 'virtual/path': '/real/path' - -def make_web_app(): - return hgwebdir("hgweb.config") - -wsgicgi.launch(wsgiapplication(make_web_app)) diff --git a/bin/contrib/markdown b/bin/contrib/markdown Binary files differ. diff --git a/bin/contrib/markdown.pl b/bin/contrib/markdown.pl @@ -1,1447 +0,0 @@ -#!/usr/bin/env perl -# -# Markdown -- A text-to-HTML conversion tool for web writers -# -# Copyright (c) 2004 John Gruber -# <http://daringfireball.net/projects/markdown/> -# -package Markdown; -require 5.006_000; -use strict; -use warnings; - -use Digest::MD5 qw(md5_hex); -use vars qw($VERSION); -$VERSION = '1.0.1'; -# Tue 14 Dec 2004 - -## Disabled; causes problems under Perl 5.6.1: -# use utf8; -# binmode( STDOUT, ":utf8" ); # c.f.: http://acis.openlib.org/dev/perl-unicode-struggle.html - - -# -# Global default settings: -# -my $g_empty_element_suffix = " />"; # Change to ">" for HTML output -my $g_tab_width = 4; - - -# -# Globals: -# - -# Regex to match balanced [brackets]. See Friedl's -# "Mastering Regular Expressions", 2nd Ed., pp. 328-331. -my $g_nested_brackets; -$g_nested_brackets = qr{ - (?> # Atomic matching - [^\[\]]+ # Anything other than brackets - | - \[ - (??{ $g_nested_brackets }) # Recursive set of nested brackets - \] - )* -}x; - - -# Table of hash values for escaped characters: -my %g_escape_table; -foreach my $char (split //, '\\`*_{}[]()>#+-.!') { - $g_escape_table{$char} = md5_hex($char); -} - - -# Global hashes, used by various utility routines -my %g_urls; -my %g_titles; -my %g_html_blocks; - -# Used to track when we're inside an ordered or unordered list -# (see _ProcessListItems() for details): -my $g_list_level = 0; - - -#### Blosxom plug-in interface ########################################## - -# Set $g_blosxom_use_meta to 1 to use Blosxom's meta plug-in to determine -# which posts Markdown should process, using a "meta-markup: markdown" -# header. If it's set to 0 (the default), Markdown will process all -# entries. -my $g_blosxom_use_meta = 0; - -sub start { 1; } -sub story { - my($pkg, $path, $filename, $story_ref, $title_ref, $body_ref) = @_; - - if ( (! $g_blosxom_use_meta) or - (defined($meta::markup) and ($meta::markup =~ /^\s*markdown\s*$/i)) - ){ - $$body_ref = Markdown($$body_ref); - } - 1; -} - - -#### Movable Type plug-in interface ##################################### -eval {require MT}; # Test to see if we're running in MT. -unless ($@) { - require MT; - import MT; - require MT::Template::Context; - import MT::Template::Context; - - eval {require MT::Plugin}; # Test to see if we're running >= MT 3.0. - unless ($@) { - require MT::Plugin; - import MT::Plugin; - my $plugin = new MT::Plugin({ - name => "Markdown", - description => "A plain-text-to-HTML formatting plugin. (Version: $VERSION)", - doc_link => 'http://daringfireball.net/projects/markdown/' - }); - MT->add_plugin( $plugin ); - } - - MT::Template::Context->add_container_tag(MarkdownOptions => sub { - my $ctx = shift; - my $args = shift; - my $builder = $ctx->stash('builder'); - my $tokens = $ctx->stash('tokens'); - - if (defined ($args->{'output'}) ) { - $ctx->stash('markdown_output', lc $args->{'output'}); - } - - defined (my $str = $builder->build($ctx, $tokens) ) - or return $ctx->error($builder->errstr); - $str; # return value - }); - - MT->add_text_filter('markdown' => { - label => 'Markdown', - docs => 'http://daringfireball.net/projects/markdown/', - on_format => sub { - my $text = shift; - my $ctx = shift; - my $raw = 0; - if (defined $ctx) { - my $output = $ctx->stash('markdown_output'); - if (defined $output && $output =~ m/^html/i) { - $g_empty_element_suffix = ">"; - $ctx->stash('markdown_output', ''); - } - elsif (defined $output && $output eq 'raw') { - $raw = 1; - $ctx->stash('markdown_output', ''); - } - else { - $raw = 0; - $g_empty_element_suffix = " />"; - } - } - $text = $raw ? $text : Markdown($text); - $text; - }, - }); - - # If SmartyPants is loaded, add a combo Markdown/SmartyPants text filter: - my $smartypants; - - { - no warnings "once"; - $smartypants = $MT::Template::Context::Global_filters{'smarty_pants'}; - } - - if ($smartypants) { - MT->add_text_filter('markdown_with_smartypants' => { - label => 'Markdown With SmartyPants', - docs => 'http://daringfireball.net/projects/markdown/', - on_format => sub { - my $text = shift; - my $ctx = shift; - if (defined $ctx) { - my $output = $ctx->stash('markdown_output'); - if (defined $output && $output eq 'html') { - $g_empty_element_suffix = ">"; - } - else { - $g_empty_element_suffix = " />"; - } - } - $text = Markdown($text); - $text = $smartypants->($text, '1'); - }, - }); - } -} -else { -#### BBEdit/command-line text filter interface ########################## -# Needs to be hidden from MT (and Blosxom when running in static mode). - - # We're only using $blosxom::version once; tell Perl not to warn us: - no warnings 'once'; - unless ( defined($blosxom::version) ) { - use warnings; - - #### Check for command-line switches: ################# - my %cli_opts; - use Getopt::Long; - Getopt::Long::Configure('pass_through'); - GetOptions(\%cli_opts, - 'version', - 'shortversion', - 'html4tags', - ); - if ($cli_opts{'version'}) { # Version info - print "\nThis is Markdown, version $VERSION.\n"; - print "Copyright 2004 John Gruber\n"; - print "http://daringfireball.net/projects/markdown/\n\n"; - exit 0; - } - if ($cli_opts{'shortversion'}) { # Just the version number string. - print $VERSION; - exit 0; - } - if ($cli_opts{'html4tags'}) { # Use HTML tag style instead of XHTML - $g_empty_element_suffix = ">"; - } - - - #### Process incoming text: ########################### - my $text; - { - local $/; # Slurp the whole file - $text = <>; - } - print Markdown($text); - } -} - - - -sub Markdown { -# -# Main function. The order in which other subs are called here is -# essential. Link and image substitutions need to happen before -# _EscapeSpecialChars(), so that any *'s or _'s in the <a> -# and <img> tags get encoded. -# - my $text = shift; - - # Clear the global hashes. If we don't clear these, you get conflicts - # from other articles when generating a page which contains more than - # one article (e.g. an index page that shows the N most recent - # articles): - %g_urls = (); - %g_titles = (); - %g_html_blocks = (); - - - # Standardize line endings: - $text =~ s{\r\n}{\n}g; # DOS to Unix - $text =~ s{\r}{\n}g; # Mac to Unix - - # Make sure $text ends with a couple of newlines: - $text .= "\n\n"; - - # Convert all tabs to spaces. - $text = _Detab($text); - - # Strip any lines consisting only of spaces and tabs. - # This makes subsequent regexen easier to write, because we can - # match consecutive blank lines with /\n+/ instead of something - # contorted like /[ \t]*\n+/ . - $text =~ s/^[ \t]+$//mg; - - # Turn block-level HTML blocks into hash entries - $text = _HashHTMLBlocks($text); - - # Strip link definitions, store in hashes. - $text = _StripLinkDefinitions($text); - - $text = _RunBlockGamut($text); - - $text = _UnescapeSpecialChars($text); - - return $text . "\n"; -} - - -sub _StripLinkDefinitions { -# -# Strips link definitions from text, stores the URLs and titles in -# hash references. -# - my $text = shift; - my $less_than_tab = $g_tab_width - 1; - - # Link defs are in the form: ^[id]: url "optional title" - while ($text =~ s{ - ^[ ]{0,$less_than_tab}\[(.+)\]: # id = $1 - [ \t]* - \n? # maybe *one* newline - [ \t]* - <?(\S+?)>? # url = $2 - [ \t]* - \n? # maybe one newline - [ \t]* - (?: - (?<=\s) # lookbehind for whitespace - ["(] - (.+?) # title = $3 - [")] - [ \t]* - )? # title is optional - (?:\n+|\Z) - } - {}mx) { - $g_urls{lc $1} = _EncodeAmpsAndAngles( $2 ); # Link IDs are case-insensitive - if ($3) { - $g_titles{lc $1} = $3; - $g_titles{lc $1} =~ s/"/&quot;/g; - } - } - - return $text; -} - - -sub _HashHTMLBlocks { - my $text = shift; - my $less_than_tab = $g_tab_width - 1; - - # Hashify HTML blocks: - # We only want to do this for block-level HTML tags, such as headers, - # lists, and tables. That's because we still want to wrap <p>s around - # "paragraphs" that are wrapped in non-block-level tags, such as anchors, - # phrase emphasis, and spans. The list of tags we're looking for is - # hard-coded: - my $block_tags_a = qr/p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del/; - my $block_tags_b = qr/p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math/; - - # First, look for nested blocks, e.g.: - # <div> - # <div> - # tags for inner block must be indented. - # </div> - # </div> - # - # The outermost tags must start at the left margin for this to match, and - # the inner nested divs must be indented. - # We need to do this before the next, more liberal match, because the next - # match will start at the first `<div>` and stop at the first `</div>`. - $text =~ s{ - ( # save in $1 - ^ # start of line (with /m) - <($block_tags_a) # start tag = $2 - \b # word break - (.*\n)*? # any number of lines, minimally matching - </\2> # the matching end tag - [ \t]* # trailing spaces/tabs - (?=\n+|\Z) # followed by a newline or end of document - ) - }{ - my $key = md5_hex($1); - $g_html_blocks{$key} = $1; - "\n\n" . $key . "\n\n"; - }egmx; - - - # - # Now match more liberally, simply from `\n<tag>` to `</tag>\n` - # - $text =~ s{ - ( # save in $1 - ^ # start of line (with /m) - <($block_tags_b) # start tag = $2 - \b # word break - (.*\n)*? # any number of lines, minimally matching - .*</\2> # the matching end tag - [ \t]* # trailing spaces/tabs - (?=\n+|\Z) # followed by a newline or end of document - ) - }{ - my $key = md5_hex($1); - $g_html_blocks{$key} = $1; - "\n\n" . $key . "\n\n"; - }egmx; - # Special case just for <hr />. It was easier to make a special case than - # to make the other regex more complicated. - $text =~ s{ - (?: - (?<=\n\n) # Starting after a blank line - | # or - \A\n? # the beginning of the doc - ) - ( # save in $1 - [ ]{0,$less_than_tab} - <(hr) # start tag = $2 - \b # word break - ([^<>])*? # - /?> # the matching end tag - [ \t]* - (?=\n{2,}|\Z) # followed by a blank line or end of document - ) - }{ - my $key = md5_hex($1); - $g_html_blocks{$key} = $1; - "\n\n" . $key . "\n\n"; - }egx; - - # Special case for standalone HTML comments: - $text =~ s{ - (?: - (?<=\n\n) # Starting after a blank line - | # or - \A\n? # the beginning of the doc - ) - ( # save in $1 - [ ]{0,$less_than_tab} - (?s: - <! - (--.*?--\s*)+ - > - ) - [ \t]* - (?=\n{2,}|\Z) # followed by a blank line or end of document - ) - }{ - my $key = md5_hex($1); - $g_html_blocks{$key} = $1; - "\n\n" . $key . "\n\n"; - }egx; - - - return $text; -} - - -sub _RunBlockGamut { -# -# These are all the transformations that form block-level -# tags like paragraphs, headers, and list items. -# - my $text = shift; - - $text = _DoHeaders($text); - - # Do Horizontal Rules: - $text =~ s{^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$}{\n<hr$g_empty_element_suffix\n}gmx; - $text =~ s{^[ ]{0,2}([ ]? -[ ]?){3,}[ \t]*$}{\n<hr$g_empty_element_suffix\n}gmx; - $text =~ s{^[ ]{0,2}([ ]? _[ ]?){3,}[ \t]*$}{\n<hr$g_empty_element_suffix\n}gmx; - - $text = _DoLists($text); - - $text = _DoCodeBlocks($text); - - $text = _DoBlockQuotes($text); - - # We already ran _HashHTMLBlocks() before, in Markdown(), but that - # was to escape raw HTML in the original Markdown source. This time, - # we're escaping the markup we've just created, so that we don't wrap - # <p> tags around block-level tags. - $text = _HashHTMLBlocks($text); - - $text = _FormParagraphs($text); - - return $text; -} - - -sub _RunSpanGamut { -# -# These are all the transformations that occur *within* block-level -# tags like paragraphs, headers, and list items. -# - my $text = shift; - - $text = _DoCodeSpans($text); - - $text = _EscapeSpecialChars($text); - - # Process anchor and image tags. Images must come first, - # because ![foo][f] looks like an anchor. - $text = _DoImages($text); - $text = _DoAnchors($text); - - # Make links out of things like `<http://example.com/>` - # Must come after _DoAnchors(), because you can use < and > - # delimiters in inline links like [this](<url>). - $text = _DoAutoLinks($text); - - $text = _EncodeAmpsAndAngles($text); - - $text = _DoItalicsAndBold($text); - - # Do hard breaks: - $text =~ s/ {2,}\n/ <br$g_empty_element_suffix\n/g; - - return $text; -} - - -sub _EscapeSpecialChars { - my $text = shift; - my $tokens ||= _TokenizeHTML($text); - - $text = ''; # rebuild $text from the tokens -# my $in_pre = 0; # Keep track of when we're inside <pre> or <code> tags. -# my $tags_to_skip = qr!<(/?)(?:pre|code|kbd|script|math)[\s>]!; - - foreach my $cur_token (@$tokens) { - if ($cur_token->[0] eq "tag") { - # Within tags, encode * and _ so they don't conflict - # with their use in Markdown for italics and strong. - # We're replacing each such character with its - # corresponding MD5 checksum value; this is likely - # overkill, but it should prevent us from colliding - # with the escape values by accident. - $cur_token->[1] =~ s! \* !$g_escape_table{'*'}!gx; - $cur_token->[1] =~ s! _ !$g_escape_table{'_'}!gx; - $text .= $cur_token->[1]; - } else { - my $t = $cur_token->[1]; - $t = _EncodeBackslashEscapes($t); - $text .= $t; - } - } - return $text; -} - - -sub _DoAnchors { -# -# Turn Markdown link shortcuts into XHTML <a> tags. -# - my $text = shift; - - # - # First, handle reference-style links: [link text] [id] - # - $text =~ s{ - ( # wrap whole match in $1 - \[ - ($g_nested_brackets) # link text = $2 - \] - - [ ]? # one optional space - (?:\n[ ]*)? # one optional newline followed by spaces - - \[ - (.*?) # id = $3 - \] - ) - }{ - my $result; - my $whole_match = $1; - my $link_text = $2; - my $link_id = lc $3; - - if ($link_id eq "") { - $link_id = lc $link_text; # for shortcut links like [this][]. - } - - if (defined $g_urls{$link_id}) { - my $url = $g_urls{$link_id}; - $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid - $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics/bold. - $result = "<a href=\"$url\""; - if ( defined $g_titles{$link_id} ) { - my $title = $g_titles{$link_id}; - $title =~ s! \* !$g_escape_table{'*'}!gx; - $title =~ s! _ !$g_escape_table{'_'}!gx; - $result .= " title=\"$title\""; - } - $result .= ">$link_text</a>"; - } - else { - $result = $whole_match; - } - $result; - }xsge; - - # - # Next, inline-style links: [link text](url "optional title") - # - $text =~ s{ - ( # wrap whole match in $1 - \[ - ($g_nested_brackets) # link text = $2 - \] - \( # literal paren - [ \t]* - <?(.*?)>? # href = $3 - [ \t]* - ( # $4 - (['"]) # quote char = $5 - (.*?) # Title = $6 - \5 # matching quote - )? # title is optional - \) - ) - }{ - my $result; - my $whole_match = $1; - my $link_text = $2; - my $url = $3; - my $title = $6; - - $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid - $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics/bold. - $result = "<a href=\"$url\""; - - if (defined $title) { - $title =~ s/"/&quot;/g; - $title =~ s! \* !$g_escape_table{'*'}!gx; - $title =~ s! _ !$g_escape_table{'_'}!gx; - $result .= " title=\"$title\""; - } - - $result .= ">$link_text</a>"; - - $result; - }xsge; - - return $text; -} - - -sub _DoImages { -# -# Turn Markdown image shortcuts into <img> tags. -# - my $text = shift; - - # - # First, handle reference-style labeled images: ![alt text][id] - # - $text =~ s{ - ( # wrap whole match in $1 - !\[ - (.*?) # alt text = $2 - \] - - [ ]? # one optional space - (?:\n[ ]*)? # one optional newline followed by spaces - - \[ - (.*?) # id = $3 - \] - - ) - }{ - my $result; - my $whole_match = $1; - my $alt_text = $2; - my $link_id = lc $3; - - if ($link_id eq "") { - $link_id = lc $alt_text; # for shortcut links like ![this][]. - } - - $alt_text =~ s/"/&quot;/g; - if (defined $g_urls{$link_id}) { - my $url = $g_urls{$link_id}; - $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid - $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics/bold. - $result = "<img src=\"$url\" alt=\"$alt_text\""; - if (defined $g_titles{$link_id}) { - my $title = $g_titles{$link_id}; - $title =~ s! \* !$g_escape_table{'*'}!gx; - $title =~ s! _ !$g_escape_table{'_'}!gx; - $result .= " title=\"$title\""; - } - $result .= $g_empty_element_suffix; - } - else { - # If there's no such link ID, leave intact: - $result = $whole_match; - } - - $result; - }xsge; - - # - # Next, handle inline images: ![alt text](url "optional title") - # Don't forget: encode * and _ - - $text =~ s{ - ( # wrap whole match in $1 - !\[ - (.*?) # alt text = $2 - \] - \( # literal paren - [ \t]* - <?(\S+?)>? # src url = $3 - [ \t]* - ( # $4 - (['"]) # quote char = $5 - (.*?) # title = $6 - \5 # matching quote - [ \t]* - )? # title is optional - \) - ) - }{ - my $result; - my $whole_match = $1; - my $alt_text = $2; - my $url = $3; - my $title = ''; - if (defined($6)) { - $title = $6; - } - - $alt_text =~ s/"/&quot;/g; - $title =~ s/"/&quot;/g; - $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid - $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics/bold. - $result = "<img src=\"$url\" alt=\"$alt_text\""; - if (defined $title) { - $title =~ s! \* !$g_escape_table{'*'}!gx; - $title =~ s! _ !$g_escape_table{'_'}!gx; - $result .= " title=\"$title\""; - } - $result .= $g_empty_element_suffix; - - $result; - }xsge; - - return $text; -} - - -sub _DoHeaders { - my $text = shift; - - # Setext-style headers: - # Header 1 - # ======== - # - # Header 2 - # -------- - # - $text =~ s{ ^(.+)[ \t]*\n=+[ \t]*\n+ }{ - "<h1>" . _RunSpanGamut($1) . "</h1>\n\n"; - }egmx; - - $text =~ s{ ^(.+)[ \t]*\n-+[ \t]*\n+ }{ - "<h2>" . _RunSpanGamut($1) . "</h2>\n\n"; - }egmx; - - - # atx-style headers: - # # Header 1 - # ## Header 2 - # ## Header 2 with closing hashes ## - # ... - # ###### Header 6 - # - $text =~ s{ - ^(\#{1,6}) # $1 = string of #'s - [ \t]* - (.+?) # $2 = Header text - [ \t]* - \#* # optional closing #'s (not counted) - \n+ - }{ - my $h_level = length($1); - "<h$h_level>" . _RunSpanGamut($2) . "</h$h_level>\n\n"; - }egmx; - - return $text; -} - - -sub _DoLists { -# -# Form HTML ordered (numbered) and unordered (bulleted) lists. -# - my $text = shift; - my $less_than_tab = $g_tab_width - 1; - - # Re-usable patterns to match list item bullets and number markers: - my $marker_ul = qr/[*+-]/; - my $marker_ol = qr/\d+[.]/; - my $marker_any = qr/(?:$marker_ul|$marker_ol)/; - - # Re-usable pattern to match any entirel ul or ol list: - my $whole_list = qr{ - ( # $1 = whole list - ( # $2 - [ ]{0,$less_than_tab} - (${marker_any}) # $3 = first list item marker - [ \t]+ - ) - (?s:.+?) - ( # $4 - \z - | - \n{2,} - (?=\S) - (?! # Negative lookahead for another list item marker - [ \t]* - ${marker_any}[ \t]+ - ) - ) - ) - }mx; - - # We use a different prefix before nested lists than top-level lists. - # See extended comment in _ProcessListItems(). - # - # Note: There's a bit of duplication here. My original implementation - # created a scalar regex pattern as the conditional result of the test on - # $g_list_level, and then only ran the $text =~ s{...}{...}egmx - # substitution once, using the scalar as the pattern. This worked, - # everywhere except when running under MT on my hosting account at Pair - # Networks. There, this caused all rebuilds to be killed by the reaper (or - # perhaps they crashed, but that seems incredibly unlikely given that the - # same script on the same server ran fine *except* under MT. I've spent - # more time trying to figure out why this is happening than I'd like to - # admit. My only guess, backed up by the fact that this workaround works, - # is that Perl optimizes the substition when it can figure out that the - # pattern will never change, and when this optimization isn't on, we run - # afoul of the reaper. Thus, the slightly redundant code to that uses two - # static s/// patterns rather than one conditional pattern. - - if ($g_list_level) { - $text =~ s{ - ^ - $whole_list - }{ - my $list = $1; - my $list_type = ($3 =~ m/$marker_ul/) ? "ul" : "ol"; - # Turn double returns into triple returns, so that we can make a - # paragraph for the last item in a list, if necessary: - $list =~ s/\n{2,}/\n\n\n/g; - my $result = _ProcessListItems($list, $marker_any); - $result = "<$list_type>\n" . $result . "</$list_type>\n"; - $result; - }egmx; - } - else { - $text =~ s{ - (?:(?<=\n\n)|\A\n?) - $whole_list - }{ - my $list = $1; - my $list_type = ($3 =~ m/$marker_ul/) ? "ul" : "ol"; - # Turn double returns into triple returns, so that we can make a - # paragraph for the last item in a list, if necessary: - $list =~ s/\n{2,}/\n\n\n/g; - my $result = _ProcessListItems($list, $marker_any); - $result = "<$list_type>\n" . $result . "</$list_type>\n"; - $result; - }egmx; - } - - - return $text; -} - - -sub _ProcessListItems { -# -# Process the contents of a single ordered or unordered list, splitting it -# into individual list items. -# - - my $list_str = shift; - my $marker_any = shift; - - - # The $g_list_level global keeps track of when we're inside a list. - # Each time we enter a list, we increment it; when we leave a list, - # we decrement. If it's zero, we're not in a list anymore. - # - # We do this because when we're not inside a list, we want to treat - # something like this: - # - # I recommend upgrading to version - # 8. Oops, now this line is treated - # as a sub-list. - # - # As a single paragraph, despite the fact that the second line starts - # with a digit-period-space sequence. - # - # Whereas when we're inside a list (or sub-list), that line will be - # treated as the start of a sub-list. What a kludge, huh? This is - # an aspect of Markdown's syntax that's hard to parse perfectly - # without resorting to mind-reading. Perhaps the solution is to - # change the syntax rules such that sub-lists must start with a - # starting cardinal number; e.g. "1." or "a.". - - $g_list_level++; - - # trim trailing blank lines: - $list_str =~ s/\n{2,}\z/\n/; - - - $list_str =~ s{ - (\n)? # leading line = $1 - (^[ \t]*) # leading whitespace = $2 - ($marker_any) [ \t]+ # list marker = $3 - ((?s:.+?) # list item text = $4 - (\n{1,2})) - (?= \n* (\z | \2 ($marker_any) [ \t]+)) - }{ - my $item = $4; - my $leading_line = $1; - my $leading_space = $2; - - if ($leading_line or ($item =~ m/\n{2,}/)) { - $item = _RunBlockGamut(_Outdent($item)); - } - else { - # Recursion for sub-lists: - $item = _DoLists(_Outdent($item)); - chomp $item; - $item = _RunSpanGamut($item); - } - - "<li>" . $item . "</li>\n"; - }egmx; - - $g_list_level--; - return $list_str; -} - - - -sub _DoCodeBlocks { -# -# Process Markdown `<pre><code>` blocks. -# - - my $text = shift; - - $text =~ s{ - (?:\n\n|\A) - ( # $1 = the code block -- one or more lines, starting with a space/tab - (?: - (?:[ ]{$g_tab_width} | \t) # Lines must start with a tab or a tab-width of spaces - .*\n+ - )+ - ) - ((?=^[ ]{0,$g_tab_width}\S)|\Z) # Lookahead for non-space at line-start, or end of doc - }{ - my $codeblock = $1; - my $result; # return value - - $codeblock = _EncodeCode(_Outdent($codeblock)); - $codeblock = _Detab($codeblock); - $codeblock =~ s/\A\n+//; # trim leading newlines - $codeblock =~ s/\s+\z//; # trim trailing whitespace - - $result = "\n\n<pre><code>" . $codeblock . "\n</code></pre>\n\n"; - - $result; - }egmx; - - return $text; -} - - -sub _DoCodeSpans { -# -# * Backtick quotes are used for <code></code> spans. -# -# * You can use multiple backticks as the delimiters if you want to -# include literal backticks in the code span. So, this input: -# -# Just type ``foo `bar` baz`` at the prompt. -# -# Will translate to: -# -# <p>Just type <code>foo `bar` baz</code> at the prompt.</p> -# -# There's no arbitrary limit to the number of backticks you -# can use as delimters. If you need three consecutive backticks -# in your code, use four for delimiters, etc. -# -# * You can use spaces to get literal backticks at the edges: -# -# ... type `` `bar` `` ... -# -# Turns to: -# -# ... type <code>`bar`</code> ... -# - - my $text = shift; - - $text =~ s@ - (`+) # $1 = Opening run of ` - (.+?) # $2 = The code block - (?<!`) - \1 # Matching closer - (?!`) - @ - my $c = "$2"; - $c =~ s/^[ \t]*//g; # leading whitespace - $c =~ s/[ \t]*$//g; # trailing whitespace - $c = _EncodeCode($c); - "<code>$c</code>"; - @egsx; - - return $text; -} - - -sub _EncodeCode { -# -# Encode/escape certain characters inside Markdown code runs. -# The point is that in code, these characters are literals, -# and lose their special Markdown meanings. -# - local $_ = shift; - - # Encode all ampersands; HTML entities are not - # entities within a Markdown code span. - s/&/&amp;/g; - - # Encode $'s, but only if we're running under Blosxom. - # (Blosxom interpolates Perl variables in article bodies.) - { - no warnings 'once'; - if (defined($blosxom::version)) { - s/\$/&#036;/g; - } - } - - - # Do the angle bracket song and dance: - s! < !&lt;!gx; - s! > !&gt;!gx; - - # Now, escape characters that are magic in Markdown: - s! \* !$g_escape_table{'*'}!gx; - s! _ !$g_escape_table{'_'}!gx; - s! { !$g_escape_table{'{'}!gx; - s! } !$g_escape_table{'}'}!gx; - s! \[ !$g_escape_table{'['}!gx; - s! \] !$g_escape_table{']'}!gx; - s! \\ !$g_escape_table{'\\'}!gx; - - return $_; -} - - -sub _DoItalicsAndBold { - my $text = shift; - - # <strong> must go first: - $text =~ s{ (\*\*|__) (?=\S) (.+?[*_]*) (?<=\S) \1 } - {<strong>$2</strong>}gsx; - - $text =~ s{ (\*|_) (?=\S) (.+?) (?<=\S) \1 } - {<em>$2</em>}gsx; - - return $text; -} - - -sub _DoBlockQuotes { - my $text = shift; - - $text =~ s{ - ( # Wrap whole match in $1 - ( - ^[ \t]*>[ \t]? # '>' at the start of a line - .+\n # rest of the first line - (.+\n)* # subsequent consecutive lines - \n* # blanks - )+ - ) - }{ - my $bq = $1; - $bq =~ s/^[ \t]*>[ \t]?//gm; # trim one level of quoting - $bq =~ s/^[ \t]+$//mg; # trim whitespace-only lines - $bq = _RunBlockGamut($bq); # recurse - - $bq =~ s/^/ /g; - # These leading spaces screw with <pre> content, so we need to fix that: - $bq =~ s{ - (\s*<pre>.+?</pre>) - }{ - my $pre = $1; - $pre =~ s/^ //mg; - $pre; - }egsx; - - "<blockquote>\n$bq\n</blockquote>\n\n"; - }egmx; - - - return $text; -} - - -sub _FormParagraphs { -# -# Params: -# $text - string to process with html <p> tags -# - my $text = shift; - - # Strip leading and trailing lines: - $text =~ s/\A\n+//; - $text =~ s/\n+\z//; - - my @grafs = split(/\n{2,}/, $text); - - # - # Wrap <p> tags. - # - foreach (@grafs) { - unless (defined( $g_html_blocks{$_} )) { - $_ = _RunSpanGamut($_); - s/^([ \t]*)/<p>/; - $_ .= "</p>"; - } - } - - # - # Unhashify HTML blocks - # - foreach (@grafs) { - if (defined( $g_html_blocks{$_} )) { - $_ = $g_html_blocks{$_}; - } - } - - return join "\n\n", @grafs; -} - - -sub _EncodeAmpsAndAngles { -# Smart processing for ampersands and angle brackets that need to be encoded. - - my $text = shift; - - # Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin: - # http://bumppo.net/projects/amputator/ - $text =~ s/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/&amp;/g; - - # Encode naked <'s - $text =~ s{<(?![a-z/?\$!])}{&lt;}gi; - - return $text; -} - - -sub _EncodeBackslashEscapes { -# -# Parameter: String. -# Returns: The string, with after processing the following backslash -# escape sequences. -# - local $_ = shift; - - s! \\\\ !$g_escape_table{'\\'}!gx; # Must process escaped backslashes first. - s! \\` !$g_escape_table{'`'}!gx; - s! \\\* !$g_escape_table{'*'}!gx; - s! \\_ !$g_escape_table{'_'}!gx; - s! \\\{ !$g_escape_table{'{'}!gx; - s! \\\} !$g_escape_table{'}'}!gx; - s! \\\[ !$g_escape_table{'['}!gx; - s! \\\] !$g_escape_table{']'}!gx; - s! \\\( !$g_escape_table{'('}!gx; - s! \\\) !$g_escape_table{')'}!gx; - s! \\> !$g_escape_table{'>'}!gx; - s! \\\# !$g_escape_table{'#'}!gx; - s! \\\+ !$g_escape_table{'+'}!gx; - s! \\\- !$g_escape_table{'-'}!gx; - s! \\\. !$g_escape_table{'.'}!gx; - s{ \\! }{$g_escape_table{'!'}}gx; - - return $_; -} - - -sub _DoAutoLinks { - my $text = shift; - - $text =~ s{<((https?|ftp):[^'">\s]+)>}{<a href="$1">$1</a>}gi; - - # Email addresses: <address@domain.foo> - $text =~ s{ - < - (?:mailto:)? - ( - [-.\w]+ - \@ - [-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+ - ) - > - }{ - _EncodeEmailAddress( _UnescapeSpecialChars($1) ); - }egix; - - return $text; -} - - -sub _EncodeEmailAddress { -# -# Input: an email address, e.g. "foo@example.com" -# -# Output: the email address as a mailto link, with each character -# of the address encoded as either a decimal or hex entity, in -# the hopes of foiling most address harvesting spam bots. E.g.: -# -# <a href="&#x6D;&#97;&#105;&#108;&#x74;&#111;:&#102;&#111;&#111;&#64;&#101; -# x&#x61;&#109;&#x70;&#108;&#x65;&#x2E;&#99;&#111;&#109;">&#102;&#111;&#111; -# &#64;&#101;x&#x61;&#109;&#x70;&#108;&#x65;&#x2E;&#99;&#111;&#109;</a> -# -# Based on a filter by Matthew Wickline, posted to the BBEdit-Talk -# mailing list: <http://tinyurl.com/yu7ue> -# - - my $addr = shift; - - srand; - my @encode = ( - sub { '&#' . ord(shift) . ';' }, - sub { '&#x' . sprintf( "%X", ord(shift) ) . ';' }, - sub { shift }, - ); - - $addr = "mailto:" . $addr; - - $addr =~ s{(.)}{ - my $char = $1; - if ( $char eq '@' ) { - # this *must* be encoded. I insist. - $char = $encode[int rand 1]->($char); - } elsif ( $char ne ':' ) { - # leave ':' alone (to spot mailto: later) - my $r = rand; - # roughly 10% raw, 45% hex, 45% dec - $char = ( - $r > .9 ? $encode[2]->($char) : - $r < .45 ? $encode[1]->($char) : - $encode[0]->($char) - ); - } - $char; - }gex; - - $addr = qq{<a href="$addr">$addr</a>}; - $addr =~ s{">.+?:}{">}; # strip the mailto: from the visible part - - return $addr; -} - - -sub _UnescapeSpecialChars { -# -# Swap back in all the special characters we've hidden. -# - my $text = shift; - - while( my($char, $hash) = each(%g_escape_table) ) { - $text =~ s/$hash/$char/g; - } - return $text; -} - - -sub _TokenizeHTML { -# -# Parameter: String containing HTML markup. -# Returns: Reference to an array of the tokens comprising the input -# string. Each token is either a tag (possibly with nested, -# tags contained therein, such as <a href="<MTFoo>">, or a -# run of text between tags. Each element of the array is a -# two-element array; the first is either 'tag' or 'text'; -# the second is the actual value. -# -# -# Derived from the _tokenize() subroutine from Brad Choate's MTRegex plugin. -# <http://www.bradchoate.com/past/mtregex.php> -# - - my $str = shift; - my $pos = 0; - my $len = length $str; - my @tokens; - - my $depth = 6; - my $nested_tags = join('|', ('(?:<[a-z/!$](?:[^<>]') x $depth) . (')*>)' x $depth); - my $match = qr/(?s: <! ( -- .*? -- \s* )+ > ) | # comment - (?s: <\? .*? \?> ) | # processing instruction - $nested_tags/ix; # nested tags - - while ($str =~ m/($match)/g) { - my $whole_tag = $1; - my $sec_start = pos $str; - my $tag_start = $sec_start - length $whole_tag; - if ($pos < $tag_start) { - push @tokens, ['text', substr($str, $pos, $tag_start - $pos)]; - } - push @tokens, ['tag', $whole_tag]; - $pos = pos $str; - } - push @tokens, ['text', substr($str, $pos, $len - $pos)] if $pos < $len; - \@tokens; -} - - -sub _Outdent { -# -# Remove one level of line-leading tabs or spaces -# - my $text = shift; - - $text =~ s/^(\t|[ ]{1,$g_tab_width})//gm; - return $text; -} - - -sub _Detab { -# -# Cribbed from a post by Bart Lateur: -# <http://www.nntp.perl.org/group/perl.macperl.anyperl/154> -# - my $text = shift; - - $text =~ s{(.*?)\t}{$1.(' ' x ($g_tab_width - length($1) % $g_tab_width))}ge; - return $text; -} - - -1; - -__END__ - - -=pod - -=head1 NAME - -B<Markdown> - - -=head1 SYNOPSIS - -B<Markdown.pl> [ B<--html4tags> ] [ B<--version> ] [ B<-shortversion> ] - [ I<file> ... ] - - -=head1 DESCRIPTION - -Markdown is a text-to-HTML filter; it translates an easy-to-read / -easy-to-write structured text format into HTML. Markdown's text format -is most similar to that of plain text email, and supports features such -as headers, *emphasis*, code blocks, blockquotes, and links. - -Markdown's syntax is designed not as a generic markup language, but -specifically to serve as a front-end to (X)HTML. You can use span-level -HTML tags anywhere in a Markdown document, and you can use block level -HTML tags (like <div> and <table> as well). - -For more information about Markdown's syntax, see: - - http://daringfireball.net/projects/markdown/ - - -=head1 OPTIONS - -Use "--" to end switch parsing. For example, to open a file named "-z", use: - - Markdown.pl -- -z - -=over 4 - - -=item B<--html4tags> - -Use HTML 4 style for empty element tags, e.g.: - - <br> - -instead of Markdown's default XHTML style tags, e.g.: - - <br /> - - -=item B<-v>, B<--version> - -Display Markdown's version number and copyright information. - - -=item B<-s>, B<--shortversion> - -Display the short-form version number. - - -=back - - - -=head1 BUGS - -To file bug reports or feature requests (other than topics listed in the -Caveats section above) please send email to: - - support@daringfireball.net - -Please include with your report: (1) the example input; (2) the output -you expected; (3) the output Markdown actually produced. - - -=head1 VERSION HISTORY - -See the readme file for detailed release notes for this version. - -1.0.1 - 14 Dec 2004 - -1.0 - 28 Aug 2004 - - -=head1 AUTHOR - - John Gruber - http://daringfireball.net - - PHP port and other contributions by Michel Fortin - http://michelf.com - - -=head1 COPYRIGHT AND LICENSE - -Copyright (c) 2003-2004 John Gruber -<http://daringfireball.net/> -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - -* Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - -* Neither the name "Markdown" nor the names of its contributors may - be used to endorse or promote products derived from this software - without specific prior written permission. - -This software is provided by the copyright holders and contributors "as -is" and any express or implied warranties, including, but not limited -to, the implied warranties of merchantability and fitness for a -particular purpose are disclaimed. In no event shall the copyright owner -or contributors be liable for any direct, indirect, incidental, special, -exemplary, or consequential damages (including, but not limited to, -procurement of substitute goods or services; loss of use, data, or -profits; or business interruption) however caused and on any theory of -liability, whether in contract, strict liability, or tort (including -negligence or otherwise) arising in any way out of the use of this -software, even if advised of the possibility of such damage. - -=cut diff --git a/bin/contrib/md2html.awk b/bin/contrib/md2html.awk @@ -1,427 +0,0 @@ -#!/bin/awk -f -# -# by: Jesus Galan (yiyus) 2009 -# -# Usage: md2html.awk file.md > file.html -# See: http://4l77.com/src/md2html.awk - -function eschtml(t) { - gsub("&", "\\&amp;", t); - gsub("<", "\\&lt;", t); - return t; -} - -function oprint(t){ - if(nr == 0) - print t; - else - otext = otext "\n" t; -} - -function subref(id){ - for(; nr > 0 && sub("<<" id, ref[id], otext); nr--); - if(nr == 0 && otext) { - print otext; - otext = ""; - } -} - -function nextil(t) { - if(!match(t, /[`<&\[*_\\-]|(\!\[)/)) - return t; - t1 = substr(t, 1, RSTART - 1); - tag = substr(t, RSTART, RLENGTH); - t2 = substr(t, RSTART + RLENGTH); - if(ilcode && tag != "`") - return eschtml(t1 tag) nextil(t2); - # Backslash escaping - if(tag == "\\"){ - if(match(t2, /^[\\`*_{}\[\]()#+\-\.!]/)){ - tag = substr(t2, 1, 1); - t2 = substr(t2, 2); - } - return t1 tag nextil(t2); - } - # Dashes - if(tag == "-"){ - if(sub(/^-/, "", t2)) - tag = "&#8212;"; - return t1 tag nextil(t2); - } - # Inline Code - if(tag == "`"){ - if(sub(/^`/, "", t2)){ - if(!match(t2, /``/)) - return t1 "&#8221;" nextil(t2); - ilcode2 = !ilcode2; - } - else if(ilcode2) - return t1 tag nextil(t2); - tag = "<code>"; - if(ilcode){ - t1 = eschtml(t1); - tag = "</code>"; - } - ilcode = !ilcode; - return t1 tag nextil(t2); - } - if(tag == "<"){ - # Autolinks - if(match(t2, /^[^ ]+[\.@][^ ]+>/)){ - url = eschtml(substr(t2, 1, RLENGTH - 1)); - t2 = substr(t2, RLENGTH + 1); - linktext = url; - if(match(url, /@/) && !match(url, /^mailto:/)) - url = "mailto:" url; - return t1 "<a href=\"" url "\">" linktext "</a>" nextil(t2); - } - # Html tags - if(match(t2, /^[A-Za-z\/!][^>]*>/)){ - tag = tag substr(t2, RSTART, RLENGTH); - t2 = substr(t2, RLENGTH + 1); - return t1 tag nextil(t2); - } - return t1 "&lt;" nextil(t2); - } - # Html special entities - if(tag == "&"){ - if(match(t2, /^#?[A-Za-z0-9]+;/)){ - tag = tag substr(t2, RSTART, RLENGTH); - t2 = substr(t2, RLENGTH + 1); - return t1 tag nextil(t2); - } - return t1 "&amp;" nextil(t2); - } - # Images - if(tag == "!["){ - if(!match(t2, /(\[.*\])|(\(.*\))/)) - return t1 tag nextil(t2); - match(t2, /^[^\]]*/); - alt = substr(t2, 1, RLENGTH); - t2 = substr(t2, RLENGTH + 2); - if(match(t2, /^\(/)){ - # Inline - sub(/^\(/, "", t2); - match(t2, /^[^\)]+/); - url = eschtml(substr(t2, 1, RLENGTH)); - t2 = substr(t2, RLENGTH + 2); - title = ""; - if(match(url, /[ ]+\".*\"[ ]*$/)) { - title = substr(url, RSTART, RLENGTH); - url = substr(url, 1, RSTART - 1); - match(title, /\".*\"/); - title = " title=\"" substr(title, RSTART + 1, RLENGTH - 2) "\""; - } - if(match(url, /^<.*>$/)) - url = substr(url, 2, RLENGTH - 2); - return t1 "<img src=\"" url "\" alt=\"" alt "\"" title " />" nextil(t2); - } - else{ - # Referenced - sub(/^ ?\[/, "", t2); - id = alt; - if(match(t2, /^[^\]]+/)) - id = substr(t2, 1, RLENGTH); - t2 = substr(t2, RLENGTH + 2); - if(ref[id]) - r = ref[id]; - else{ - r = "<<" id; - nr++; - } - return t1 "<img src=\"" r "\" alt=\"" alt "\" />" nextil(t2); - } - } - # Links - if(tag == "["){ - if(!match(t2, /(\[.*\])|(\(.*\))/)) - return t1 tag nextil(t2); - match(t2, /^[^\]]*(\[[^\]]*\][^\]]*)*/); - linktext = substr(t2, 1, RLENGTH); - t2 = substr(t2, RLENGTH + 2); - if(match(t2, /^\(/)){ - # Inline - match(t2, /^[^\)]+(\([^\)]+\)[^\)]*)*/); - url = substr(t2, 2, RLENGTH - 1); - pt2 = substr(t2, RLENGTH + 2); - title = ""; - if(match(url, /[ ]+\".*\"[ ]*$/)) { - title = substr(url, RSTART, RLENGTH); - url = substr(url, 1, RSTART - 1); - match(title, /\".*\"/); - title = " title=\"" substr(title, RSTART + 1, RLENGTH - 2) "\""; - } - if(match(url, /^<.*>$/)) - url = substr(url, 2, RLENGTH - 2); - url = eschtml(url); - return t1 "<a href=\"" url "\"" title ">" nextil(linktext) "</a>" nextil(pt2); - } - else{ - # Referenced - sub(/^ ?\[/, "", t2); - id = linktext; - if(match(t2, /^[^\]]+/)) - id = substr(t2, 1, RLENGTH); - t2 = substr(t2, RLENGTH + 2); - if(ref[id]) - r = ref[id]; - else{ - r = "<<" id; - nr++; - } - pt2 = t2; - return t1 "<a href=\"" r "\" />" nextil(linktext) "</a>" nextil(pt2); - } - } - # Emphasis - if(match(tag, /[*_]/)){ - ntag = tag; - if(sub("^" tag, "", t2)){ - if(stag[ns] == tag && match(t2, "^" tag)) - t2 = tag t2; - else - ntag = tag tag - } - n = length(ntag); - tag = (n == 2) ? "strong" : "em"; - if(match(t1, / $/) && match(t2, /^ /)) - return t1 tag nextil(t2); - if(stag[ns] == ntag){ - tag = "/" tag; - ns--; - } - else - stag[++ns] = ntag; - tag = "<" tag ">"; - return t1 tag nextil(t2); - } -} - -function inline(t) { - ilcode = 0; - ilcode2 = 0; - ns = 0; - - return nextil(t); -} - -function printp(tag) { - if(!match(text, /^[ ]*$/)){ - text = inline(text); - if(tag != "") - oprint("<" tag ">" text "</" tag ">"); - else - oprint(text); - } - text = ""; -} - -BEGIN { - blank = 0; - code = 0; - hr = 0; - html = 0; - nl = 0; - nr = 0; - otext = ""; - text = ""; - par = "p"; -} - -# References -!code && /^ *\[[^\]]*\]:[ ]+/ { - sub(/^ *\[/, ""); - match($0, /\]/); - id = substr($0, 1, RSTART - 1); - sub(id "\\]:[ ]+", ""); - title = ""; - if(match($0, /\".*\"$/)) - title = "\" title=\"" substr($0, RSTART + 1, RLENGTH - 2); - sub(/[ ]+\".*\"$/, ""); - url = eschtml($0); - ref[id] = url title; - - subref(id); - next; -} - -# html -!html && /^<(address|blockquote|center|dir|div|dl|fieldset|form|h[1-6r]|\ -isindex|menu|noframes|noscript|ol|p|pre|table|ul|!--)/ { - if(code) - oprint("</pre></code>"); - for(; !text && block[nl] == "blockquote"; nl--) - oprint("</blockquote>"); - match($0, /^<(address|blockquote|center|dir|div|dl|fieldset|form|h[1-6r]|\ - isindex|menu|noframes|noscript|ol|p|pre|table|ul|!--)/); - htag = substr($0, 2, RLENGTH - 1); - if(!match($0, "(<\\/" htag ">)|((^<hr ?\\/?)|(--)>$)")) - html = 1; - if(html && match($0, /^<hr/)) - hr = 1; - oprint($0); - next; -} - -html && (/(^<\/(address|blockquote|center|dir|div|dl|fieldset|form|h[1-6r]|\ -isindex|menu|noframes|noscript|ol|p|pre|table|ul).*)|(--)>$/ || -(hr && />$/)) { - html = 0; - hr = 0; - oprint($0); - next; -} - -html { - oprint($0); - next; -} - -# List and quote blocks - -# Remove indentation -{ - for(nnl = 0; nnl < nl; nnl++) - if((match(block[nnl + 1], /[ou]l/) && !sub(/^( | )/, "")) || \ - (block[nnl + 1] == "blockquote" && !sub(/^> ?/, ""))) - break; -} -nnl < nl && !blank && text && ! /^ ? ? ?([*+-]|([0-9]+\.)+)( +| )/ { nnl = nl; } -# Quote blocks -{ - while(sub(/^> /, "")) - nblock[++nnl] = "blockquote"; -} -# Horizontal rules -{ hr = 0; } -(blank || (!text && !code)) && /^ ? ? ?([-*_][ ]*)([-*_][ ]*)([-*_][ ]*)+$/ { - if(code){ - oprint("</pre></code>"); - code = 0; - } - blank = 0; - nnl = 0; - hr = 1; -} -# List items -block[nl] ~ /[ou]l/ && /^$/ { - blank = 1; - next; -} -{ newli = 0; } -!hr && (nnl != nl || !text || block[nl] ~ /[ou]l/) && /^ ? ? ?[*+-]( +| )/ { - sub(/^ ? ? ?[*+-]( +| )/, ""); - nnl++; - nblock[nnl] = "ul"; - newli = 1; -} -(nnl != nl || !text || block[nl] ~ /[ou]l/) && /^ ? ? ?([0-9]+\.)+( +| )/ { - sub(/^ ? ? ?([0-9]+\.)+( +| )/, ""); - nnl++; - nblock[nnl] = "ol"; - newli = 1; -} -newli { - if(blank && nnl == nl && !par) - par = "p"; - blank = 0; - printp(par); - if(nnl == nl && block[nl] == nblock[nl]) - oprint("</li><li>"); -} -blank && ! /^$/ { - if(match(block[nnl], /[ou]l/) && !par) - par = "p"; - printp(par); - par = "p"; - blank = 0; -} - -# Close old blocks and open new ones -nnl != nl || nblock[nl] != block[nl] { - if(code){ - oprint("</pre></code>"); - code = 0; - } - printp(par); - b = (nnl > nl) ? nblock[nnl] : block[nnl]; - par = (match(b, /[ou]l/)) ? "" : "p"; -} -nnl < nl || (nnl == nl && nblock[nl] != block[nl]) { - for(; nl > nnl || (nnl == nl && pblock[nl] != block[nl]); nl--){ - if(match(block[nl], /[ou]l/)) - oprint("</li>"); - oprint("</" block[nl] ">"); - } -} -nnl > nl { - for(; nl < nnl; nl++){ - block[nl + 1] = nblock[nl + 1]; - oprint("<" block[nl + 1] ">"); - if(match(block[nl + 1], /[ou]l/)) - oprint("<li>"); - } -} -hr { - oprint("<hr>"); - next; -} - -# Code blocks -code && /^$/ { - if(blanK) - oprint(""); - blank = 1; - next; -} -!text && sub(/^( | )/, "") { - if(blanK) - oprint(""); - blank = 0; - if(!code) - oprint("<code><pre>"); - code = 1; - $0 = eschtml($0); - oprint($0); - next; -} -code { - oprint("</pre></code>"); - code = 0; -} - -# Setex-style Headers -text && /^=+$/ {printp("h1"); next;} -text && /^-+$/ {printp("h2"); next;} - -# Atx-Style headers -/^#+/ && (!newli || par=="p" || /^##/) { - for(n = 0; n < 6 && sub(/^# */, ""); n++) - sub(/#$/, ""); - par = "h" n; -} - -# Paragraph -/^$/ { - printp(par); - par = "p"; - next; -} - -# Add text -{ text = (text ? text " " : "") $0; } - -END { - if(code){ - oprint("</pre></code>"); - code = 0; - } - printp(par); - for(; nl > 0; nl--){ - if(match(block[nl], /[ou]l/)) - oprint("</li>"); - oprint("</" block[nl] ">"); - } - gsub(/<<[^\"]*/, "", otext); - print(otext); -} diff --git a/bin/contrib/urldecode.awk b/bin/contrib/urldecode.awk @@ -1,39 +0,0 @@ -#!/usr/bin/awk -f -BEGIN { - hextab ["0"] = 0; hextab ["8"] = 8; - hextab ["1"] = 1; hextab ["9"] = 9; - hextab ["2"] = 2; hextab ["A"] = hextab ["a"] = 10 - hextab ["3"] = 3; hextab ["B"] = hextab ["b"] = 11; - hextab ["4"] = 4; hextab ["C"] = hextab ["c"] = 12; - hextab ["5"] = 5; hextab ["D"] = hextab ["d"] = 13; - hextab ["6"] = 6; hextab ["E"] = hextab ["e"] = 14; - hextab ["7"] = 7; hextab ["F"] = hextab ["f"] = 15; -} -{ - decoded = "" - i = 1 - len = length ($0) - while ( i <= len ) { - c = substr ($0, i, 1) - if ( c == "%" ) { - if ( i+2 <= len ) { - c1 = substr ($0, i+1, 1) - c2 = substr ($0, i+2, 1) - if ( hextab [c1] == "" || hextab [c2] == "" ) { - print "WARNING: invalid hex encoding: %" c1 c2 | "cat >&2" - } else { - code = 0 + hextab [c1] * 16 + hextab [c2] + 0 - c = sprintf ("%c", code) - i = i + 2 - } - } else { - print "WARNING: invalid % encoding: " substr ($0, i, len - i) - } - } else if ( c == "+" ) { - c = " " - } - decoded = decoded c - ++i - } - print decoded -} diff --git a/bin/contrib/urlencode.awk b/bin/contrib/urlencode.awk @@ -1,126 +0,0 @@ -# Taken from http://www.shelldorado.com/scripts/cmds/urlencode -########################################################################## -# Title : urlencode - encode URL data -# Author : Heiner Steven (heiner.steven@odn.de) -# Date : 2000-03-15 -# Requires : awk -# Categories : File Conversion, WWW, CGI -# SCCS-Id. : @(#) urlencode 1.4 06/10/29 -########################################################################## -# Description -# Encode data according to -# RFC 1738: "Uniform Resource Locators (URL)" and -# RFC 1866: "Hypertext Markup Language - 2.0" (HTML) -# -# This encoding is used i.e. for the MIME type -# "application/x-www-form-urlencoded" -# -# Notes -# o The default behaviour is not to encode the line endings. This -# may not be what was intended, because the result will be -# multiple lines of output (which cannot be used in an URL or a -# HTTP "POST" request). If the desired output should be one -# line, use the "-l" option. -# -# o The "-l" option assumes, that the end-of-line is denoted by -# the character LF (ASCII 10). This is not true for Windows or -# Mac systems, where the end of a line is denoted by the two -# characters CR LF (ASCII 13 10). -# We use this for symmetry; data processed in the following way: -# cat | urlencode -l | urldecode -l -# should (and will) result in the original data -# -# o Large lines (or binary files) will break many AWK -# implementations. If you get the message -# awk: record `...' too long -# record number xxx -# consider using GNU AWK (gawk). -# -# o urlencode will always terminate it's output with an EOL -# character -# -# Thanks to Stefan Brozinski for pointing out a bug related to non-standard -# locales. -# -# See also -# urldecode -########################################################################## - -PN=`basename "$0"` # Program name -VER='1.4' - -: ${AWK=awk} - -Usage () { - echo >&2 "$PN - encode URL data, $VER -usage: $PN [-l] [file ...] - -l: encode line endings (result will be one line of output) - -The default is to encode each input line on its own." - exit 1 -} - -Msg () { - for MsgLine - do echo "$PN: $MsgLine" >&2 - done -} - -Fatal () { Msg "$@"; exit 1; } - -set -- `getopt hl "$@" 2>/dev/null` || Usage -[ $# -lt 1 ] && Usage # "getopt" detected an error - -EncodeEOL=no -while [ $# -gt 0 ] -do - case "$1" in - -l) EncodeEOL=yes;; - --) shift; break;; - -h) Usage;; - -*) Usage;; - *) break;; # First file name - esac - shift -done - -LANG=C export LANG -$AWK ' - BEGIN { - # We assume an awk implementation that is just plain dumb. - # We will convert an character to its ASCII value with the - # table ord[], and produce two-digit hexadecimal output - # without the printf("%02X") feature. - - EOL = "%0A" # "end of line" string (encoded) - split ("1 2 3 4 5 6 7 8 9 A B C D E F", hextab, " ") - hextab [0] = 0 - for ( i=1; i<=255; ++i ) ord [ sprintf ("%c", i) "" ] = i + 0 - if ("'"$EncodeEOL"'" == "yes") EncodeEOL = 1; else EncodeEOL = 0 - } - { - encoded = "" - for ( i=1; i<=length ($0); ++i ) { - c = substr ($0, i, 1) - if ( c ~ /[a-zA-Z0-9.-]/ ) { - encoded = encoded c # safe character - } else if ( c == " " ) { - encoded = encoded "+" # special handling - } else { - # unsafe character, encode it as a two-digit hex-number - lo = ord [c] % 16 - hi = int (ord [c] / 16); - encoded = encoded "%" hextab [hi] hextab [lo] - } - } - if ( EncodeEOL ) { - printf ("%s", encoded EOL) - } else { - print encoded - } - } - END { - #if ( EncodeEOL ) print "" - } -' "$@" - diff --git a/bin/contrib/webserver.rc b/bin/contrib/webserver.rc @@ -1,30 +0,0 @@ -#!/usr/local/plan9/bin/rc - -# A web server in rc by maht -# Originally from http://www.proweb.co.uk/~matt/rc/webserver.rc - -ifs=' ' -request=`{sed 1q} - -url=$request(2) -file=`{echo $url | sed 's/http:\/\/[^\/]*//' | tr -d \012} - -if(test -d $file){ - file=$file ^'/index.html' -} -if(test -e $file) { - response='200' -} -if not { - response='404' - file='404.html' -} - -echo 'HTTP/1.1 ' ^$response -echo 'Date: ' `{date} -echo 'Server: rc shell' -echo 'Content-Length: ' `{cat $file | wc -c | tr -d ' '} -echo 'Content-Type: ' `{file -i $file | awk '{ print $2 }'} -echo 'Connection: close' -echo -cat $file diff --git a/etc/initrc b/etc/initrc @@ -25,7 +25,6 @@ path=($plan9port/bin . ./bin ./bin/contrib /bin /usr/bin) # takes as an argument a filter, in the default configuration markdown.pl, that # caches output) Note that some werc components assume a markdown-like # formatter, but all major functionality should should be formatter agnostic. -#formatter=(md2html.awk) formatter=(fltr_cache markdown.pl) man_formatter=(troff -N -man)