commit 78e32eb2e123a7cf00b3f2241f96822800640b73 parent 55d7075ba764df6038931c25e18d4908bd6f73d7 Author: uriel@vm41.cat-v.org <unknown> Date: Thu, 30 Jul 2009 02:12:06 +0200 Much better (I hope) get_html_title implementation, first try to find <title>, if that fails, get the first non-tag string in the file. Diffstat:
M | bin/werclib.rc | | | 13 | +++++++------ |
1 file changed, 7 insertions(+), 6 deletions(-)
diff --git a/bin/werclib.rc b/bin/werclib.rc @@ -98,12 +98,13 @@ fn get_md_title { } fn get_html_title { - # H1 is not reliable because htmlroff doesn't use it :( - #desc=`{cat $1 | sed 32q | grep '<[Hh]1>' | sed 's/<[Hh]1>(.*)(<\/[Hh]1>|$)/\1/;s/<[^>]*>//g;1q'} - # Pick the first line of body instead - desc=`{sed -n '/<[Bb][Oo][Dd][Yy]/,/./s/(<[^>]*>|$)//gp' < $1} - if(~ $#desc 0) - desc=`{sed 's/<[^>]*>//g; 1q' < $1} + t=`{sed -n '32q; s/^.*<[Tt][Ii][Tt][Ll][Ee]> *([^<]+) *(<\/[Tt][Ii][Tt][Ll][Ee]>.*)?$/\1/p' < $1} + + # As a backup we might want to pick the first 'non-tag' text in the file with: + if(~ $"t '') + t=`{sed -n -e 's/^(<[^>]+>)*([^<]+).*/\2/p; 32q' < $1 | sed 1q} + + echo $t } fn get_file_title {