sites

public wiki contents of suckless.org
git clone git://git.suckless.org/sites
Log | Files | Refs

md-printlinks.c (9757B)


      1 /* process Markdown (based on smu code), but only output links */
      2 #include <sys/types.h>
      3 
      4 #include <ctype.h>
      5 #include <errno.h>
      6 #include <stdarg.h>
      7 #include <stdio.h>
      8 #include <stdint.h>
      9 #include <stdlib.h>
     10 #include <string.h>
     11 
     12 #ifdef __OpenBSD__
     13 #include <unistd.h>
     14 #else
     15 #define pledge(a,b) 0
     16 #endif
     17 
     18 #define READ_BUF_SIZ 16384
     19 #define LEN(x)  sizeof(x)/sizeof(x[0])
     20 #define ADDC(b,i)  if (i % READ_BUF_SIZ == 0) { b = realloc(b, (i + READ_BUF_SIZ)); if (!b) eprint("realloc:"); } b[i]
     21 
     22 typedef int (*Parser)(const char *, const char *, int);
     23 typedef struct {
     24 	char *search;
     25 	int process;
     26 	char *before, *after;
     27 } Tag;
     28 
     29 static int dolineprefix(const char *begin, const char *end, int newblock);/* Parser for line prefix tags */
     30 static int dolink(const char *begin, const char *end, int newblock);      /* Parser for links and images */
     31 static int dolist(const char *begin, const char *end, int newblock);      /* Parser for lists */
     32 static int doparagraph(const char *begin, const char *end, int newblock); /* Parser for paragraphs */
     33 static int doshortlink(const char *begin, const char *end, int newblock); /* Parser for links and images */
     34 static int dosurround(const char *begin, const char *end, int newblock);  /* Parser for surrounding tags */
     35 static int dounderline(const char *begin, const char *end, int newblock); /* Parser for underline tags */
     36 static void *ereallocz(void *p, size_t size);
     37 static void hprint(const char *begin, const char *end);                   /* escapes HTML and prints it to output */
     38 static void process(const char *begin, const char *end, int newblock);    /* Processes range between begin and end. */
     39 
     40 /* list of parsers */
     41 static Parser parsers[] = {
     42 	dounderline, dolineprefix, dolist, doparagraph, dosurround, dolink, doshortlink,
     43 };
     44  
     45 static Tag lineprefix[] = {
     46 	{ "   ",	0,	"<pre><code>", "</code></pre>" },
     47 	{ "\t",		0,	"<pre><code>", "</code></pre>" },
     48 	{ "> ",		2,	"<blockquote>",	"</blockquote>" },
     49 	{ "###### ",	1,	"<h6>",		"</h6>" },
     50 	{ "##### ",	1,	"<h5>",		"</h5>" },
     51 	{ "#### ",	1,	"<h4>",		"</h4>" },
     52 	{ "### ",	1,	"<h3>",		"</h3>" },
     53 	{ "## ",	1,	"<h2>",		"</h2>" },
     54 	{ "# ",		1,	"<h1>",		"</h1>" },
     55 	{ "- - -\n",	1,	"<hr/>",	""},
     56 };
     57 
     58 static Tag underline[] = {
     59 	{ "=",		1,	"<h1>",		"</h1>\n" },
     60 	{ "-",		1,	"<h2>",		"</h2>\n" },
     61 };
     62 
     63 static Tag surround[] = {
     64 	{ "``",		0,	"<code>",	"</code>" },
     65 	{ "`",		0,	"<code>",	"</code>" },
     66 	{ "___",	1,	"<b><i>",	"</i></b>" },
     67 	{ "***",	1,	"<b><i>",	"</i></b>" },
     68 	{ "__",		1,	"<b>",		"</b>" },
     69 	{ "**",		1,	"<b>",		"</b>" },
     70 	{ "_",		1,	"<i>",		"</i>" },
     71 	{ "*",		1,	"<i>",		"</i>" },
     72 };
     73 
     74 void
     75 eprint(const char *format, ...)
     76 {
     77 	va_list ap;
     78 
     79 	va_start(ap, format);
     80 	vfprintf(stderr, format, ap);
     81 	va_end(ap);
     82 	if (format[0] && format[strlen(format) - 1] == ':')
     83 		fputs(strerror(errno), stderr);
     84 	fputc('\n', stderr);
     85 	exit(1);
     86 }
     87 
     88 int
     89 dolineprefix(const char *begin, const char *end, int newblock)
     90 {
     91 	unsigned int i, j, l;
     92 	char *buffer;
     93 	const char *p;
     94 
     95 	if (newblock)
     96 		p = begin;
     97 	else if (*begin == '\n')
     98 		p = begin + 1;
     99 	else
    100 		return 0;
    101 	for (i = 0; i < LEN(lineprefix); i++) {
    102 		l = strlen(lineprefix[i].search);
    103 		if (end - p < l)
    104 			continue;
    105 		if (strncmp(lineprefix[i].search, p, l))
    106 			continue;
    107 		if (lineprefix[i].search[l-1] == '\n') {
    108 			return l;
    109 		}
    110 		if (!(buffer = malloc(BUFSIZ)))
    111 			eprint("malloc:");
    112 		buffer[0] = '\0';
    113 
    114 		for (j = 0, p += l; p < end; p++, j++) {
    115 			ADDC(buffer, j) = *p;
    116 			if (*p == '\n' && p + l < end) {
    117 				if (strncmp(lineprefix[i].search, p + 1, l) != 0)
    118 					break;
    119 				p += l;
    120 			}
    121 		}
    122 
    123 		ADDC(buffer, j) = '\0';
    124 		if (lineprefix[i].process)
    125 			process(buffer, buffer + strlen(buffer), lineprefix[i].process >= 2);
    126 		free(buffer);
    127 		return -(p - begin);
    128 	}
    129 	return 0;
    130 }
    131 
    132 int
    133 dolink(const char *begin, const char *end, int newblock)
    134 {
    135 	int img, len, sep;
    136 	const char *desc, *link, *p, *q, *descend, *linkend;
    137 	const char *title = NULL, *titleend = NULL;
    138 
    139 	if (*begin == '[')
    140 		img = 0;
    141 	else if (strncmp(begin, "![", 2) == 0)
    142 		img = 1;
    143 	else
    144 		return 0;
    145 	p = desc = begin + 1 + img;
    146 	if (!(p = strstr(desc, "](")) || p > end)
    147 		return 0;
    148 	for (q = strstr(desc, "!["); q && q < end && q < p; q = strstr(q + 1, "!["))
    149 		if (!(p = strstr(p + 1, "](")) || p > end)
    150 			return 0;
    151 	descend = p;
    152 	link = p + 2;
    153 	if (!(q = strchr(link, ')')) || q > end)
    154 		return 0;
    155 	if ((p = strpbrk(link, "\"'")) && p < end && q > p) {
    156 		sep = p[0]; /* separator: can be " or ' */
    157 		title = p + 1;
    158 		/* strip trailing whitespace */
    159 		for (linkend = p; linkend > link && isspace(*(linkend - 1)); linkend--)
    160 			;
    161 		if (!(p = strchr(title, sep)) || q > end || p > q)
    162 			return 0;
    163 		titleend = p;
    164 		len = p + 2 - begin;
    165 	}
    166 	else {
    167 		linkend = q;
    168 		len = q + 1 - begin;
    169 	}
    170 	if (img) {
    171 		fwrite(link, 1, linkend - link, stdout);
    172 		fputs("\n", stdout);
    173 	}
    174 	else {
    175 		fwrite(link, 1, linkend - link, stdout);
    176 		fputs("\n", stdout);
    177 
    178 		process(desc, descend, 0);
    179 	}
    180 	return len;
    181 }
    182 
    183 int
    184 dolist(const char *begin, const char *end, int newblock)
    185 {
    186 	unsigned int i, j, indent, run, ul, isblock;
    187 	const char *p, *q;
    188 	char *buffer = NULL;
    189 
    190 	isblock = 0;
    191 	if (newblock)
    192 		p = begin;
    193 	else if (*begin == '\n')
    194 		p = begin + 1;
    195 	else
    196 		return 0;
    197 	q = p;
    198 	if (*p == '-' || *p == '*' || *p == '+')
    199 		ul = 1;
    200 	else {
    201 		ul = 0;
    202 		for (; p < end && *p >= '0' && *p <= '9'; p++)
    203 			;
    204 		if (p >= end || *p != '.')
    205 			return 0;
    206 	}
    207 	p++;
    208 	if (p >= end || !(*p == ' ' || *p == '\t'))
    209 		return 0;
    210 	for (p++; p != end && (*p == ' ' || *p == '\t'); p++)
    211 		;
    212 	indent = p - q;
    213 	buffer = ereallocz(buffer, BUFSIZ);
    214 	run = 1;
    215 	for (; p < end && run; p++) {
    216 		for (i = 0; p < end && run; p++, i++) {
    217 			if (*p == '\n') {
    218 				if (p + 1 == end)
    219 					break;
    220 				else if (p[1] == '\n') {
    221 					p++;
    222 					ADDC(buffer, i) = '\n';
    223 					i++;
    224 					run = 0;
    225 					isblock++;
    226 				}
    227 				q = p + 1;
    228 				j = 0;
    229 				if (ul && (*q == '-' || *q == '*' || *q == '+'))
    230 					j = 1;
    231 				else if (!ul) {
    232 					for (; q + j != end && q[j] >= '0' && q[j] <= '9' && j < indent; j++)
    233 						;
    234 					if (q + j == end)
    235 						break;
    236 					if (j > 0 && q[j] == '.')
    237 						j++;
    238 					else
    239 						j = 0;
    240 				}
    241 				if (q + indent < end)
    242 					for (; (q[j] == ' ' || q[j] == '\t') && j < indent; j++)
    243 						;
    244 				if (j == indent) {
    245 					ADDC(buffer, i) = '\n';
    246 					i++;
    247 					p += indent;
    248 					run = 1;
    249 					if (*q == ' ' || *q == '\t')
    250 						p++;
    251 					else
    252 						break;
    253 				}
    254 			}
    255 			ADDC(buffer, i) = *p;
    256 		}
    257 		ADDC(buffer, i) = '\0';
    258 		process(buffer, buffer + i, isblock > 1 || (isblock == 1 && run));
    259 	}
    260 	free(buffer);
    261 	p--;
    262 	while (*(--p) == '\n')
    263 		;
    264 	return -(p - begin + 1);
    265 }
    266 
    267 int
    268 doparagraph(const char *begin, const char *end, int newblock)
    269 {
    270 	const char *p;
    271 
    272 	if (!newblock)
    273 		return 0;
    274 	p = strstr(begin, "\n\n");
    275 	if (!p || p > end)
    276 		p = end;
    277 	if (p == begin)
    278 		return 0;
    279 	process(begin, p, 0);
    280 	return -(p - begin);
    281 }
    282 
    283 int
    284 doshortlink(const char *begin, const char *end, int newblock)
    285 {
    286 	const char *p;
    287 	int ismail = 0;
    288 
    289 	if (*begin != '<')
    290 		return 0;
    291 	for (p = begin + 1; p != end; p++) {
    292 		switch (*p) {
    293 		case ' ':
    294 		case '\t':
    295 		case '\n':
    296 			return 0;
    297 		case '#':
    298 		case ':':
    299 			ismail = -1;
    300 			break;
    301 		case '@':
    302 			if (ismail == 0)
    303 				ismail = 1;
    304 			break;
    305 		case '>':
    306 			if (ismail == 0)
    307 				return 0;
    308 			if (ismail != 1) {
    309 				fwrite(begin + 1, 1, p - begin + 1, stdout);
    310 				fputs("\n", stdout);
    311 			}
    312 			return p - begin + 1;
    313 		}
    314 	}
    315 	return 0;
    316 }
    317 
    318 int
    319 dosurround(const char *begin, const char *end, int newblock)
    320 {
    321 	unsigned int i, l;
    322 	const char *p, *start, *stop;
    323 
    324 	for (i = 0; i < LEN(surround); i++) {
    325 		l = strlen(surround[i].search);
    326 		if (end - begin < 2*l || strncmp(begin, surround[i].search, l) != 0)
    327 			continue;
    328 		start = begin + l;
    329 		p = start - 1;
    330 		do {
    331 			stop = p;
    332 			p = strstr(p + 1, surround[i].search);
    333 		} while (p && p[-1] == '\\');
    334 		if (p && p[-1] != '\\')
    335 			stop = p;
    336 		if (!stop || stop < start || stop >= end)
    337 			continue;
    338 		if (surround[i].process)
    339 			process(start, stop, 0);
    340 		else
    341 			hprint(start, stop);
    342 		return stop - begin + l;
    343 	}
    344 	return 0;
    345 }
    346 
    347 int
    348 dounderline(const char *begin, const char *end, int newblock)
    349 {
    350 	unsigned int i, j, l;
    351 	const char *p;
    352 
    353 	if (!newblock)
    354 		return 0;
    355 	p = begin;
    356 	for (l = 0; p + l != end && p[l] != '\n'; l++)
    357 		;
    358 	p += l + 1;
    359 	if (l == 0)
    360 		return 0;
    361 	for (i = 0; i < LEN(underline); i++) {
    362 		for (j = 0; p + j != end && p[j] != '\n' && p[j] == underline[i].search[0]; j++)
    363 			;
    364 		if (j >= l) {
    365 			if (underline[i].process)
    366 				process(begin, begin + l, 0);
    367 			else
    368 				hprint(begin, begin + l);
    369 			return -(j + p - begin);
    370 		}
    371 	}
    372 	return 0;
    373 }
    374 
    375 void *
    376 ereallocz(void *p, size_t size)
    377 {
    378 	if (!(p = realloc(p, size)))
    379 		eprint("realloc: could not allocate %u bytes:", size);
    380 	return p;
    381 }
    382 
    383 void
    384 hprint(const char *begin, const char *end)
    385 {
    386 }
    387 
    388 void
    389 process(const char *begin, const char *end, int newblock)
    390 {
    391 	const char *p, *q;
    392 	int affected;
    393 	unsigned int i;
    394 
    395 	for (p = begin; p < end;) {
    396 		if (newblock)
    397 			while (*p == '\n')
    398 				if (++p == end)
    399 					return;
    400 		affected = 0;
    401 		for (i = 0; i < LEN(parsers) && !affected; i++)
    402 			affected = parsers[i](p, end, newblock);
    403 		p += abs(affected);
    404 		if (!affected) {
    405 			p++;
    406 		}
    407 		for (q = p; q != end && *q == '\n'; q++)
    408 			;
    409 		if (q == end)
    410 			return;
    411 		else if (p[0] == '\n' && p + 1 != end && p[1] == '\n')
    412 			newblock = 1;
    413 		else
    414 			newblock = (affected < 0);
    415 	}
    416 }
    417 
    418 int
    419 main(int argc, char **argv)
    420 {
    421 	char *buffer;
    422 	int s;
    423 	size_t len, bsize;
    424 
    425 	if (pledge("stdio", NULL) < 0)
    426 		eprint("pledge:");
    427 
    428 	bsize = 2 * READ_BUF_SIZ;
    429 	buffer = ereallocz(NULL, bsize);
    430 	len = 0;
    431 	while ((s = fread(buffer + len, 1, READ_BUF_SIZ, stdin))) {
    432 		len += s;
    433 		if (READ_BUF_SIZ + len + 1 > bsize) {
    434 			bsize += READ_BUF_SIZ;
    435 			if (!(buffer = realloc(buffer, bsize)))
    436 				eprint("realloc:");
    437 		}
    438 	}
    439 	buffer[len] = '\0';
    440 	process(buffer, buffer + len, 1);
    441 	free(buffer);
    442 
    443 	return 0;
    444 }