md-printlinks.c (9757B)
1 /* process Markdown (based on smu code), but only output links */ 2 #include <sys/types.h> 3 4 #include <ctype.h> 5 #include <errno.h> 6 #include <stdarg.h> 7 #include <stdio.h> 8 #include <stdint.h> 9 #include <stdlib.h> 10 #include <string.h> 11 12 #ifdef __OpenBSD__ 13 #include <unistd.h> 14 #else 15 #define pledge(a,b) 0 16 #endif 17 18 #define READ_BUF_SIZ 16384 19 #define LEN(x) sizeof(x)/sizeof(x[0]) 20 #define ADDC(b,i) if (i % READ_BUF_SIZ == 0) { b = realloc(b, (i + READ_BUF_SIZ)); if (!b) eprint("realloc:"); } b[i] 21 22 typedef int (*Parser)(const char *, const char *, int); 23 typedef struct { 24 char *search; 25 int process; 26 char *before, *after; 27 } Tag; 28 29 static int dolineprefix(const char *begin, const char *end, int newblock);/* Parser for line prefix tags */ 30 static int dolink(const char *begin, const char *end, int newblock); /* Parser for links and images */ 31 static int dolist(const char *begin, const char *end, int newblock); /* Parser for lists */ 32 static int doparagraph(const char *begin, const char *end, int newblock); /* Parser for paragraphs */ 33 static int doshortlink(const char *begin, const char *end, int newblock); /* Parser for links and images */ 34 static int dosurround(const char *begin, const char *end, int newblock); /* Parser for surrounding tags */ 35 static int dounderline(const char *begin, const char *end, int newblock); /* Parser for underline tags */ 36 static void *ereallocz(void *p, size_t size); 37 static void hprint(const char *begin, const char *end); /* escapes HTML and prints it to output */ 38 static void process(const char *begin, const char *end, int newblock); /* Processes range between begin and end. */ 39 40 /* list of parsers */ 41 static Parser parsers[] = { 42 dounderline, dolineprefix, dolist, doparagraph, dosurround, dolink, doshortlink, 43 }; 44 45 static Tag lineprefix[] = { 46 { " ", 0, "<pre><code>", "</code></pre>" }, 47 { "\t", 0, "<pre><code>", "</code></pre>" }, 48 { "> ", 2, "<blockquote>", "</blockquote>" }, 49 { "###### ", 1, "<h6>", "</h6>" }, 50 { "##### ", 1, "<h5>", "</h5>" }, 51 { "#### ", 1, "<h4>", "</h4>" }, 52 { "### ", 1, "<h3>", "</h3>" }, 53 { "## ", 1, "<h2>", "</h2>" }, 54 { "# ", 1, "<h1>", "</h1>" }, 55 { "- - -\n", 1, "<hr/>", ""}, 56 }; 57 58 static Tag underline[] = { 59 { "=", 1, "<h1>", "</h1>\n" }, 60 { "-", 1, "<h2>", "</h2>\n" }, 61 }; 62 63 static Tag surround[] = { 64 { "``", 0, "<code>", "</code>" }, 65 { "`", 0, "<code>", "</code>" }, 66 { "___", 1, "<b><i>", "</i></b>" }, 67 { "***", 1, "<b><i>", "</i></b>" }, 68 { "__", 1, "<b>", "</b>" }, 69 { "**", 1, "<b>", "</b>" }, 70 { "_", 1, "<i>", "</i>" }, 71 { "*", 1, "<i>", "</i>" }, 72 }; 73 74 void 75 eprint(const char *format, ...) 76 { 77 va_list ap; 78 79 va_start(ap, format); 80 vfprintf(stderr, format, ap); 81 va_end(ap); 82 if (format[0] && format[strlen(format) - 1] == ':') 83 fputs(strerror(errno), stderr); 84 fputc('\n', stderr); 85 exit(1); 86 } 87 88 int 89 dolineprefix(const char *begin, const char *end, int newblock) 90 { 91 unsigned int i, j, l; 92 char *buffer; 93 const char *p; 94 95 if (newblock) 96 p = begin; 97 else if (*begin == '\n') 98 p = begin + 1; 99 else 100 return 0; 101 for (i = 0; i < LEN(lineprefix); i++) { 102 l = strlen(lineprefix[i].search); 103 if (end - p < l) 104 continue; 105 if (strncmp(lineprefix[i].search, p, l)) 106 continue; 107 if (lineprefix[i].search[l-1] == '\n') { 108 return l; 109 } 110 if (!(buffer = malloc(BUFSIZ))) 111 eprint("malloc:"); 112 buffer[0] = '\0'; 113 114 for (j = 0, p += l; p < end; p++, j++) { 115 ADDC(buffer, j) = *p; 116 if (*p == '\n' && p + l < end) { 117 if (strncmp(lineprefix[i].search, p + 1, l) != 0) 118 break; 119 p += l; 120 } 121 } 122 123 ADDC(buffer, j) = '\0'; 124 if (lineprefix[i].process) 125 process(buffer, buffer + strlen(buffer), lineprefix[i].process >= 2); 126 free(buffer); 127 return -(p - begin); 128 } 129 return 0; 130 } 131 132 int 133 dolink(const char *begin, const char *end, int newblock) 134 { 135 int img, len, sep; 136 const char *desc, *link, *p, *q, *descend, *linkend; 137 const char *title = NULL, *titleend = NULL; 138 139 if (*begin == '[') 140 img = 0; 141 else if (strncmp(begin, "![", 2) == 0) 142 img = 1; 143 else 144 return 0; 145 p = desc = begin + 1 + img; 146 if (!(p = strstr(desc, "](")) || p > end) 147 return 0; 148 for (q = strstr(desc, "!["); q && q < end && q < p; q = strstr(q + 1, "![")) 149 if (!(p = strstr(p + 1, "](")) || p > end) 150 return 0; 151 descend = p; 152 link = p + 2; 153 if (!(q = strchr(link, ')')) || q > end) 154 return 0; 155 if ((p = strpbrk(link, "\"'")) && p < end && q > p) { 156 sep = p[0]; /* separator: can be " or ' */ 157 title = p + 1; 158 /* strip trailing whitespace */ 159 for (linkend = p; linkend > link && isspace(*(linkend - 1)); linkend--) 160 ; 161 if (!(p = strchr(title, sep)) || q > end || p > q) 162 return 0; 163 titleend = p; 164 len = p + 2 - begin; 165 } 166 else { 167 linkend = q; 168 len = q + 1 - begin; 169 } 170 if (img) { 171 fwrite(link, 1, linkend - link, stdout); 172 fputs("\n", stdout); 173 } 174 else { 175 fwrite(link, 1, linkend - link, stdout); 176 fputs("\n", stdout); 177 178 process(desc, descend, 0); 179 } 180 return len; 181 } 182 183 int 184 dolist(const char *begin, const char *end, int newblock) 185 { 186 unsigned int i, j, indent, run, ul, isblock; 187 const char *p, *q; 188 char *buffer = NULL; 189 190 isblock = 0; 191 if (newblock) 192 p = begin; 193 else if (*begin == '\n') 194 p = begin + 1; 195 else 196 return 0; 197 q = p; 198 if (*p == '-' || *p == '*' || *p == '+') 199 ul = 1; 200 else { 201 ul = 0; 202 for (; p < end && *p >= '0' && *p <= '9'; p++) 203 ; 204 if (p >= end || *p != '.') 205 return 0; 206 } 207 p++; 208 if (p >= end || !(*p == ' ' || *p == '\t')) 209 return 0; 210 for (p++; p != end && (*p == ' ' || *p == '\t'); p++) 211 ; 212 indent = p - q; 213 buffer = ereallocz(buffer, BUFSIZ); 214 run = 1; 215 for (; p < end && run; p++) { 216 for (i = 0; p < end && run; p++, i++) { 217 if (*p == '\n') { 218 if (p + 1 == end) 219 break; 220 else if (p[1] == '\n') { 221 p++; 222 ADDC(buffer, i) = '\n'; 223 i++; 224 run = 0; 225 isblock++; 226 } 227 q = p + 1; 228 j = 0; 229 if (ul && (*q == '-' || *q == '*' || *q == '+')) 230 j = 1; 231 else if (!ul) { 232 for (; q + j != end && q[j] >= '0' && q[j] <= '9' && j < indent; j++) 233 ; 234 if (q + j == end) 235 break; 236 if (j > 0 && q[j] == '.') 237 j++; 238 else 239 j = 0; 240 } 241 if (q + indent < end) 242 for (; (q[j] == ' ' || q[j] == '\t') && j < indent; j++) 243 ; 244 if (j == indent) { 245 ADDC(buffer, i) = '\n'; 246 i++; 247 p += indent; 248 run = 1; 249 if (*q == ' ' || *q == '\t') 250 p++; 251 else 252 break; 253 } 254 } 255 ADDC(buffer, i) = *p; 256 } 257 ADDC(buffer, i) = '\0'; 258 process(buffer, buffer + i, isblock > 1 || (isblock == 1 && run)); 259 } 260 free(buffer); 261 p--; 262 while (*(--p) == '\n') 263 ; 264 return -(p - begin + 1); 265 } 266 267 int 268 doparagraph(const char *begin, const char *end, int newblock) 269 { 270 const char *p; 271 272 if (!newblock) 273 return 0; 274 p = strstr(begin, "\n\n"); 275 if (!p || p > end) 276 p = end; 277 if (p == begin) 278 return 0; 279 process(begin, p, 0); 280 return -(p - begin); 281 } 282 283 int 284 doshortlink(const char *begin, const char *end, int newblock) 285 { 286 const char *p; 287 int ismail = 0; 288 289 if (*begin != '<') 290 return 0; 291 for (p = begin + 1; p != end; p++) { 292 switch (*p) { 293 case ' ': 294 case '\t': 295 case '\n': 296 return 0; 297 case '#': 298 case ':': 299 ismail = -1; 300 break; 301 case '@': 302 if (ismail == 0) 303 ismail = 1; 304 break; 305 case '>': 306 if (ismail == 0) 307 return 0; 308 if (ismail != 1) { 309 fwrite(begin + 1, 1, p - begin - 1, stdout); 310 fputs("\n", stdout); 311 } 312 return p - begin + 1; 313 } 314 } 315 return 0; 316 } 317 318 int 319 dosurround(const char *begin, const char *end, int newblock) 320 { 321 unsigned int i, l; 322 const char *p, *start, *stop; 323 324 for (i = 0; i < LEN(surround); i++) { 325 l = strlen(surround[i].search); 326 if (end - begin < 2*l || strncmp(begin, surround[i].search, l) != 0) 327 continue; 328 start = begin + l; 329 p = start - 1; 330 do { 331 stop = p; 332 p = strstr(p + 1, surround[i].search); 333 } while (p && p[-1] == '\\'); 334 if (p && p[-1] != '\\') 335 stop = p; 336 if (!stop || stop < start || stop >= end) 337 continue; 338 if (surround[i].process) 339 process(start, stop, 0); 340 else 341 hprint(start, stop); 342 return stop - begin + l; 343 } 344 return 0; 345 } 346 347 int 348 dounderline(const char *begin, const char *end, int newblock) 349 { 350 unsigned int i, j, l; 351 const char *p; 352 353 if (!newblock) 354 return 0; 355 p = begin; 356 for (l = 0; p + l != end && p[l] != '\n'; l++) 357 ; 358 p += l + 1; 359 if (l == 0) 360 return 0; 361 for (i = 0; i < LEN(underline); i++) { 362 for (j = 0; p + j != end && p[j] != '\n' && p[j] == underline[i].search[0]; j++) 363 ; 364 if (j >= l) { 365 if (underline[i].process) 366 process(begin, begin + l, 0); 367 else 368 hprint(begin, begin + l); 369 return -(j + p - begin); 370 } 371 } 372 return 0; 373 } 374 375 void * 376 ereallocz(void *p, size_t size) 377 { 378 if (!(p = realloc(p, size))) 379 eprint("realloc: could not allocate %u bytes:", size); 380 return p; 381 } 382 383 void 384 hprint(const char *begin, const char *end) 385 { 386 } 387 388 void 389 process(const char *begin, const char *end, int newblock) 390 { 391 const char *p, *q; 392 int affected; 393 unsigned int i; 394 395 for (p = begin; p < end;) { 396 if (newblock) 397 while (*p == '\n') 398 if (++p == end) 399 return; 400 affected = 0; 401 for (i = 0; i < LEN(parsers) && !affected; i++) 402 affected = parsers[i](p, end, newblock); 403 p += abs(affected); 404 if (!affected) { 405 p++; 406 } 407 for (q = p; q != end && *q == '\n'; q++) 408 ; 409 if (q == end) 410 return; 411 else if (p[0] == '\n' && p + 1 != end && p[1] == '\n') 412 newblock = 1; 413 else 414 newblock = (affected < 0); 415 } 416 } 417 418 int 419 main(int argc, char **argv) 420 { 421 char *buffer; 422 int s; 423 size_t len, bsize; 424 425 if (pledge("stdio", NULL) < 0) 426 eprint("pledge:"); 427 428 bsize = 2 * READ_BUF_SIZ; 429 buffer = ereallocz(NULL, bsize); 430 len = 0; 431 while ((s = fread(buffer + len, 1, READ_BUF_SIZ, stdin))) { 432 len += s; 433 if (READ_BUF_SIZ + len + 1 > bsize) { 434 bsize += READ_BUF_SIZ; 435 if (!(buffer = realloc(buffer, bsize))) 436 eprint("realloc:"); 437 } 438 } 439 buffer[len] = '\0'; 440 process(buffer, buffer + len, 1); 441 free(buffer); 442 443 return 0; 444 }