libgrapheme

grapheme cluster utility library
git clone git://git.suckless.org/libgrapheme
Log | Files | Refs | LICENSE

grapheme_boundary_test.c (3170B)


      1 /* See LICENSE file for copyright and license details. */
      2 #include <stddef.h>
      3 #include <stdio.h>
      4 #include <stdlib.h>
      5 #include <string.h>
      6 #include <errno.h>
      7 
      8 #include "datautil.h"
      9 
     10 struct break_test {
     11 	uint32_t *cp;
     12 	size_t cplen;
     13 	size_t *len;
     14 	size_t lenlen;
     15 	char *descr;
     16 };
     17 
     18 static struct break_test *test = NULL;
     19 static size_t ntests = 0;
     20 
     21 int
     22 process_line(char **field, size_t nfields, char *comment)
     23 {
     24 	struct break_test *t;
     25 	size_t i;
     26 	char *token;
     27 
     28 	if (nfields < 1) {
     29 		return 1;
     30 	}
     31 
     32 	/* append new testcase and initialize with zeroes */
     33 	if ((test = realloc(test, ++ntests * sizeof(*test))) == NULL) {
     34 		fprintf(stderr, "realloc: %s\n", strerror(errno));
     35 		return 1;
     36 	}
     37 	t = &test[ntests - 1];
     38 	memset(t, 0, sizeof(*t));
     39 
     40 	/* parse testcase "<÷|×> <cp> <÷|×> ... <cp> <÷|×>" */
     41 	for (token = strtok(field[0], " "), i = 0; token != NULL; i++,
     42 	     token = strtok(NULL, " ")) {
     43 		if (i % 2 == 0) {
     44 			/* delimiter */
     45 			if (!strncmp(token, "\xC3\xB7", 2)) { /* UTF-8 */
     46 				/*
     47 				 * '÷' indicates a breakpoint,
     48 				 * the current length is done; allocate
     49 				 * a new length field and set it to 0
     50 				 */
     51 				if ((t->len = realloc(t->len,
     52 				     ++t->lenlen * sizeof(*t->len))) == NULL) {
     53 					fprintf(stderr, "realloc: %s\n",
     54 					        strerror(errno));
     55 					return 1;
     56 				}
     57 				t->len[t->lenlen - 1] = 0;
     58 			} else if (!strncmp(token, "\xC3\x97", 2)) { /* UTF-8 */
     59 				/*
     60 				 * '×' indicates a non-breakpoint, do nothing
     61 				 */
     62 			} else {
     63 				fprintf(stderr, "malformed delimiter '%s'\n",
     64 				        token);
     65 				return 1;
     66 			}
     67 		} else {
     68 			/* add code point to cp-array */
     69 			if ((t->cp = realloc(t->cp, ++t->cplen *
     70 			                     sizeof(*t->cp))) == NULL) {
     71 				fprintf(stderr, "realloc: %s\n", strerror(errno));
     72 				return 1;
     73 			}
     74 			if (cp_parse(token, &t->cp[t->cplen - 1])) {
     75 				return 1;
     76 			}
     77 			if (t->lenlen > 0) {
     78 				t->len[t->lenlen - 1]++;
     79 			}
     80 		}
     81 	}
     82 	if (t->len[t->lenlen - 1] == 0) {
     83 		/* we allocated one more length than we needed */
     84 		t->lenlen--;
     85 	}
     86 
     87 	/* store comment */
     88 	if ((test[ntests - 1].descr = strdup(comment)) == NULL) {
     89 		fprintf(stderr, "strdup: %s\n", strerror(errno));
     90 		return 1;
     91 	}
     92 
     93 	return 0;
     94 }
     95 
     96 int
     97 main(void)
     98 {
     99 	size_t i, j;
    100 
    101 	printf("/* Automatically generated by data/gbt */\n"
    102 	       "#include <stdint.h>\n#include <stddef.h>\n\n");
    103 
    104 	parse_input(process_line);
    105 
    106 	printf("static const struct break_test {\n\tuint32_t *cp;\n"
    107 	       "\tsize_t cplen;\n\tsize_t *len;\n\tsize_t lenlen;\n"
    108 	       "\tchar *descr;\n} t[] = {\n");
    109 	for (i = 0; i < ntests; i++) {
    110 		printf("\t{\n");
    111 
    112 		printf("\t\t.cp     = (uint32_t[]){");
    113 		for (j = 0; j < test[i].cplen; j++) {
    114 			printf(" UINT32_C(0x%06X)", test[i].cp[j]);
    115 			if (j + 1 < test[i].cplen) {
    116 				putchar(',');
    117 			}
    118 		}
    119 		printf(" },\n");
    120 		printf("\t\t.cplen  = %zu,\n", test[i].cplen);
    121 
    122 		printf("\t\t.len    = (size_t[]){");
    123 		for (j = 0; j < test[i].lenlen; j++) {
    124 			printf(" %zu", test[i].len[j]);
    125 			if (j + 1 < test[i].lenlen) {
    126 				putchar(',');
    127 			}
    128 		}
    129 		printf(" },\n");
    130 		printf("\t\t.lenlen = %zu,\n", test[i].lenlen);
    131 
    132 		printf("\t\t.descr  = \"%s\",\n", test[i].descr);
    133 
    134 		printf("\t},\n");
    135 	}
    136 	printf("};\n");
    137 
    138 	return 0;
    139 }