libgrapheme

grapheme cluster utility library
git clone git://git.suckless.org/libgrapheme
Log | Files | Refs | LICENSE

gbt.awk (1332B)


      1 # See LICENSE file for copyright and license details.
      2 
      3 # https://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakTest.txt
      4 BEGIN {
      5 	FS = " "
      6 
      7 	printf("struct test {\n\tuint32_t *cp;\n\tsize_t cplen;\n");
      8 	printf("\tsize_t *len;\n\tsize_t lenlen;\n\tchar *descr;\n};\n\n");
      9 	printf("static const struct test t[] = {\n");
     10 }
     11 
     12 $0 ~ /^#/ || $0 ~ /^\s*$/ { next }
     13 
     14 {
     15 	ncps = 0;
     16 	nlens = 0;
     17 
     18 	curlen = 1;
     19 	for (i = 2; i <= NF; i++) {
     20 		if ($(i + 1) == "#") {
     21 			break;
     22 		}
     23 		if (i % 2 == 0) {
     24 			# code point
     25 			cp[ncps++] = tolower($i);
     26 		} else {
     27 			# break information
     28 			if ($i == "÷") {
     29 				# break
     30 				len[nlens++] = curlen;
     31 				curlen = 1;
     32 			} else { # $i == "×"
     33 				# no break
     34 				curlen++;
     35 			}
     36 		}
     37 	}
     38 	len[nlens++] = curlen;
     39 
     40 	# print code points
     41 	printf("\t{\n\t\t.cp     = (uint32_t[]){ ");
     42 	for (i = 0; i < ncps; i++) {
     43 		printf("UINT32_C(0x%s)", cp[i]);
     44 		if (i + 1 < ncps) {
     45 			printf(", ");
     46 		}
     47 	}
     48 	printf(" },\n\t\t.cplen  = %d,\n", ncps);
     49 
     50 	# print grapheme cluster lengths
     51 	printf("\t\t.len    = (size_t[]){ ");
     52 	for (i = 0; i < nlens; i++) {
     53 		printf("%s", len[i]);
     54 		if (i + 1 < nlens) {
     55 			printf(", ");
     56 		}
     57 	}
     58 	printf(" },\n\t\t.lenlen = %d,\n", nlens);
     59 
     60 	# print testcase description
     61 	printf("\t\t.descr  = \"%s\",\n", substr($0, index($0, "#") + 3));
     62 
     63 	printf("\t},\n");
     64 }
     65 
     66 END {
     67 	printf("};\n");
     68 }