libgrapheme

grapheme cluster utility library
git clone git://git.suckless.org/libgrapheme
Log | Files | Refs | LICENSE

gbp.awk (2695B)


      1 # See LICENSE file for copyright and license details.
      2 
      3 # http://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakProperty.txt
      4 BEGIN {
      5 	FS = "[ ;]+"
      6 }
      7 
      8 $0 ~ /^#/ || $0 ~ /^\s*$/  { next }
      9 $2 == "CR"                 { crs[ncrs++] = $1 }
     10 $2 == "LF"                 { lfs[nlfs++] = $1 }
     11 $2 == "Control"            { controls[ncontrols++] = $1 }
     12 $2 == "Extend"             { extends[nextends++] = $1 }
     13 $2 == "ZWJ"                { zwj[nzwj++] = $1 }
     14 $2 == "Regional_Indicator" { ris[nris++] = $1 }
     15 $2 == "Prepend"            { prepends[nprepends++] = $1 }
     16 $2 == "SpacingMark"        { spacingmarks[nspacingmarks++] = $1 }
     17 $2 == "L"                  { ls[nls++] = $1 }
     18 $2 == "V"                  { vs[nvs++] = $1 }
     19 $2 == "T"                  { ts[nts++] = $1 }
     20 $2 == "LV"                 { lvs[nlvs++] = $1 }
     21 $2 == "LVT"                { lvts[nlvts++] = $1 }
     22 
     23 END {
     24 	mktable("cr", crs, ncrs);
     25 	mktable("lf", lfs, nlfs);
     26 	mktable("control", controls, ncontrols);
     27 	mktable("extend", extends, nextends);
     28 	mktable("zwj", zwj, nzwj);
     29 	mktable("ri", ris, nris);
     30 	mktable("prepend", prepends, nprepends);
     31 	mktable("spacingmark", spacingmarks, nspacingmarks);
     32 	mktable("l", ls, nls);
     33 	mktable("v", vs, nvs);
     34 	mktable("t", ts, nts);
     35 	mktable("lv", lvs, nlvs);
     36 	mktable("lvt", lvts, nlvts);
     37 }
     38 
     39 function hextonum(str) {
     40 	str = tolower(str);
     41 	if (substr(str, 1, 2) != "0x") {
     42 		return -1;
     43 	}
     44 	str = substr(str, 3);
     45 
     46 	val = 0;
     47 	for (i = 0; i < length(str); i++) {
     48 		dig = index("0123456789abcdef", substr(str, i + 1, 1));
     49 
     50 		if (!dig) {
     51 			return -1;
     52 		}
     53 
     54 		val = (16 * val) + (dig - 1);
     55 	}
     56 
     57 	return val;
     58 }
     59 
     60 function mktable(name, array, arrlen) {
     61 	printf("static const uint32_t "name"_table[][2] = {\n");
     62 
     63 	for (j = 0; j < arrlen; j++) {
     64 		if (ind = index(array[j], "..")) {
     65 			lower = tolower(substr(array[j], 1, ind - 1));
     66 			upper = tolower(substr(array[j], ind + 2));
     67 		} else {
     68 			lower = upper = tolower(array[j]);
     69 		}
     70 		lower = sprintf("0x%s", lower);
     71 		upper = sprintf("0x%s", upper);
     72 
     73 		# print lower bound
     74 		printf("\t{ UINT32_C(%s), ", lower);
     75 
     76 		for (; j < arrlen - 1; j++) {
     77 			# look ahead and check if we have adjacent arrays
     78 			if (ind = index(array[j + 1], "..")) {
     79 				nextlower = tolower(substr(array[j + 1],
     80 				                    1, ind - 1));
     81 				nextupper = tolower(substr(array[j + 1],
     82 				                    ind + 2));
     83 			} else {
     84 				nextlower = nextupper = tolower(array[j + 1]);
     85 			}
     86 			nextlower = sprintf("0x%s", nextlower);
     87 			nextupper = sprintf("0x%s", nextupper);
     88 
     89 			if ((hextonum(nextlower) * 1) != (hextonum(upper) + 1)) {
     90 				break;
     91 			} else {
     92 				upper = nextupper;
     93 			}
     94 		}
     95 
     96 		# print upper bound
     97 		printf("UINT32_C(%s) },\n", upper);
     98 	}
     99 
    100 	printf("};\n");
    101 }