gbp.awk (2695B)
1 # See LICENSE file for copyright and license details. 2 3 # http://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakProperty.txt 4 BEGIN { 5 FS = "[ ;]+" 6 } 7 8 $0 ~ /^#/ || $0 ~ /^\s*$/ { next } 9 $2 == "CR" { crs[ncrs++] = $1 } 10 $2 == "LF" { lfs[nlfs++] = $1 } 11 $2 == "Control" { controls[ncontrols++] = $1 } 12 $2 == "Extend" { extends[nextends++] = $1 } 13 $2 == "ZWJ" { zwj[nzwj++] = $1 } 14 $2 == "Regional_Indicator" { ris[nris++] = $1 } 15 $2 == "Prepend" { prepends[nprepends++] = $1 } 16 $2 == "SpacingMark" { spacingmarks[nspacingmarks++] = $1 } 17 $2 == "L" { ls[nls++] = $1 } 18 $2 == "V" { vs[nvs++] = $1 } 19 $2 == "T" { ts[nts++] = $1 } 20 $2 == "LV" { lvs[nlvs++] = $1 } 21 $2 == "LVT" { lvts[nlvts++] = $1 } 22 23 END { 24 mktable("cr", crs, ncrs); 25 mktable("lf", lfs, nlfs); 26 mktable("control", controls, ncontrols); 27 mktable("extend", extends, nextends); 28 mktable("zwj", zwj, nzwj); 29 mktable("ri", ris, nris); 30 mktable("prepend", prepends, nprepends); 31 mktable("spacingmark", spacingmarks, nspacingmarks); 32 mktable("l", ls, nls); 33 mktable("v", vs, nvs); 34 mktable("t", ts, nts); 35 mktable("lv", lvs, nlvs); 36 mktable("lvt", lvts, nlvts); 37 } 38 39 function hextonum(str) { 40 str = tolower(str); 41 if (substr(str, 1, 2) != "0x") { 42 return -1; 43 } 44 str = substr(str, 3); 45 46 val = 0; 47 for (i = 0; i < length(str); i++) { 48 dig = index("0123456789abcdef", substr(str, i + 1, 1)); 49 50 if (!dig) { 51 return -1; 52 } 53 54 val = (16 * val) + (dig - 1); 55 } 56 57 return val; 58 } 59 60 function mktable(name, array, arrlen) { 61 printf("static const uint32_t "name"_table[][2] = {\n"); 62 63 for (j = 0; j < arrlen; j++) { 64 if (ind = index(array[j], "..")) { 65 lower = tolower(substr(array[j], 1, ind - 1)); 66 upper = tolower(substr(array[j], ind + 2)); 67 } else { 68 lower = upper = tolower(array[j]); 69 } 70 lower = sprintf("0x%s", lower); 71 upper = sprintf("0x%s", upper); 72 73 # print lower bound 74 printf("\t{ UINT32_C(%s), ", lower); 75 76 for (; j < arrlen - 1; j++) { 77 # look ahead and check if we have adjacent arrays 78 if (ind = index(array[j + 1], "..")) { 79 nextlower = tolower(substr(array[j + 1], 80 1, ind - 1)); 81 nextupper = tolower(substr(array[j + 1], 82 ind + 2)); 83 } else { 84 nextlower = nextupper = tolower(array[j + 1]); 85 } 86 nextlower = sprintf("0x%s", nextlower); 87 nextupper = sprintf("0x%s", nextupper); 88 89 if ((hextonum(nextlower) * 1) != (hextonum(upper) + 1)) { 90 break; 91 } else { 92 upper = nextupper; 93 } 94 } 95 96 # print upper bound 97 printf("UINT32_C(%s) },\n", upper); 98 } 99 100 printf("};\n"); 101 }