emo.awk (1702B)
1 # See LICENSE file for copyright and license details. 2 3 # https://www.unicode.org/Public/emoji/latest/emoji-data.txt 4 BEGIN { 5 FS = "[ ;]+" 6 } 7 8 $0 ~ /^#/ || $0 ~ /^\s*$/ { next } 9 $2 == "Extended_Pictographic#" { extpicts[nextpicts++] = $1 } 10 11 END { 12 mktable("extpict", extpicts, nextpicts); 13 } 14 15 function hextonum(str) { 16 str = tolower(str); 17 if (substr(str, 1, 2) != "0x") { 18 return -1; 19 } 20 str = substr(str, 3); 21 22 val = 0; 23 for (i = 0; i < length(str); i++) { 24 dig = index("0123456789abcdef", substr(str, i + 1, 1)); 25 26 if (!dig) { 27 return -1; 28 } 29 30 val = (16 * val) + (dig - 1); 31 } 32 33 return val; 34 } 35 36 function mktable(name, array, arrlen) { 37 printf("\nstatic const uint32_t "name"_table[][2] = {\n"); 38 39 for (j = 0; j < arrlen; j++) { 40 if (ind = index(array[j], "..")) { 41 lower = tolower(substr(array[j], 1, ind - 1)); 42 upper = tolower(substr(array[j], ind + 2)); 43 } else { 44 lower = upper = tolower(array[j]); 45 } 46 lower = sprintf("0x%s", lower); 47 upper = sprintf("0x%s", upper); 48 49 # print lower bound 50 printf("\t{ UINT32_C(%s), ", lower); 51 52 for (; j < arrlen - 1; j++) { 53 # look ahead and check if we have adjacent arrays 54 if (ind = index(array[j + 1], "..")) { 55 nextlower = tolower(substr(array[j + 1], 56 1, ind - 1)); 57 nextupper = tolower(substr(array[j + 1], 58 ind + 2)); 59 } else { 60 nextlower = nextupper = tolower(array[j + 1]); 61 } 62 nextlower = sprintf("0x%s", nextlower); 63 nextupper = sprintf("0x%s", nextupper); 64 65 if ((hextonum(nextlower) * 1) != (hextonum(upper) + 1)) { 66 break; 67 } else { 68 upper = nextupper; 69 } 70 } 71 72 # print upper bound 73 printf("UINT32_C(%s) },\n", upper); 74 } 75 76 printf("};\n"); 77 }