libgrapheme

grapheme cluster utility library
git clone git://git.suckless.org/libgrapheme
Log | Files | Refs | LICENSE

emo.awk (1702B)


      1 # See LICENSE file for copyright and license details.
      2 
      3 # https://www.unicode.org/Public/emoji/latest/emoji-data.txt
      4 BEGIN {
      5 	FS = "[ ;]+"
      6 }
      7 
      8 $0 ~ /^#/ || $0 ~ /^\s*$/      { next }
      9 $2 == "Extended_Pictographic#" { extpicts[nextpicts++] = $1 }
     10 
     11 END {
     12 	mktable("extpict", extpicts, nextpicts);
     13 }
     14 
     15 function hextonum(str) {
     16 	str = tolower(str);
     17 	if (substr(str, 1, 2) != "0x") {
     18 		return -1;
     19 	}
     20 	str = substr(str, 3);
     21 
     22 	val = 0;
     23 	for (i = 0; i < length(str); i++) {
     24 		dig = index("0123456789abcdef", substr(str, i + 1, 1));
     25 
     26 		if (!dig) {
     27 			return -1;
     28 		}
     29 
     30 		val = (16 * val) + (dig - 1);
     31 	}
     32 
     33 	return val;
     34 }
     35 
     36 function mktable(name, array, arrlen) {
     37 	printf("\nstatic const uint32_t "name"_table[][2] = {\n");
     38 
     39 	for (j = 0; j < arrlen; j++) {
     40 		if (ind = index(array[j], "..")) {
     41 			lower = tolower(substr(array[j], 1, ind - 1));
     42 			upper = tolower(substr(array[j], ind + 2));
     43 		} else {
     44 			lower = upper = tolower(array[j]);
     45 		}
     46 		lower = sprintf("0x%s", lower);
     47 		upper = sprintf("0x%s", upper);
     48 
     49 		# print lower bound
     50 		printf("\t{ UINT32_C(%s), ", lower);
     51 
     52 		for (; j < arrlen - 1; j++) {
     53 			# look ahead and check if we have adjacent arrays
     54 			if (ind = index(array[j + 1], "..")) {
     55 				nextlower = tolower(substr(array[j + 1],
     56 				                    1, ind - 1));
     57 				nextupper = tolower(substr(array[j + 1],
     58 				                    ind + 2));
     59 			} else {
     60 				nextlower = nextupper = tolower(array[j + 1]);
     61 			}
     62 			nextlower = sprintf("0x%s", nextlower);
     63 			nextupper = sprintf("0x%s", nextupper);
     64 
     65 			if ((hextonum(nextlower) * 1) != (hextonum(upper) + 1)) {
     66 				break;
     67 			} else {
     68 				upper = nextupper;
     69 			}
     70 		}
     71 
     72 		# print upper bound
     73 		printf("UINT32_C(%s) },\n", upper);
     74 	}
     75 
     76 	printf("};\n");
     77 }