mkrunetype.awk (7613B)
1 # See LICENSE file for copyright and license details. 2 3 BEGIN { 4 FS = ";" 5 # set up hexadecimal lookup table 6 for(i = 0; i < 16; i++) 7 hex[sprintf("%X",i)] = i; 8 HEADER = "/* Automatically generated by mkrunetype.awk */\n#include <stdlib.h>\n\n#include \"../utf.h\"\n#include \"runetype.h\"\n" 9 HEADER_OTHER = "/* Automatically generated by mkrunetype.awk */\n#include \"../utf.h\"\n#include \"runetype.h\"\n" 10 } 11 12 $3 ~ /^L/ { alphav[alphac++] = $1; } 13 ($3 ~ /^Z/) || ($5 == "WS") || ($5 == "S") || ($5 == "B") { spacev[spacec++] = $1; } 14 $3 == "Cc" { cntrlv[cntrlc++] = $1; } 15 $3 == "Lu" { upperv[upperc++] = $1; tolowerv[uppercc++] = ($14 == "") ? $1 : $14; } 16 $3 == "Ll" { lowerv[lowerc++] = $1; toupperv[lowercc++] = ($13 == "") ? $1 : $13; } 17 $3 == "Lt" { titlev[titlec++] = $1; } 18 $3 == "Nd" { digitv[digitc++] = $1; } 19 20 END { 21 system("rm -f isalpharune.c isspacerune.c iscntrlrune.c upperrune.c lowerrune.c istitlerune.c isdigitrune.c"); 22 23 mkis("alpha", alphav, alphac, "isalpharune.c", q, ""); 24 mkis("space", spacev, spacec, "isspacerune.c", q, ""); 25 mkis("cntrl", cntrlv, cntrlc, "iscntrlrune.c", q, ""); 26 mkis("upper", upperv, upperc, "upperrune.c", tolowerv, "lower"); 27 mkis("lower", lowerv, lowerc, "lowerrune.c", toupperv, "upper"); 28 mkis("title", titlev, titlec, "istitlerune.c", q, ""); 29 mkis("digit", digitv, digitc, "isdigitrune.c", q, ""); 30 31 system("rm -f isalnumrune.c isblankrune.c isprintrune.c isgraphrune.c ispunctrune.c isxdigitrune.c"); 32 33 otheris(); 34 } 35 36 # parse hexadecimal rune index to int 37 function code(s) { 38 x = 0; 39 for(i = 1; i <= length(s); i++) { 40 c = substr(s, i, 1); 41 x = (x*16) + hex[c]; 42 } 43 return x; 44 } 45 46 # generate 'is<name>rune' unicode lookup function 47 function mkis(name, runev, runec, file, casev, casename) { 48 rune1c = 0; 49 rune2c = 0; 50 rune3c = 0; 51 rune4c = 0; 52 mode = 1; 53 54 #sort rune groups into singletons, ranges and laces 55 for(j = 0; j < runec; j++) { 56 # range 57 if(code(runev[j+1]) == code(runev[j])+1 && ((length(casev) == 0) || 58 code(casev[j+1]) == code(casev[j])+1) && j+1 < runec) { 59 if (mode == 2) { 60 continue; 61 } else if (mode == 3) { 62 rune3v1[rune3c] = runev[j]; 63 rune3c++; 64 } else if (mode == 4) { 65 rune4v1[rune4c] = runev[j]; 66 rune4c++; 67 } 68 mode = 2; 69 rune2v0[rune2c] = runev[j]; 70 if(length(casev) > 0) { 71 case2v[rune2c] = casev[j]; 72 } 73 continue; 74 } 75 # lace 1 76 if(code(runev[j+1]) == code(runev[j])+2 && ((length(casev) == 0) || 77 (code(casev[j+1]) == code(runev[j+1])+1 && code(casev[j]) == code(runev[j])+1)) && 78 j+1 < runec) { 79 if (mode == 3) { 80 continue; 81 } else if (mode == 2) { 82 rune2v1[rune2c] = runev[j]; 83 rune2c++; 84 } else if (mode == 4) { 85 rune4v1[rune2c] = runev[j]; 86 rune4c++; 87 } 88 mode = 3; 89 rune3v0[rune3c] = runev[j]; 90 continue; 91 } 92 # lace 2 93 if(code(runev[j+1]) == code(runev[j])+2 && ((length(casev) == 0) || 94 (code(casev[j+1]) == code(runev[j+1])-1 && code(casev[j]) == code(runev[j])-1)) && 95 j+1 < runec) { 96 if (mode == 4) { 97 continue; 98 } else if (mode == 2) { 99 rune2v1[rune2c] = runev[j]; 100 rune2c++; 101 } else if (mode == 3) { 102 rune3v1[rune2c] = runev[j]; 103 rune3c++; 104 } 105 mode = 4; 106 rune4v0[rune4c] = runev[j]; 107 continue; 108 } 109 # terminating case 110 if (mode == 1) { 111 rune1v[rune1c] = runev[j]; 112 if (length(casev) > 0) { 113 case1v[rune1c] = casev[j]; 114 } 115 rune1c++; 116 } else if (mode == 2) { 117 rune2v1[rune2c] = runev[j]; 118 rune2c++; 119 } else if (mode == 3) { 120 rune3v1[rune3c] = runev[j]; 121 rune3c++; 122 } else { #lace 2 123 rune4v1[rune4c] = runev[j]; 124 rune4c++; 125 } 126 mode = 1; 127 } 128 print HEADER > file; 129 130 #generate list of laces 1 131 if(rune3c > 0) { 132 print "static const Rune "name"3[][2] = {" > file; 133 for(j = 0; j < rune3c; j++) { 134 print "\t{ 0x"rune3v0[j]", 0x"rune3v1[j]" }," > file; 135 } 136 print "};\n" > file; 137 } 138 139 #generate list of laces 2 140 if(rune4c > 0) { 141 print "static const Rune "name"4[][2] = {" > file; 142 for(j = 0; j < rune4c; j++) { 143 print "\t{ 0x"rune4v0[j]", 0x"rune4v1[j]" }," > file; 144 } 145 print "};\n" > file; 146 } 147 148 # generate list of ranges 149 if(rune2c > 0) { 150 if(length(casev) > 0) { 151 print "static const Rune "name"2[][3] = {" > file; 152 for(j = 0; j < rune2c; j++) { 153 print "\t{ 0x"rune2v0[j]", 0x"rune2v1[j]", 0x"case2v[j]" }," > file; 154 } 155 } else { 156 print "static const Rune "name"2[][2] = {" > file 157 for(j = 0; j < rune2c; j++) { 158 print "\t{ 0x"rune2v0[j]", 0x"rune2v1[j]" }," > file; 159 } 160 } 161 print "};\n" > file; 162 } 163 164 # generate list of singletons 165 if(rune1c > 0) { 166 if(length(casev) > 0) { 167 print "static const Rune "name"1[][2] = {" > file; 168 for(j = 0; j < rune1c; j++) { 169 print "\t{ 0x"rune1v[j]", 0x"case1v[j]" }," > file; 170 } 171 } else { 172 print "static const Rune "name"1[] = {" > file; 173 for(j = 0; j < rune1c; j++) { 174 print "\t0x"rune1v[j]"," > file; 175 } 176 } 177 print "};\n" > file; 178 } 179 # generate lookup function 180 print "int\nis"name"rune(Rune r)\n{" > file; 181 if(rune4c > 0 || rune3c > 0) 182 print "\tconst Rune *match;\n" > file; 183 if(rune4c > 0) { 184 print "\tif((match = bsearch(&r, "name"4, nelem("name"4), sizeof *"name"4, &rune2cmp)))" > file; 185 print "\t\treturn !((r - match[0]) % 2);" > file; 186 } 187 if(rune3c > 0) { 188 print "\tif((match = bsearch(&r, "name"3, nelem("name"3), sizeof *"name"3, &rune2cmp)))" > file; 189 print "\t\treturn !((r - match[0]) % 2);" > file; 190 } 191 if(rune2c > 0) { 192 print "\tif(bsearch(&r, "name"2, nelem("name"2), sizeof *"name"2, &rune2cmp))\n\t\treturn 1;" > file; 193 } 194 if(rune1c > 0) { 195 print "\tif(bsearch(&r, "name"1, nelem("name"1), sizeof *"name"1, &rune1cmp))\n\t\treturn 1;" > file; 196 } 197 print "\treturn 0;\n}" > file; 198 199 # generate case conversion function 200 if(length(casev) > 0) { 201 print "\nint\nto"casename"rune(Rune r)\n{\n\tRune *match;\n" > file; 202 if(rune4c > 0) { 203 print "\tmatch = bsearch(&r, "name"4, nelem("name"4), sizeof *"name"4, &rune2cmp);" > file; 204 print "\tif (match)" > file; 205 print "\t\treturn ((r - match[0]) % 2) ? r : r - 1;" > file; 206 } 207 if(rune3c > 0) { 208 print "\tmatch = bsearch(&r, "name"3, nelem("name"3), sizeof *"name"3, &rune2cmp);" > file; 209 print "\tif (match)" > file; 210 print "\t\treturn ((r - match[0]) % 2) ? r : r + 1;" > file; 211 } 212 if(rune2c > 0) { 213 print "\tmatch = bsearch(&r, "name"2, nelem("name"2), sizeof *"name"2, &rune2cmp);" > file; 214 print "\tif (match)" > file; 215 print "\t\treturn match[2] + (r - match[0]);" > file; 216 } 217 if(rune1c > 0) { 218 print "\tmatch = bsearch(&r, "name"1, nelem("name"1), sizeof *"name"1, &rune1cmp);" > file; 219 print "\tif (match)" > file; 220 print "\t\treturn match[1];" > file; 221 } 222 print "\treturn r;\n}" > file; 223 } 224 } 225 226 function otheris() { 227 print HEADER_OTHER > "isalnumrune.c"; 228 print "int\nisalnumrune(Rune r)\n{\n\treturn isalpharune(r) || isdigitrune(r);\n}" > "isalnumrune.c"; 229 print HEADER_OTHER > "isblankrune.c"; 230 print "int\nisblankrune(Rune r)\n{\n\treturn r == ' ' || r == '\\t';\n}" > "isblankrune.c"; 231 print HEADER_OTHER > "isprintrune.c"; 232 print "int\nisprintrune(Rune r)\n{\n\treturn !iscntrlrune(r) && (r != 0x2028) && (r != 0x2029) &&" > "isprintrune.c"; 233 print "\t ((r < 0xFFF9) || (r > 0xFFFB));\n}" > "isprintrune.c"; 234 print HEADER_OTHER > "isgraphrune.c"; 235 print "int\nisgraphrune(Rune r)\n{\n\treturn !isspacerune(r) && isprintrune(r);\n}" > "isgraphrune.c"; 236 print HEADER_OTHER > "ispunctrune.c"; 237 print "int\nispunctrune(Rune r)\n{\n\treturn isgraphrune(r) && !isalnumrune(r);\n}" > "ispunctrune.c"; 238 print HEADER_OTHER > "isxdigitrune.c"; 239 print "int\nisxdigitrune(Rune r)\n{\n\treturn (r >= '0' && (r - '0') < 10) || (r >= 'a' && (r - 'a') < 6);\n}" > "isxdigitrune.c"; 240 }