bidirectional-test.c (15968B)
1 /* See LICENSE file for copyright and license details. */ 2 #include <errno.h> 3 #include <inttypes.h> 4 #include <stddef.h> 5 #include <stdio.h> 6 #include <stdlib.h> 7 #include <string.h> 8 9 #include "../grapheme.h" 10 #include "util.h" 11 12 struct bidirectional_test { 13 uint_least32_t *cp; 14 size_t cplen; 15 enum grapheme_bidirectional_direction mode[3]; 16 size_t modelen; 17 enum grapheme_bidirectional_direction resolved; 18 int_least8_t *level; 19 int_least16_t *reorder; 20 size_t reorderlen; 21 }; 22 23 static const struct { 24 const char *class; 25 const uint_least32_t cp; 26 } classcpmap[] = { 27 { .class = "L", .cp = UINT32_C(0x0041) }, 28 { .class = "AL", .cp = UINT32_C(0x0608) }, 29 { .class = "AN", .cp = UINT32_C(0x0600) }, 30 { .class = "B", .cp = UINT32_C(0x000A) }, 31 { .class = "BN", .cp = UINT32_C(0x0000) }, 32 { .class = "CS", .cp = UINT32_C(0x002C) }, 33 { .class = "EN", .cp = UINT32_C(0x0030) }, 34 { .class = "ES", .cp = UINT32_C(0x002B) }, 35 { .class = "ET", .cp = UINT32_C(0x0023) }, 36 { .class = "FSI", .cp = UINT32_C(0x2068) }, 37 { .class = "LRE", .cp = UINT32_C(0x202A) }, 38 { .class = "LRI", .cp = UINT32_C(0x2066) }, 39 { .class = "LRO", .cp = UINT32_C(0x202D) }, 40 { .class = "NSM", .cp = UINT32_C(0x0300) }, 41 { .class = "ON", .cp = UINT32_C(0x0021) }, 42 { .class = "PDF", .cp = UINT32_C(0x202C) }, 43 { .class = "PDI", .cp = UINT32_C(0x2069) }, 44 { .class = "R", .cp = UINT32_C(0x05BE) }, 45 { .class = "RLE", .cp = UINT32_C(0x202B) }, 46 { .class = "RLI", .cp = UINT32_C(0x2067) }, 47 { .class = "RLO", .cp = UINT32_C(0x202E) }, 48 { .class = "S", .cp = UINT32_C(0x0009) }, 49 { .class = "WS", .cp = UINT32_C(0x000C) }, 50 }; 51 52 static int 53 classtocp(const char *str, size_t len, uint_least32_t *cp) 54 { 55 size_t i; 56 57 for (i = 0; i < LEN(classcpmap); i++) { 58 if (!strncmp(str, classcpmap[i].class, len)) { 59 *cp = classcpmap[i].cp; 60 return 0; 61 } 62 } 63 fprintf(stderr, "classtocp: unknown class string '%.*s'.\n", (int)len, 64 str); 65 66 return 1; 67 } 68 69 static int 70 parse_class_list(const char *str, uint_least32_t **cp, size_t *cplen) 71 { 72 size_t count, i; 73 const char *tmp1 = NULL, *tmp2 = NULL; 74 75 if (strlen(str) == 0) { 76 *cp = NULL; 77 *cplen = 0; 78 return 0; 79 } 80 81 /* count the number of spaces in the string and infer list length */ 82 for (count = 1, tmp1 = str; (tmp2 = strchr(tmp1, ' ')) != NULL; 83 count++, tmp1 = tmp2 + 1) { 84 ; 85 } 86 87 /* allocate resources */ 88 if (!(*cp = calloc((*cplen = count), sizeof(**cp)))) { 89 fprintf(stderr, "calloc: %s\n", strerror(errno)); 90 exit(1); 91 } 92 93 /* go through the string again, parsing the classes */ 94 for (i = 0, tmp1 = tmp2 = str; tmp2 != NULL; i++) { 95 tmp2 = strchr(tmp1, ' '); 96 if (classtocp(tmp1, tmp2 ? (size_t)(tmp2 - tmp1) : strlen(tmp1), 97 &((*cp)[i]))) { 98 return 1; 99 } 100 if (tmp2 != NULL) { 101 tmp1 = tmp2 + 1; 102 } 103 } 104 105 return 0; 106 } 107 108 static int 109 strtolevel(const char *str, size_t len, int_least8_t *level) 110 { 111 size_t i; 112 113 if (len == 1 && str[0] == 'x') { 114 /* 115 * 'x' indicates those characters that are ignored. 116 * We indicate this with a level of -1 117 */ 118 *level = -1; 119 return 0; 120 } 121 122 if (len > 3) { 123 /* 124 * given we can only express (positive) numbers from 125 * 0..127, more than 3 digits means an excess 126 */ 127 goto toolarge; 128 } 129 130 /* check if the string is completely numerical */ 131 for (i = 0; i < len; i++) { 132 if (str[i] < '0' && str[i] > '9') { 133 fprintf(stderr, 134 "strtolevel: '%.*s' is not an integer.\n", 135 (int)len, str); 136 return 1; 137 } 138 } 139 140 if (len == 3) { 141 if (str[0] != '1' || str[1] > '2' || 142 (str[1] == '2' && str[2] > '7')) { 143 goto toolarge; 144 } 145 *level = (str[0] - '0') * 100 + (str[1] - '0') * 10 + 146 (str[2] - '0'); 147 } else if (len == 2) { 148 *level = (str[0] - '0') * 10 + (str[1] - '0'); 149 } else if (len == 1) { 150 *level = (str[0] - '0'); 151 } else { /* len == 0 */ 152 *level = 0; 153 } 154 155 return 0; 156 toolarge: 157 fprintf(stderr, "strtolevel: '%.*s' is too large.\n", (int)len, str); 158 return 1; 159 } 160 161 static int 162 strtoreorder(const char *str, size_t len, int_least16_t *reorder) 163 { 164 size_t i; 165 166 if (len == 1 && str[0] == 'x') { 167 /* 168 * 'x' indicates those characters that are ignored. 169 * We indicate this with a reorder of -1 170 */ 171 *reorder = -1; 172 return 0; 173 } 174 175 if (len > 3) { 176 /* 177 * given we want to only express (positive) numbers from 178 * 0..999 (at most!), more than 3 digits means an excess 179 */ 180 goto toolarge; 181 } 182 183 /* check if the string is completely numerical */ 184 for (i = 0; i < len; i++) { 185 if (str[i] < '0' && str[i] > '9') { 186 fprintf(stderr, 187 "strtoreorder: '%.*s' is not an integer.\n", 188 (int)len, str); 189 return 1; 190 } 191 } 192 193 if (len == 3) { 194 *reorder = (str[0] - '0') * 100 + (str[1] - '0') * 10 + 195 (str[2] - '0'); 196 } else if (len == 2) { 197 *reorder = (str[0] - '0') * 10 + (str[1] - '0'); 198 } else if (len == 1) { 199 *reorder = (str[0] - '0'); 200 } else { /* len == 0 */ 201 *reorder = 0; 202 } 203 204 return 0; 205 toolarge: 206 fprintf(stderr, "strtoreorder: '%.*s' is too large.\n", (int)len, str); 207 return 1; 208 } 209 210 static int 211 parse_level_list(const char *str, int_least8_t **level, size_t *levellen) 212 { 213 size_t count, i; 214 const char *tmp1 = NULL, *tmp2 = NULL; 215 216 if (strlen(str) == 0) { 217 *level = NULL; 218 *levellen = 0; 219 return 0; 220 } 221 222 /* count the number of spaces in the string and infer list length */ 223 for (count = 1, tmp1 = str; (tmp2 = strchr(tmp1, ' ')) != NULL; 224 count++, tmp1 = tmp2 + 1) { 225 ; 226 } 227 228 /* allocate resources */ 229 if (!(*level = calloc((*levellen = count), sizeof(**level)))) { 230 fprintf(stderr, "calloc: %s\n", strerror(errno)); 231 exit(1); 232 } 233 234 /* go through the string again, parsing the levels */ 235 for (i = 0, tmp1 = tmp2 = str; tmp2 != NULL; i++) { 236 tmp2 = strchr(tmp1, ' '); 237 if (strtolevel(tmp1, 238 tmp2 ? (size_t)(tmp2 - tmp1) : strlen(tmp1), 239 &((*level)[i]))) { 240 return 1; 241 } 242 if (tmp2 != NULL) { 243 tmp1 = tmp2 + 1; 244 } 245 } 246 247 return 0; 248 } 249 250 static int 251 parse_reorder_list(const char *str, int_least16_t **reorder, size_t *reorderlen) 252 { 253 size_t count, i; 254 const char *tmp1 = NULL, *tmp2 = NULL; 255 256 if (strlen(str) == 0) { 257 *reorder = NULL; 258 *reorderlen = 0; 259 return 0; 260 } 261 262 /* count the number of spaces in the string and infer list length */ 263 for (count = 1, tmp1 = str; (tmp2 = strchr(tmp1, ' ')) != NULL; 264 count++, tmp1 = tmp2 + 1) { 265 ; 266 } 267 268 /* allocate resources */ 269 if (!(*reorder = calloc((*reorderlen = count), sizeof(**reorder)))) { 270 fprintf(stderr, "calloc: %s\n", strerror(errno)); 271 exit(1); 272 } 273 274 /* go through the string again, parsing the reorders */ 275 for (i = 0, tmp1 = tmp2 = str; tmp2 != NULL; i++) { 276 tmp2 = strchr(tmp1, ' '); 277 if (strtoreorder(tmp1, 278 tmp2 ? (size_t)(tmp2 - tmp1) : strlen(tmp1), 279 &((*reorder)[i]))) { 280 return 1; 281 } 282 if (tmp2 != NULL) { 283 tmp1 = tmp2 + 1; 284 } 285 } 286 287 return 0; 288 } 289 290 static void 291 bidirectional_test_list_print(const struct bidirectional_test *test, 292 size_t testlen, const char *identifier, 293 const char *progname) 294 { 295 size_t i, j; 296 297 printf("/* Automatically generated by %s */\n" 298 "#include <stdint.h>\n#include <stddef.h>\n\n" 299 "#include \"../grapheme.h\"\n\n", 300 progname); 301 302 printf("static const struct {\n" 303 "\tuint_least32_t *cp;\n" 304 "\tsize_t cplen;\n" 305 "\tenum grapheme_bidirectional_direction *mode;\n" 306 "\tsize_t modelen;\n" 307 "\tenum grapheme_bidirectional_direction resolved;\n" 308 "\tint_least8_t *level;\n" 309 "\tint_least16_t *reorder;\n" 310 "\tsize_t reorderlen;\n} %s[] = {\n", 311 identifier); 312 for (i = 0; i < testlen; i++) { 313 printf("\t{\n"); 314 315 printf("\t\t.cp = (uint_least32_t[]){"); 316 for (j = 0; j < test[i].cplen; j++) { 317 printf(" UINT32_C(0x%06X)", test[i].cp[j]); 318 if (j + 1 < test[i].cplen) { 319 putchar(','); 320 } 321 } 322 printf(" },\n"); 323 printf("\t\t.cplen = %zu,\n", test[i].cplen); 324 325 printf("\t\t.mode = (enum " 326 "grapheme_bidirectional_direction[]){"); 327 for (j = 0; j < test[i].modelen; j++) { 328 if (test[i].mode[j] == 329 GRAPHEME_BIDIRECTIONAL_DIRECTION_NEUTRAL) { 330 printf(" GRAPHEME_BIDIRECTIONAL_DIRECTION_" 331 "NEUTRAL"); 332 } else if (test[i].mode[j] == 333 GRAPHEME_BIDIRECTIONAL_DIRECTION_LTR) { 334 printf(" GRAPHEME_BIDIRECTIONAL_DIRECTION_LTR"); 335 } else if (test[i].mode[j] == 336 GRAPHEME_BIDIRECTIONAL_DIRECTION_RTL) { 337 printf(" GRAPHEME_BIDIRECTIONAL_DIRECTION_RTL"); 338 } 339 if (j + 1 < test[i].modelen) { 340 putchar(','); 341 } 342 } 343 printf(" },\n"); 344 printf("\t\t.modelen = %zu,\n", test[i].modelen); 345 346 printf("\t\t.resolved = "); 347 if (test[i].resolved == 348 GRAPHEME_BIDIRECTIONAL_DIRECTION_NEUTRAL) { 349 printf("GRAPHEME_BIDIRECTIONAL_DIRECTION_" 350 "NEUTRAL"); 351 } else if (test[i].resolved == 352 GRAPHEME_BIDIRECTIONAL_DIRECTION_LTR) { 353 printf("GRAPHEME_BIDIRECTIONAL_DIRECTION_LTR"); 354 } else if (test[i].resolved == 355 GRAPHEME_BIDIRECTIONAL_DIRECTION_RTL) { 356 printf("GRAPHEME_BIDIRECTIONAL_DIRECTION_RTL"); 357 } 358 printf(",\n"); 359 360 printf("\t\t.level = (int_least8_t[]){"); 361 for (j = 0; j < test[i].cplen; j++) { 362 printf(" %" PRIdLEAST8, test[i].level[j]); 363 if (j + 1 < test[i].cplen) { 364 putchar(','); 365 } 366 } 367 printf(" },\n"); 368 369 printf("\t\t.reorder = "); 370 if (test[i].reorderlen > 0) { 371 printf("(int_least16_t[]){"); 372 for (j = 0; j < test[i].reorderlen; j++) { 373 printf(" %" PRIdLEAST16, test[i].reorder[j]); 374 if (j + 1 < test[i].reorderlen) { 375 putchar(','); 376 } 377 } 378 printf(" },\n"); 379 } else { 380 printf("NULL,\n"); 381 } 382 printf("\t\t.reorderlen = %zu,\n", test[i].reorderlen); 383 384 printf("\t},\n"); 385 } 386 printf("};\n"); 387 } 388 389 static struct bidirectional_test *test; 390 static size_t testlen; 391 392 static int_least8_t *current_level; 393 static size_t current_level_len; 394 static int_least16_t *current_reorder; 395 static size_t current_reorder_len; 396 397 static int 398 test_callback(const char *file, char **field, size_t nfields, char *comment, 399 void *payload) 400 { 401 char *tmp; 402 403 (void)file; 404 (void)comment; 405 (void)payload; 406 407 /* we either get a line beginning with an '@', or an input line */ 408 if (nfields > 0 && field[0][0] == '@') { 409 if (!strncmp(field[0], "@Levels:", sizeof("@Levels:") - 1)) { 410 tmp = field[0] + sizeof("@Levels:") - 1; 411 for (; *tmp != '\0' && (*tmp == ' ' || *tmp == '\t'); 412 tmp++) { 413 ; 414 } 415 free(current_level); 416 parse_level_list(tmp, ¤t_level, 417 ¤t_level_len); 418 } else if (!strncmp(field[0], 419 "@Reorder:", sizeof("@Reorder:") - 1)) { 420 tmp = field[0] + sizeof("@Reorder:") - 1; 421 for (; *tmp != '\0' && (*tmp == ' ' || *tmp == '\t'); 422 tmp++) { 423 ; 424 } 425 free(current_reorder); 426 parse_reorder_list(tmp, ¤t_reorder, 427 ¤t_reorder_len); 428 } else { 429 fprintf(stderr, "Unknown @-input-line.\n"); 430 exit(1); 431 } 432 } else { 433 if (nfields < 2) { 434 /* discard any line that does not have at least 2 fields 435 */ 436 return 0; 437 } 438 439 /* extend test array */ 440 if (!(test = realloc(test, (++testlen) * sizeof(*test)))) { 441 fprintf(stderr, "realloc: %s\n", strerror(errno)); 442 exit(1); 443 } 444 445 /* parse field data */ 446 parse_class_list(field[0], &(test[testlen - 1].cp), 447 &(test[testlen - 1].cplen)); 448 449 /* copy current level- and reorder-arrays */ 450 if (!(test[testlen - 1].level = 451 calloc(current_level_len, 452 sizeof(*(test[testlen - 1].level))))) { 453 fprintf(stderr, "calloc: %s\n", strerror(errno)); 454 exit(1); 455 } 456 memcpy(test[testlen - 1].level, current_level, 457 current_level_len * sizeof(*(test[testlen - 1].level))); 458 459 if (!(test[testlen - 1].reorder = 460 calloc(current_reorder_len, 461 sizeof(*(test[testlen - 1].reorder))))) { 462 fprintf(stderr, "calloc: %s\n", strerror(errno)); 463 exit(1); 464 } 465 if (current_reorder != NULL) { 466 memcpy(test[testlen - 1].reorder, current_reorder, 467 current_reorder_len * 468 sizeof(*(test[testlen - 1].reorder))); 469 } 470 test[testlen - 1].reorderlen = current_reorder_len; 471 472 if (current_level_len != test[testlen - 1].cplen) { 473 fprintf(stderr, 474 "mismatch between string and level lengths.\n"); 475 exit(1); 476 } 477 478 /* parse paragraph-level-bitset */ 479 if (strlen(field[1]) != 1) { 480 fprintf(stderr, "malformed paragraph-level-bitset.\n"); 481 exit(1); 482 } else if (field[1][0] == '2') { 483 test[testlen - 1].mode[0] = 484 GRAPHEME_BIDIRECTIONAL_DIRECTION_LTR; 485 test[testlen - 1].modelen = 1; 486 } else if (field[1][0] == '3') { 487 /* auto=0 and LTR=1 */ 488 test[testlen - 1].mode[0] = 489 GRAPHEME_BIDIRECTIONAL_DIRECTION_NEUTRAL; 490 test[testlen - 1].mode[1] = 491 GRAPHEME_BIDIRECTIONAL_DIRECTION_LTR; 492 test[testlen - 1].modelen = 2; 493 } else if (field[1][0] == '4') { 494 test[testlen - 1].mode[0] = 495 GRAPHEME_BIDIRECTIONAL_DIRECTION_RTL; 496 test[testlen - 1].modelen = 1; 497 } else if (field[1][0] == '5') { 498 test[testlen - 1].mode[0] = 499 GRAPHEME_BIDIRECTIONAL_DIRECTION_NEUTRAL; 500 test[testlen - 1].mode[1] = 501 GRAPHEME_BIDIRECTIONAL_DIRECTION_RTL; 502 test[testlen - 1].modelen = 2; 503 } else if (field[1][0] == '7') { 504 test[testlen - 1].mode[0] = 505 GRAPHEME_BIDIRECTIONAL_DIRECTION_NEUTRAL; 506 test[testlen - 1].mode[1] = 507 GRAPHEME_BIDIRECTIONAL_DIRECTION_LTR; 508 test[testlen - 1].mode[2] = 509 GRAPHEME_BIDIRECTIONAL_DIRECTION_RTL; 510 test[testlen - 1].modelen = 3; 511 } else { 512 fprintf(stderr, 513 "unhandled paragraph-level-bitset %s.\n", 514 field[1]); 515 exit(1); 516 } 517 518 /* the resolved paragraph level is always neutral as the test 519 * file does not specify it */ 520 test[testlen - 1].resolved = 521 GRAPHEME_BIDIRECTIONAL_DIRECTION_NEUTRAL; 522 } 523 524 return 0; 525 } 526 527 static int 528 character_test_callback(const char *file, char **field, size_t nfields, 529 char *comment, void *payload) 530 { 531 size_t tmp; 532 533 (void)file; 534 (void)comment; 535 (void)payload; 536 537 if (nfields < 5) { 538 /* discard any line that does not have at least 5 fields */ 539 return 0; 540 } 541 542 /* extend test array */ 543 if (!(test = realloc(test, (++testlen) * sizeof(*test)))) { 544 fprintf(stderr, "realloc: %s\n", strerror(errno)); 545 exit(1); 546 } 547 548 /* parse field data */ 549 parse_cp_list(field[0], &(test[testlen - 1].cp), 550 &(test[testlen - 1].cplen)); 551 parse_level_list(field[3], &(test[testlen - 1].level), &tmp); 552 parse_reorder_list(field[4], &(test[testlen - 1].reorder), 553 &(test[testlen - 1].reorderlen)); 554 555 /* parse paragraph-level-mode */ 556 if (strlen(field[1]) != 1) { 557 fprintf(stderr, "malformed paragraph-level-setting.\n"); 558 exit(1); 559 } else if (field[1][0] == '0') { 560 test[testlen - 1].mode[0] = 561 GRAPHEME_BIDIRECTIONAL_DIRECTION_LTR; 562 } else if (field[1][0] == '1') { 563 test[testlen - 1].mode[0] = 564 GRAPHEME_BIDIRECTIONAL_DIRECTION_RTL; 565 } else if (field[1][0] == '2') { 566 test[testlen - 1].mode[0] = 567 GRAPHEME_BIDIRECTIONAL_DIRECTION_NEUTRAL; 568 } else { 569 fprintf(stderr, "unhandled paragraph-level-setting.\n"); 570 exit(1); 571 } 572 test[testlen - 1].modelen = 1; 573 574 /* parse resolved paragraph level */ 575 if (strlen(field[2]) != 1) { 576 fprintf(stderr, "malformed resolved paragraph level.\n"); 577 exit(1); 578 } else if (field[2][0] == '0') { 579 test[testlen - 1].resolved = 580 GRAPHEME_BIDIRECTIONAL_DIRECTION_LTR; 581 } else if (field[2][0] == '1') { 582 test[testlen - 1].resolved = 583 GRAPHEME_BIDIRECTIONAL_DIRECTION_RTL; 584 } else { 585 fprintf(stderr, "unhandled resolved paragraph level.\n"); 586 exit(1); 587 } 588 589 if (tmp != test[testlen - 1].cplen) { 590 fprintf(stderr, "mismatch between string and level lengths.\n"); 591 exit(1); 592 } 593 594 return 0; 595 } 596 597 int 598 main(int argc, char *argv[]) 599 { 600 (void)argc; 601 602 parse_file_with_callback("data/BidiTest.txt", test_callback, NULL); 603 parse_file_with_callback("data/BidiCharacterTest.txt", 604 character_test_callback, NULL); 605 bidirectional_test_list_print(test, testlen, "bidirectional_test", 606 argv[0]); 607 608 return 0; 609 }