libgrapheme

unicode string library
git clone git://git.suckless.org/libgrapheme
Log | Files | Refs | README | LICENSE

case.c (16477B)


      1 /* See LICENSE file for copyright and license details. */
      2 #include <stdbool.h>
      3 #include <stdint.h>
      4 #include <stdio.h>
      5 #include <string.h>
      6 
      7 #include "../grapheme.h"
      8 #include "util.h"
      9 
     10 struct unit_test_is_case_utf8 {
     11 	const char *description;
     12 
     13 	struct {
     14 		const char *src;
     15 		size_t srclen;
     16 	} input;
     17 
     18 	struct {
     19 		bool ret;
     20 		size_t caselen;
     21 	} output;
     22 };
     23 
     24 struct unit_test_to_case_utf8 {
     25 	const char *description;
     26 
     27 	struct {
     28 		const char *src;
     29 		size_t srclen;
     30 		size_t destlen;
     31 	} input;
     32 
     33 	struct {
     34 		const char *dest;
     35 		size_t ret;
     36 	} output;
     37 };
     38 
     39 static const struct unit_test_is_case_utf8 is_lowercase_utf8[] = {
     40 	{
     41 		.description = "empty input",
     42 		.input = { "", 0 },
     43 		.output = { true, 0 },
     44 	},
     45 	{
     46 		.description = "one character, violation",
     47 		.input = { "A", 1 },
     48 		.output = { false, 0 },
     49 	},
     50 	{
     51 		.description = "one character, confirmation",
     52 		.input = { "\xC3\x9F", 2 },
     53 		.output = { true, 2 },
     54 	},
     55 	{
     56 		.description = "one character, violation, NUL-terminated",
     57 		.input = { "A", SIZE_MAX },
     58 		.output = { false, 0 },
     59 	},
     60 	{
     61 		.description = "one character, confirmation, NUL-terminated",
     62 		.input = { "\xC3\x9F", SIZE_MAX },
     63 		.output = { true, 2 },
     64 	},
     65 	{
     66 		.description = "one word, violation",
     67 		.input = { "Hello", 5 },
     68 		.output = { false, 0 },
     69 	},
     70 	{
     71 		.description = "one word, partial confirmation",
     72 		.input = { "gru"
     73 	                   "\xC3\x9F"
     74 	                   "fOrmel",
     75 	                   11 },
     76 		.output = { false, 6 },
     77 	},
     78 	{
     79 		.description = "one word, full confirmation",
     80 		.input = { "gru"
     81 	                   "\xC3\x9F"
     82 	                   "formel",
     83 	                   11 },
     84 		.output = { true, 11 },
     85 	},
     86 	{
     87 		.description = "one word, violation, NUL-terminated",
     88 		.input = { "Hello", SIZE_MAX },
     89 		.output = { false, 0 },
     90 	},
     91 	{
     92 		.description = "one word, partial confirmation, NUL-terminated",
     93 		.input = { "gru"
     94 	                   "\xC3\x9F"
     95 	                   "fOrmel",
     96 	                   SIZE_MAX },
     97 		.output = { false, 6 },
     98 	},
     99 	{
    100 		.description = "one word, full confirmation, NUL-terminated",
    101 		.input = { "gru"
    102 	                   "\xC3\x9F"
    103 	                   "formel",
    104 	                   SIZE_MAX },
    105 		.output = { true, 11 },
    106 	},
    107 };
    108 
    109 static const struct unit_test_is_case_utf8 is_uppercase_utf8[] = {
    110 	{
    111 		.description = "empty input",
    112 		.input = { "", 0 },
    113 		.output = { true, 0 },
    114 	},
    115 	{
    116 		.description = "one character, violation",
    117 		.input = { "\xC3\x9F", 2 },
    118 		.output = { false, 0 },
    119 	},
    120 	{
    121 		.description = "one character, confirmation",
    122 		.input = { "A", 1 },
    123 		.output = { true, 1 },
    124 	},
    125 	{
    126 		.description = "one character, violation, NUL-terminated",
    127 		.input = { "\xC3\x9F", SIZE_MAX },
    128 		.output = { false, 0 },
    129 	},
    130 	{
    131 		.description = "one character, confirmation, NUL-terminated",
    132 		.input = { "A", SIZE_MAX },
    133 		.output = { true, 1 },
    134 	},
    135 	{
    136 		.description = "one word, violation",
    137 		.input = { "hello", 5 },
    138 		.output = { false, 0 },
    139 	},
    140 	{
    141 		.description = "one word, partial confirmation",
    142 		.input = { "GRU"
    143 	                   "\xC3\x9F"
    144 	                   "formel",
    145 	                   11 },
    146 		.output = { false, 3 },
    147 	},
    148 	{
    149 		.description = "one word, full confirmation",
    150 		.input = { "HELLO", 5 },
    151 		.output = { true, 5 },
    152 	},
    153 	{
    154 		.description = "one word, violation, NUL-terminated",
    155 		.input = { "hello", SIZE_MAX },
    156 		.output = { false, 0 },
    157 	},
    158 	{
    159 		.description = "one word, partial confirmation, NUL-terminated",
    160 		.input = { "GRU"
    161 	                   "\xC3\x9F"
    162 	                   "formel",
    163 	                   SIZE_MAX },
    164 		.output = { false, 3 },
    165 	},
    166 	{
    167 		.description = "one word, full confirmation, NUL-terminated",
    168 		.input = { "HELLO", SIZE_MAX },
    169 		.output = { true, 5 },
    170 	},
    171 };
    172 
    173 static const struct unit_test_is_case_utf8 is_titlecase_utf8[] = {
    174 	{
    175 		.description = "empty input",
    176 		.input = { "", 0 },
    177 		.output = { true, 0 },
    178 	},
    179 	{
    180 		.description = "one character, violation",
    181 		.input = { "\xC3\x9F", 2 },
    182 		.output = { false, 0 },
    183 	},
    184 	{
    185 		.description = "one character, confirmation",
    186 		.input = { "A", 1 },
    187 		.output = { true, 1 },
    188 	},
    189 	{
    190 		.description = "one character, violation, NUL-terminated",
    191 		.input = { "\xC3\x9F", SIZE_MAX },
    192 		.output = { false, 0 },
    193 	},
    194 	{
    195 		.description = "one character, confirmation, NUL-terminated",
    196 		.input = { "A", SIZE_MAX },
    197 		.output = { true, 1 },
    198 	},
    199 	{
    200 		.description = "one word, violation",
    201 		.input = { "hello", 5 },
    202 		.output = { false, 0 },
    203 	},
    204 	{
    205 		.description = "one word, partial confirmation",
    206 		.input = { "Gru"
    207 	                   "\xC3\x9F"
    208 	                   "fOrmel",
    209 	                   11 },
    210 		.output = { false, 6 },
    211 	},
    212 	{
    213 		.description = "one word, full confirmation",
    214 		.input = { "Gru"
    215 	                   "\xC3\x9F"
    216 	                   "formel",
    217 	                   11 },
    218 		.output = { true, 11 },
    219 	},
    220 	{
    221 		.description = "one word, violation, NUL-terminated",
    222 		.input = { "hello", SIZE_MAX },
    223 		.output = { false, 0 },
    224 	},
    225 	{
    226 		.description = "one word, partial confirmation, NUL-terminated",
    227 		.input = { "Gru"
    228 	                   "\xC3\x9F"
    229 	                   "fOrmel",
    230 	                   SIZE_MAX },
    231 		.output = { false, 6 },
    232 	},
    233 	{
    234 		.description = "one word, full confirmation, NUL-terminated",
    235 		.input = { "Gru"
    236 	                   "\xC3\x9F"
    237 	                   "formel",
    238 	                   SIZE_MAX },
    239 		.output = { true, 11 },
    240 	},
    241 	{
    242 		.description = "multiple words, partial confirmation",
    243 		.input = { "Hello Gru"
    244 	                   "\xC3\x9F"
    245 	                   "fOrmel!",
    246 	                   18 },
    247 		.output = { false, 12 },
    248 	},
    249 	{
    250 		.description = "multiple words, full confirmation",
    251 		.input = { "Hello Gru"
    252 	                   "\xC3\x9F"
    253 	                   "formel!",
    254 	                   18 },
    255 		.output = { true, 18 },
    256 	},
    257 	{
    258 		.description =
    259 			"multiple words, partial confirmation, NUL-terminated",
    260 		.input = { "Hello Gru"
    261 	                   "\xC3\x9F"
    262 	                   "fOrmel!",
    263 	                   SIZE_MAX },
    264 		.output = { false, 12 },
    265 	},
    266 	{
    267 		.description =
    268 			"multiple words, full confirmation, NUL-terminated",
    269 		.input = { "Hello Gru"
    270 	                   "\xC3\x9F"
    271 	                   "formel!",
    272 	                   SIZE_MAX },
    273 		.output = { true, 18 },
    274 	},
    275 };
    276 
    277 static const struct unit_test_to_case_utf8 to_lowercase_utf8[] = {
    278 	{
    279 		.description = "empty input",
    280 		.input = { "", 0, 10 },
    281 		.output = { "", 0 },
    282 	},
    283 	{
    284 		.description = "empty output",
    285 		.input = { "hello", 5, 0 },
    286 		.output = { "", 5 },
    287 	},
    288 	{
    289 		.description = "one character, conversion",
    290 		.input = { "A", 1, 10 },
    291 		.output = { "a", 1 },
    292 	},
    293 	{
    294 		.description = "one character, no conversion",
    295 		.input = { "\xC3\x9F", 2, 10 },
    296 		.output = { "\xC3\x9F", 2 },
    297 	},
    298 	{
    299 		.description = "one character, conversion, truncation",
    300 		.input = { "A", 1, 0 },
    301 		.output = { "", 1 },
    302 	},
    303 	{
    304 		.description = "one character, conversion, NUL-terminated",
    305 		.input = { "A", SIZE_MAX, 10 },
    306 		.output = { "a", 1 },
    307 	},
    308 	{
    309 		.description = "one character, no conversion, NUL-terminated",
    310 		.input = { "\xC3\x9F", SIZE_MAX, 10 },
    311 		.output = { "\xC3\x9F", 2 },
    312 	},
    313 	{
    314 		.description =
    315 			"one character, conversion, NUL-terminated, truncation",
    316 		.input = { "A", SIZE_MAX, 0 },
    317 		.output = { "", 1 },
    318 	},
    319 	{
    320 		.description = "one word, conversion",
    321 		.input = { "wOrD", 4, 10 },
    322 		.output = { "word", 4 },
    323 	},
    324 	{
    325 		.description = "one word, no conversion",
    326 		.input = { "word", 4, 10 },
    327 		.output = { "word", 4 },
    328 	},
    329 	{
    330 		.description = "one word, conversion, truncation",
    331 		.input = { "wOrD", 4, 3 },
    332 		.output = { "wo", 4 },
    333 	},
    334 	{
    335 		.description = "one word, conversion, NUL-terminated",
    336 		.input = { "wOrD", SIZE_MAX, 10 },
    337 		.output = { "word", 4 },
    338 	},
    339 	{
    340 		.description = "one word, no conversion, NUL-terminated",
    341 		.input = { "word", SIZE_MAX, 10 },
    342 		.output = { "word", 4 },
    343 	},
    344 	{
    345 		.description =
    346 			"one word, conversion, NUL-terminated, truncation",
    347 		.input = { "wOrD", SIZE_MAX, 3 },
    348 		.output = { "wo", 4 },
    349 	},
    350 };
    351 
    352 static const struct unit_test_to_case_utf8 to_uppercase_utf8[] = {
    353 	{
    354 		.description = "empty input",
    355 		.input = { "", 0, 10 },
    356 		.output = { "", 0 },
    357 	},
    358 	{
    359 		.description = "empty output",
    360 		.input = { "hello", 5, 0 },
    361 		.output = { "", 5 },
    362 	},
    363 	{
    364 		.description = "one character, conversion",
    365 		.input = { "\xC3\x9F", 2, 10 },
    366 		.output = { "SS", 2 },
    367 	},
    368 	{
    369 		.description = "one character, no conversion",
    370 		.input = { "A", 1, 10 },
    371 		.output = { "A", 1 },
    372 	},
    373 	{
    374 		.description = "one character, conversion, truncation",
    375 		.input = { "\xC3\x9F", 2, 0 },
    376 		.output = { "", 2 },
    377 	},
    378 	{
    379 		.description = "one character, conversion, NUL-terminated",
    380 		.input = { "\xC3\x9F", SIZE_MAX, 10 },
    381 		.output = { "SS", 2 },
    382 	},
    383 	{
    384 		.description = "one character, no conversion, NUL-terminated",
    385 		.input = { "A", SIZE_MAX, 10 },
    386 		.output = { "A", 1 },
    387 	},
    388 	{
    389 		.description =
    390 			"one character, conversion, NUL-terminated, truncation",
    391 		.input = { "\xC3\x9F", SIZE_MAX, 0 },
    392 		.output = { "", 2 },
    393 	},
    394 	{
    395 		.description = "one word, conversion",
    396 		.input = { "gRu"
    397 	                   "\xC3\x9F"
    398 	                   "fOrMel",
    399 	                   11, 15 },
    400 		.output = { "GRUSSFORMEL", 11 },
    401 	},
    402 	{
    403 		.description = "one word, no conversion",
    404 		.input = { "WORD", 4, 10 },
    405 		.output = { "WORD", 4 },
    406 	},
    407 	{
    408 		.description = "one word, conversion, truncation",
    409 		.input = { "gRu"
    410 	                   "\xC3\x9F"
    411 	                   "formel",
    412 	                   11, 5 },
    413 		.output = { "GRUS", 11 },
    414 	},
    415 	{
    416 		.description = "one word, conversion, NUL-terminated",
    417 		.input = { "gRu"
    418 	                   "\xC3\x9F"
    419 	                   "formel",
    420 	                   SIZE_MAX, 15 },
    421 		.output = { "GRUSSFORMEL", 11 },
    422 	},
    423 	{
    424 		.description = "one word, no conversion, NUL-terminated",
    425 		.input = { "WORD", SIZE_MAX, 10 },
    426 		.output = { "WORD", 4 },
    427 	},
    428 	{
    429 		.description =
    430 			"one word, conversion, NUL-terminated, truncation",
    431 		.input = { "gRu"
    432 	                   "\xC3\x9F"
    433 	                   "formel",
    434 	                   SIZE_MAX, 5 },
    435 		.output = { "GRUS", 11 },
    436 	},
    437 };
    438 
    439 static const struct unit_test_to_case_utf8 to_titlecase_utf8[] = {
    440 	{
    441 		.description = "empty input",
    442 		.input = { "", 0, 10 },
    443 		.output = { "", 0 },
    444 	},
    445 	{
    446 		.description = "empty output",
    447 		.input = { "hello", 5, 0 },
    448 		.output = { "", 5 },
    449 	},
    450 	{
    451 		.description = "one character, conversion",
    452 		.input = { "a", 1, 10 },
    453 		.output = { "A", 1 },
    454 	},
    455 	{
    456 		.description = "one character, no conversion",
    457 		.input = { "A", 1, 10 },
    458 		.output = { "A", 1 },
    459 	},
    460 	{
    461 		.description = "one character, conversion, truncation",
    462 		.input = { "a", 1, 0 },
    463 		.output = { "", 1 },
    464 	},
    465 	{
    466 		.description = "one character, conversion, NUL-terminated",
    467 		.input = { "a", SIZE_MAX, 10 },
    468 		.output = { "A", 1 },
    469 	},
    470 	{
    471 		.description = "one character, no conversion, NUL-terminated",
    472 		.input = { "A", SIZE_MAX, 10 },
    473 		.output = { "A", 1 },
    474 	},
    475 	{
    476 		.description =
    477 			"one character, conversion, NUL-terminated, truncation",
    478 		.input = { "a", SIZE_MAX, 0 },
    479 		.output = { "", 1 },
    480 	},
    481 	{
    482 		.description = "one word, conversion",
    483 		.input = { "heLlo", 5, 10 },
    484 		.output = { "Hello", 5 },
    485 	},
    486 	{
    487 		.description = "one word, no conversion",
    488 		.input = { "Hello", 5, 10 },
    489 		.output = { "Hello", 5 },
    490 	},
    491 	{
    492 		.description = "one word, conversion, truncation",
    493 		.input = { "heLlo", 5, 2 },
    494 		.output = { "H", 5 },
    495 	},
    496 	{
    497 		.description = "one word, conversion, NUL-terminated",
    498 		.input = { "heLlo", SIZE_MAX, 10 },
    499 		.output = { "Hello", 5 },
    500 	},
    501 	{
    502 		.description = "one word, no conversion, NUL-terminated",
    503 		.input = { "Hello", SIZE_MAX, 10 },
    504 		.output = { "Hello", 5 },
    505 	},
    506 	{
    507 		.description =
    508 			"one word, conversion, NUL-terminated, truncation",
    509 		.input = { "heLlo", SIZE_MAX, 3 },
    510 		.output = { "He", 5 },
    511 	},
    512 	{
    513 		.description = "two words, conversion",
    514 		.input = { "heLlo wORLd!", 12, 20 },
    515 		.output = { "Hello World!", 12 },
    516 	},
    517 	{
    518 		.description = "two words, no conversion",
    519 		.input = { "Hello World!", 12, 20 },
    520 		.output = { "Hello World!", 12 },
    521 	},
    522 	{
    523 		.description = "two words, conversion, truncation",
    524 		.input = { "heLlo wORLd!", 12, 8 },
    525 		.output = { "Hello W", 12 },
    526 	},
    527 	{
    528 		.description = "two words, conversion, NUL-terminated",
    529 		.input = { "heLlo wORLd!", SIZE_MAX, 20 },
    530 		.output = { "Hello World!", 12 },
    531 	},
    532 	{
    533 		.description = "two words, no conversion, NUL-terminated",
    534 		.input = { "Hello World!", SIZE_MAX, 20 },
    535 		.output = { "Hello World!", 12 },
    536 	},
    537 	{
    538 		.description =
    539 			"two words, conversion, NUL-terminated, truncation",
    540 		.input = { "heLlo wORLd!", SIZE_MAX, 4 },
    541 		.output = { "Hel", 12 },
    542 	},
    543 };
    544 
    545 static int
    546 unit_test_callback_is_case_utf8(const void *t, size_t off, const char *name,
    547                                 const char *argv0)
    548 {
    549 	const struct unit_test_is_case_utf8 *test =
    550 		(const struct unit_test_is_case_utf8 *)t + off;
    551 	bool ret = false;
    552 	size_t caselen = 0x7f;
    553 
    554 	if (t == is_lowercase_utf8) {
    555 		ret = grapheme_is_lowercase_utf8(test->input.src,
    556 		                                 test->input.srclen, &caselen);
    557 	} else if (t == is_uppercase_utf8) {
    558 		ret = grapheme_is_uppercase_utf8(test->input.src,
    559 		                                 test->input.srclen, &caselen);
    560 	} else if (t == is_titlecase_utf8) {
    561 		ret = grapheme_is_titlecase_utf8(test->input.src,
    562 		                                 test->input.srclen, &caselen);
    563 
    564 	} else {
    565 		goto err;
    566 	}
    567 
    568 	/* check results */
    569 	if (ret != test->output.ret || caselen != test->output.caselen) {
    570 		goto err;
    571 	}
    572 
    573 	return 0;
    574 err:
    575 	fprintf(stderr,
    576 	        "%s: %s: Failed unit test %zu \"%s\" "
    577 	        "(returned (%s, %zu) instead of (%s, %zu)).\n",
    578 	        argv0, name, off, test->description, ret ? "true" : "false",
    579 	        caselen, test->output.ret ? "true" : "false",
    580 	        test->output.caselen);
    581 	return 1;
    582 }
    583 
    584 static int
    585 unit_test_callback_to_case_utf8(const void *t, size_t off, const char *name,
    586                                 const char *argv0)
    587 {
    588 	const struct unit_test_to_case_utf8 *test =
    589 		(const struct unit_test_to_case_utf8 *)t + off;
    590 	size_t ret = 0, i;
    591 	char buf[512];
    592 
    593 	/* fill the array with canary values */
    594 	memset(buf, 0x7f, LEN(buf));
    595 
    596 	if (t == to_lowercase_utf8) {
    597 		ret = grapheme_to_lowercase_utf8(test->input.src,
    598 		                                 test->input.srclen, buf,
    599 		                                 test->input.destlen);
    600 	} else if (t == to_uppercase_utf8) {
    601 		ret = grapheme_to_uppercase_utf8(test->input.src,
    602 		                                 test->input.srclen, buf,
    603 		                                 test->input.destlen);
    604 	} else if (t == to_titlecase_utf8) {
    605 		ret = grapheme_to_titlecase_utf8(test->input.src,
    606 		                                 test->input.srclen, buf,
    607 		                                 test->input.destlen);
    608 	} else {
    609 		goto err;
    610 	}
    611 
    612 	/* check results */
    613 	if (ret != test->output.ret ||
    614 	    memcmp(buf, test->output.dest,
    615 	           MIN(test->input.destlen, test->output.ret))) {
    616 		goto err;
    617 	}
    618 
    619 	/* check that none of the canary values have been overwritten */
    620 	for (i = test->input.destlen; i < LEN(buf); i++) {
    621 		if (buf[i] != 0x7f) {
    622 			goto err;
    623 		}
    624 	}
    625 
    626 	return 0;
    627 err:
    628 	fprintf(stderr,
    629 	        "%s: %s: Failed unit test %zu \"%s\" "
    630 	        "(returned (\"%.*s\", %zu) instead of (\"%.*s\", %zu)).\n",
    631 	        argv0, name, off, test->description, (int)ret, buf, ret,
    632 	        (int)test->output.ret, test->output.dest, test->output.ret);
    633 	return 1;
    634 }
    635 
    636 int
    637 main(int argc, char *argv[])
    638 {
    639 	(void)argc;
    640 
    641 	return run_unit_tests(unit_test_callback_is_case_utf8,
    642 	                      is_lowercase_utf8, LEN(is_lowercase_utf8),
    643 	                      "grapheme_is_lowercase_utf8", argv[0]) +
    644 	       run_unit_tests(unit_test_callback_is_case_utf8,
    645 	                      is_uppercase_utf8, LEN(is_uppercase_utf8),
    646 	                      "grapheme_is_uppercase_utf8", argv[0]) +
    647 	       run_unit_tests(unit_test_callback_is_case_utf8,
    648 	                      is_titlecase_utf8, LEN(is_titlecase_utf8),
    649 	                      "grapheme_is_titlecase_utf8", argv[0]) +
    650 	       run_unit_tests(unit_test_callback_to_case_utf8,
    651 	                      to_lowercase_utf8, LEN(to_lowercase_utf8),
    652 	                      "grapheme_to_lowercase_utf8", argv[0]) +
    653 	       run_unit_tests(unit_test_callback_to_case_utf8,
    654 	                      to_uppercase_utf8, LEN(to_uppercase_utf8),
    655 	                      "grapheme_to_uppercase_utf8", argv[0]) +
    656 	       run_unit_tests(unit_test_callback_to_case_utf8,
    657 	                      to_titlecase_utf8, LEN(to_titlecase_utf8),
    658 	                      "grapheme_to_titlecase_utf8", argv[0]);
    659 }