utf8-encode.c (2238B)
1 /* See LICENSE file for copyright and license details. */ 2 #include <stddef.h> 3 #include <stdint.h> 4 #include <stdio.h> 5 #include <string.h> 6 7 #include "../grapheme.h" 8 #include "util.h" 9 10 static const struct { 11 uint_least32_t cp; /* input codepoint */ 12 char *exp_arr; /* expected UTF-8 byte sequence */ 13 size_t exp_len; /* expected length of UTF-8 sequence */ 14 } enc_test[] = { 15 { 16 /* invalid codepoint (UTF-16 surrogate half) */ 17 .cp = UINT32_C(0xD800), 18 .exp_arr = (char *)(unsigned char[]) { 0xEF, 0xBF, 0xBD }, 19 .exp_len = 3, 20 }, 21 { 22 /* invalid codepoint (UTF-16-unrepresentable) */ 23 .cp = UINT32_C(0x110000), 24 .exp_arr = (char *)(unsigned char[]) { 0xEF, 0xBF, 0xBD }, 25 .exp_len = 3, 26 }, 27 { 28 /* codepoint encoded to a 1-byte sequence */ 29 .cp = 0x01, 30 .exp_arr = (char *)(unsigned char[]) { 0x01 }, 31 .exp_len = 1, 32 }, 33 { 34 /* codepoint encoded to a 2-byte sequence */ 35 .cp = 0xFF, 36 .exp_arr = (char *)(unsigned char[]) { 0xC3, 0xBF }, 37 .exp_len = 2, 38 }, 39 { 40 /* codepoint encoded to a 3-byte sequence */ 41 .cp = 0xFFF, 42 .exp_arr = (char *)(unsigned char[]) { 0xE0, 0xBF, 0xBF }, 43 .exp_len = 3, 44 }, 45 { 46 /* codepoint encoded to a 4-byte sequence */ 47 .cp = UINT32_C(0xFFFFF), 48 .exp_arr = (char *)(unsigned char[]) { 0xF3, 0xBF, 0xBF, 0xBF }, 49 .exp_len = 4, 50 }, 51 }; 52 53 int 54 main(int argc, char *argv[]) 55 { 56 size_t i, j, failed; 57 58 (void)argc; 59 60 /* UTF-8 encoder test */ 61 for (i = 0, failed = 0; i < LEN(enc_test); i++) { 62 char arr[4]; 63 size_t len; 64 65 len = grapheme_encode_utf8(enc_test[i].cp, arr, LEN(arr)); 66 67 if (len != enc_test[i].exp_len || 68 memcmp(arr, enc_test[i].exp_arr, len)) { 69 fprintf(stderr, 70 "%s, Failed test %zu: " 71 "Expected (", 72 argv[0], i); 73 for (j = 0; j < enc_test[i].exp_len; j++) { 74 fprintf(stderr, "0x%x", enc_test[i].exp_arr[j]); 75 if (j + 1 < enc_test[i].exp_len) { 76 fprintf(stderr, " "); 77 } 78 } 79 fprintf(stderr, "), but got ("); 80 for (j = 0; j < len; j++) { 81 fprintf(stderr, "0x%x", arr[j]); 82 if (j + 1 < len) { 83 fprintf(stderr, " "); 84 } 85 } 86 fprintf(stderr, ").\n"); 87 failed++; 88 } 89 } 90 printf("%s: %zu/%zu unit tests passed.\n", argv[0], 91 LEN(enc_test) - failed, LEN(enc_test)); 92 93 return (failed > 0) ? 1 : 0; 94 }