libgrapheme

unicode string library
git clone git://git.suckless.org/libgrapheme
Log | Files | Refs | README | LICENSE

utf8-encode.c (2238B)


      1 /* See LICENSE file for copyright and license details. */
      2 #include <stddef.h>
      3 #include <stdint.h>
      4 #include <stdio.h>
      5 #include <string.h>
      6 
      7 #include "../grapheme.h"
      8 #include "util.h"
      9 
     10 static const struct {
     11 	uint_least32_t cp; /* input codepoint */
     12 	char *exp_arr;     /* expected UTF-8 byte sequence */
     13 	size_t exp_len;    /* expected length of UTF-8 sequence */
     14 } enc_test[] = {
     15 	{
     16 		/* invalid codepoint (UTF-16 surrogate half) */
     17 		.cp = UINT32_C(0xD800),
     18 		.exp_arr = (char *)(unsigned char[]) { 0xEF, 0xBF, 0xBD },
     19 		.exp_len = 3,
     20 	},
     21 	{
     22 		/* invalid codepoint (UTF-16-unrepresentable) */
     23 		.cp = UINT32_C(0x110000),
     24 		.exp_arr = (char *)(unsigned char[]) { 0xEF, 0xBF, 0xBD },
     25 		.exp_len = 3,
     26 	},
     27 	{
     28 		/* codepoint encoded to a 1-byte sequence */
     29 		.cp = 0x01,
     30 		.exp_arr = (char *)(unsigned char[]) { 0x01 },
     31 		.exp_len = 1,
     32 	},
     33 	{
     34 		/* codepoint encoded to a 2-byte sequence */
     35 		.cp = 0xFF,
     36 		.exp_arr = (char *)(unsigned char[]) { 0xC3, 0xBF },
     37 		.exp_len = 2,
     38 	},
     39 	{
     40 		/* codepoint encoded to a 3-byte sequence */
     41 		.cp = 0xFFF,
     42 		.exp_arr = (char *)(unsigned char[]) { 0xE0, 0xBF, 0xBF },
     43 		.exp_len = 3,
     44 	},
     45 	{
     46 		/* codepoint encoded to a 4-byte sequence */
     47 		.cp = UINT32_C(0xFFFFF),
     48 		.exp_arr = (char *)(unsigned char[]) { 0xF3, 0xBF, 0xBF, 0xBF },
     49 		.exp_len = 4,
     50 	},
     51 };
     52 
     53 int
     54 main(int argc, char *argv[])
     55 {
     56 	size_t i, j, failed;
     57 
     58 	(void)argc;
     59 
     60 	/* UTF-8 encoder test */
     61 	for (i = 0, failed = 0; i < LEN(enc_test); i++) {
     62 		char arr[4];
     63 		size_t len;
     64 
     65 		len = grapheme_encode_utf8(enc_test[i].cp, arr, LEN(arr));
     66 
     67 		if (len != enc_test[i].exp_len ||
     68 		    memcmp(arr, enc_test[i].exp_arr, len)) {
     69 			fprintf(stderr,
     70 			        "%s, Failed test %zu: "
     71 			        "Expected (",
     72 			        argv[0], i);
     73 			for (j = 0; j < enc_test[i].exp_len; j++) {
     74 				fprintf(stderr, "0x%x", enc_test[i].exp_arr[j]);
     75 				if (j + 1 < enc_test[i].exp_len) {
     76 					fprintf(stderr, " ");
     77 				}
     78 			}
     79 			fprintf(stderr, "), but got (");
     80 			for (j = 0; j < len; j++) {
     81 				fprintf(stderr, "0x%x", arr[j]);
     82 				if (j + 1 < len) {
     83 					fprintf(stderr, " ");
     84 				}
     85 			}
     86 			fprintf(stderr, ").\n");
     87 			failed++;
     88 		}
     89 	}
     90 	printf("%s: %zu/%zu unit tests passed.\n", argv[0],
     91 	       LEN(enc_test) - failed, LEN(enc_test));
     92 
     93 	return (failed > 0) ? 1 : 0;
     94 }