utf8-decode.c (2177B)
1 /* See LICENSE file for copyright and license details. */ 2 #include <errno.h> 3 #include <math.h> 4 #include <stdint.h> 5 #include <stdio.h> 6 #include <stdlib.h> 7 #include <string.h> 8 9 #include "../grapheme.h" 10 #include "../gen/character-test.h" 11 #include "util.h" 12 13 #include <utf8proc.h> 14 15 #define NUM_ITERATIONS 100000 16 17 struct utf8_benchmark_payload { 18 char *buf; 19 utf8proc_uint8_t *buf_utf8proc; 20 size_t buflen; 21 }; 22 23 void 24 libgrapheme(const void *payload) 25 { 26 const struct utf8_benchmark_payload *p = payload; 27 uint_least32_t cp; 28 size_t ret, off; 29 30 for (off = 0; off < p->buflen; off += ret) { 31 if ((ret = grapheme_decode_utf8(p->buf + off, 32 p->buflen - off, &cp)) > 33 (p->buflen - off)) { 34 break; 35 } 36 (void)cp; 37 } 38 } 39 40 void 41 libutf8proc(const void *payload) 42 { 43 const struct utf8_benchmark_payload *p = payload; 44 utf8proc_int32_t cp; 45 utf8proc_ssize_t ret; 46 size_t off; 47 48 for (off = 0; off < p->buflen; off += (size_t)ret) { 49 if ((ret = utf8proc_iterate(p->buf_utf8proc + off, 50 (utf8proc_ssize_t)(p->buflen - off), 51 &cp)) < 0) { 52 break; 53 } 54 (void)cp; 55 } 56 } 57 58 int 59 main(int argc, char *argv[]) 60 { 61 struct utf8_benchmark_payload p; 62 size_t i; 63 double baseline = (double)NAN; 64 65 (void)argc; 66 67 p.buf = generate_utf8_test_buffer(character_test, 68 LEN(character_test), 69 &(p.buflen)); 70 71 /* convert cp-buffer to stupid custom libutf8proc-uint8-type */ 72 if ((p.buf_utf8proc = malloc(p.buflen)) == NULL) { 73 fprintf(stderr, "malloc: %s\n", strerror(errno)); 74 exit(1); 75 } 76 for (i = 0; i < p.buflen; i++) { 77 /* 78 * even if char is larger than 8 bit, it will only have 79 * any of the first 8 bits set (by construction). 80 */ 81 p.buf_utf8proc[i] = (utf8proc_uint8_t)p.buf[i]; 82 } 83 84 printf("%s\n", argv[0]); 85 run_benchmark(libgrapheme, &p, "libgrapheme ", NULL, 86 "byte", &baseline, NUM_ITERATIONS, p.buflen); 87 run_benchmark(libutf8proc, &p, "libutf8proc ", 88 "but unsafe (does not detect overlong encodings)", 89 "byte", &baseline, NUM_ITERATIONS, p.buflen); 90 91 free(p.buf); 92 free(p.buf_utf8proc); 93 94 return 0; 95 }