libgrapheme

unicode string library
git clone git://git.suckless.org/libgrapheme
Log | Files | Refs | README | LICENSE

utf8-decode.c (2177B)


      1 /* See LICENSE file for copyright and license details. */
      2 #include <errno.h>
      3 #include <math.h>
      4 #include <stdint.h>
      5 #include <stdio.h>
      6 #include <stdlib.h>
      7 #include <string.h>
      8 
      9 #include "../grapheme.h"
     10 #include "../gen/character-test.h"
     11 #include "util.h"
     12 
     13 #include <utf8proc.h>
     14 
     15 #define NUM_ITERATIONS 100000
     16 
     17 struct utf8_benchmark_payload {
     18 	char *buf;
     19 	utf8proc_uint8_t *buf_utf8proc;
     20 	size_t buflen;
     21 };
     22 
     23 void
     24 libgrapheme(const void *payload)
     25 {
     26 	const struct utf8_benchmark_payload *p = payload;
     27 	uint_least32_t cp;
     28 	size_t ret, off;
     29 
     30 	for (off = 0; off < p->buflen; off += ret) {
     31 		if ((ret = grapheme_decode_utf8(p->buf + off,
     32 		                                p->buflen - off, &cp)) >
     33 		    (p->buflen - off)) {
     34 			break;
     35 		}
     36 		(void)cp;
     37 	}
     38 }
     39 
     40 void
     41 libutf8proc(const void *payload)
     42 {
     43 	const struct utf8_benchmark_payload *p = payload;
     44 	utf8proc_int32_t cp;
     45 	utf8proc_ssize_t ret;
     46 	size_t off;
     47 
     48 	for (off = 0; off < p->buflen; off += (size_t)ret) {
     49 		if ((ret = utf8proc_iterate(p->buf_utf8proc + off,
     50 		                            (utf8proc_ssize_t)(p->buflen - off),
     51 				            &cp)) < 0) {
     52 			break;
     53 		}
     54 		(void)cp;
     55 	}
     56 }
     57 
     58 int
     59 main(int argc, char *argv[])
     60 {
     61 	struct utf8_benchmark_payload p;
     62 	size_t i;
     63 	double baseline = (double)NAN;
     64 
     65 	(void)argc;
     66 
     67 	p.buf = generate_utf8_test_buffer(character_test,
     68 	                                  LEN(character_test),
     69 	                                  &(p.buflen));
     70 
     71 	/* convert cp-buffer to stupid custom libutf8proc-uint8-type */
     72 	if ((p.buf_utf8proc = malloc(p.buflen)) == NULL) {
     73 		fprintf(stderr, "malloc: %s\n", strerror(errno));
     74 		exit(1);
     75 	}
     76 	for (i = 0; i < p.buflen; i++) {
     77 		/* 
     78 		 * even if char is larger than 8 bit, it will only have
     79 		 * any of the first 8 bits set (by construction).
     80 		 */
     81 		p.buf_utf8proc[i] = (utf8proc_uint8_t)p.buf[i];
     82 	}
     83 
     84 	printf("%s\n", argv[0]);
     85 	run_benchmark(libgrapheme, &p, "libgrapheme ", NULL,
     86 	              "byte", &baseline, NUM_ITERATIONS, p.buflen);
     87 	run_benchmark(libutf8proc, &p, "libutf8proc ",
     88 	              "but unsafe (does not detect overlong encodings)",
     89 	              "byte", &baseline, NUM_ITERATIONS, p.buflen);
     90 
     91 	free(p.buf);
     92 	free(p.buf_utf8proc);
     93 
     94 	return 0;
     95 }