libgrapheme

unicode string library
git clone git://git.suckless.org/libgrapheme
Log | Files | Refs | README | LICENSE

commit 602ae9b2041df6c7e2b1d9f9da2b5ae57eb94b64
parent e7b4a99ac11124212811a345563e65d199b1fb79
Author: Laslo Hunhold <dev@frign.de>
Date:   Tue,  4 Jan 2022 18:29:30 +0100

Generate separate utf8proc_int32_t buffer to preserve strict aliasing

This clearly shows why it's never a good idea to roll your own types
and to better stick with ones provided by the standard library.

Even if the custom type in libutf8proc was defined as an unsigned
32-bit integer type, it could be changed at any point (e.g. to
uint_fast32_t which might default to an unsigned 64 bit type). So we
can't simply cast between the pointers anyway, even if we didn't care
about strict aliasing.

Signed-off-by: Laslo Hunhold <dev@frign.de>

Diffstat:
Mbenchmark/character.c | 20++++++++++++++++++--
1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/benchmark/character.c b/benchmark/character.c @@ -1,8 +1,10 @@ /* See LICENSE file for copyright and license details. */ +#include <errno.h> #include <math.h> #include <stdint.h> #include <stdio.h> #include <stdlib.h> +#include <string.h> #include "../grapheme.h" #include "../gen/character-test.h" @@ -21,6 +23,7 @@ struct payload { uint_least32_t *buf; + utf8proc_int32_t *buf_int32; size_t bufsiz; }; @@ -45,7 +48,8 @@ libutf8proc(const void *payload) size_t i; for (i = 0; i + 1 < p->bufsiz; i++) { - (void)utf8proc_grapheme_break_stateful(p->buf[i], p->buf[i+1], + (void)utf8proc_grapheme_break_stateful(p->buf_int32[i], + p->buf_int32[i+1], &state); } } @@ -54,7 +58,8 @@ int main(int argc, char *argv[]) { struct payload p; - double baseline = NAN; + double baseline = (double)NAN; + size_t i; (void)argc; @@ -62,6 +67,17 @@ main(int argc, char *argv[]) &(p.bufsiz))) == NULL) { return 1; } + if ((p.buf_int32 = malloc(p.bufsiz * sizeof(*(p.buf_int32)))) == NULL) { + fprintf(stderr, "malloc: %s\n", strerror(errno)); + exit(1); + } + for (i = 0; i < p.bufsiz; i++) { + /* + * there is no overflow, as we know that the maximum + * codepoint is 0x10FFFF, which is way below 2^31 + */ + p.buf_int32[i] = (utf8proc_int32_t)p.buf[i]; + } printf("%s\n", argv[0]); run_benchmark(libgrapheme, &p, "libgrapheme ", &baseline,