commit abdc2ba0c764c527aaa2ed9fe42db27d71a10bc2
parent 50efb9a3396588e6e1266f51ec5446a9fa8013ea
Author: Laslo Hunhold <dev@frign.de>
Date: Tue, 15 Nov 2022 15:53:56 +0100
Apply clang-format
Even though this disrupts the backtrackability of the code a bit,
it's better to rip the band aid off now than to push it on into the
future.
With these changes, formatting is automatically governed and ensured by
a simple call to
make format
Signed-off-by: Laslo Hunhold <dev@frign.de>
Diffstat:
38 files changed, 1736 insertions(+), 1393 deletions(-)
diff --git a/benchmark/bidirectional.c b/benchmark/bidirectional.c
@@ -5,8 +5,8 @@
#include <stdlib.h>
#include <string.h>
-#include "../grapheme.h"
#include "../gen/bidirectional-test.h"
+#include "../grapheme.h"
#include "util.h"
#define NUM_ITERATIONS 100000
diff --git a/benchmark/case.c b/benchmark/case.c
@@ -6,8 +6,8 @@
#include <stdlib.h>
#include <string.h>
-#include "../grapheme.h"
#include "../gen/word-test.h"
+#include "../grapheme.h"
#include "util.h"
#define NUM_ITERATIONS 10000
@@ -40,7 +40,8 @@ main(int argc, char *argv[])
&(p.srclen))) == NULL) {
return 1;
}
- if ((p.dest = calloc((p.destlen = 2 * p.srclen), sizeof(*(p.dest)))) == NULL) {
+ if ((p.dest = calloc((p.destlen = 2 * p.srclen), sizeof(*(p.dest)))) ==
+ NULL) {
fprintf(stderr, "calloc: Out of memory\n");
}
diff --git a/benchmark/character.c b/benchmark/character.c
@@ -6,8 +6,8 @@
#include <stdlib.h>
#include <string.h>
-#include "../grapheme.h"
#include "../gen/character-test.h"
+#include "../grapheme.h"
#include "util.h"
#include <utf8proc.h>
@@ -28,7 +28,7 @@ libgrapheme(const void *payload)
size_t i;
for (i = 0; i + 1 < p->buflen; i++) {
- (void)grapheme_is_character_break(p->buf[i], p->buf[i+1],
+ (void)grapheme_is_character_break(p->buf[i], p->buf[i + 1],
&state);
}
}
@@ -41,9 +41,8 @@ libutf8proc(const void *payload)
size_t i;
for (i = 0; i + 1 < p->buflen; i++) {
- (void)utf8proc_grapheme_break_stateful(p->buf_utf8proc[i],
- p->buf_utf8proc[i+1],
- &state);
+ (void)utf8proc_grapheme_break_stateful(
+ p->buf_utf8proc[i], p->buf_utf8proc[i + 1], &state);
}
}
@@ -61,7 +60,8 @@ main(int argc, char *argv[])
&(p.buflen))) == NULL) {
return 1;
}
- if ((p.buf_utf8proc = malloc(p.buflen * sizeof(*(p.buf_utf8proc)))) == NULL) {
+ if ((p.buf_utf8proc = malloc(p.buflen * sizeof(*(p.buf_utf8proc)))) ==
+ NULL) {
fprintf(stderr, "malloc: %s\n", strerror(errno));
exit(1);
}
diff --git a/benchmark/line.c b/benchmark/line.c
@@ -6,8 +6,8 @@
#include <stdlib.h>
#include <string.h>
-#include "../grapheme.h"
#include "../gen/line-test.h"
+#include "../grapheme.h"
#include "util.h"
#define NUM_ITERATIONS 10000
@@ -23,7 +23,7 @@ libgrapheme(const void *payload)
const struct break_benchmark_payload *p = payload;
size_t off;
- for (off = 0; off < p->buflen; ) {
+ for (off = 0; off < p->buflen;) {
off += grapheme_next_line_break(p->buf + off, p->buflen - off);
}
}
diff --git a/benchmark/sentence.c b/benchmark/sentence.c
@@ -6,8 +6,8 @@
#include <stdlib.h>
#include <string.h>
-#include "../grapheme.h"
#include "../gen/sentence-test.h"
+#include "../grapheme.h"
#include "util.h"
#define NUM_ITERATIONS 100000
@@ -23,8 +23,9 @@ libgrapheme(const void *payload)
const struct break_benchmark_payload *p = payload;
size_t off;
- for (off = 0; off < p->buflen; ) {
- off += grapheme_next_sentence_break(p->buf + off, p->buflen - off);
+ for (off = 0; off < p->buflen;) {
+ off += grapheme_next_sentence_break(p->buf + off,
+ p->buflen - off);
}
}
diff --git a/benchmark/utf8-decode.c b/benchmark/utf8-decode.c
@@ -6,8 +6,8 @@
#include <stdlib.h>
#include <string.h>
-#include "../grapheme.h"
#include "../gen/character-test.h"
+#include "../grapheme.h"
#include "util.h"
#include <utf8proc.h>
@@ -28,9 +28,8 @@ libgrapheme(const void *payload)
size_t ret, off;
for (off = 0; off < p->buflen; off += ret) {
- if ((ret = grapheme_decode_utf8(p->buf + off,
- p->buflen - off, &cp)) >
- (p->buflen - off)) {
+ if ((ret = grapheme_decode_utf8(p->buf + off, p->buflen - off,
+ &cp)) > (p->buflen - off)) {
break;
}
(void)cp;
@@ -48,7 +47,7 @@ libutf8proc(const void *payload)
for (off = 0; off < p->buflen; off += (size_t)ret) {
if ((ret = utf8proc_iterate(p->buf_utf8proc + off,
(utf8proc_ssize_t)(p->buflen - off),
- &cp)) < 0) {
+ &cp)) < 0) {
break;
}
(void)cp;
@@ -64,9 +63,8 @@ main(int argc, char *argv[])
(void)argc;
- p.buf = generate_utf8_test_buffer(character_break_test,
- LEN(character_break_test),
- &(p.buflen));
+ p.buf = generate_utf8_test_buffer(
+ character_break_test, LEN(character_break_test), &(p.buflen));
/* convert cp-buffer to stupid custom libutf8proc-uint8-type */
if ((p.buf_utf8proc = malloc(p.buflen)) == NULL) {
@@ -74,7 +72,7 @@ main(int argc, char *argv[])
exit(1);
}
for (i = 0; i < p.buflen; i++) {
- /*
+ /*
* even if char is larger than 8 bit, it will only have
* any of the first 8 bits set (by construction).
*/
@@ -82,11 +80,11 @@ main(int argc, char *argv[])
}
printf("%s\n", argv[0]);
- run_benchmark(libgrapheme, &p, "libgrapheme ", NULL,
- "byte", &baseline, NUM_ITERATIONS, p.buflen);
+ run_benchmark(libgrapheme, &p, "libgrapheme ", NULL, "byte", &baseline,
+ NUM_ITERATIONS, p.buflen);
run_benchmark(libutf8proc, &p, "libutf8proc ",
- "but unsafe (does not detect overlong encodings)",
- "byte", &baseline, NUM_ITERATIONS, p.buflen);
+ "but unsafe (does not detect overlong encodings)", "byte",
+ &baseline, NUM_ITERATIONS, p.buflen);
free(p.buf);
free(p.buf_utf8proc);
diff --git a/benchmark/util.c b/benchmark/util.c
@@ -1,7 +1,7 @@
/* See LICENSE file for copyright and license details. */
#include <math.h>
-#include <stdlib.h>
#include <stdio.h>
+#include <stdlib.h>
#include <time.h>
#include "../gen/types.h"
@@ -20,7 +20,8 @@ generate_cp_test_buffer(const struct break_test *test, size_t testlen,
*buflen += test[i].cplen;
}
if (!(buf = calloc(*buflen, sizeof(*buf)))) {
- fprintf(stderr, "generate_test_buffer: calloc: Out of memory.\n");
+ fprintf(stderr,
+ "generate_test_buffer: calloc: Out of memory.\n");
exit(1);
}
for (i = 0, off = 0; i < testlen; i++) {
@@ -48,18 +49,18 @@ generate_utf8_test_buffer(const struct break_test *test, size_t testlen,
}
(*buflen)++; /* terminating NUL-byte */
if (!(buf = malloc(*buflen))) {
- fprintf(stderr, "generate_test_buffer: malloc: Out of memory.\n");
+ fprintf(stderr,
+ "generate_test_buffer: malloc: Out of memory.\n");
exit(1);
}
for (i = 0, off = 0; i < testlen; i++) {
for (j = 0; j < test[i].cplen; j++, off += ret) {
- if ((ret = grapheme_encode_utf8(test[i].cp[j],
- buf + off,
- *buflen - off)) >
+ if ((ret = grapheme_encode_utf8(
+ test[i].cp[j], buf + off, *buflen - off)) >
(*buflen - off)) {
/* shouldn't happen */
fprintf(stderr, "generate_utf8_test_buffer: "
- "Buffer too small.\n");
+ "Buffer too small.\n");
exit(1);
}
}
@@ -77,10 +78,9 @@ time_diff(struct timespec *a, struct timespec *b)
}
void
-run_benchmark(void (*func)(const void *), const void *payload,
- const char *name, const char *comment, const char *unit,
- double *baseline, size_t num_iterations,
- size_t units_per_iteration)
+run_benchmark(void (*func)(const void *), const void *payload, const char *name,
+ const char *comment, const char *unit, double *baseline,
+ size_t num_iterations, size_t units_per_iteration)
{
struct timespec start, end;
size_t i;
@@ -109,7 +109,6 @@ run_benchmark(void (*func)(const void *), const void *payload,
printf(" avg. %.3es/%s (%.2f%% %s%s%s)\n", diff, unit,
fabs(1.0 - diff / *baseline) * 100,
(diff < *baseline) ? "faster" : "slower",
- comment ? ", " : "",
- comment ? comment : "");
+ comment ? ", " : "", comment ? comment : "");
}
}
diff --git a/benchmark/util.h b/benchmark/util.h
@@ -7,10 +7,10 @@
#define LEN(x) (sizeof(x) / sizeof(*(x)))
#ifdef __has_attribute
- #if __has_attribute(optnone)
- void libgrapheme(const void *) __attribute__((optnone));
- void libutf8proc(const void *) __attribute__((optnone));
- #endif
+#if __has_attribute(optnone)
+void libgrapheme(const void *) __attribute__((optnone));
+void libutf8proc(const void *) __attribute__((optnone));
+#endif
#endif
uint_least32_t *generate_cp_test_buffer(const struct break_test *, size_t,
diff --git a/benchmark/word.c b/benchmark/word.c
@@ -6,8 +6,8 @@
#include <stdlib.h>
#include <string.h>
-#include "../grapheme.h"
#include "../gen/word-test.h"
+#include "../grapheme.h"
#include "util.h"
#define NUM_ITERATIONS 10000
@@ -23,7 +23,7 @@ libgrapheme(const void *payload)
const struct break_benchmark_payload *p = payload;
size_t off;
- for (off = 0; off < p->buflen; ) {
+ for (off = 0; off < p->buflen;) {
off += grapheme_next_word_break(p->buf + off, p->buflen - off);
}
}
diff --git a/gen/bidirectional-test.c b/gen/bidirectional-test.c
@@ -3,8 +3,8 @@
#include <inttypes.h>
#include <stddef.h>
#include <stdio.h>
-#include <string.h>
#include <stdlib.h>
+#include <string.h>
#include "../grapheme.h"
#include "util.h"
@@ -23,29 +23,29 @@ static const struct {
const char *class;
const uint_least32_t cp;
} classcpmap[] = {
- { .class = "L", .cp = UINT32_C(0x0041) },
- { .class = "AL", .cp = UINT32_C(0x0608) },
- { .class = "AN", .cp = UINT32_C(0x0600) },
- { .class = "B", .cp = UINT32_C(0x000A) },
- { .class = "BN", .cp = UINT32_C(0x0000) },
- { .class = "CS", .cp = UINT32_C(0x002C) },
- { .class = "EN", .cp = UINT32_C(0x0030) },
- { .class = "ES", .cp = UINT32_C(0x002B) },
- { .class = "ET", .cp = UINT32_C(0x0023) },
+ { .class = "L", .cp = UINT32_C(0x0041) },
+ { .class = "AL", .cp = UINT32_C(0x0608) },
+ { .class = "AN", .cp = UINT32_C(0x0600) },
+ { .class = "B", .cp = UINT32_C(0x000A) },
+ { .class = "BN", .cp = UINT32_C(0x0000) },
+ { .class = "CS", .cp = UINT32_C(0x002C) },
+ { .class = "EN", .cp = UINT32_C(0x0030) },
+ { .class = "ES", .cp = UINT32_C(0x002B) },
+ { .class = "ET", .cp = UINT32_C(0x0023) },
{ .class = "FSI", .cp = UINT32_C(0x2068) },
{ .class = "LRE", .cp = UINT32_C(0x202A) },
{ .class = "LRI", .cp = UINT32_C(0x2066) },
{ .class = "LRO", .cp = UINT32_C(0x202D) },
{ .class = "NSM", .cp = UINT32_C(0x0300) },
- { .class = "ON", .cp = UINT32_C(0x0021) },
+ { .class = "ON", .cp = UINT32_C(0x0021) },
{ .class = "PDF", .cp = UINT32_C(0x202C) },
{ .class = "PDI", .cp = UINT32_C(0x2069) },
- { .class = "R", .cp = UINT32_C(0x05BE) },
+ { .class = "R", .cp = UINT32_C(0x05BE) },
{ .class = "RLE", .cp = UINT32_C(0x202B) },
{ .class = "RLI", .cp = UINT32_C(0x2067) },
{ .class = "RLO", .cp = UINT32_C(0x202E) },
- { .class = "S", .cp = UINT32_C(0x0009) },
- { .class = "WS", .cp = UINT32_C(0x000C) },
+ { .class = "S", .cp = UINT32_C(0x0009) },
+ { .class = "WS", .cp = UINT32_C(0x000C) },
};
static int
@@ -59,7 +59,8 @@ classtocp(const char *str, size_t len, uint_least32_t *cp)
return 0;
}
}
- fprintf(stderr, "classtocp: unknown class string '%.*s'.\n", (int)len, str);
+ fprintf(stderr, "classtocp: unknown class string '%.*s'.\n", (int)len,
+ str);
return 1;
}
@@ -77,8 +78,10 @@ parse_class_list(const char *str, uint_least32_t **cp, size_t *cplen)
}
/* count the number of spaces in the string and infer list length */
- for (count = 1, tmp1 = str; (tmp2 = strchr(tmp1, ' ')) != NULL; count++, tmp1 = tmp2 + 1)
+ for (count = 1, tmp1 = str; (tmp2 = strchr(tmp1, ' ')) != NULL;
+ count++, tmp1 = tmp2 + 1) {
;
+ }
/* allocate resources */
if (!(*cp = calloc((*cplen = count), sizeof(**cp)))) {
@@ -89,7 +92,8 @@ parse_class_list(const char *str, uint_least32_t **cp, size_t *cplen)
/* go through the string again, parsing the classes */
for (i = 0, tmp1 = tmp2 = str; tmp2 != NULL; i++) {
tmp2 = strchr(tmp1, ' ');
- if (classtocp(tmp1, tmp2 ? (size_t)(tmp2 - tmp1) : strlen(tmp1), &((*cp)[i]))) {
+ if (classtocp(tmp1, tmp2 ? (size_t)(tmp2 - tmp1) : strlen(tmp1),
+ &((*cp)[i]))) {
return 1;
}
if (tmp2 != NULL) {
@@ -135,12 +139,10 @@ strtolevel(const char *str, size_t len, int_least8_t *level)
if (str[0] != '1') {
goto toolarge;
}
- *level = (str[0] - '0') * 100 +
- (str[1] - '0') * 10 +
- (str[2] - '0');
+ *level = (str[0] - '0') * 100 + (str[1] - '0') * 10 +
+ (str[2] - '0');
} else if (len == 2) {
- *level = (str[0] - '0') * 10 +
- (str[1] - '0');
+ *level = (str[0] - '0') * 10 + (str[1] - '0');
} else if (len == 1) {
*level = (str[0] - '0');
} else { /* len == 0 */
@@ -149,8 +151,7 @@ strtolevel(const char *str, size_t len, int_least8_t *level)
return 0;
toolarge:
- fprintf(stderr, "hextocp: '%.*s' is too large.\n",
- (int)len, str);
+ fprintf(stderr, "hextocp: '%.*s' is too large.\n", (int)len, str);
return 1;
}
@@ -167,8 +168,10 @@ parse_level_list(const char *str, int_least8_t **level, size_t *levellen)
}
/* count the number of spaces in the string and infer list length */
- for (count = 1, tmp1 = str; (tmp2 = strchr(tmp1, ' ')) != NULL; count++, tmp1 = tmp2 + 1)
+ for (count = 1, tmp1 = str; (tmp2 = strchr(tmp1, ' ')) != NULL;
+ count++, tmp1 = tmp2 + 1) {
;
+ }
/* allocate resources */
if (!(*level = calloc((*levellen = count), sizeof(**level)))) {
@@ -179,7 +182,9 @@ parse_level_list(const char *str, int_least8_t **level, size_t *levellen)
/* go through the string again, parsing the levels */
for (i = 0, tmp1 = tmp2 = str; tmp2 != NULL; i++) {
tmp2 = strchr(tmp1, ' ');
- if (strtolevel(tmp1, tmp2 ? (size_t)(tmp2 - tmp1) : strlen(tmp1), &((*level)[i]))) {
+ if (strtolevel(tmp1,
+ tmp2 ? (size_t)(tmp2 - tmp1) : strlen(tmp1),
+ &((*level)[i]))) {
return 1;
}
if (tmp2 != NULL) {
@@ -199,7 +204,8 @@ bidirectional_test_list_print(const struct bidirectional_test *test,
printf("/* Automatically generated by %s */\n"
"#include <stdint.h>\n#include <stddef.h>\n\n"
- "#include \"../grapheme.h\"\n\n", progname);
+ "#include \"../grapheme.h\"\n\n",
+ progname);
printf("static const struct {\n"
"\tuint_least32_t *cp;\n"
@@ -208,7 +214,8 @@ bidirectional_test_list_print(const struct bidirectional_test *test,
"\tsize_t modelen;\n"
"\tint_least8_t *level;\n"
"\tint_least8_t *reorder;\n"
- "\tsize_t reorderlen;\n} %s[] = {\n", identifier);
+ "\tsize_t reorderlen;\n} %s[] = {\n",
+ identifier);
for (i = 0; i < testlen; i++) {
printf("\t{\n");
@@ -222,11 +229,13 @@ bidirectional_test_list_print(const struct bidirectional_test *test,
printf(" },\n");
printf("\t\t.cplen = %zu,\n", test[i].cplen);
- printf("\t\t.mode = (enum grapheme_bidirectional_override[]){");
+ printf("\t\t.mode = (enum "
+ "grapheme_bidirectional_override[]){");
for (j = 0; j < test[i].modelen; j++) {
if (test[i].mode[j] ==
GRAPHEME_BIDIRECTIONAL_OVERRIDE_NEUTRAL) {
- printf(" GRAPHEME_BIDIRECTIONAL_OVERRIDE_NEUTRAL");
+ printf(" GRAPHEME_BIDIRECTIONAL_OVERRIDE_"
+ "NEUTRAL");
} else if (test[i].mode[j] ==
GRAPHEME_BIDIRECTIONAL_OVERRIDE_LTR) {
printf(" GRAPHEME_BIDIRECTIONAL_OVERRIDE_LTR");
@@ -279,8 +288,8 @@ static int_least8_t *current_reorder;
static size_t current_reorder_len;
static int
-test_callback(const char *file, char **field, size_t nfields,
- char *comment, void *payload)
+test_callback(const char *file, char **field, size_t nfields, char *comment,
+ void *payload)
{
char *tmp;
@@ -292,23 +301,31 @@ test_callback(const char *file, char **field, size_t nfields,
if (nfields > 0 && field[0][0] == '@') {
if (!strncmp(field[0], "@Levels:", sizeof("@Levels:") - 1)) {
tmp = field[0] + sizeof("@Levels:") - 1;
- for (; *tmp != '\0' && (*tmp == ' ' || *tmp == '\t'); tmp++)
+ for (; *tmp != '\0' && (*tmp == ' ' || *tmp == '\t');
+ tmp++) {
;
+ }
free(current_level);
- parse_level_list(tmp, ¤t_level, ¤t_level_len);
- } else if (!strncmp(field[0], "@Reorder:", sizeof("@Reorder:") - 1)) {
+ parse_level_list(tmp, ¤t_level,
+ ¤t_level_len);
+ } else if (!strncmp(field[0],
+ "@Reorder:", sizeof("@Reorder:") - 1)) {
tmp = field[0] + sizeof("@Reorder:") - 1;
- for (; *tmp != '\0' && (*tmp == ' ' || *tmp == '\t'); tmp++)
+ for (; *tmp != '\0' && (*tmp == ' ' || *tmp == '\t');
+ tmp++) {
;
+ }
free(current_reorder);
- parse_level_list(tmp, ¤t_reorder, ¤t_reorder_len);
+ parse_level_list(tmp, ¤t_reorder,
+ ¤t_reorder_len);
} else {
fprintf(stderr, "Unknown @-input-line.\n");
exit(1);
}
} else {
if (nfields < 2) {
- /* discard any line that does not have at least 2 fields */
+ /* discard any line that does not have at least 2 fields
+ */
return 0;
}
@@ -321,26 +338,33 @@ test_callback(const char *file, char **field, size_t nfields,
/* parse field data */
parse_class_list(field[0], &(test[testlen - 1].cp),
&(test[testlen - 1].cplen));
-
+
/* copy current level- and reorder-arrays */
- if (!(test[testlen - 1].level = calloc(current_level_len, sizeof(*(test[testlen - 1].level))))) {
+ if (!(test[testlen - 1].level =
+ calloc(current_level_len,
+ sizeof(*(test[testlen - 1].level))))) {
fprintf(stderr, "calloc: %s\n", strerror(errno));
exit(1);
}
- memcpy(test[testlen - 1].level, current_level, current_level_len * sizeof(*(test[testlen - 1].level)));
+ memcpy(test[testlen - 1].level, current_level,
+ current_level_len * sizeof(*(test[testlen - 1].level)));
- if (!(test[testlen - 1].reorder = calloc(current_reorder_len, sizeof(*(test[testlen - 1].reorder))))) {
+ if (!(test[testlen - 1].reorder =
+ calloc(current_reorder_len,
+ sizeof(*(test[testlen - 1].reorder))))) {
fprintf(stderr, "calloc: %s\n", strerror(errno));
exit(1);
}
if (current_reorder != NULL) {
memcpy(test[testlen - 1].reorder, current_reorder,
- current_reorder_len * sizeof(*(test[testlen - 1].reorder)));
+ current_reorder_len *
+ sizeof(*(test[testlen - 1].reorder)));
}
test[testlen - 1].reorderlen = current_reorder_len;
-
+
if (current_level_len != test[testlen - 1].cplen) {
- fprintf(stderr, "mismatch between string and level lengths.\n");
+ fprintf(stderr,
+ "mismatch between string and level lengths.\n");
exit(1);
}
@@ -349,27 +373,38 @@ test_callback(const char *file, char **field, size_t nfields,
fprintf(stderr, "malformed paragraph-level-bitset.\n");
exit(1);
} else if (field[1][0] == '2') {
- test[testlen - 1].mode[0] = GRAPHEME_BIDIRECTIONAL_OVERRIDE_LTR;
+ test[testlen - 1].mode[0] =
+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_LTR;
test[testlen - 1].modelen = 1;
} else if (field[1][0] == '3') {
/* auto=0 and LTR=1 */
- test[testlen - 1].mode[0] = GRAPHEME_BIDIRECTIONAL_OVERRIDE_NEUTRAL;
- test[testlen - 1].mode[1] = GRAPHEME_BIDIRECTIONAL_OVERRIDE_LTR;
+ test[testlen - 1].mode[0] =
+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_NEUTRAL;
+ test[testlen - 1].mode[1] =
+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_LTR;
test[testlen - 1].modelen = 2;
} else if (field[1][0] == '4') {
- test[testlen - 1].mode[0] = GRAPHEME_BIDIRECTIONAL_OVERRIDE_RTL;
+ test[testlen - 1].mode[0] =
+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_RTL;
test[testlen - 1].modelen = 1;
- } else if (field[1][0] == '5') {
- test[testlen - 1].mode[0] = GRAPHEME_BIDIRECTIONAL_OVERRIDE_NEUTRAL;
- test[testlen - 1].mode[1] = GRAPHEME_BIDIRECTIONAL_OVERRIDE_RTL;
+ } else if (field[1][0] == '5') {
+ test[testlen - 1].mode[0] =
+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_NEUTRAL;
+ test[testlen - 1].mode[1] =
+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_RTL;
test[testlen - 1].modelen = 2;
} else if (field[1][0] == '7') {
- test[testlen - 1].mode[0] = GRAPHEME_BIDIRECTIONAL_OVERRIDE_NEUTRAL;
- test[testlen - 1].mode[1] = GRAPHEME_BIDIRECTIONAL_OVERRIDE_LTR;
- test[testlen - 1].mode[2] = GRAPHEME_BIDIRECTIONAL_OVERRIDE_RTL;
+ test[testlen - 1].mode[0] =
+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_NEUTRAL;
+ test[testlen - 1].mode[1] =
+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_LTR;
+ test[testlen - 1].mode[2] =
+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_RTL;
test[testlen - 1].modelen = 3;
} else {
- fprintf(stderr, "unhandled paragraph-level-bitset %s.\n", field[1]);
+ fprintf(stderr,
+ "unhandled paragraph-level-bitset %s.\n",
+ field[1]);
exit(1);
}
}
@@ -414,7 +449,8 @@ character_test_callback(const char *file, char **field, size_t nfields,
} else if (field[1][0] == '1') {
test[testlen - 1].mode[0] = GRAPHEME_BIDIRECTIONAL_OVERRIDE_RTL;
} else if (field[1][0] == '2') {
- test[testlen - 1].mode[0] = GRAPHEME_BIDIRECTIONAL_OVERRIDE_NEUTRAL;
+ test[testlen - 1].mode[0] =
+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_NEUTRAL;
} else {
fprintf(stderr, "unhandled paragraph-level-setting.\n");
exit(1);
diff --git a/gen/bidirectional.c b/gen/bidirectional.c
@@ -15,118 +15,118 @@ static const struct property_spec bidi_property[] = {
{
/* default */
.enumname = "L",
- .file = FILE_BIDI_CLASS,
- .ucdname = "L",
+ .file = FILE_BIDI_CLASS,
+ .ucdname = "L",
},
{
.enumname = "AL",
- .file = FILE_BIDI_CLASS,
- .ucdname = "AL",
+ .file = FILE_BIDI_CLASS,
+ .ucdname = "AL",
},
{
.enumname = "AN",
- .file = FILE_BIDI_CLASS,
- .ucdname = "AN",
+ .file = FILE_BIDI_CLASS,
+ .ucdname = "AN",
},
{
.enumname = "B",
- .file = FILE_BIDI_CLASS,
- .ucdname = "B",
+ .file = FILE_BIDI_CLASS,
+ .ucdname = "B",
},
{
.enumname = "BN",
- .file = FILE_BIDI_CLASS,
- .ucdname = "BN",
+ .file = FILE_BIDI_CLASS,
+ .ucdname = "BN",
},
{
.enumname = "CS",
- .file = FILE_BIDI_CLASS,
- .ucdname = "CS",
+ .file = FILE_BIDI_CLASS,
+ .ucdname = "CS",
},
{
.enumname = "EN",
- .file = FILE_BIDI_CLASS,
- .ucdname = "EN",
+ .file = FILE_BIDI_CLASS,
+ .ucdname = "EN",
},
{
.enumname = "ES",
- .file = FILE_BIDI_CLASS,
- .ucdname = "ES",
+ .file = FILE_BIDI_CLASS,
+ .ucdname = "ES",
},
{
.enumname = "ET",
- .file = FILE_BIDI_CLASS,
- .ucdname = "ET",
+ .file = FILE_BIDI_CLASS,
+ .ucdname = "ET",
},
{
.enumname = "FSI",
- .file = FILE_BIDI_CLASS,
- .ucdname = "FSI",
+ .file = FILE_BIDI_CLASS,
+ .ucdname = "FSI",
},
{
.enumname = "LRE",
- .file = FILE_BIDI_CLASS,
- .ucdname = "LRE",
+ .file = FILE_BIDI_CLASS,
+ .ucdname = "LRE",
},
{
.enumname = "LRI",
- .file = FILE_BIDI_CLASS,
- .ucdname = "LRI",
+ .file = FILE_BIDI_CLASS,
+ .ucdname = "LRI",
},
{
.enumname = "LRO",
- .file = FILE_BIDI_CLASS,
- .ucdname = "LRO",
+ .file = FILE_BIDI_CLASS,
+ .ucdname = "LRO",
},
{
.enumname = "NSM",
- .file = FILE_BIDI_CLASS,
- .ucdname = "NSM",
+ .file = FILE_BIDI_CLASS,
+ .ucdname = "NSM",
},
{
.enumname = "ON",
- .file = FILE_BIDI_CLASS,
- .ucdname = "ON",
+ .file = FILE_BIDI_CLASS,
+ .ucdname = "ON",
},
{
.enumname = "PDF",
- .file = FILE_BIDI_CLASS,
- .ucdname = "PDF",
+ .file = FILE_BIDI_CLASS,
+ .ucdname = "PDF",
},
{
.enumname = "PDI",
- .file = FILE_BIDI_CLASS,
- .ucdname = "PDI",
+ .file = FILE_BIDI_CLASS,
+ .ucdname = "PDI",
},
{
.enumname = "R",
- .file = FILE_BIDI_CLASS,
- .ucdname = "R",
+ .file = FILE_BIDI_CLASS,
+ .ucdname = "R",
},
{
.enumname = "RLE",
- .file = FILE_BIDI_CLASS,
- .ucdname = "RLE",
+ .file = FILE_BIDI_CLASS,
+ .ucdname = "RLE",
},
{
.enumname = "RLI",
- .file = FILE_BIDI_CLASS,
- .ucdname = "RLI",
+ .file = FILE_BIDI_CLASS,
+ .ucdname = "RLI",
},
{
.enumname = "RLO",
- .file = FILE_BIDI_CLASS,
- .ucdname = "RLO",
+ .file = FILE_BIDI_CLASS,
+ .ucdname = "RLO",
},
{
.enumname = "S",
- .file = FILE_BIDI_CLASS,
- .ucdname = "S",
+ .file = FILE_BIDI_CLASS,
+ .ucdname = "S",
},
{
.enumname = "WS",
- .file = FILE_BIDI_CLASS,
- .ucdname = "WS",
+ .file = FILE_BIDI_CLASS,
+ .ucdname = "WS",
},
};
@@ -135,11 +135,12 @@ static struct {
uint_least32_t cp_pair;
char type;
} *b = NULL;
+
static size_t blen;
static int
-bracket_callback(const char *file, char **field, size_t nfields,
- char *comment, void *payload)
+bracket_callback(const char *file, char **field, size_t nfields, char *comment,
+ void *payload)
{
(void)file;
(void)comment;
@@ -189,11 +190,12 @@ post_process(struct properties *prop)
}
static uint_least8_t
-fill_missing(uint_least32_t cp) {
+fill_missing(uint_least32_t cp)
+{
/* based on the @missing-properties in data/DerivedBidiClass.txt */
- if ((cp >= UINT32_C(0x0590) && cp <= UINT32_C(0x05FF)) ||
- (cp >= UINT32_C(0x07C0) && cp <= UINT32_C(0x085F)) ||
- (cp >= UINT32_C(0xFB1D) && cp <= UINT32_C(0xFB4F)) ||
+ if ((cp >= UINT32_C(0x0590) && cp <= UINT32_C(0x05FF)) ||
+ (cp >= UINT32_C(0x07C0) && cp <= UINT32_C(0x085F)) ||
+ (cp >= UINT32_C(0xFB1D) && cp <= UINT32_C(0xFB4F)) ||
(cp >= UINT32_C(0x10800) && cp <= UINT32_C(0x10CFF)) ||
(cp >= UINT32_C(0x10D40) && cp <= UINT32_C(0x10EBF)) ||
(cp >= UINT32_C(0x10F00) && cp <= UINT32_C(0x10F2F)) ||
@@ -203,22 +205,22 @@ fill_missing(uint_least32_t cp) {
(cp >= UINT32_C(0x1ED50) && cp <= UINT32_C(0x1EDFF)) ||
(cp >= UINT32_C(0x1EF00) && cp <= UINT32_C(0x1EFFF))) {
return 17; /* class R */
- } else if ((cp >= UINT32_C(0x0600) && cp <= UINT32_C(0x07BF)) ||
- (cp >= UINT32_C(0x0860) && cp <= UINT32_C(0x08FF)) ||
- (cp >= UINT32_C(0xFB50) && cp <= UINT32_C(0xFDCF)) ||
- (cp >= UINT32_C(0xFDF0) && cp <= UINT32_C(0xFDFF)) ||
- (cp >= UINT32_C(0xFE70) && cp <= UINT32_C(0xFEFF)) ||
+ } else if ((cp >= UINT32_C(0x0600) && cp <= UINT32_C(0x07BF)) ||
+ (cp >= UINT32_C(0x0860) && cp <= UINT32_C(0x08FF)) ||
+ (cp >= UINT32_C(0xFB50) && cp <= UINT32_C(0xFDCF)) ||
+ (cp >= UINT32_C(0xFDF0) && cp <= UINT32_C(0xFDFF)) ||
+ (cp >= UINT32_C(0xFE70) && cp <= UINT32_C(0xFEFF)) ||
(cp >= UINT32_C(0x10D00) && cp <= UINT32_C(0x10D3F)) ||
(cp >= UINT32_C(0x10EC0) && cp <= UINT32_C(0x10EFF)) ||
- (cp >= UINT32_C(0x10F30) && cp <= UINT32_C(0x10F6F)) ||
+ (cp >= UINT32_C(0x10F30) && cp <= UINT32_C(0x10F6F)) ||
(cp >= UINT32_C(0x1EC70) && cp <= UINT32_C(0x1ECBF)) ||
(cp >= UINT32_C(0x1ED00) && cp <= UINT32_C(0x1ED4F)) ||
(cp >= UINT32_C(0x1EE00) && cp <= UINT32_C(0x1EEFF))) {
- return 1; /* class AL */
+ return 1; /* class AL */
} else if (cp >= UINT32_C(0x20A0) && cp <= UINT32_C(0x20CF)) {
- return 8; /* class ET */
+ return 8; /* class ET */
} else {
- return 0; /* class L */
+ return 0; /* class L */
}
}
@@ -238,13 +240,11 @@ main(int argc, char *argv[])
fprintf(stderr, "calloc: %s\n", strerror(errno));
exit(1);
}
- parse_file_with_callback(FILE_BIDI_BRACKETS, bracket_callback,
- NULL);
+ parse_file_with_callback(FILE_BIDI_BRACKETS, bracket_callback, NULL);
- properties_generate_break_property(bidi_property,
- LEN(bidi_property), fill_missing,
- NULL, post_process, "bidi",
- argv[0]);
+ properties_generate_break_property(bidi_property, LEN(bidi_property),
+ fill_missing, NULL, post_process,
+ "bidi", argv[0]);
printf("\nenum bracket_type {\n\tBIDI_BRACKET_NONE,\n\t"
"BIDI_BRACKET_OPEN,\n\tBIDI_BRACKET_CLOSE,\n};\n\n"
@@ -252,10 +252,12 @@ main(int argc, char *argv[])
"\tuint_least32_t pair;\n};\n\n"
"static const struct bracket bidi_bracket[] = {\n");
for (i = 0; i < blen; i++) {
- printf("\t{\n\t\t.type = %s,\n\t\t.pair = UINT32_C(0x%06X),\n\t},\n",
- (b[i].type == 'o') ? "BIDI_BRACKET_OPEN" :
- (b[i].type == 'c') ? "BIDI_BRACKET_CLOSE" : "BIDI_BRACKET_NONE",
- b[i].cp_pair);
+ printf("\t{\n\t\t.type = %s,\n\t\t.pair = "
+ "UINT32_C(0x%06X),\n\t},\n",
+ (b[i].type == 'o') ? "BIDI_BRACKET_OPEN" :
+ (b[i].type == 'c') ? "BIDI_BRACKET_CLOSE" :
+ "BIDI_BRACKET_NONE",
+ b[i].cp_pair);
}
printf("};\n");
diff --git a/gen/case.c b/gen/case.c
@@ -12,28 +12,28 @@
static const struct property_spec case_property[] = {
{
.enumname = "OTHER",
- .file = NULL,
- .ucdname = NULL,
+ .file = NULL,
+ .ucdname = NULL,
},
{
.enumname = "BOTH_CASED_CASE_IGNORABLE",
- .file = NULL,
- .ucdname = NULL,
+ .file = NULL,
+ .ucdname = NULL,
},
- {
+ {
.enumname = "CASED",
- .file = FILE_DCP,
- .ucdname = "Cased",
+ .file = FILE_DCP,
+ .ucdname = "Cased",
},
{
.enumname = "CASE_IGNORABLE",
- .file = FILE_DCP,
- .ucdname = "Case_Ignorable",
+ .file = FILE_DCP,
+ .ucdname = "Case_Ignorable",
},
{
.enumname = "UNCASED",
- .file = FILE_DCP,
- .ucdname = "Uncased",
+ .file = FILE_DCP,
+ .ucdname = "Uncased",
},
};
@@ -67,12 +67,14 @@ handle_conflict(uint_least32_t cp, uint_least8_t prop1, uint_least8_t prop2)
}
static struct properties *prop_upper = NULL, *prop_lower, *prop_title;
+
static struct special_case {
struct {
uint_least32_t *cp;
size_t cplen;
} upper, lower, title;
} *sc = NULL;
+
static size_t sclen = 0;
static int
@@ -89,9 +91,12 @@ unicodedata_callback(const char *file, char **field, size_t nfields,
upper = lower = title = cp;
- if ((strlen(field[12]) > 0 && hextocp(field[12], strlen(field[12]), &upper)) ||
- (strlen(field[13]) > 0 && hextocp(field[13], strlen(field[13]), &lower)) ||
- (nfields >= 15 && strlen(field[14]) > 0 && hextocp(field[14], strlen(field[14]), &title))) {
+ if ((strlen(field[12]) > 0 &&
+ hextocp(field[12], strlen(field[12]), &upper)) ||
+ (strlen(field[13]) > 0 &&
+ hextocp(field[13], strlen(field[13]), &lower)) ||
+ (nfields >= 15 && strlen(field[14]) > 0 &&
+ hextocp(field[14], strlen(field[14]), &title))) {
return 1;
}
@@ -126,7 +131,7 @@ specialcasing_callback(const char *file, char **field, size_t nfields,
/* extend special case array */
if (!(sc = realloc(sc, (++sclen) * sizeof(*sc)))) {
fprintf(stderr, "realloc: %s\n", strerror(errno));
- exit(1);
+ exit(1);
}
/* parse field data */
@@ -142,9 +147,12 @@ specialcasing_callback(const char *file, char **field, size_t nfields,
* special value 0x110000 + (offset in special case array),
* even if the special case has length 1
*/
- prop_upper[cp].property = (int_least64_t)(UINT32_C(0x110000) + (sclen - 1));
- prop_lower[cp].property = (int_least64_t)(UINT32_C(0x110000) + (sclen - 1));
- prop_title[cp].property = (int_least64_t)(UINT32_C(0x110000) + (sclen - 1));
+ prop_upper[cp].property =
+ (int_least64_t)(UINT32_C(0x110000) + (sclen - 1));
+ prop_lower[cp].property =
+ (int_least64_t)(UINT32_C(0x110000) + (sclen - 1));
+ prop_title[cp].property =
+ (int_least64_t)(UINT32_C(0x110000) + (sclen - 1));
return 0;
}
@@ -165,9 +173,8 @@ main(int argc, char *argv[])
(void)argc;
/* generate case property table from the specification */
- properties_generate_break_property(case_property,
- LEN(case_property), NULL,
- handle_conflict, NULL, "case",
+ properties_generate_break_property(case_property, LEN(case_property),
+ NULL, handle_conflict, NULL, "case",
argv[0]);
/*
@@ -186,38 +193,46 @@ main(int argc, char *argv[])
}
parse_file_with_callback("data/UnicodeData.txt", unicodedata_callback,
NULL);
- parse_file_with_callback("data/SpecialCasing.txt", specialcasing_callback,
- NULL);
+ parse_file_with_callback("data/SpecialCasing.txt",
+ specialcasing_callback, NULL);
/* compress properties */
properties_compress(prop_upper, &comp_upper);
properties_compress(prop_lower, &comp_lower);
properties_compress(prop_title, &comp_title);
- fprintf(stderr, "%s: LUT compression-ratios: upper=%.2f%%, lower=%.2f%%, title=%.2f%%\n",
+ fprintf(stderr,
+ "%s: LUT compression-ratios: upper=%.2f%%, lower=%.2f%%, "
+ "title=%.2f%%\n",
argv[0], properties_get_major_minor(&comp_upper, &mm_upper),
properties_get_major_minor(&comp_lower, &mm_lower),
properties_get_major_minor(&comp_title, &mm_title));
/* print tables */
- printf("/* Automatically generated by %s */\n#include <stdint.h>\n#include <stddef.h>\n\n", argv[0]);
+ printf("/* Automatically generated by %s */\n#include "
+ "<stdint.h>\n#include <stddef.h>\n\n",
+ argv[0]);
- printf("struct special_case {\n\tuint_least32_t *cp;\n\tsize_t cplen;\n};\n\n");
+ printf("struct special_case {\n\tuint_least32_t *cp;\n\tsize_t "
+ "cplen;\n};\n\n");
properties_print_lookup_table("upper_major", mm_upper.major, 0x1100);
printf("\n");
- properties_print_derived_lookup_table("upper_minor", "int_least32_t", mm_upper.minor,
- mm_upper.minorlen, get_value, comp_upper.data);
+ properties_print_derived_lookup_table("upper_minor", "int_least32_t",
+ mm_upper.minor, mm_upper.minorlen,
+ get_value, comp_upper.data);
printf("\n");
properties_print_lookup_table("lower_major", mm_lower.major, 0x1100);
printf("\n");
- properties_print_derived_lookup_table("lower_minor", "int_least32_t", mm_lower.minor,
- mm_lower.minorlen, get_value, comp_lower.data);
+ properties_print_derived_lookup_table("lower_minor", "int_least32_t",
+ mm_lower.minor, mm_lower.minorlen,
+ get_value, comp_lower.data);
printf("\n");
properties_print_lookup_table("title_major", mm_title.major, 0x1100);
printf("\n");
- properties_print_derived_lookup_table("title_minor", "int_least32_t", mm_title.minor,
- mm_title.minorlen, get_value, comp_title.data);
+ properties_print_derived_lookup_table("title_minor", "int_least32_t",
+ mm_title.minor, mm_title.minorlen,
+ get_value, comp_title.data);
printf("\n");
printf("static const struct special_case upper_special[] = {\n");
diff --git a/gen/character.c b/gen/character.c
@@ -9,78 +9,78 @@
static const struct property_spec char_break_property[] = {
{
.enumname = "OTHER",
- .file = NULL,
- .ucdname = NULL,
+ .file = NULL,
+ .ucdname = NULL,
},
{
.enumname = "CONTROL",
- .file = FILE_GRAPHEME,
- .ucdname = "Control",
+ .file = FILE_GRAPHEME,
+ .ucdname = "Control",
},
{
.enumname = "CR",
- .file = FILE_GRAPHEME,
- .ucdname = "CR",
+ .file = FILE_GRAPHEME,
+ .ucdname = "CR",
},
{
.enumname = "EXTEND",
- .file = FILE_GRAPHEME,
- .ucdname = "Extend",
+ .file = FILE_GRAPHEME,
+ .ucdname = "Extend",
},
{
.enumname = "EXTENDED_PICTOGRAPHIC",
- .file = FILE_EMOJI,
- .ucdname = "Extended_Pictographic",
+ .file = FILE_EMOJI,
+ .ucdname = "Extended_Pictographic",
},
{
.enumname = "HANGUL_L",
- .file = FILE_GRAPHEME,
- .ucdname = "L",
+ .file = FILE_GRAPHEME,
+ .ucdname = "L",
},
{
.enumname = "HANGUL_V",
- .file = FILE_GRAPHEME,
- .ucdname = "V",
+ .file = FILE_GRAPHEME,
+ .ucdname = "V",
},
{
.enumname = "HANGUL_T",
- .file = FILE_GRAPHEME,
- .ucdname = "T",
+ .file = FILE_GRAPHEME,
+ .ucdname = "T",
},
{
.enumname = "HANGUL_LV",
- .file = FILE_GRAPHEME,
- .ucdname = "LV",
+ .file = FILE_GRAPHEME,
+ .ucdname = "LV",
},
{
.enumname = "HANGUL_LVT",
- .file = FILE_GRAPHEME,
- .ucdname = "LVT",
+ .file = FILE_GRAPHEME,
+ .ucdname = "LVT",
},
{
.enumname = "LF",
- .file = FILE_GRAPHEME,
- .ucdname = "LF",
+ .file = FILE_GRAPHEME,
+ .ucdname = "LF",
},
{
.enumname = "PREPEND",
- .file = FILE_GRAPHEME,
- .ucdname = "Prepend",
+ .file = FILE_GRAPHEME,
+ .ucdname = "Prepend",
},
{
.enumname = "REGIONAL_INDICATOR",
- .file = FILE_GRAPHEME,
- .ucdname = "Regional_Indicator",
+ .file = FILE_GRAPHEME,
+ .ucdname = "Regional_Indicator",
},
{
.enumname = "SPACINGMARK",
- .file = FILE_GRAPHEME,
- .ucdname = "SpacingMark",
+ .file = FILE_GRAPHEME,
+ .ucdname = "SpacingMark",
},
{
.enumname = "ZWJ",
- .file = FILE_GRAPHEME,
- .ucdname = "ZWJ",
+ .file = FILE_GRAPHEME,
+ .ucdname = "ZWJ",
},
};
@@ -90,8 +90,8 @@ main(int argc, char *argv[])
(void)argc;
properties_generate_break_property(char_break_property,
- LEN(char_break_property), NULL,
- NULL, NULL, "char_break", argv[0]);
+ LEN(char_break_property), NULL, NULL,
+ NULL, "char_break", argv[0]);
return 0;
}
diff --git a/gen/line.c b/gen/line.c
@@ -12,8 +12,8 @@
static const struct property_spec line_break_property[] = {
{
.enumname = "AL",
- .file = FILE_LINE,
- .ucdname = "AL",
+ .file = FILE_LINE,
+ .ucdname = "AL",
},
/*
* Both extended pictographic and cn are large classes,
@@ -32,269 +32,269 @@ static const struct property_spec line_break_property[] = {
*/
{
.enumname = "TMP_CN",
- .file = FILE_LINE,
- .ucdname = "Cn",
+ .file = FILE_LINE,
+ .ucdname = "Cn",
},
{
.enumname = "TMP_EXTENDED_PICTOGRAPHIC",
- .file = FILE_EMOJI,
- .ucdname = "Extended_Pictographic",
+ .file = FILE_EMOJI,
+ .ucdname = "Extended_Pictographic",
},
/* end of special block */
{
.enumname = "B2",
- .file = FILE_LINE,
- .ucdname = "B2",
+ .file = FILE_LINE,
+ .ucdname = "B2",
},
{
.enumname = "BA",
- .file = FILE_LINE,
- .ucdname = "BA",
+ .file = FILE_LINE,
+ .ucdname = "BA",
},
{
.enumname = "BB",
- .file = FILE_LINE,
- .ucdname = "BB",
+ .file = FILE_LINE,
+ .ucdname = "BB",
},
{
.enumname = "BK",
- .file = FILE_LINE,
- .ucdname = "BK",
+ .file = FILE_LINE,
+ .ucdname = "BK",
},
{
.enumname = "BOTH_CN_EXTPICT",
- .file = NULL,
- .ucdname = NULL,
+ .file = NULL,
+ .ucdname = NULL,
},
{
.enumname = "CB",
- .file = FILE_LINE,
- .ucdname = "CB",
+ .file = FILE_LINE,
+ .ucdname = "CB",
},
{
.enumname = "CL",
- .file = FILE_LINE,
- .ucdname = "CL",
+ .file = FILE_LINE,
+ .ucdname = "CL",
},
{
.enumname = "CM",
- .file = FILE_LINE,
- .ucdname = "CM",
+ .file = FILE_LINE,
+ .ucdname = "CM",
},
{
.enumname = "CP_WITHOUT_EAW_HWF",
- .file = FILE_LINE,
- .ucdname = "CP",
+ .file = FILE_LINE,
+ .ucdname = "CP",
},
{
.enumname = "CP_WITH_EAW_HWF",
- .file = NULL,
- .ucdname = NULL,
+ .file = NULL,
+ .ucdname = NULL,
},
{
.enumname = "CR",
- .file = FILE_LINE,
- .ucdname = "CR",
+ .file = FILE_LINE,
+ .ucdname = "CR",
},
{
.enumname = "EB",
- .file = FILE_LINE,
- .ucdname = "EB",
+ .file = FILE_LINE,
+ .ucdname = "EB",
},
{
.enumname = "EM",
- .file = FILE_LINE,
- .ucdname = "EM",
+ .file = FILE_LINE,
+ .ucdname = "EM",
},
{
.enumname = "EX",
- .file = FILE_LINE,
- .ucdname = "EX",
+ .file = FILE_LINE,
+ .ucdname = "EX",
},
{
.enumname = "GL",
- .file = FILE_LINE,
- .ucdname = "GL",
+ .file = FILE_LINE,
+ .ucdname = "GL",
},
{
.enumname = "H2",
- .file = FILE_LINE,
- .ucdname = "H2",
+ .file = FILE_LINE,
+ .ucdname = "H2",
},
{
.enumname = "H3",
- .file = FILE_LINE,
- .ucdname = "H3",
+ .file = FILE_LINE,
+ .ucdname = "H3",
},
{
.enumname = "HL",
- .file = FILE_LINE,
- .ucdname = "HL",
+ .file = FILE_LINE,
+ .ucdname = "HL",
},
{
.enumname = "HY",
- .file = FILE_LINE,
- .ucdname = "HY",
+ .file = FILE_LINE,
+ .ucdname = "HY",
},
{
.enumname = "ID",
- .file = FILE_LINE,
- .ucdname = "ID",
+ .file = FILE_LINE,
+ .ucdname = "ID",
},
{
.enumname = "IN",
- .file = FILE_LINE,
- .ucdname = "IN",
+ .file = FILE_LINE,
+ .ucdname = "IN",
},
{
.enumname = "IS",
- .file = FILE_LINE,
- .ucdname = "IS",
+ .file = FILE_LINE,
+ .ucdname = "IS",
},
{
.enumname = "JL",
- .file = FILE_LINE,
- .ucdname = "JL",
+ .file = FILE_LINE,
+ .ucdname = "JL",
},
{
.enumname = "JT",
- .file = FILE_LINE,
- .ucdname = "JT",
+ .file = FILE_LINE,
+ .ucdname = "JT",
},
{
.enumname = "JV",
- .file = FILE_LINE,
- .ucdname = "JV",
+ .file = FILE_LINE,
+ .ucdname = "JV",
},
{
.enumname = "LF",
- .file = FILE_LINE,
- .ucdname = "LF",
+ .file = FILE_LINE,
+ .ucdname = "LF",
},
{
.enumname = "NL",
- .file = FILE_LINE,
- .ucdname = "NL",
+ .file = FILE_LINE,
+ .ucdname = "NL",
},
{
.enumname = "NS",
- .file = FILE_LINE,
- .ucdname = "NS",
+ .file = FILE_LINE,
+ .ucdname = "NS",
},
{
.enumname = "NU",
- .file = FILE_LINE,
- .ucdname = "NU",
+ .file = FILE_LINE,
+ .ucdname = "NU",
},
{
.enumname = "OP_WITHOUT_EAW_HWF",
- .file = FILE_LINE,
- .ucdname = "OP",
+ .file = FILE_LINE,
+ .ucdname = "OP",
},
{
.enumname = "OP_WITH_EAW_HWF",
- .file = NULL,
- .ucdname = NULL,
+ .file = NULL,
+ .ucdname = NULL,
},
{
.enumname = "PO",
- .file = FILE_LINE,
- .ucdname = "PO",
+ .file = FILE_LINE,
+ .ucdname = "PO",
},
{
.enumname = "PR",
- .file = FILE_LINE,
- .ucdname = "PR",
+ .file = FILE_LINE,
+ .ucdname = "PR",
},
{
.enumname = "QU",
- .file = FILE_LINE,
- .ucdname = "QU",
+ .file = FILE_LINE,
+ .ucdname = "QU",
},
{
.enumname = "RI",
- .file = FILE_LINE,
- .ucdname = "RI",
+ .file = FILE_LINE,
+ .ucdname = "RI",
},
{
.enumname = "SP",
- .file = FILE_LINE,
- .ucdname = "SP",
+ .file = FILE_LINE,
+ .ucdname = "SP",
},
{
.enumname = "SY",
- .file = FILE_LINE,
- .ucdname = "SY",
+ .file = FILE_LINE,
+ .ucdname = "SY",
},
{
.enumname = "WJ",
- .file = FILE_LINE,
- .ucdname = "WJ",
+ .file = FILE_LINE,
+ .ucdname = "WJ",
},
{
.enumname = "ZW",
- .file = FILE_LINE,
- .ucdname = "ZW",
+ .file = FILE_LINE,
+ .ucdname = "ZW",
},
{
.enumname = "ZWJ",
- .file = FILE_LINE,
- .ucdname = "ZWJ",
+ .file = FILE_LINE,
+ .ucdname = "ZWJ",
},
{
.enumname = "TMP_AI",
- .file = FILE_LINE,
- .ucdname = "AI",
+ .file = FILE_LINE,
+ .ucdname = "AI",
},
{
.enumname = "TMP_CJ",
- .file = FILE_LINE,
- .ucdname = "CJ",
+ .file = FILE_LINE,
+ .ucdname = "CJ",
},
{
.enumname = "TMP_XX",
- .file = NULL,
- .ucdname = NULL,
+ .file = NULL,
+ .ucdname = NULL,
},
{
.enumname = "TMP_MN",
- .file = FILE_LINE,
- .ucdname = "Mn",
+ .file = FILE_LINE,
+ .ucdname = "Mn",
},
{
.enumname = "TMP_MC",
- .file = FILE_LINE,
- .ucdname = "Mc",
+ .file = FILE_LINE,
+ .ucdname = "Mc",
},
{
.enumname = "TMP_SA_WITHOUT_MN_OR_MC",
- .file = FILE_LINE,
- .ucdname = "SA",
+ .file = FILE_LINE,
+ .ucdname = "SA",
},
{
.enumname = "TMP_SA_WITH_MN_OR_MC",
- .file = FILE_LINE,
- .ucdname = "SA",
+ .file = FILE_LINE,
+ .ucdname = "SA",
},
{
.enumname = "TMP_SG",
- .file = FILE_LINE,
- .ucdname = "SG",
+ .file = FILE_LINE,
+ .ucdname = "SG",
},
{
.enumname = "TMP_EAW_H",
- .file = FILE_EAW,
- .ucdname = "H",
+ .file = FILE_EAW,
+ .ucdname = "H",
},
{
.enumname = "TMP_EAW_W",
- .file = FILE_EAW,
- .ucdname = "W",
+ .file = FILE_EAW,
+ .ucdname = "W",
},
{
.enumname = "TMP_EAW_F",
- .file = FILE_EAW,
- .ucdname = "F",
+ .file = FILE_EAW,
+ .ucdname = "F",
},
};
@@ -306,23 +306,30 @@ handle_conflict(uint_least32_t cp, uint_least8_t prop1, uint_least8_t prop2)
(void)cp;
- if ((!strcmp(line_break_property[prop1].enumname, "TMP_EAW_H") ||
- !strcmp(line_break_property[prop1].enumname, "TMP_EAW_W") ||
+ if ((!strcmp(line_break_property[prop1].enumname, "TMP_EAW_H") ||
+ !strcmp(line_break_property[prop1].enumname, "TMP_EAW_W") ||
!strcmp(line_break_property[prop1].enumname, "TMP_EAW_F")) ||
(!strcmp(line_break_property[prop2].enumname, "TMP_EAW_H") ||
!strcmp(line_break_property[prop2].enumname, "TMP_EAW_W") ||
!strcmp(line_break_property[prop2].enumname, "TMP_EAW_F"))) {
- if (!strcmp(line_break_property[prop1].enumname, "CP_WITHOUT_EAW_HWF") ||
- !strcmp(line_break_property[prop2].enumname, "CP_WITHOUT_EAW_HWF")) {
+ if (!strcmp(line_break_property[prop1].enumname,
+ "CP_WITHOUT_EAW_HWF") ||
+ !strcmp(line_break_property[prop2].enumname,
+ "CP_WITHOUT_EAW_HWF")) {
target = "CP_WITH_EAW_HWF";
- } else if (!strcmp(line_break_property[prop1].enumname, "OP_WITHOUT_EAW_HWF") ||
- !strcmp(line_break_property[prop2].enumname, "OP_WITHOUT_EAW_HWF")) {
+ } else if (!strcmp(line_break_property[prop1].enumname,
+ "OP_WITHOUT_EAW_HWF") ||
+ !strcmp(line_break_property[prop2].enumname,
+ "OP_WITHOUT_EAW_HWF")) {
target = "OP_WITH_EAW_HWF";
} else {
/* ignore EAW for the rest */
- if ((!strcmp(line_break_property[prop1].enumname, "TMP_EAW_H") ||
- !strcmp(line_break_property[prop1].enumname, "TMP_EAW_W") ||
- !strcmp(line_break_property[prop1].enumname, "TMP_EAW_F"))) {
+ if ((!strcmp(line_break_property[prop1].enumname,
+ "TMP_EAW_H") ||
+ !strcmp(line_break_property[prop1].enumname,
+ "TMP_EAW_W") ||
+ !strcmp(line_break_property[prop1].enumname,
+ "TMP_EAW_F"))) {
result = prop2;
} else {
result = prop1;
@@ -330,15 +337,19 @@ handle_conflict(uint_least32_t cp, uint_least8_t prop1, uint_least8_t prop2)
}
} else if ((!strcmp(line_break_property[prop1].enumname, "TMP_MN") ||
!strcmp(line_break_property[prop1].enumname, "TMP_MC")) ||
- (!strcmp(line_break_property[prop2].enumname, "TMP_MN") ||
- !strcmp(line_break_property[prop2].enumname, "TMP_MC"))) {
- if (!strcmp(line_break_property[prop1].enumname, "SA_WITHOUT_MN_OR_MC") ||
- !strcmp(line_break_property[prop2].enumname, "SA_WITHOUT_MN_OR_MC")) {
+ (!strcmp(line_break_property[prop2].enumname, "TMP_MN") ||
+ !strcmp(line_break_property[prop2].enumname, "TMP_MC"))) {
+ if (!strcmp(line_break_property[prop1].enumname,
+ "SA_WITHOUT_MN_OR_MC") ||
+ !strcmp(line_break_property[prop2].enumname,
+ "SA_WITHOUT_MN_OR_MC")) {
target = "SA_WITH_MN_OR_MC";
} else {
/* ignore Mn and Mc for the rest */
- if ((!strcmp(line_break_property[prop1].enumname, "TMP_MN") ||
- !strcmp(line_break_property[prop1].enumname, "TMP_MC"))) {
+ if ((!strcmp(line_break_property[prop1].enumname,
+ "TMP_MN") ||
+ !strcmp(line_break_property[prop1].enumname,
+ "TMP_MC"))) {
result = prop2;
} else {
result = prop1;
@@ -346,33 +357,42 @@ handle_conflict(uint_least32_t cp, uint_least8_t prop1, uint_least8_t prop2)
}
} else if (!strcmp(line_break_property[prop1].enumname, "TMP_CN") ||
!strcmp(line_break_property[prop2].enumname, "TMP_CN")) {
- if (!strcmp(line_break_property[prop1].enumname, "TMP_EXTENDED_PICTOGRAPHIC") ||
- !strcmp(line_break_property[prop2].enumname, "TMP_EXTENDED_PICTOGRAPHIC")) {
+ if (!strcmp(line_break_property[prop1].enumname,
+ "TMP_EXTENDED_PICTOGRAPHIC") ||
+ !strcmp(line_break_property[prop2].enumname,
+ "TMP_EXTENDED_PICTOGRAPHIC")) {
target = "BOTH_CN_EXTPICT";
} else {
/* ignore Cn for all the other properties */
- if (!strcmp(line_break_property[prop1].enumname, "TMP_CN")) {
+ if (!strcmp(line_break_property[prop1].enumname,
+ "TMP_CN")) {
result = prop2;
} else {
result = prop1;
}
}
- } else if (!strcmp(line_break_property[prop1].enumname, "TMP_EXTENDED_PICTOGRAPHIC") ||
- !strcmp(line_break_property[prop2].enumname, "TMP_EXTENDED_PICTOGRAPHIC")) {
+ } else if (!strcmp(line_break_property[prop1].enumname,
+ "TMP_EXTENDED_PICTOGRAPHIC") ||
+ !strcmp(line_break_property[prop2].enumname,
+ "TMP_EXTENDED_PICTOGRAPHIC")) {
if (!strcmp(line_break_property[prop1].enumname, "TMP_CN") ||
!strcmp(line_break_property[prop2].enumname, "TMP_CN")) {
target = "BOTH_CN_EXTPICT";
} else {
- /* ignore Extended_Pictographic for all the other properties */
- if (!strcmp(line_break_property[prop1].enumname, "TMP_EXTENDED_PICTOGRAPHIC")) {
+ /* ignore Extended_Pictographic for all the other
+ * properties */
+ if (!strcmp(line_break_property[prop1].enumname,
+ "TMP_EXTENDED_PICTOGRAPHIC")) {
result = prop2;
} else {
result = prop1;
}
}
} else {
- fprintf(stderr, "handle_conflict: Cannot handle conflict %s <- %s.\n",
- line_break_property[prop1].enumname, line_break_property[prop2].enumname);
+ fprintf(stderr,
+ "handle_conflict: Cannot handle conflict %s <- %s.\n",
+ line_break_property[prop1].enumname,
+ line_break_property[prop2].enumname);
exit(1);
}
@@ -402,27 +422,44 @@ post_process(struct properties *prop)
/* post-mapping according to the line breaking algorithm */
for (i = 0; i < UINT32_C(0x110000); i++) {
/* LB1 */
- if (!strcmp(line_break_property[prop[i].property].enumname, "TMP_AI") ||
- !strcmp(line_break_property[prop[i].property].enumname, "TMP_SG") ||
- !strcmp(line_break_property[prop[i].property].enumname, "TMP_XX")) {
+ if (!strcmp(line_break_property[prop[i].property].enumname,
+ "TMP_AI") ||
+ !strcmp(line_break_property[prop[i].property].enumname,
+ "TMP_SG") ||
+ !strcmp(line_break_property[prop[i].property].enumname,
+ "TMP_XX")) {
/* map AI, SG and XX to AL */
target = "AL";
- } else if (!strcmp(line_break_property[prop[i].property].enumname, "TMP_SA_WITH_MN_OR_MC")) {
+ } else if (!strcmp(line_break_property[prop[i].property]
+ .enumname,
+ "TMP_SA_WITH_MN_OR_MC")) {
/* map SA (with General_Category Mn or Mc) to CM */
target = "CM";
- } else if (!strcmp(line_break_property[prop[i].property].enumname, "TMP_SA_WITHOUT_MN_OR_MC")) {
+ } else if (!strcmp(line_break_property[prop[i].property]
+ .enumname,
+ "TMP_SA_WITHOUT_MN_OR_MC")) {
/* map SA (without General_Category Mn or Mc) to AL */
target = "AL";
- } else if (!strcmp(line_break_property[prop[i].property].enumname, "TMP_CJ")) {
+ } else if (!strcmp(line_break_property[prop[i].property]
+ .enumname,
+ "TMP_CJ")) {
/* map CJ to NS */
target = "NS";
- } else if (!strcmp(line_break_property[prop[i].property].enumname, "TMP_CN") ||
- !strcmp(line_break_property[prop[i].property].enumname, "TMP_EXTENDED_PICTOGRAPHIC") ||
- !strcmp(line_break_property[prop[i].property].enumname, "TMP_MN") ||
- !strcmp(line_break_property[prop[i].property].enumname, "TMP_MC") ||
- !strcmp(line_break_property[prop[i].property].enumname, "TMP_EAW_H") ||
- !strcmp(line_break_property[prop[i].property].enumname, "TMP_EAW_W") ||
- !strcmp(line_break_property[prop[i].property].enumname, "TMP_EAW_F")) {
+ } else if (
+ !strcmp(line_break_property[prop[i].property].enumname,
+ "TMP_CN") ||
+ !strcmp(line_break_property[prop[i].property].enumname,
+ "TMP_EXTENDED_PICTOGRAPHIC") ||
+ !strcmp(line_break_property[prop[i].property].enumname,
+ "TMP_MN") ||
+ !strcmp(line_break_property[prop[i].property].enumname,
+ "TMP_MC") ||
+ !strcmp(line_break_property[prop[i].property].enumname,
+ "TMP_EAW_H") ||
+ !strcmp(line_break_property[prop[i].property].enumname,
+ "TMP_EAW_W") ||
+ !strcmp(line_break_property[prop[i].property].enumname,
+ "TMP_EAW_F")) {
/* map all the temporary classes "residue" to AL */
target = "AL";
} else {
@@ -430,14 +467,17 @@ post_process(struct properties *prop)
}
if (target) {
- for (result = 0; result < LEN(line_break_property); result++) {
- if (!strcmp(line_break_property[result].enumname,
+ for (result = 0; result < LEN(line_break_property);
+ result++) {
+ if (!strcmp(line_break_property[result]
+ .enumname,
target)) {
break;
}
}
if (result == LEN(line_break_property)) {
- fprintf(stderr, "handle_conflict: Internal error.\n");
+ fprintf(stderr,
+ "handle_conflict: Internal error.\n");
exit(1);
}
@@ -451,10 +491,9 @@ main(int argc, char *argv[])
{
(void)argc;
- properties_generate_break_property(line_break_property,
- LEN(line_break_property), NULL,
- handle_conflict, post_process,
- "line_break", argv[0]);
+ properties_generate_break_property(
+ line_break_property, LEN(line_break_property), NULL,
+ handle_conflict, post_process, "line_break", argv[0]);
return 0;
}
diff --git a/gen/sentence.c b/gen/sentence.c
@@ -6,78 +6,78 @@
static const struct property_spec sentence_break_property[] = {
{
.enumname = "OTHER",
- .file = NULL,
- .ucdname = NULL,
+ .file = NULL,
+ .ucdname = NULL,
},
{
.enumname = "CR",
- .file = FILE_SENTENCE,
- .ucdname = "CR",
+ .file = FILE_SENTENCE,
+ .ucdname = "CR",
},
{
.enumname = "LF",
- .file = FILE_SENTENCE,
- .ucdname = "LF",
+ .file = FILE_SENTENCE,
+ .ucdname = "LF",
},
{
.enumname = "EXTEND",
- .file = FILE_SENTENCE,
- .ucdname = "Extend",
+ .file = FILE_SENTENCE,
+ .ucdname = "Extend",
},
{
.enumname = "SEP",
- .file = FILE_SENTENCE,
- .ucdname = "Sep",
+ .file = FILE_SENTENCE,
+ .ucdname = "Sep",
},
{
.enumname = "FORMAT",
- .file = FILE_SENTENCE,
- .ucdname = "Format",
+ .file = FILE_SENTENCE,
+ .ucdname = "Format",
},
{
.enumname = "SP",
- .file = FILE_SENTENCE,
- .ucdname = "Sp",
+ .file = FILE_SENTENCE,
+ .ucdname = "Sp",
},
{
.enumname = "LOWER",
- .file = FILE_SENTENCE,
- .ucdname = "Lower",
+ .file = FILE_SENTENCE,
+ .ucdname = "Lower",
},
{
.enumname = "UPPER",
- .file = FILE_SENTENCE,
- .ucdname = "Upper",
+ .file = FILE_SENTENCE,
+ .ucdname = "Upper",
},
{
.enumname = "OLETTER",
- .file = FILE_SENTENCE,
- .ucdname = "OLetter",
+ .file = FILE_SENTENCE,
+ .ucdname = "OLetter",
},
{
.enumname = "NUMERIC",
- .file = FILE_SENTENCE,
- .ucdname = "Numeric",
+ .file = FILE_SENTENCE,
+ .ucdname = "Numeric",
},
{
.enumname = "ATERM",
- .file = FILE_SENTENCE,
- .ucdname = "ATerm",
+ .file = FILE_SENTENCE,
+ .ucdname = "ATerm",
},
{
.enumname = "SCONTINUE",
- .file = FILE_SENTENCE,
- .ucdname = "SContinue",
+ .file = FILE_SENTENCE,
+ .ucdname = "SContinue",
},
{
.enumname = "STERM",
- .file = FILE_SENTENCE,
- .ucdname = "STerm",
+ .file = FILE_SENTENCE,
+ .ucdname = "STerm",
},
{
.enumname = "CLOSE",
- .file = FILE_SENTENCE,
- .ucdname = "Close",
+ .file = FILE_SENTENCE,
+ .ucdname = "Close",
},
};
@@ -86,9 +86,9 @@ main(int argc, char *argv[])
{
(void)argc;
- properties_generate_break_property(sentence_break_property,
- LEN(sentence_break_property), NULL,
- NULL, NULL, "sentence_break", argv[0]);
+ properties_generate_break_property(
+ sentence_break_property, LEN(sentence_break_property), NULL,
+ NULL, NULL, "sentence_break", argv[0]);
return 0;
}
diff --git a/gen/util.c b/gen/util.c
@@ -1,13 +1,12 @@
/* See LICENSE file for copyright and license details. */
-#include <stdbool.h>
#include <ctype.h>
#include <errno.h>
#include <inttypes.h>
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
-#include <stdlib.h>
#include <stdio.h>
+#include <stdlib.h>
#include <string.h>
#include "util.h"
@@ -21,12 +20,13 @@ struct properties_payload {
struct properties *prop;
const struct property_spec *spec;
uint_least8_t speclen;
- int (*set_value)(struct properties_payload *, uint_least32_t, int_least64_t);
- uint_least8_t (*handle_conflict)(uint_least32_t, uint_least8_t, uint_least8_t);
+ int (*set_value)(struct properties_payload *, uint_least32_t,
+ int_least64_t);
+ uint_least8_t (*handle_conflict)(uint_least32_t, uint_least8_t,
+ uint_least8_t);
};
-struct break_test_payload
-{
+struct break_test_payload {
struct break_test **test;
size_t *testlen;
};
@@ -51,8 +51,8 @@ hextocp(const char *str, size_t len, uint_least32_t *cp)
/* the maximum valid codepoint is 0x10FFFF */
if (len > 6) {
- fprintf(stderr, "hextocp: '%.*s' is too long.\n",
- (int)len, str);
+ fprintf(stderr, "hextocp: '%.*s' is too long.\n", (int)len,
+ str);
return 1;
}
@@ -77,8 +77,8 @@ hextocp(const char *str, size_t len, uint_least32_t *cp)
}
if (*cp > UINT32_C(0x10FFFF)) {
- fprintf(stderr, "hextocp: '%.*s' is too large.\n",
- (int)len, str);
+ fprintf(stderr, "hextocp: '%.*s' is too large.\n", (int)len,
+ str);
return 1;
}
@@ -98,8 +98,10 @@ parse_cp_list(const char *str, uint_least32_t **cp, size_t *cplen)
}
/* count the number of spaces in the string and infer list length */
- for (count = 1, tmp1 = str; (tmp2 = strchr(tmp1, ' ')) != NULL; count++, tmp1 = tmp2 + 1)
+ for (count = 1, tmp1 = str; (tmp2 = strchr(tmp1, ' ')) != NULL;
+ count++, tmp1 = tmp2 + 1) {
;
+ }
/* allocate resources */
if (!(*cp = calloc((*cplen = count), sizeof(**cp)))) {
@@ -110,7 +112,8 @@ parse_cp_list(const char *str, uint_least32_t **cp, size_t *cplen)
/* go through the string again, parsing the numbers */
for (i = 0, tmp1 = tmp2 = str; tmp2 != NULL; i++) {
tmp2 = strchr(tmp1, ' ');
- if (hextocp(tmp1, tmp2 ? (size_t)(tmp2 - tmp1) : strlen(tmp1), &((*cp)[i]))) {
+ if (hextocp(tmp1, tmp2 ? (size_t)(tmp2 - tmp1) : strlen(tmp1),
+ &((*cp)[i]))) {
return 1;
}
if (tmp2 != NULL) {
@@ -144,8 +147,10 @@ range_parse(const char *str, struct range *range)
}
void
-parse_file_with_callback(const char *fname, int (*callback)(const char *,
- char **, size_t, char *, void *), void *payload)
+parse_file_with_callback(const char *fname,
+ int (*callback)(const char *, char **, size_t, char *,
+ void *),
+ void *payload)
{
FILE *fp;
char *line = NULL, **field = NULL, *comment;
@@ -182,10 +187,15 @@ parse_file_with_callback(const char *fname, int (*callback)(const char *,
if (line[i] != '#') {
/* extend field buffer, if necessary */
if (++nfields > fieldbufsize) {
- if ((field = realloc(field, nfields *
- sizeof(*field))) == NULL) {
- fprintf(stderr, "parse_file_with_"
- "callback: realloc: %s.\n",
+ if ((field = realloc(
+ field,
+ nfields *
+ sizeof(*field))) ==
+ NULL) {
+ fprintf(stderr,
+ "parse_file_with_"
+ "callback: realloc: "
+ "%s.\n",
strerror(errno));
exit(1);
}
@@ -209,8 +219,9 @@ parse_file_with_callback(const char *fname, int (*callback)(const char *,
/* go back whitespace and terminate field there */
if (i > 0) {
- for (j = i - 1; line[j] == ' '; j--)
+ for (j = i - 1; line[j] == ' '; j--) {
;
+ }
line[j + 1] = '\0';
} else {
line[i] = '\0';
@@ -230,7 +241,7 @@ parse_file_with_callback(const char *fname, int (*callback)(const char *,
/* call callback function */
if (callback(fname, field, nfields, comment, payload)) {
fprintf(stderr, "parse_file_with_callback: "
- "Malformed input.\n");
+ "Malformed input.\n");
exit(1);
}
}
@@ -257,10 +268,11 @@ properties_callback(const char *file, char **field, size_t nfields,
for (i = 0; i < p->speclen; i++) {
/* identify fitting file and identifier */
- if (p->spec[i].file &&
- !strcmp(p->spec[i].file, file) &&
+ if (p->spec[i].file && !strcmp(p->spec[i].file, file) &&
(!strcmp(p->spec[i].ucdname, field[1]) ||
- (comment != NULL && !strncmp(p->spec[i].ucdname, comment, strlen(p->spec[i].ucdname)) &&
+ (comment != NULL &&
+ !strncmp(p->spec[i].ucdname, comment,
+ strlen(p->spec[i].ucdname)) &&
comment[strlen(p->spec[i].ucdname)] == ' '))) {
/* parse range in first field */
if (range_parse(field[0], &r)) {
@@ -287,7 +299,8 @@ properties_compress(const struct properties *prop,
uint_least32_t cp, i;
/* initialization */
- if (!(comp->offset = malloc((size_t)UINT32_C(0x110000) * sizeof(*(comp->offset))))) {
+ if (!(comp->offset = malloc((size_t)UINT32_C(0x110000) *
+ sizeof(*(comp->offset))))) {
fprintf(stderr, "malloc: %s\n", strerror(errno));
exit(1);
}
@@ -296,7 +309,8 @@ properties_compress(const struct properties *prop,
for (cp = 0; cp < UINT32_C(0x110000); cp++) {
for (i = 0; i < comp->datalen; i++) {
- if (!memcmp(&(prop[cp]), &(comp->data[i]), sizeof(*prop))) {
+ if (!memcmp(&(prop[cp]), &(comp->data[i]),
+ sizeof(*prop))) {
/* found a match! */
comp->offset[cp] = i;
break;
@@ -308,9 +322,9 @@ properties_compress(const struct properties *prop,
* add current properties to data and add the
* offset in the offset-table
*/
- if (!(comp->data = reallocate_array(comp->data,
- ++(comp->datalen),
- sizeof(*(comp->data))))) {
+ if (!(comp->data = reallocate_array(
+ comp->data, ++(comp->datalen),
+ sizeof(*(comp->data))))) {
fprintf(stderr, "reallocate_array: %s\n",
strerror(errno));
exit(1);
@@ -357,8 +371,7 @@ properties_get_major_minor(const struct properties_compressed *comp,
* and need less storage)
*/
for (j = 0; j + 0xFF < mm->minorlen; j++) {
- if (!memcmp(&(comp->offset[i << 8]),
- &(mm->minor[j]),
+ if (!memcmp(&(comp->offset[i << 8]), &(mm->minor[j]),
sizeof(*(comp->offset)) * 0x100)) {
break;
}
@@ -373,9 +386,9 @@ properties_get_major_minor(const struct properties_compressed *comp,
* in major
*/
mm->minorlen += 0x100;
- if (!(mm->minor = reallocate_array(mm->minor,
- mm->minorlen,
- sizeof(*(mm->minor))))) {
+ if (!(mm->minor =
+ reallocate_array(mm->minor, mm->minorlen,
+ sizeof(*(mm->minor))))) {
fprintf(stderr, "reallocate_array: %s\n",
strerror(errno));
exit(1);
@@ -403,7 +416,7 @@ properties_print_lookup_table(char *name, size_t *data, size_t datalen)
}
}
- type = (maxval <= UINT_LEAST8_MAX) ? "uint_least8_t" :
+ type = (maxval <= UINT_LEAST8_MAX) ? "uint_least8_t" :
(maxval <= UINT_LEAST16_MAX) ? "uint_least16_t" :
(maxval <= UINT_LEAST32_MAX) ? "uint_least32_t" :
"uint_least64_t";
@@ -418,21 +431,21 @@ properties_print_lookup_table(char *name, size_t *data, size_t datalen)
} else {
printf(",\n\t");
}
-
}
printf("};\n");
}
void
-properties_print_derived_lookup_table(char *name, char *type, size_t *offset, size_t offsetlen,
- int_least64_t (*get_value)(const struct properties *,
- size_t), const void *payload)
+properties_print_derived_lookup_table(
+ char *name, char *type, size_t *offset, size_t offsetlen,
+ int_least64_t (*get_value)(const struct properties *, size_t),
+ const void *payload)
{
size_t i;
printf("static const %s %s[] = {\n\t", type, name);
for (i = 0; i < offsetlen; i++) {
- printf("%"PRIiLEAST64, get_value(payload, offset[i]));
+ printf("%" PRIiLEAST64, get_value(payload, offset[i]));
if (i + 1 == offsetlen) {
printf("\n");
} else if ((i + 1) % 8 != 0) {
@@ -440,7 +453,6 @@ properties_print_derived_lookup_table(char *name, char *type, size_t *offset, si
} else {
printf(",\n\t");
}
-
}
printf("};\n");
}
@@ -464,17 +476,19 @@ set_value_bp(struct properties_payload *payload, uint_least32_t cp,
{
if (payload->prop[cp].property != payload->speclen) {
if (payload->handle_conflict == NULL) {
- fprintf(stderr, "set_value_bp: "
- "Unhandled character break property "
+ fprintf(stderr,
+ "set_value_bp: "
+ "Unhandled character break property "
"overwrite for 0x%06X (%s <- %s).\n",
- cp, payload->spec[payload->prop[cp].
- property].enumname,
+ cp,
+ payload->spec[payload->prop[cp].property]
+ .enumname,
payload->spec[value].enumname);
return 1;
} else {
- value = payload->handle_conflict(cp,
- (uint_least8_t)payload->prop[cp].property,
- (uint_least8_t)value);
+ value = payload->handle_conflict(
+ cp, (uint_least8_t)payload->prop[cp].property,
+ (uint_least8_t)value);
}
}
payload->prop[cp].property = value;
@@ -489,15 +503,13 @@ get_value_bp(const struct properties *prop, size_t offset)
}
void
-properties_generate_break_property(const struct property_spec *spec,
- uint_least8_t speclen,
- uint_least8_t (*fill_missing)(
- uint_least32_t),
- uint_least8_t (*handle_conflict)(
- uint_least32_t, uint_least8_t,
- uint_least8_t), void
- (*post_process)(struct properties *),
- const char *prefix, const char *argv0)
+properties_generate_break_property(
+ const struct property_spec *spec, uint_least8_t speclen,
+ uint_least8_t (*fill_missing)(uint_least32_t),
+ uint_least8_t (*handle_conflict)(uint_least32_t, uint_least8_t,
+ uint_least8_t),
+ void (*post_process)(struct properties *), const char *prefix,
+ const char *argv0)
{
struct properties_compressed comp;
struct properties_major_minor mm;
@@ -537,8 +549,7 @@ properties_generate_break_property(const struct property_spec *spec,
if (i == j && spec[i].file) {
/* file has not been processed yet */
parse_file_with_callback(spec[i].file,
- properties_callback,
- &payload);
+ properties_callback, &payload);
}
}
@@ -546,7 +557,8 @@ properties_generate_break_property(const struct property_spec *spec,
for (i = 0; i < UINT32_C(0x110000); i++) {
if (payload.prop[i].property == speclen) {
if (fill_missing != NULL) {
- payload.prop[i].property = fill_missing((uint_least32_t)i);
+ payload.prop[i].property =
+ fill_missing((uint_least32_t)i);
} else {
payload.prop[i].property = 0;
}
@@ -559,14 +571,16 @@ properties_generate_break_property(const struct property_spec *spec,
}
/* compress data */
- printf("/* Automatically generated by %s */\n#include <stdint.h>\n\n", argv0);
+ printf("/* Automatically generated by %s */\n#include <stdint.h>\n\n",
+ argv0);
properties_compress(prop, &comp);
- fprintf(stderr, "%s: %s-LUT compression-ratio: %.2f%%\n", argv0,
- prefix, properties_get_major_minor(&comp, &mm));
+ fprintf(stderr, "%s: %s-LUT compression-ratio: %.2f%%\n", argv0, prefix,
+ properties_get_major_minor(&comp, &mm));
/* prepare names */
- if ((size_t)snprintf(buf1, LEN(buf1), "%s_property", prefix) >= LEN(buf1)) {
+ if ((size_t)snprintf(buf1, LEN(buf1), "%s_property", prefix) >=
+ LEN(buf1)) {
fprintf(stderr, "snprintf: String truncated.\n");
exit(1);
}
@@ -578,9 +592,12 @@ properties_generate_break_property(const struct property_spec *spec,
prefix_uc[i] = (char)toupper(prefix[i]);
}
prefix_uc[prefixlen] = '\0';
- if ((size_t)snprintf(buf2, LEN(buf2), "%s_PROP", prefix_uc) >= LEN(buf2) ||
- (size_t)snprintf(buf3, LEN(buf3), "%s_major", prefix) >= LEN(buf3) ||
- (size_t)snprintf(buf4, LEN(buf4), "%s_minor", prefix) >= LEN(buf4)) {
+ if ((size_t)snprintf(buf2, LEN(buf2), "%s_PROP", prefix_uc) >=
+ LEN(buf2) ||
+ (size_t)snprintf(buf3, LEN(buf3), "%s_major", prefix) >=
+ LEN(buf3) ||
+ (size_t)snprintf(buf4, LEN(buf4), "%s_minor", prefix) >=
+ LEN(buf4)) {
fprintf(stderr, "snprintf: String truncated.\n");
exit(1);
}
@@ -589,8 +606,9 @@ properties_generate_break_property(const struct property_spec *spec,
properties_print_enum(spec, speclen, buf1, buf2);
properties_print_lookup_table(buf3, mm.major, 0x1100);
printf("\n");
- properties_print_derived_lookup_table(buf4, "uint_least8_t", mm.minor, mm.minorlen,
- get_value_bp, comp.data);
+ properties_print_derived_lookup_table(buf4, "uint_least8_t", mm.minor,
+ mm.minorlen, get_value_bp,
+ comp.data);
/* free data */
free(prop);
@@ -625,42 +643,50 @@ break_test_callback(const char *fname, char **field, size_t nfields,
memset(t, 0, sizeof(*t));
/* parse testcase "<÷|×> <cp> <÷|×> ... <cp> <÷|×>" */
- for (token = strtok(field[0], " "), i = 0; token != NULL; i++,
- token = strtok(NULL, " ")) {
+ for (token = strtok(field[0], " "), i = 0; token != NULL;
+ i++, token = strtok(NULL, " ")) {
if (i % 2 == 0) {
/* delimiter or start of sequence */
- if (i == 0 || !strncmp(token, "\xC3\xB7", 2)) { /* UTF-8 */
+ if (i == 0 ||
+ !strncmp(token, "\xC3\xB7", 2)) { /* UTF-8 */
/*
* '÷' indicates a breakpoint,
* the current length is done; allocate
* a new length field and set it to 0
*/
- if ((t->len = realloc(t->len,
- ++t->lenlen * sizeof(*t->len))) == NULL) {
- fprintf(stderr, "break_test_"
+ if ((t->len = realloc(
+ t->len,
+ ++t->lenlen * sizeof(*t->len))) ==
+ NULL) {
+ fprintf(stderr,
+ "break_test_"
"callback: realloc: %s.\n",
strerror(errno));
return 1;
}
t->len[t->lenlen - 1] = 0;
} else if (!strncmp(token, "\xC3\x97", 2)) { /* UTF-8 */
- /*
- * '×' indicates a non-breakpoint, do nothing
- */
+ /* '×' indicates a non-breakpoint, do nothing */
} else {
- fprintf(stderr, "break_test_callback: "
- "Malformed delimiter '%s'.\n", token);
+ fprintf(stderr,
+ "break_test_callback: "
+ "Malformed delimiter '%s'.\n",
+ token);
return 1;
}
} else {
/* add codepoint to cp-array */
- if ((t->cp = realloc(t->cp, ++t->cplen *
- sizeof(*t->cp))) == NULL) {
- fprintf(stderr, "break_test_callback: "
- "realloc: %s.\n", strerror(errno));
+ if ((t->cp = realloc(t->cp,
+ ++t->cplen * sizeof(*t->cp))) ==
+ NULL) {
+ fprintf(stderr,
+ "break_test_callback: "
+ "realloc: %s.\n",
+ strerror(errno));
return 1;
}
- if (hextocp(token, strlen(token), &t->cp[t->cplen - 1])) {
+ if (hextocp(token, strlen(token),
+ &t->cp[t->cplen - 1])) {
return 1;
}
if (t->lenlen > 0) {
@@ -688,8 +714,7 @@ break_test_callback(const char *fname, char **field, size_t nfields,
}
void
-break_test_list_parse(char *fname, struct break_test **test,
- size_t *testlen)
+break_test_list_parse(char *fname, struct break_test **test, size_t *testlen)
{
struct break_test_payload pl = {
.test = test,
@@ -703,13 +728,14 @@ break_test_list_parse(char *fname, struct break_test **test,
void
break_test_list_print(const struct break_test *test, size_t testlen,
- const char *identifier, const char *progname)
+ const char *identifier, const char *progname)
{
size_t i, j;
printf("/* Automatically generated by %s */\n"
"#include <stdint.h>\n#include <stddef.h>\n\n"
- "#include \"../gen/types.h\"\n\n", progname);
+ "#include \"../gen/types.h\"\n\n",
+ progname);
printf("static const struct break_test %s[] = {\n", identifier);
for (i = 0; i < testlen; i++) {
diff --git a/gen/util.h b/gen/util.h
@@ -7,7 +7,7 @@
#include "types.h"
-#define LEN(x) (sizeof (x) / sizeof *(x))
+#define LEN(x) (sizeof(x) / sizeof *(x))
struct property_spec {
const char *enumname;
@@ -34,30 +34,31 @@ struct properties_major_minor {
int hextocp(const char *, size_t, uint_least32_t *cp);
int parse_cp_list(const char *, uint_least32_t **, size_t *);
-void parse_file_with_callback(const char *, int (*callback)(const char *,
- char **, size_t, char *, void *), void *payload);
+void parse_file_with_callback(const char *,
+ int (*callback)(const char *, char **, size_t,
+ char *, void *),
+ void *payload);
-void properties_compress(const struct properties *, struct properties_compressed *comp);
+void properties_compress(const struct properties *,
+ struct properties_compressed *comp);
double properties_get_major_minor(const struct properties_compressed *,
struct properties_major_minor *);
void properties_print_lookup_table(char *, size_t *, size_t);
-void properties_print_derived_lookup_table(char *, char *, size_t *, size_t,
- int_least64_t (*get_value)(const struct properties *,
- size_t), const void *);
-
-void properties_generate_break_property(const struct property_spec *,
- uint_least8_t, uint_least8_t
- (*fill_missing)(uint_least32_t),
- uint_least8_t
- (*handle_conflict)(uint_least32_t,
- uint_least8_t, uint_least8_t),
- void (*post_process)
- (struct properties *),
- const char *, const char *);
+void properties_print_derived_lookup_table(
+ char *, char *, size_t *, size_t,
+ int_least64_t (*get_value)(const struct properties *, size_t),
+ const void *);
+
+void properties_generate_break_property(
+ const struct property_spec *, uint_least8_t,
+ uint_least8_t (*fill_missing)(uint_least32_t),
+ uint_least8_t (*handle_conflict)(uint_least32_t, uint_least8_t,
+ uint_least8_t),
+ void (*post_process)(struct properties *), const char *, const char *);
void break_test_list_parse(char *, struct break_test **, size_t *);
-void break_test_list_print(const struct break_test *, size_t,
- const char *, const char *);
+void break_test_list_print(const struct break_test *, size_t, const char *,
+ const char *);
void break_test_list_free(struct break_test *, size_t);
#endif /* UTIL_H */
diff --git a/gen/word.c b/gen/word.c
@@ -11,108 +11,108 @@
static const struct property_spec word_break_property[] = {
{
.enumname = "OTHER",
- .file = NULL,
- .ucdname = NULL,
+ .file = NULL,
+ .ucdname = NULL,
},
{
.enumname = "ALETTER",
- .file = FILE_WORD,
- .ucdname = "ALetter",
+ .file = FILE_WORD,
+ .ucdname = "ALetter",
},
{
.enumname = "BOTH_ALETTER_EXTPICT",
- .file = NULL,
- .ucdname = NULL,
+ .file = NULL,
+ .ucdname = NULL,
},
{
.enumname = "CR",
- .file = FILE_WORD,
- .ucdname = "CR",
+ .file = FILE_WORD,
+ .ucdname = "CR",
},
{
.enumname = "DOUBLE_QUOTE",
- .file = FILE_WORD,
- .ucdname = "Double_Quote",
+ .file = FILE_WORD,
+ .ucdname = "Double_Quote",
},
{
.enumname = "EXTEND",
- .file = FILE_WORD,
- .ucdname = "Extend",
+ .file = FILE_WORD,
+ .ucdname = "Extend",
},
{
.enumname = "EXTENDED_PICTOGRAPHIC",
- .file = FILE_EMOJI,
- .ucdname = "Extended_Pictographic",
+ .file = FILE_EMOJI,
+ .ucdname = "Extended_Pictographic",
},
{
.enumname = "EXTENDNUMLET",
- .file = FILE_WORD,
- .ucdname = "ExtendNumLet",
+ .file = FILE_WORD,
+ .ucdname = "ExtendNumLet",
},
{
.enumname = "FORMAT",
- .file = FILE_WORD,
- .ucdname = "Format",
+ .file = FILE_WORD,
+ .ucdname = "Format",
},
{
.enumname = "HEBREW_LETTER",
- .file = FILE_WORD,
- .ucdname = "Hebrew_Letter",
+ .file = FILE_WORD,
+ .ucdname = "Hebrew_Letter",
},
{
.enumname = "KATAKANA",
- .file = FILE_WORD,
- .ucdname = "Katakana",
+ .file = FILE_WORD,
+ .ucdname = "Katakana",
},
{
.enumname = "LF",
- .file = FILE_WORD,
- .ucdname = "LF",
+ .file = FILE_WORD,
+ .ucdname = "LF",
},
{
.enumname = "MIDLETTER",
- .file = FILE_WORD,
- .ucdname = "MidLetter",
+ .file = FILE_WORD,
+ .ucdname = "MidLetter",
},
{
.enumname = "MIDNUM",
- .file = FILE_WORD,
- .ucdname = "MidNum",
+ .file = FILE_WORD,
+ .ucdname = "MidNum",
},
{
.enumname = "MIDNUMLET",
- .file = FILE_WORD,
- .ucdname = "MidNumLet",
+ .file = FILE_WORD,
+ .ucdname = "MidNumLet",
},
{
.enumname = "NEWLINE",
- .file = FILE_WORD,
- .ucdname = "Newline",
+ .file = FILE_WORD,
+ .ucdname = "Newline",
},
{
.enumname = "NUMERIC",
- .file = FILE_WORD,
- .ucdname = "Numeric",
+ .file = FILE_WORD,
+ .ucdname = "Numeric",
},
{
.enumname = "REGIONAL_INDICATOR",
- .file = FILE_WORD,
- .ucdname = "Regional_Indicator",
+ .file = FILE_WORD,
+ .ucdname = "Regional_Indicator",
},
{
.enumname = "SINGLE_QUOTE",
- .file = FILE_WORD,
- .ucdname = "Single_Quote",
+ .file = FILE_WORD,
+ .ucdname = "Single_Quote",
},
{
.enumname = "WSEGSPACE",
- .file = FILE_WORD,
- .ucdname = "WSegSpace",
+ .file = FILE_WORD,
+ .ucdname = "WSegSpace",
},
{
.enumname = "ZWJ",
- .file = FILE_WORD,
- .ucdname = "ZWJ",
+ .file = FILE_WORD,
+ .ucdname = "ZWJ",
},
};
@@ -124,8 +124,10 @@ handle_conflict(uint_least32_t cp, uint_least8_t prop1, uint_least8_t prop2)
(void)cp;
if ((!strcmp(word_break_property[prop1].enumname, "ALETTER") &&
- !strcmp(word_break_property[prop2].enumname, "EXTENDED_PICTOGRAPHIC")) ||
- (!strcmp(word_break_property[prop1].enumname, "EXTENDED_PICTOGRAPHIC") &&
+ !strcmp(word_break_property[prop2].enumname,
+ "EXTENDED_PICTOGRAPHIC")) ||
+ (!strcmp(word_break_property[prop1].enumname,
+ "EXTENDED_PICTOGRAPHIC") &&
!strcmp(word_break_property[prop2].enumname, "ALETTER"))) {
for (result = 0; result < LEN(word_break_property); result++) {
if (!strcmp(word_break_property[result].enumname,
@@ -150,10 +152,9 @@ main(int argc, char *argv[])
{
(void)argc;
- properties_generate_break_property(word_break_property,
- LEN(word_break_property), NULL,
- handle_conflict, NULL, "word_break",
- argv[0]);
+ properties_generate_break_property(
+ word_break_property, LEN(word_break_property), NULL,
+ handle_conflict, NULL, "word_break", argv[0]);
return 0;
}
diff --git a/grapheme.h b/grapheme.h
@@ -18,14 +18,15 @@ enum grapheme_bidirectional_override {
size_t grapheme_decode_utf8(const char *, size_t, uint_least32_t *);
size_t grapheme_encode_utf8(uint_least32_t, char *, size_t);
-size_t grapheme_get_bidirectional_embedding_levels(const uint_least32_t *, size_t,
- enum grapheme_bidirectional_override,
- int_least32_t *, size_t);
-size_t grapheme_get_bidirectional_embedding_levels_utf8(const char *, size_t,
- enum grapheme_bidirectional_override,
- int_least32_t *, size_t);
+size_t grapheme_get_bidirectional_embedding_levels(
+ const uint_least32_t *, size_t, enum grapheme_bidirectional_override,
+ int_least32_t *, size_t);
+size_t grapheme_get_bidirectional_embedding_levels_utf8(
+ const char *, size_t, enum grapheme_bidirectional_override,
+ int_least32_t *, size_t);
-bool grapheme_is_character_break(uint_least32_t, uint_least32_t, uint_least16_t *);
+bool grapheme_is_character_break(uint_least32_t, uint_least32_t,
+ uint_least16_t *);
bool grapheme_is_lowercase(const uint_least32_t *, size_t, size_t *);
bool grapheme_is_titlecase(const uint_least32_t *, size_t, size_t *);
@@ -45,9 +46,12 @@ size_t grapheme_next_line_break_utf8(const char *, size_t);
size_t grapheme_next_sentence_break_utf8(const char *, size_t);
size_t grapheme_next_word_break_utf8(const char *, size_t);
-size_t grapheme_to_lowercase(const uint_least32_t *, size_t, uint_least32_t *, size_t);
-size_t grapheme_to_titlecase(const uint_least32_t *, size_t, uint_least32_t *, size_t);
-size_t grapheme_to_uppercase(const uint_least32_t *, size_t, uint_least32_t *, size_t);
+size_t grapheme_to_lowercase(const uint_least32_t *, size_t, uint_least32_t *,
+ size_t);
+size_t grapheme_to_titlecase(const uint_least32_t *, size_t, uint_least32_t *,
+ size_t);
+size_t grapheme_to_uppercase(const uint_least32_t *, size_t, uint_least32_t *,
+ size_t);
size_t grapheme_to_lowercase_utf8(const char *, size_t, char *, size_t);
size_t grapheme_to_titlecase_utf8(const char *, size_t, char *, size_t);
diff --git a/src/bidirectional.c b/src/bidirectional.c
@@ -12,15 +12,18 @@ struct isolate_runner {
int_least32_t *buf;
size_t buflen;
enum bidi_property prev_prop;
+
struct {
size_t off;
enum bidi_property prop;
int_least8_t level;
} cur;
+
struct {
size_t off;
enum bidi_property prop;
} next;
+
uint_least8_t paragraph_level;
int_least8_t isolating_run_level;
enum bidi_property last_strong_type;
@@ -57,24 +60,42 @@ struct state {
static inline void
state_serialize(const struct state *s, int_least32_t *out)
{
- *out = (int_least32_t)(
- ((((uint_least32_t)(s->paragraph_level)) & 0x01 /* 00000001 */) << 0) |
- ((((uint_least32_t)(s->level + 1)) & 0x7F /* 01111111 */) << 1) |
- ((((uint_least32_t)(s->prop)) & 0x1F /* 00011111 */) << 8) |
- ((((uint_least32_t)(s->bracket - bidi_bracket)) & 0xFF /* 11111111 */) << 13) |
- ((((uint_least32_t)(s->visited)) & 0x01 /* 00000001 */) << 21) |
- ((((uint_least32_t)(s->rawprop)) & 0x1F /* 00011111 */) << 22));
+ *out = (int_least32_t)(((((uint_least32_t)(s->paragraph_level)) &
+ 0x01 /* 00000001 */)
+ << 0) |
+ ((((uint_least32_t)(s->level + 1)) &
+ 0x7F /* 01111111 */)
+ << 1) |
+ ((((uint_least32_t)(s->prop)) &
+ 0x1F /* 00011111 */)
+ << 8) |
+ ((((uint_least32_t)(s->bracket - bidi_bracket)) &
+ 0xFF /* 11111111 */)
+ << 13) |
+ ((((uint_least32_t)(s->visited)) &
+ 0x01 /* 00000001 */)
+ << 21) |
+ ((((uint_least32_t)(s->rawprop)) &
+ 0x1F /* 00011111 */)
+ << 22));
}
static inline void
state_deserialize(int_least32_t in, struct state *s)
{
- s->paragraph_level = (uint_least8_t)((((uint_least32_t)in) >> 0) & 0x01 /* 00000001 */);
- s->level = (int_least8_t)((((uint_least32_t)in) >> 1) & 0x7F /* 01111111 */) - 1;
- s->prop = (enum bidi_property)((((uint_least32_t)in) >> 8) & 0x1F /* 00011111 */);
- s->bracket = bidi_bracket + (uint_least8_t)((((uint_least32_t)in) >> 13) & 0xFF /* 11111111 */);
- s->visited = (bool)((((uint_least32_t)in) >> 21) & 0x01 /* 00000001 */);
- s->rawprop = (enum bidi_property)((((uint_least32_t)in) >> 22) & 0x1F /* 00011111 */);
+ s->paragraph_level = (uint_least8_t)((((uint_least32_t)in) >> 0) &
+ 0x01 /* 00000001 */);
+ s->level = (int_least8_t)((((uint_least32_t)in) >> 1) &
+ 0x7F /* 01111111 */) -
+ 1;
+ s->prop = (enum bidi_property)((((uint_least32_t)in) >> 8) &
+ 0x1F /* 00011111 */);
+ s->bracket =
+ bidi_bracket + (uint_least8_t)((((uint_least32_t)in) >> 13) &
+ 0xFF /* 11111111 */);
+ s->visited = (bool)((((uint_least32_t)in) >> 21) & 0x01 /* 00000001 */);
+ s->rawprop = (enum bidi_property)((((uint_least32_t)in) >> 22) &
+ 0x1F /* 00011111 */);
}
static void
@@ -171,7 +192,6 @@ isolate_runner_advance(struct isolate_runner *ir)
return 1;
}
-
/* shift in */
ir->prev_prop = ir->cur.prop;
ir->cur.off = ir->next.off;
@@ -188,13 +208,13 @@ isolate_runner_advance(struct isolate_runner *ir)
* on the first advancement as the prev_prop holds the sos type,
* which can only be either R or L, which are both strong types
*/
- if (ir->prev_prop == BIDI_PROP_R ||
- ir->prev_prop == BIDI_PROP_L ||
+ if (ir->prev_prop == BIDI_PROP_R || ir->prev_prop == BIDI_PROP_L ||
ir->prev_prop == BIDI_PROP_AL) {
ir->last_strong_type = ir->prev_prop;
}
- /* initialize next state by going to the next character in the sequence */
+ /* initialize next state by going to the next character in the sequence
+ */
ir->next.off = SIZE_MAX;
ir->next.prop = NUM_BIDI_PROPS;
@@ -210,8 +230,7 @@ isolate_runner_advance(struct isolate_runner *ir)
}
/* follow BD8/BD9 and P2 to traverse the current sequence */
- if (s.prop == BIDI_PROP_LRI ||
- s.prop == BIDI_PROP_RLI ||
+ if (s.prop == BIDI_PROP_LRI || s.prop == BIDI_PROP_RLI ||
s.prop == BIDI_PROP_FSI) {
/*
* we encountered an isolate initiator, increment
@@ -224,8 +243,7 @@ isolate_runner_advance(struct isolate_runner *ir)
if (isolate_level != 1) {
continue;
}
- } else if (s.prop == BIDI_PROP_PDI &&
- isolate_level > 0) {
+ } else if (s.prop == BIDI_PROP_PDI && isolate_level > 0) {
isolate_level--;
/*
@@ -250,12 +268,14 @@ isolate_runner_advance(struct isolate_runner *ir)
/* we were in the first initializing round */
continue;
} else if (s.level == ir->isolating_run_level) {
- /* isolate_level-skips have been handled before, we're good */
+ /* isolate_level-skips have been handled before, we're
+ * good */
/* still in the sequence */
ir->next.off = (size_t)i;
ir->next.prop = s.prop;
} else {
- /* out of sequence or isolated, compare levels via eos */
+ /* out of sequence or isolated, compare levels via eos
+ */
if (MAX(last_isolate_level, s.level) % 2 == 0) {
ir->next.prop = BIDI_PROP_L;
} else {
@@ -286,7 +306,8 @@ isolate_runner_advance(struct isolate_runner *ir)
}
static void
-isolate_runner_set_current_prop(struct isolate_runner *ir, enum bidi_property prop)
+isolate_runner_set_current_prop(struct isolate_runner *ir,
+ enum bidi_property prop)
{
struct state s;
@@ -301,9 +322,9 @@ static inline enum bidi_property
get_bidi_property(uint_least32_t cp)
{
if (likely(cp <= 0x10FFFF)) {
- return (enum bidi_property)
- ((bidi_minor[bidi_major[cp >> 8] + (cp & 0xff)]) &
- 0x1F /* 00011111 */);
+ return (enum bidi_property)(
+ (bidi_minor[bidi_major[cp >> 8] + (cp & 0xff)]) &
+ 0x1F /* 00011111 */);
} else {
return BIDI_PROP_L;
}
@@ -320,8 +341,8 @@ get_bidi_bracket_off(uint_least32_t cp)
}
static size_t
-process_isolating_run_sequence(int_least32_t *buf, size_t buflen,
- size_t off, uint_least8_t paragraph_level)
+process_isolating_run_sequence(int_least32_t *buf, size_t buflen, size_t off,
+ uint_least8_t paragraph_level)
{
enum bidi_property sequence_prop;
struct isolate_runner ir, tmp;
@@ -335,7 +356,8 @@ process_isolating_run_sequence(int_least32_t *buf, size_t buflen,
ir.prev_prop == BIDI_PROP_RLI ||
ir.prev_prop == BIDI_PROP_FSI ||
ir.prev_prop == BIDI_PROP_PDI) {
- isolate_runner_set_current_prop(&ir, BIDI_PROP_ON);
+ isolate_runner_set_current_prop(&ir,
+ BIDI_PROP_ON);
} else {
isolate_runner_set_current_prop(&ir,
ir.prev_prop);
@@ -371,7 +393,7 @@ process_isolating_run_sequence(int_least32_t *buf, size_t buflen,
}
if (ir.prev_prop == BIDI_PROP_AN &&
- ir.cur.prop == BIDI_PROP_CS &&
+ ir.cur.prop == BIDI_PROP_CS &&
ir.next.prop == BIDI_PROP_AN) {
isolate_runner_set_current_prop(&ir, BIDI_PROP_AN);
}
@@ -389,14 +411,19 @@ process_isolating_run_sequence(int_least32_t *buf, size_t buflen,
} else if (ir.cur.prop == BIDI_PROP_EN) {
/* set the preceding sequence */
if (runsince != SIZE_MAX) {
- isolate_runner_init(buf, buflen, runsince, paragraph_level, (runsince > off), &tmp);
+ isolate_runner_init(buf, buflen, runsince,
+ paragraph_level,
+ (runsince > off), &tmp);
while (!isolate_runner_advance(&tmp) &&
tmp.cur.off < ir.cur.off) {
- isolate_runner_set_current_prop(&tmp, BIDI_PROP_EN);
+ isolate_runner_set_current_prop(
+ &tmp, BIDI_PROP_EN);
}
runsince = SIZE_MAX;
} else {
- isolate_runner_init(buf, buflen, ir.cur.off, paragraph_level, (ir.cur.off > off), &tmp);
+ isolate_runner_init(buf, buflen, ir.cur.off,
+ paragraph_level,
+ (ir.cur.off > off), &tmp);
isolate_runner_advance(&tmp);
}
/* follow the succeeding sequence */
@@ -404,7 +431,8 @@ process_isolating_run_sequence(int_least32_t *buf, size_t buflen,
if (tmp.cur.prop != BIDI_PROP_ET) {
break;
}
- isolate_runner_set_current_prop(&tmp, BIDI_PROP_EN);
+ isolate_runner_set_current_prop(&tmp,
+ BIDI_PROP_EN);
}
} else {
/* sequence ended */
@@ -439,23 +467,26 @@ process_isolating_run_sequence(int_least32_t *buf, size_t buflen,
isolate_runner_init(buf, buflen, off, paragraph_level, false, &ir);
while (!isolate_runner_advance(&ir)) {
if (sequence_end == SIZE_MAX) {
- if (ir.cur.prop == BIDI_PROP_B ||
- ir.cur.prop == BIDI_PROP_S ||
- ir.cur.prop == BIDI_PROP_WS ||
- ir.cur.prop == BIDI_PROP_ON ||
+ if (ir.cur.prop == BIDI_PROP_B ||
+ ir.cur.prop == BIDI_PROP_S ||
+ ir.cur.prop == BIDI_PROP_WS ||
+ ir.cur.prop == BIDI_PROP_ON ||
ir.cur.prop == BIDI_PROP_FSI ||
ir.cur.prop == BIDI_PROP_LRI ||
ir.cur.prop == BIDI_PROP_RLI ||
ir.cur.prop == BIDI_PROP_PDI) {
- /* the current character is an NI (neutral or isolate) */
+ /* the current character is an NI (neutral or
+ * isolate) */
/* scan ahead to the end of the NI-sequence */
- isolate_runner_init(buf, buflen, ir.cur.off, paragraph_level, (ir.cur.off > off), &tmp);
+ isolate_runner_init(buf, buflen, ir.cur.off,
+ paragraph_level,
+ (ir.cur.off > off), &tmp);
while (!isolate_runner_advance(&tmp)) {
- if (tmp.next.prop != BIDI_PROP_B &&
- tmp.next.prop != BIDI_PROP_S &&
- tmp.next.prop != BIDI_PROP_WS &&
- tmp.next.prop != BIDI_PROP_ON &&
+ if (tmp.next.prop != BIDI_PROP_B &&
+ tmp.next.prop != BIDI_PROP_S &&
+ tmp.next.prop != BIDI_PROP_WS &&
+ tmp.next.prop != BIDI_PROP_ON &&
tmp.next.prop != BIDI_PROP_FSI &&
tmp.next.prop != BIDI_PROP_LRI &&
tmp.next.prop != BIDI_PROP_RLI &&
@@ -465,17 +496,17 @@ process_isolating_run_sequence(int_least32_t *buf, size_t buflen,
}
/*
- * check what follows and see if the text has the
- * same direction on both sides
+ * check what follows and see if the text has
+ * the same direction on both sides
*/
if (ir.prev_prop == BIDI_PROP_L &&
tmp.next.prop == BIDI_PROP_L) {
sequence_end = tmp.cur.off;
sequence_prop = BIDI_PROP_L;
- } else if ((ir.prev_prop == BIDI_PROP_R ||
+ } else if ((ir.prev_prop == BIDI_PROP_R ||
ir.prev_prop == BIDI_PROP_EN ||
ir.prev_prop == BIDI_PROP_AN) &&
- (tmp.next.prop == BIDI_PROP_R ||
+ (tmp.next.prop == BIDI_PROP_R ||
tmp.next.prop == BIDI_PROP_EN ||
tmp.next.prop == BIDI_PROP_AN)) {
sequence_end = tmp.cur.off;
@@ -486,7 +517,8 @@ process_isolating_run_sequence(int_least32_t *buf, size_t buflen,
if (sequence_end != SIZE_MAX) {
if (ir.cur.off <= sequence_end) {
- isolate_runner_set_current_prop(&ir, sequence_prop);
+ isolate_runner_set_current_prop(&ir,
+ sequence_prop);
} else {
/* end of sequence, reset */
sequence_end = SIZE_MAX;
@@ -498,10 +530,9 @@ process_isolating_run_sequence(int_least32_t *buf, size_t buflen,
/* N2 */
isolate_runner_init(buf, buflen, off, paragraph_level, false, &ir);
while (!isolate_runner_advance(&ir)) {
- if (ir.cur.prop == BIDI_PROP_B ||
- ir.cur.prop == BIDI_PROP_S ||
- ir.cur.prop == BIDI_PROP_WS ||
- ir.cur.prop == BIDI_PROP_ON ||
+ if (ir.cur.prop == BIDI_PROP_B || ir.cur.prop == BIDI_PROP_S ||
+ ir.cur.prop == BIDI_PROP_WS ||
+ ir.cur.prop == BIDI_PROP_ON ||
ir.cur.prop == BIDI_PROP_FSI ||
ir.cur.prop == BIDI_PROP_LRI ||
ir.cur.prop == BIDI_PROP_RLI ||
@@ -509,10 +540,12 @@ process_isolating_run_sequence(int_least32_t *buf, size_t buflen,
/* N2 */
if (ir.cur.level % 2 == 0) {
/* even embedding level */
- isolate_runner_set_current_prop(&ir, BIDI_PROP_L);
+ isolate_runner_set_current_prop(&ir,
+ BIDI_PROP_L);
} else {
/* odd embedding level */
- isolate_runner_set_current_prop(&ir, BIDI_PROP_R);
+ isolate_runner_set_current_prop(&ir,
+ BIDI_PROP_R);
}
}
}
@@ -522,8 +555,8 @@ process_isolating_run_sequence(int_least32_t *buf, size_t buflen,
static uint_least8_t
get_paragraph_level(enum grapheme_bidirectional_override override,
- bool terminate_on_pdi,
- const int_least32_t *buf, size_t buflen)
+ bool terminate_on_pdi, const int_least32_t *buf,
+ size_t buflen)
{
struct state s;
int_least8_t isolate_level;
@@ -541,8 +574,7 @@ get_paragraph_level(enum grapheme_bidirectional_override override,
for (bufoff = 0, isolate_level = 0; bufoff < buflen; bufoff++) {
state_deserialize(buf[bufoff], &s);
- if (s.prop == BIDI_PROP_PDI &&
- isolate_level == 0 &&
+ if (s.prop == BIDI_PROP_PDI && isolate_level == 0 &&
terminate_on_pdi) {
/*
* we are in a FSI-subsection of a paragraph and
@@ -552,8 +584,7 @@ get_paragraph_level(enum grapheme_bidirectional_override override,
}
/* BD8/BD9 */
- if ((s.prop == BIDI_PROP_LRI ||
- s.prop == BIDI_PROP_RLI ||
+ if ((s.prop == BIDI_PROP_LRI || s.prop == BIDI_PROP_RLI ||
s.prop == BIDI_PROP_FSI) &&
isolate_level < MAX_DEPTH) {
/* we hit an isolate initiator, increment counter */
@@ -570,8 +601,7 @@ get_paragraph_level(enum grapheme_bidirectional_override override,
/* P3 */
if (s.prop == BIDI_PROP_L) {
return 0;
- } else if (s.prop == BIDI_PROP_AL ||
- s.prop == BIDI_PROP_R) {
+ } else if (s.prop == BIDI_PROP_AL || s.prop == BIDI_PROP_R) {
return 1;
}
}
@@ -585,13 +615,15 @@ get_paragraph_embedding_levels(enum grapheme_bidirectional_override override,
{
enum bidi_property tmp_prop;
struct state s, t;
+
struct {
int_least8_t level;
enum grapheme_bidirectional_override override;
bool directional_isolate;
} directional_status[MAX_DEPTH + 2], *dirstat = directional_status;
+
size_t overflow_isolate_count, overflow_embedding_count,
- valid_isolate_count, bufoff, i, runsince;
+ valid_isolate_count, bufoff, i, runsince;
uint_least8_t paragraph_level;
paragraph_level = get_paragraph_level(override, false, buf, buflen);
@@ -600,7 +632,8 @@ get_paragraph_embedding_levels(enum grapheme_bidirectional_override override,
dirstat->level = (int_least8_t)paragraph_level;
dirstat->override = GRAPHEME_BIDIRECTIONAL_OVERRIDE_NEUTRAL;
dirstat->directional_isolate = false;
- overflow_isolate_count = overflow_embedding_count = valid_isolate_count = 0;
+ overflow_isolate_count = overflow_embedding_count =
+ valid_isolate_count = 0;
for (bufoff = 0; bufoff < buflen; bufoff++) {
state_deserialize(buf[bufoff], &s);
@@ -608,79 +641,105 @@ get_paragraph_embedding_levels(enum grapheme_bidirectional_override override,
again:
if (tmp_prop == BIDI_PROP_RLE) {
/* X2 */
- if (dirstat->level + (dirstat->level % 2 != 0) + 1 <= MAX_DEPTH &&
+ if (dirstat->level + (dirstat->level % 2 != 0) + 1 <=
+ MAX_DEPTH &&
overflow_isolate_count == 0 &&
overflow_embedding_count == 0) {
/* valid RLE */
dirstat++;
- dirstat->level = (dirstat - 1)->level + ((dirstat - 1)->level % 2 != 0) + 1;
- dirstat->override = GRAPHEME_BIDIRECTIONAL_OVERRIDE_NEUTRAL;
+ dirstat->level =
+ (dirstat - 1)->level +
+ ((dirstat - 1)->level % 2 != 0) + 1;
+ dirstat->override =
+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_NEUTRAL;
dirstat->directional_isolate = false;
} else {
/* overflow RLE */
- overflow_embedding_count += (overflow_isolate_count == 0);
+ overflow_embedding_count +=
+ (overflow_isolate_count == 0);
}
} else if (tmp_prop == BIDI_PROP_LRE) {
/* X3 */
- if (dirstat->level + (dirstat->level % 2 == 0) + 1 <= MAX_DEPTH &&
+ if (dirstat->level + (dirstat->level % 2 == 0) + 1 <=
+ MAX_DEPTH &&
overflow_isolate_count == 0 &&
overflow_embedding_count == 0) {
/* valid LRE */
dirstat++;
- dirstat->level = (dirstat - 1)->level + ((dirstat - 1)->level % 2 == 0) + 1;
- dirstat->override = GRAPHEME_BIDIRECTIONAL_OVERRIDE_NEUTRAL;
+ dirstat->level =
+ (dirstat - 1)->level +
+ ((dirstat - 1)->level % 2 == 0) + 1;
+ dirstat->override =
+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_NEUTRAL;
dirstat->directional_isolate = false;
} else {
/* overflow LRE */
- overflow_embedding_count += (overflow_isolate_count == 0);
+ overflow_embedding_count +=
+ (overflow_isolate_count == 0);
}
} else if (tmp_prop == BIDI_PROP_RLO) {
/* X4 */
- if (dirstat->level + (dirstat->level % 2 != 0) + 1 <= MAX_DEPTH &&
+ if (dirstat->level + (dirstat->level % 2 != 0) + 1 <=
+ MAX_DEPTH &&
overflow_isolate_count == 0 &&
overflow_embedding_count == 0) {
/* valid RLO */
dirstat++;
- dirstat->level = (dirstat - 1)->level + ((dirstat - 1)->level % 2 != 0) + 1;
- dirstat->override = GRAPHEME_BIDIRECTIONAL_OVERRIDE_RTL;
+ dirstat->level =
+ (dirstat - 1)->level +
+ ((dirstat - 1)->level % 2 != 0) + 1;
+ dirstat->override =
+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_RTL;
dirstat->directional_isolate = false;
} else {
/* overflow RLO */
- overflow_embedding_count += (overflow_isolate_count == 0);
+ overflow_embedding_count +=
+ (overflow_isolate_count == 0);
}
} else if (tmp_prop == BIDI_PROP_LRO) {
/* X5 */
- if (dirstat->level + (dirstat->level % 2 == 0) + 1 <= MAX_DEPTH &&
+ if (dirstat->level + (dirstat->level % 2 == 0) + 1 <=
+ MAX_DEPTH &&
overflow_isolate_count == 0 &&
overflow_embedding_count == 0) {
/* valid LRE */
dirstat++;
- dirstat->level = (dirstat - 1)->level + ((dirstat - 1)->level % 2 == 0) + 1;
- dirstat->override = GRAPHEME_BIDIRECTIONAL_OVERRIDE_LTR;
+ dirstat->level =
+ (dirstat - 1)->level +
+ ((dirstat - 1)->level % 2 == 0) + 1;
+ dirstat->override =
+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_LTR;
dirstat->directional_isolate = false;
} else {
/* overflow LRO */
- overflow_embedding_count += (overflow_isolate_count == 0);
+ overflow_embedding_count +=
+ (overflow_isolate_count == 0);
}
} else if (tmp_prop == BIDI_PROP_RLI) {
/* X5a */
s.level = dirstat->level;
- if (dirstat->override == GRAPHEME_BIDIRECTIONAL_OVERRIDE_LTR) {
+ if (dirstat->override ==
+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_LTR) {
s.prop = BIDI_PROP_L;
- } else if (dirstat->override == GRAPHEME_BIDIRECTIONAL_OVERRIDE_RTL) {
+ } else if (dirstat->override ==
+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_RTL) {
s.prop = BIDI_PROP_R;
}
state_serialize(&s, &(buf[bufoff]));
- if (dirstat->level + (dirstat->level % 2 != 0) + 1 <= MAX_DEPTH &&
+ if (dirstat->level + (dirstat->level % 2 != 0) + 1 <=
+ MAX_DEPTH &&
overflow_isolate_count == 0 &&
overflow_embedding_count == 0) {
/* valid RLI */
valid_isolate_count++;
dirstat++;
- dirstat->level = (dirstat - 1)->level + ((dirstat - 1)->level % 2 != 0) + 1;
- dirstat->override = GRAPHEME_BIDIRECTIONAL_OVERRIDE_NEUTRAL;
+ dirstat->level =
+ (dirstat - 1)->level +
+ ((dirstat - 1)->level % 2 != 0) + 1;
+ dirstat->override =
+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_NEUTRAL;
dirstat->directional_isolate = true;
} else {
/* overflow RLI */
@@ -689,22 +748,28 @@ again:
} else if (tmp_prop == BIDI_PROP_LRI) {
/* X5b */
s.level = dirstat->level;
- if (dirstat->override == GRAPHEME_BIDIRECTIONAL_OVERRIDE_LTR) {
+ if (dirstat->override ==
+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_LTR) {
s.prop = BIDI_PROP_L;
- } else if (dirstat->override == GRAPHEME_BIDIRECTIONAL_OVERRIDE_RTL) {
+ } else if (dirstat->override ==
+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_RTL) {
s.prop = BIDI_PROP_R;
}
state_serialize(&s, &(buf[bufoff]));
- if (dirstat->level + (dirstat->level % 2 == 0) + 1 <= MAX_DEPTH &&
+ if (dirstat->level + (dirstat->level % 2 == 0) + 1 <=
+ MAX_DEPTH &&
overflow_isolate_count == 0 &&
overflow_embedding_count == 0) {
/* valid LRI */
valid_isolate_count++;
dirstat++;
- dirstat->level = (dirstat - 1)->level + ((dirstat - 1)->level % 2 == 0) + 1;
- dirstat->override = GRAPHEME_BIDIRECTIONAL_OVERRIDE_NEUTRAL;
+ dirstat->level =
+ (dirstat - 1)->level +
+ ((dirstat - 1)->level % 2 == 0) + 1;
+ dirstat->override =
+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_NEUTRAL;
dirstat->directional_isolate = true;
} else {
/* overflow LRI */
@@ -712,23 +777,27 @@ again:
}
} else if (tmp_prop == BIDI_PROP_FSI) {
/* X5c */
- if (get_paragraph_level(GRAPHEME_BIDIRECTIONAL_OVERRIDE_NEUTRAL, true,
- buf + (bufoff + 1), buflen - (bufoff + 1)) == 1) {
+ if (get_paragraph_level(
+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_NEUTRAL,
+ true, buf + (bufoff + 1),
+ buflen - (bufoff + 1)) == 1) {
tmp_prop = BIDI_PROP_RLI;
goto again;
} else { /* ... == 0 */
tmp_prop = BIDI_PROP_LRI;
goto again;
}
- } else if (tmp_prop != BIDI_PROP_B &&
- tmp_prop != BIDI_PROP_BN &&
+ } else if (tmp_prop != BIDI_PROP_B &&
+ tmp_prop != BIDI_PROP_BN &&
tmp_prop != BIDI_PROP_PDF &&
tmp_prop != BIDI_PROP_PDI) {
/* X6 */
s.level = dirstat->level;
- if (dirstat->override == GRAPHEME_BIDIRECTIONAL_OVERRIDE_LTR) {
+ if (dirstat->override ==
+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_LTR) {
s.prop = BIDI_PROP_L;
- } else if (dirstat->override == GRAPHEME_BIDIRECTIONAL_OVERRIDE_RTL) {
+ } else if (dirstat->override ==
+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_RTL) {
s.prop = BIDI_PROP_R;
}
state_serialize(&s, &(buf[bufoff]));
@@ -773,9 +842,11 @@ again:
}
s.level = dirstat->level;
- if (dirstat->override == GRAPHEME_BIDIRECTIONAL_OVERRIDE_LTR) {
+ if (dirstat->override ==
+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_LTR) {
s.prop = BIDI_PROP_L;
- } else if (dirstat->override == GRAPHEME_BIDIRECTIONAL_OVERRIDE_RTL) {
+ } else if (dirstat->override ==
+ GRAPHEME_BIDIRECTIONAL_OVERRIDE_RTL) {
s.prop = BIDI_PROP_R;
}
state_serialize(&s, &(buf[bufoff]));
@@ -796,12 +867,9 @@ again:
}
/* X9 */
- if (tmp_prop == BIDI_PROP_RLE ||
- tmp_prop == BIDI_PROP_LRE ||
- tmp_prop == BIDI_PROP_RLO ||
- tmp_prop == BIDI_PROP_LRO ||
- tmp_prop == BIDI_PROP_PDF ||
- tmp_prop == BIDI_PROP_BN) {
+ if (tmp_prop == BIDI_PROP_RLE || tmp_prop == BIDI_PROP_LRE ||
+ tmp_prop == BIDI_PROP_RLO || tmp_prop == BIDI_PROP_LRO ||
+ tmp_prop == BIDI_PROP_PDF || tmp_prop == BIDI_PROP_BN) {
s.level = -1;
state_serialize(&s, &(buf[bufoff]));
}
@@ -811,8 +879,8 @@ again:
for (bufoff = 0; bufoff < buflen; bufoff++) {
state_deserialize(buf[bufoff], &s);
if (!s.visited && s.level != -1) {
- bufoff += process_isolating_run_sequence(buf, buflen, bufoff,
- paragraph_level);
+ bufoff += process_isolating_run_sequence(
+ buf, buflen, bufoff, paragraph_level);
}
}
@@ -823,7 +891,7 @@ again:
for (bufoff = 0; bufoff < buflen; bufoff++) {
state_deserialize(buf[bufoff], &s);
- if (s.level % 2 == 0 ) {
+ if (s.level % 2 == 0) {
/* even level */
if (s.prop == BIDI_PROP_R) {
s.level += 1;
@@ -833,8 +901,7 @@ again:
}
} else {
/* odd level */
- if (s.prop == BIDI_PROP_L ||
- s.prop == BIDI_PROP_EN ||
+ if (s.prop == BIDI_PROP_L || s.prop == BIDI_PROP_EN ||
s.prop == BIDI_PROP_AN) {
s.level += 1;
}
@@ -853,10 +920,8 @@ again:
continue;
}
- if (s.rawprop == BIDI_PROP_WS ||
- s.rawprop == BIDI_PROP_FSI ||
- s.rawprop == BIDI_PROP_LRI ||
- s.rawprop == BIDI_PROP_RLI ||
+ if (s.rawprop == BIDI_PROP_WS || s.rawprop == BIDI_PROP_FSI ||
+ s.rawprop == BIDI_PROP_LRI || s.rawprop == BIDI_PROP_RLI ||
s.rawprop == BIDI_PROP_PDI) {
if (runsince == SIZE_MAX) {
/* a new run has begun */
@@ -878,8 +943,7 @@ again:
runsince = SIZE_MAX;
}
- if (s.rawprop == BIDI_PROP_S ||
- s.rawprop == BIDI_PROP_B) {
+ if (s.rawprop == BIDI_PROP_S || s.rawprop == BIDI_PROP_B) {
s.level = (int_least8_t)paragraph_level;
state_serialize(&s, &(buf[bufoff]));
}
@@ -902,7 +966,8 @@ again:
}
static size_t
-get_embedding_levels(HERODOTUS_READER *r, enum grapheme_bidirectional_override override,
+get_embedding_levels(HERODOTUS_READER *r,
+ enum grapheme_bidirectional_override override,
int_least32_t *buf, size_t buflen)
{
struct state s;
@@ -911,8 +976,9 @@ get_embedding_levels(HERODOTUS_READER *r, enum grapheme_bidirectional_override o
if (buf == NULL) {
for (; herodotus_read_codepoint(r, true, &cp) ==
- HERODOTUS_STATUS_SUCCESS;)
+ HERODOTUS_STATUS_SUCCESS;) {
;
+ }
/* see below for return value reasoning */
return herodotus_reader_number_read(r);
@@ -922,8 +988,9 @@ get_embedding_levels(HERODOTUS_READER *r, enum grapheme_bidirectional_override o
* the first step is to determine the bidirectional properties
* and store them in the buffer
*/
- for (bufoff = 0; herodotus_read_codepoint(r, true, &cp) ==
- HERODOTUS_STATUS_SUCCESS; bufoff++) {
+ for (bufoff = 0;
+ herodotus_read_codepoint(r, true, &cp) == HERODOTUS_STATUS_SUCCESS;
+ bufoff++) {
if (bufoff < buflen) {
/*
* actually only do something when we have
@@ -974,9 +1041,10 @@ get_embedding_levels(HERODOTUS_READER *r, enum grapheme_bidirectional_override o
}
size_t
-grapheme_get_bidirectional_embedding_levels(const uint_least32_t *src, size_t srclen,
- enum grapheme_bidirectional_override override,
- int_least32_t *dest, size_t destlen)
+grapheme_get_bidirectional_embedding_levels(
+ const uint_least32_t *src, size_t srclen,
+ enum grapheme_bidirectional_override override, int_least32_t *dest,
+ size_t destlen)
{
HERODOTUS_READER r;
@@ -986,9 +1054,10 @@ grapheme_get_bidirectional_embedding_levels(const uint_least32_t *src, size_t sr
}
size_t
-grapheme_get_bidirectional_embedding_levels_utf8(const char *src, size_t srclen,
- enum grapheme_bidirectional_override override,
- int_least32_t *dest, size_t destlen)
+grapheme_get_bidirectional_embedding_levels_utf8(
+ const char *src, size_t srclen,
+ enum grapheme_bidirectional_override override, int_least32_t *dest,
+ size_t destlen)
{
HERODOTUS_READER r;
diff --git a/src/case.c b/src/case.c
@@ -2,8 +2,8 @@
#include <stddef.h>
#include <stdint.h>
-#include "../grapheme.h"
#include "../gen/case.h"
+#include "../grapheme.h"
#include "util.h"
static inline enum case_property
@@ -11,7 +11,7 @@ get_case_property(uint_least32_t cp)
{
if (likely(cp <= UINT32_C(0x10FFFF))) {
return (enum case_property)
- case_minor[case_major[cp >> 8] + (cp & 0xFF)];
+ case_minor[case_major[cp >> 8] + (cp & 0xFF)];
} else {
return CASE_PROP_OTHER;
}
@@ -45,58 +45,64 @@ to_case(HERODOTUS_READER *r, HERODOTUS_WRITER *w,
uint_least32_t cp, tmp_cp;
int_least32_t map;
- for (; herodotus_read_codepoint(r, true, &cp) == HERODOTUS_STATUS_SUCCESS;) {
+ for (; herodotus_read_codepoint(r, true, &cp) ==
+ HERODOTUS_STATUS_SUCCESS;) {
if (sc == lower_special) {
/*
- * For the special Final_Sigma-rule (see SpecialCasing.txt),
- * which is the only non-localized case-dependent rule,
- * we apply a different mapping when a sigma is at the
- * end of a word.
+ * For the special Final_Sigma-rule (see
+ * SpecialCasing.txt), which is the only non-localized
+ * case-dependent rule, we apply a different mapping
+ * when a sigma is at the end of a word.
*
* Before: cased case-ignorable*
* After: not(case-ignorable* cased)
*
- * We check the after-condition on demand, but the before-
- * condition is best checked using the "level"-heuristic
- * also used in the sentence and line breaking-implementations.
+ * We check the after-condition on demand, but the
+ * before- condition is best checked using the
+ * "level"-heuristic also used in the sentence and line
+ * breaking-implementations.
*/
- if (cp == UINT32_C(0x03A3) && /* GREEK CAPITAL LETTER SIGMA */
+ if (cp == UINT32_C(0x03A3) && /* GREEK CAPITAL LETTER
+ SIGMA */
(final_sigma_level == 1 ||
final_sigma_level == 2)) {
/*
* check succeeding characters by first skipping
- * all case-ignorable characters and then checking
- * if the succeeding character is cased, invalidating
- * the after-condition
+ * all case-ignorable characters and then
+ * checking if the succeeding character is
+ * cased, invalidating the after-condition
*/
herodotus_reader_copy(r, &tmp);
for (prop = NUM_CASE_PROPS;
- (s = herodotus_read_codepoint(&tmp, true, &tmp_cp)) ==
- HERODOTUS_STATUS_SUCCESS; ) {
+ (s = herodotus_read_codepoint(&tmp, true,
+ &tmp_cp)) ==
+ HERODOTUS_STATUS_SUCCESS;) {
prop = get_case_property(tmp_cp);
if (prop != CASE_PROP_CASE_IGNORABLE &&
prop != CASE_PROP_BOTH_CASED_CASE_IGNORABLE) {
- break;
+ break;
}
}
/*
- * Now prop is something other than case-ignorable or
- * the source-string ended.
- * If it is something other than cased, we know
+ * Now prop is something other than
+ * case-ignorable or the source-string ended. If
+ * it is something other than cased, we know
* that the after-condition holds
*/
if (s != HERODOTUS_STATUS_SUCCESS ||
(prop != CASE_PROP_CASED &&
prop != CASE_PROP_BOTH_CASED_CASE_IGNORABLE)) {
/*
- * write GREEK SMALL LETTER FINAL SIGMA to
- * destination
+ * write GREEK SMALL LETTER FINAL SIGMA
+ * to destination
+ */
+ herodotus_write_codepoint(
+ w, UINT32_C(0x03C2));
+
+ /* reset Final_Sigma-state and continue
*/
- herodotus_write_codepoint(w, UINT32_C(0x03C2));
-
- /* reset Final_Sigma-state and continue */
final_sigma_level = 0;
continue;
}
@@ -110,11 +116,13 @@ to_case(HERODOTUS_READER *r, HERODOTUS_WRITER *w,
prop == CASE_PROP_BOTH_CASED_CASE_IGNORABLE)) {
/* sequence has begun */
final_sigma_level = 1;
- } else if ((final_sigma_level == 1 ||
- final_sigma_level == 2) &&
- (prop == CASE_PROP_CASE_IGNORABLE ||
- prop == CASE_PROP_BOTH_CASED_CASE_IGNORABLE)) {
- /* case-ignorable sequence begins or continued */
+ } else if (
+ (final_sigma_level == 1 ||
+ final_sigma_level == 2) &&
+ (prop == CASE_PROP_CASE_IGNORABLE ||
+ prop == CASE_PROP_BOTH_CASED_CASE_IGNORABLE)) {
+ /* case-ignorable sequence begins or continued
+ */
final_sigma_level = 2;
} else {
/* sequence broke */
@@ -134,8 +142,8 @@ to_case(HERODOTUS_READER *r, HERODOTUS_WRITER *w,
}
} else {
/* we have a simple mapping */
- herodotus_write_codepoint(w, (uint_least32_t)
- ((int_least32_t)cp + map));
+ herodotus_write_codepoint(
+ w, (uint_least32_t)((int_least32_t)cp + map));
}
}
@@ -168,14 +176,16 @@ to_titlecase(HERODOTUS_READER *r, HERODOTUS_WRITER *w)
for (; (nwb = herodotus_next_word_break(r)) > 0;) {
herodotus_reader_push_advance_limit(r, nwb);
- for (; (s = herodotus_read_codepoint(r, false, &cp)) == HERODOTUS_STATUS_SUCCESS;) {
+ for (; (s = herodotus_read_codepoint(r, false, &cp)) ==
+ HERODOTUS_STATUS_SUCCESS;) {
/* check if we have a cased character */
prop = get_case_property(cp);
if (prop == CASE_PROP_CASED ||
prop == CASE_PROP_BOTH_CASED_CASE_IGNORABLE) {
break;
} else {
- /* write the data to the output verbatim, it if permits */
+ /* write the data to the output verbatim, it if
+ * permits */
herodotus_write_codepoint(w, cp);
/* increment reader */
@@ -199,9 +209,10 @@ to_titlecase(HERODOTUS_READER *r, HERODOTUS_WRITER *w)
* we encountered a cased character before the word
* break, convert it to titlecase
*/
- herodotus_reader_push_advance_limit(r,
- herodotus_reader_next_codepoint_break(r));
- to_case(r, w, 0, title_major, title_minor, title_special);
+ herodotus_reader_push_advance_limit(
+ r, herodotus_reader_next_codepoint_break(r));
+ to_case(r, w, 0, title_major, title_minor,
+ title_special);
herodotus_reader_pop_limit(r);
}
@@ -218,7 +229,8 @@ to_titlecase(HERODOTUS_READER *r, HERODOTUS_WRITER *w)
}
size_t
-grapheme_to_uppercase(const uint_least32_t *src, size_t srclen, uint_least32_t *dest, size_t destlen)
+grapheme_to_uppercase(const uint_least32_t *src, size_t srclen,
+ uint_least32_t *dest, size_t destlen)
{
HERODOTUS_READER r;
HERODOTUS_WRITER w;
@@ -230,7 +242,8 @@ grapheme_to_uppercase(const uint_least32_t *src, size_t srclen, uint_least32_t *
}
size_t
-grapheme_to_lowercase(const uint_least32_t *src, size_t srclen, uint_least32_t *dest, size_t destlen)
+grapheme_to_lowercase(const uint_least32_t *src, size_t srclen,
+ uint_least32_t *dest, size_t destlen)
{
HERODOTUS_READER r;
HERODOTUS_WRITER w;
@@ -242,7 +255,8 @@ grapheme_to_lowercase(const uint_least32_t *src, size_t srclen, uint_least32_t *
}
size_t
-grapheme_to_titlecase(const uint_least32_t *src, size_t srclen, uint_least32_t *dest, size_t destlen)
+grapheme_to_titlecase(const uint_least32_t *src, size_t srclen,
+ uint_least32_t *dest, size_t destlen)
{
HERODOTUS_READER r;
HERODOTUS_WRITER w;
@@ -254,7 +268,8 @@ grapheme_to_titlecase(const uint_least32_t *src, size_t srclen, uint_least32_t *
}
size_t
-grapheme_to_uppercase_utf8(const char *src, size_t srclen, char *dest, size_t destlen)
+grapheme_to_uppercase_utf8(const char *src, size_t srclen, char *dest,
+ size_t destlen)
{
HERODOTUS_READER r;
HERODOTUS_WRITER w;
@@ -266,7 +281,8 @@ grapheme_to_uppercase_utf8(const char *src, size_t srclen, char *dest, size_t de
}
size_t
-grapheme_to_lowercase_utf8(const char *src, size_t srclen, char *dest, size_t destlen)
+grapheme_to_lowercase_utf8(const char *src, size_t srclen, char *dest,
+ size_t destlen)
{
HERODOTUS_READER r;
HERODOTUS_WRITER w;
@@ -278,7 +294,8 @@ grapheme_to_lowercase_utf8(const char *src, size_t srclen, char *dest, size_t de
}
size_t
-grapheme_to_titlecase_utf8(const char *src, size_t srclen, char *dest, size_t destlen)
+grapheme_to_titlecase_utf8(const char *src, size_t srclen, char *dest,
+ size_t destlen)
{
HERODOTUS_READER r;
HERODOTUS_WRITER w;
@@ -299,7 +316,8 @@ is_case(HERODOTUS_READER *r, const uint_least16_t *major,
uint_least32_t cp;
int_least32_t map;
- for (; herodotus_read_codepoint(r, false, &cp) == HERODOTUS_STATUS_SUCCESS;) {
+ for (; herodotus_read_codepoint(r, false, &cp) ==
+ HERODOTUS_STATUS_SUCCESS;) {
/* get and handle case mapping */
if (unlikely((map = get_case_offset(cp, major, minor)) >=
INT32_C(0x110000))) {
@@ -315,7 +333,8 @@ is_case(HERODOTUS_READER *r, const uint_least16_t *major,
goto done;
} else {
/* move forward */
- herodotus_read_codepoint(r, true, &cp);
+ herodotus_read_codepoint(
+ r, true, &cp);
}
} else {
/*
@@ -357,7 +376,8 @@ is_titlecase(HERODOTUS_READER *r, size_t *output)
for (; (nwb = herodotus_next_word_break(r)) > 0;) {
herodotus_reader_push_advance_limit(r, nwb);
- for (; (s = herodotus_read_codepoint(r, false, &cp)) == HERODOTUS_STATUS_SUCCESS;) {
+ for (; (s = herodotus_read_codepoint(r, false, &cp)) ==
+ HERODOTUS_STATUS_SUCCESS;) {
/* check if we have a cased character */
prop = get_case_property(cp);
if (prop == CASE_PROP_CASED ||
@@ -384,17 +404,20 @@ is_titlecase(HERODOTUS_READER *r, size_t *output)
* we encountered a cased character before the word
* break, check if it's titlecase
*/
- herodotus_reader_push_advance_limit(r,
- herodotus_reader_next_codepoint_break(r));
- if (!is_case(r, title_major, title_minor, title_special, NULL)) {
+ herodotus_reader_push_advance_limit(
+ r, herodotus_reader_next_codepoint_break(r));
+ if (!is_case(r, title_major, title_minor, title_special,
+ NULL)) {
ret = false;
goto done;
}
herodotus_reader_pop_limit(r);
}
- /* check if the rest of the codepoints in the word are lowercase */
- if (!is_case(r, lower_major, lower_minor, lower_special, NULL)) {
+ /* check if the rest of the codepoints in the word are lowercase
+ */
+ if (!is_case(r, lower_major, lower_minor, lower_special,
+ NULL)) {
ret = false;
goto done;
}
diff --git a/src/character.c b/src/character.c
@@ -16,83 +16,80 @@ struct character_break_state {
static const uint_least16_t dont_break[NUM_CHAR_BREAK_PROPS] = {
[CHAR_BREAK_PROP_OTHER] =
- UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */
- UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */
- UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */
- [CHAR_BREAK_PROP_CR] =
- UINT16_C(1) << CHAR_BREAK_PROP_LF, /* GB3 */
+ UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */
+ [CHAR_BREAK_PROP_CR] = UINT16_C(1) << CHAR_BREAK_PROP_LF, /* GB3 */
[CHAR_BREAK_PROP_EXTEND] =
- UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */
- UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */
- UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */
+ UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */
[CHAR_BREAK_PROP_EXTENDED_PICTOGRAPHIC] =
- UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */
- UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */
- UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */
+ UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */
[CHAR_BREAK_PROP_HANGUL_L] =
- UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_L | /* GB6 */
- UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_V | /* GB6 */
- UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_LV | /* GB6 */
- UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_LVT | /* GB6 */
- UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */
- UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */
- UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */
+ UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_L | /* GB6 */
+ UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_V | /* GB6 */
+ UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_LV | /* GB6 */
+ UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_LVT | /* GB6 */
+ UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */
[CHAR_BREAK_PROP_HANGUL_V] =
- UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_V | /* GB7 */
- UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_T | /* GB7 */
- UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */
- UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */
- UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */
+ UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_V | /* GB7 */
+ UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_T | /* GB7 */
+ UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */
[CHAR_BREAK_PROP_HANGUL_T] =
- UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_T | /* GB8 */
- UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */
- UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */
- UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */
+ UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_T | /* GB8 */
+ UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */
[CHAR_BREAK_PROP_HANGUL_LV] =
- UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_V | /* GB7 */
- UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_T | /* GB7 */
- UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */
- UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */
- UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */
+ UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_V | /* GB7 */
+ UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_T | /* GB7 */
+ UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */
[CHAR_BREAK_PROP_HANGUL_LVT] =
- UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_T | /* GB8 */
- UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */
- UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */
- UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */
+ UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_T | /* GB8 */
+ UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */
[CHAR_BREAK_PROP_PREPEND] =
- UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */
- UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */
- UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK | /* GB9a */
+ UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK | /* GB9a */
(UINT16_C(0xFFFF) &
- ~(UINT16_C(1) << CHAR_BREAK_PROP_CR |
- UINT16_C(1) << CHAR_BREAK_PROP_LF |
- UINT16_C(1) << CHAR_BREAK_PROP_CONTROL
- )
- ), /* GB9b */
+ ~(UINT16_C(1) << CHAR_BREAK_PROP_CR |
+ UINT16_C(1) << CHAR_BREAK_PROP_LF |
+ UINT16_C(1) << CHAR_BREAK_PROP_CONTROL)), /* GB9b */
[CHAR_BREAK_PROP_REGIONAL_INDICATOR] =
- UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */
- UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */
- UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */
+ UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */
[CHAR_BREAK_PROP_SPACINGMARK] =
- UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */
- UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */
- UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */
+ UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */
[CHAR_BREAK_PROP_ZWJ] =
- UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */
- UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */
- UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */
+ UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */
};
static const uint_least16_t flag_update_gb11[2 * NUM_CHAR_BREAK_PROPS] = {
[CHAR_BREAK_PROP_EXTENDED_PICTOGRAPHIC] =
- UINT16_C(1) << CHAR_BREAK_PROP_ZWJ |
+ UINT16_C(1) << CHAR_BREAK_PROP_ZWJ |
UINT16_C(1) << CHAR_BREAK_PROP_EXTEND,
[CHAR_BREAK_PROP_ZWJ + NUM_CHAR_BREAK_PROPS] =
UINT16_C(1) << CHAR_BREAK_PROP_EXTENDED_PICTOGRAPHIC,
[CHAR_BREAK_PROP_EXTEND + NUM_CHAR_BREAK_PROPS] =
- UINT16_C(1) << CHAR_BREAK_PROP_EXTEND |
+ UINT16_C(1) << CHAR_BREAK_PROP_EXTEND |
UINT16_C(1) << CHAR_BREAK_PROP_ZWJ,
[CHAR_BREAK_PROP_EXTENDED_PICTOGRAPHIC + NUM_CHAR_BREAK_PROPS] =
- UINT16_C(1) << CHAR_BREAK_PROP_ZWJ |
+ UINT16_C(1) << CHAR_BREAK_PROP_ZWJ |
UINT16_C(1) << CHAR_BREAK_PROP_EXTEND,
};
static const uint_least16_t dont_break_gb11[2 * NUM_CHAR_BREAK_PROPS] = {
@@ -113,7 +110,8 @@ get_break_prop(uint_least32_t cp)
{
if (likely(cp <= UINT32_C(0x10FFFF))) {
return (enum char_break_property)
- char_break_minor[char_break_major[cp >> 8] + (cp & 0xFF)];
+ char_break_minor[char_break_major[cp >> 8] +
+ (cp & 0xFF)];
} else {
return CHAR_BREAK_PROP_OTHER;
}
@@ -122,23 +120,27 @@ get_break_prop(uint_least32_t cp)
static inline void
state_serialize(const struct character_break_state *in, uint_least16_t *out)
{
- *out = (uint_least16_t)(in->prop & UINT8_C(0xFF)) | /* first 8 bits */
- (uint_least16_t)(((uint_least16_t)(in->prop_set)) << 8) | /* 9th bit */
- (uint_least16_t)(((uint_least16_t)(in->gb11_flag)) << 9) | /* 10th bit */
- (uint_least16_t)(((uint_least16_t)(in->gb12_13_flag)) << 10); /* 11th bit */
+ *out = (uint_least16_t)(in->prop & UINT8_C(0xFF)) | /* first 8 bits */
+ (uint_least16_t)(((uint_least16_t)(in->prop_set))
+ << 8) | /* 9th bit */
+ (uint_least16_t)(((uint_least16_t)(in->gb11_flag))
+ << 9) | /* 10th bit */
+ (uint_least16_t)(((uint_least16_t)(in->gb12_13_flag))
+ << 10); /* 11th bit */
}
static inline void
state_deserialize(uint_least16_t in, struct character_break_state *out)
{
- out->prop = in & UINT8_C(0xFF);
- out->prop_set = in & (UINT16_C(1) << 8);
- out->gb11_flag = in & (UINT16_C(1) << 9);
+ out->prop = in & UINT8_C(0xFF);
+ out->prop_set = in & (UINT16_C(1) << 8);
+ out->gb11_flag = in & (UINT16_C(1) << 9);
out->gb12_13_flag = in & (UINT16_C(1) << 10);
}
bool
-grapheme_is_character_break(uint_least32_t cp0, uint_least32_t cp1, uint_least16_t *s)
+grapheme_is_character_break(uint_least32_t cp0, uint_least32_t cp1,
+ uint_least16_t *s)
{
struct character_break_state state;
enum char_break_property cp0_prop, cp1_prop;
@@ -161,23 +163,26 @@ grapheme_is_character_break(uint_least32_t cp0, uint_least32_t cp1, uint_least16
/* update flags */
state.gb11_flag =
flag_update_gb11[cp0_prop + NUM_CHAR_BREAK_PROPS *
- state.gb11_flag] &
+ state.gb11_flag] &
UINT16_C(1) << cp1_prop;
state.gb12_13_flag =
- flag_update_gb12_13[cp0_prop + NUM_CHAR_BREAK_PROPS *
- state.gb12_13_flag] &
- UINT16_C(1) << cp1_prop;
+ flag_update_gb12_13[cp0_prop +
+ NUM_CHAR_BREAK_PROPS *
+ state.gb12_13_flag] &
+ UINT16_C(1) << cp1_prop;
/*
* Apply grapheme cluster breaking algorithm (UAX #29), see
* http://unicode.org/reports/tr29/#Grapheme_Cluster_Boundary_Rules
*/
notbreak = (dont_break[cp0_prop] & (UINT16_C(1) << cp1_prop)) ||
- (dont_break_gb11[cp0_prop + state.gb11_flag *
- NUM_CHAR_BREAK_PROPS] &
+ (dont_break_gb11[cp0_prop +
+ state.gb11_flag *
+ NUM_CHAR_BREAK_PROPS] &
(UINT16_C(1) << cp1_prop)) ||
- (dont_break_gb12_13[cp0_prop + state.gb12_13_flag *
- NUM_CHAR_BREAK_PROPS] &
+ (dont_break_gb12_13[cp0_prop +
+ state.gb12_13_flag *
+ NUM_CHAR_BREAK_PROPS] &
(UINT16_C(1) << cp1_prop));
/* update or reset flags (when we have a break) */
@@ -198,8 +203,10 @@ grapheme_is_character_break(uint_least32_t cp0, uint_least32_t cp1, uint_least16
* were all set to false
*/
notbreak = (dont_break[cp0_prop] & (UINT16_C(1) << cp1_prop)) ||
- (dont_break_gb11[cp0_prop] & (UINT16_C(1) << cp1_prop)) ||
- (dont_break_gb12_13[cp0_prop] & (UINT16_C(1) << cp1_prop));
+ (dont_break_gb11[cp0_prop] &
+ (UINT16_C(1) << cp1_prop)) ||
+ (dont_break_gb12_13[cp0_prop] &
+ (UINT16_C(1) << cp1_prop));
}
return !notbreak;
@@ -212,7 +219,8 @@ next_character_break(HERODOTUS_READER *r)
uint_least32_t cp0 = 0, cp1 = 0;
for (herodotus_read_codepoint(r, true, &cp0);
- herodotus_read_codepoint(r, false, &cp1) == HERODOTUS_STATUS_SUCCESS;
+ herodotus_read_codepoint(r, false, &cp1) ==
+ HERODOTUS_STATUS_SUCCESS;
herodotus_read_codepoint(r, true, &cp0)) {
if (grapheme_is_character_break(cp0, cp1, &state)) {
break;
diff --git a/src/line.c b/src/line.c
@@ -11,7 +11,8 @@ get_break_prop(uint_least32_t cp)
{
if (likely(cp <= UINT32_C(0x10FFFF))) {
return (enum line_break_property)
- line_break_minor[line_break_major[cp >> 8] + (cp & 0xff)];
+ line_break_minor[line_break_major[cp >> 8] +
+ (cp & 0xff)];
} else {
return LINE_BREAK_PROP_AL;
}
@@ -22,7 +23,7 @@ next_line_break(HERODOTUS_READER *r)
{
HERODOTUS_READER tmp;
enum line_break_property cp0_prop, cp1_prop, last_non_cm_or_zwj_prop,
- last_non_sp_prop, last_non_sp_cm_or_zwj_prop;
+ last_non_sp_prop, last_non_sp_cm_or_zwj_prop;
uint_least32_t cp;
uint_least8_t lb25_level = 0;
bool lb21a_flag = false, ri_even = true;
@@ -43,8 +44,10 @@ next_line_break(HERODOTUS_READER *r)
last_non_cm_or_zwj_prop = LINE_BREAK_PROP_AL; /* according to LB10 */
last_non_sp_prop = last_non_sp_cm_or_zwj_prop = NUM_LINE_BREAK_PROPS;
- for (herodotus_read_codepoint(r, true, &cp), cp0_prop = get_break_prop(cp);
- herodotus_read_codepoint(r, false, &cp) == HERODOTUS_STATUS_SUCCESS;
+ for (herodotus_read_codepoint(r, true, &cp),
+ cp0_prop = get_break_prop(cp);
+ herodotus_read_codepoint(r, false, &cp) ==
+ HERODOTUS_STATUS_SUCCESS;
herodotus_read_codepoint(r, true, &cp), cp0_prop = cp1_prop) {
/* get property of the right codepoint */
cp1_prop = get_break_prop(cp);
@@ -59,10 +62,11 @@ next_line_break(HERODOTUS_READER *r)
cp0_prop != LINE_BREAK_PROP_ZWJ) {
/*
* check if the property we are overwriting now is an
- * HL. If so, we set the LB21a-flag which depends on this
- * knowledge.
+ * HL. If so, we set the LB21a-flag which depends on
+ * this knowledge.
*/
- lb21a_flag = (last_non_cm_or_zwj_prop == LINE_BREAK_PROP_HL);
+ lb21a_flag =
+ (last_non_cm_or_zwj_prop == LINE_BREAK_PROP_HL);
/* check regional indicator state */
if (cp0_prop == LINE_BREAK_PROP_RI) {
@@ -109,8 +113,7 @@ next_line_break(HERODOTUS_READER *r)
* and one (CL | CP) to the left of the middle
* spot
*/
- if ((lb25_level == 0 ||
- lb25_level == 1) &&
+ if ((lb25_level == 0 || lb25_level == 1) &&
cp0_prop == LINE_BREAK_PROP_NU) {
/* sequence has begun */
lb25_level = 1;
@@ -118,12 +121,15 @@ next_line_break(HERODOTUS_READER *r)
(cp0_prop == LINE_BREAK_PROP_NU ||
cp0_prop == LINE_BREAK_PROP_SY ||
cp0_prop == LINE_BREAK_PROP_IS)) {
- /* (NU | SY | IS) sequence begins or continued */
+ /* (NU | SY | IS) sequence begins or continued
+ */
lb25_level = 2;
- } else if ((lb25_level == 1 || lb25_level == 2) &&
- (cp0_prop == LINE_BREAK_PROP_CL ||
- cp0_prop == LINE_BREAK_PROP_CP_WITHOUT_EAW_HWF ||
- cp0_prop == LINE_BREAK_PROP_CP_WITH_EAW_HWF)) {
+ } else if (
+ (lb25_level == 1 || lb25_level == 2) &&
+ (cp0_prop == LINE_BREAK_PROP_CL ||
+ cp0_prop ==
+ LINE_BREAK_PROP_CP_WITHOUT_EAW_HWF ||
+ cp0_prop == LINE_BREAK_PROP_CP_WITH_EAW_HWF)) {
/* CL or CP at the end of the sequence */
lb25_level = 3;
} else {
@@ -229,17 +235,19 @@ next_line_break(HERODOTUS_READER *r)
/* LB13 (affected by tailoring for LB25, see example 7) */
if (cp1_prop == LINE_BREAK_PROP_EX ||
(last_non_cm_or_zwj_prop != LINE_BREAK_PROP_NU &&
- (cp1_prop == LINE_BREAK_PROP_CL ||
+ (cp1_prop == LINE_BREAK_PROP_CL ||
cp1_prop == LINE_BREAK_PROP_CP_WITHOUT_EAW_HWF ||
- cp1_prop == LINE_BREAK_PROP_CP_WITH_EAW_HWF ||
- cp1_prop == LINE_BREAK_PROP_IS ||
+ cp1_prop == LINE_BREAK_PROP_CP_WITH_EAW_HWF ||
+ cp1_prop == LINE_BREAK_PROP_IS ||
cp1_prop == LINE_BREAK_PROP_SY))) {
continue;
}
/* LB14 */
- if (last_non_sp_cm_or_zwj_prop == LINE_BREAK_PROP_OP_WITHOUT_EAW_HWF ||
- last_non_sp_cm_or_zwj_prop == LINE_BREAK_PROP_OP_WITH_EAW_HWF) {
+ if (last_non_sp_cm_or_zwj_prop ==
+ LINE_BREAK_PROP_OP_WITHOUT_EAW_HWF ||
+ last_non_sp_cm_or_zwj_prop ==
+ LINE_BREAK_PROP_OP_WITH_EAW_HWF) {
continue;
}
@@ -251,9 +259,11 @@ next_line_break(HERODOTUS_READER *r)
}
/* LB16 */
- if ((last_non_sp_cm_or_zwj_prop == LINE_BREAK_PROP_CL ||
- last_non_sp_cm_or_zwj_prop == LINE_BREAK_PROP_CP_WITHOUT_EAW_HWF ||
- last_non_sp_cm_or_zwj_prop == LINE_BREAK_PROP_CP_WITH_EAW_HWF) &&
+ if ((last_non_sp_cm_or_zwj_prop == LINE_BREAK_PROP_CL ||
+ last_non_sp_cm_or_zwj_prop ==
+ LINE_BREAK_PROP_CP_WITHOUT_EAW_HWF ||
+ last_non_sp_cm_or_zwj_prop ==
+ LINE_BREAK_PROP_CP_WITH_EAW_HWF) &&
cp1_prop == LINE_BREAK_PROP_NS) {
continue;
}
@@ -308,7 +318,7 @@ next_line_break(HERODOTUS_READER *r)
}
/* LB23 */
- if ((last_non_cm_or_zwj_prop == LINE_BREAK_PROP_AL ||
+ if ((last_non_cm_or_zwj_prop == LINE_BREAK_PROP_AL ||
last_non_cm_or_zwj_prop == LINE_BREAK_PROP_HL) &&
cp1_prop == LINE_BREAK_PROP_NU) {
continue;
@@ -336,11 +346,11 @@ next_line_break(HERODOTUS_READER *r)
/* LB24 */
if ((last_non_cm_or_zwj_prop == LINE_BREAK_PROP_PR ||
last_non_cm_or_zwj_prop == LINE_BREAK_PROP_PO) &&
- (cp1_prop == LINE_BREAK_PROP_AL ||
+ (cp1_prop == LINE_BREAK_PROP_AL ||
cp1_prop == LINE_BREAK_PROP_HL)) {
continue;
}
- if ((last_non_cm_or_zwj_prop == LINE_BREAK_PROP_AL ||
+ if ((last_non_cm_or_zwj_prop == LINE_BREAK_PROP_AL ||
last_non_cm_or_zwj_prop == LINE_BREAK_PROP_HL) &&
(cp1_prop == LINE_BREAK_PROP_PR ||
cp1_prop == LINE_BREAK_PROP_PO)) {
@@ -362,32 +372,33 @@ next_line_break(HERODOTUS_READER *r)
herodotus_reader_copy(r, &tmp);
herodotus_read_codepoint(&tmp, true, &cp);
if (herodotus_read_codepoint(&tmp, true, &cp) ==
- HERODOTUS_STATUS_SUCCESS &&
+ HERODOTUS_STATUS_SUCCESS &&
(cp1_prop == LINE_BREAK_PROP_OP_WITHOUT_EAW_HWF ||
- cp1_prop == LINE_BREAK_PROP_OP_WITH_EAW_HWF ||
+ cp1_prop == LINE_BREAK_PROP_OP_WITH_EAW_HWF ||
cp1_prop == LINE_BREAK_PROP_HY)) {
if (get_break_prop(cp) == LINE_BREAK_PROP_NU) {
continue;
}
}
}
- if ((last_non_cm_or_zwj_prop == LINE_BREAK_PROP_OP_WITHOUT_EAW_HWF ||
- last_non_cm_or_zwj_prop == LINE_BREAK_PROP_OP_WITH_EAW_HWF ||
+ if ((last_non_cm_or_zwj_prop ==
+ LINE_BREAK_PROP_OP_WITHOUT_EAW_HWF ||
+ last_non_cm_or_zwj_prop ==
+ LINE_BREAK_PROP_OP_WITH_EAW_HWF ||
last_non_cm_or_zwj_prop == LINE_BREAK_PROP_HY) &&
cp1_prop == LINE_BREAK_PROP_NU) {
continue;
}
- if (lb25_level == 1 &&
- (cp1_prop == LINE_BREAK_PROP_NU ||
- cp1_prop == LINE_BREAK_PROP_SY ||
- cp1_prop == LINE_BREAK_PROP_IS)) {
+ if (lb25_level == 1 && (cp1_prop == LINE_BREAK_PROP_NU ||
+ cp1_prop == LINE_BREAK_PROP_SY ||
+ cp1_prop == LINE_BREAK_PROP_IS)) {
continue;
}
if ((lb25_level == 1 || lb25_level == 2) &&
- (cp1_prop == LINE_BREAK_PROP_NU ||
- cp1_prop == LINE_BREAK_PROP_SY ||
- cp1_prop == LINE_BREAK_PROP_IS ||
- cp1_prop == LINE_BREAK_PROP_CL ||
+ (cp1_prop == LINE_BREAK_PROP_NU ||
+ cp1_prop == LINE_BREAK_PROP_SY ||
+ cp1_prop == LINE_BREAK_PROP_IS ||
+ cp1_prop == LINE_BREAK_PROP_CL ||
cp1_prop == LINE_BREAK_PROP_CP_WITHOUT_EAW_HWF ||
cp1_prop == LINE_BREAK_PROP_CP_WITH_EAW_HWF)) {
continue;
@@ -437,37 +448,37 @@ next_line_break(HERODOTUS_READER *r)
}
/* LB28 */
- if ((last_non_cm_or_zwj_prop == LINE_BREAK_PROP_AL ||
+ if ((last_non_cm_or_zwj_prop == LINE_BREAK_PROP_AL ||
last_non_cm_or_zwj_prop == LINE_BREAK_PROP_HL) &&
- (cp1_prop == LINE_BREAK_PROP_AL ||
+ (cp1_prop == LINE_BREAK_PROP_AL ||
cp1_prop == LINE_BREAK_PROP_HL)) {
continue;
}
/* LB29 */
if (last_non_cm_or_zwj_prop == LINE_BREAK_PROP_IS &&
- (cp1_prop == LINE_BREAK_PROP_AL ||
+ (cp1_prop == LINE_BREAK_PROP_AL ||
cp1_prop == LINE_BREAK_PROP_HL)) {
continue;
}
/* LB30 */
- if ((last_non_cm_or_zwj_prop == LINE_BREAK_PROP_AL ||
- last_non_cm_or_zwj_prop == LINE_BREAK_PROP_HL ||
+ if ((last_non_cm_or_zwj_prop == LINE_BREAK_PROP_AL ||
+ last_non_cm_or_zwj_prop == LINE_BREAK_PROP_HL ||
last_non_cm_or_zwj_prop == LINE_BREAK_PROP_NU) &&
cp1_prop == LINE_BREAK_PROP_OP_WITHOUT_EAW_HWF) {
continue;
}
- if (last_non_cm_or_zwj_prop == LINE_BREAK_PROP_CP_WITHOUT_EAW_HWF &&
- (cp1_prop == LINE_BREAK_PROP_AL ||
- cp1_prop == LINE_BREAK_PROP_HL ||
+ if (last_non_cm_or_zwj_prop ==
+ LINE_BREAK_PROP_CP_WITHOUT_EAW_HWF &&
+ (cp1_prop == LINE_BREAK_PROP_AL ||
+ cp1_prop == LINE_BREAK_PROP_HL ||
cp1_prop == LINE_BREAK_PROP_NU)) {
continue;
}
/* LB30a */
- if (!ri_even &&
- last_non_cm_or_zwj_prop == LINE_BREAK_PROP_RI &&
+ if (!ri_even && last_non_cm_or_zwj_prop == LINE_BREAK_PROP_RI &&
cp1_prop == LINE_BREAK_PROP_RI) {
continue;
}
@@ -477,7 +488,8 @@ next_line_break(HERODOTUS_READER *r)
cp1_prop == LINE_BREAK_PROP_EM) {
continue;
}
- if (last_non_cm_or_zwj_prop == LINE_BREAK_PROP_BOTH_CN_EXTPICT &&
+ if (last_non_cm_or_zwj_prop ==
+ LINE_BREAK_PROP_BOTH_CN_EXTPICT &&
cp1_prop == LINE_BREAK_PROP_EM) {
continue;
}
diff --git a/src/sentence.c b/src/sentence.c
@@ -6,8 +6,7 @@
#include "../grapheme.h"
#include "util.h"
-struct sentence_break_state
-{
+struct sentence_break_state {
uint_least8_t aterm_close_sp_level;
uint_least8_t saterm_close_sp_parasep_level;
};
@@ -17,8 +16,8 @@ get_sentence_break_prop(uint_least32_t cp)
{
if (likely(cp <= UINT32_C(0x10FFFF))) {
return (uint_least8_t)
- sentence_break_minor[sentence_break_major[cp >> 8] +
- (cp & 0xff)];
+ sentence_break_minor[sentence_break_major[cp >> 8] +
+ (cp & 0xff)];
} else {
return SENTENCE_BREAK_PROP_OTHER;
}
@@ -80,7 +79,7 @@ sentence_skip_shift_callback(uint_least8_t prop, void *s)
state->aterm_close_sp_level = 2;
} else if ((state->aterm_close_sp_level == 1 ||
state->aterm_close_sp_level == 2 ||
- state->aterm_close_sp_level == 3) &&
+ state->aterm_close_sp_level == 3) &&
prop == SENTENCE_BREAK_PROP_SP) {
/* sp-sequence begins or continued */
state->aterm_close_sp_level = 3;
@@ -102,7 +101,7 @@ sentence_skip_shift_callback(uint_least8_t prop, void *s)
state->saterm_close_sp_parasep_level = 2;
} else if ((state->saterm_close_sp_parasep_level == 1 ||
state->saterm_close_sp_parasep_level == 2 ||
- state->saterm_close_sp_parasep_level == 3) &&
+ state->saterm_close_sp_parasep_level == 3) &&
prop == SENTENCE_BREAK_PROP_SP) {
/* sp-sequence begins or continued */
state->saterm_close_sp_parasep_level = 3;
@@ -110,7 +109,7 @@ sentence_skip_shift_callback(uint_least8_t prop, void *s)
state->saterm_close_sp_parasep_level == 2 ||
state->saterm_close_sp_parasep_level == 3) &&
(prop == SENTENCE_BREAK_PROP_SEP ||
- prop == SENTENCE_BREAK_PROP_CR ||
+ prop == SENTENCE_BREAK_PROP_CR ||
prop == SENTENCE_BREAK_PROP_LF)) {
/* ParaSep at the end of the sequence */
state->saterm_close_sp_parasep_level = 4;
@@ -146,7 +145,7 @@ next_sentence_break(HERODOTUS_READER *r)
/* SB4 */
if (p.raw.prev_prop[0] == SENTENCE_BREAK_PROP_SEP ||
- p.raw.prev_prop[0] == SENTENCE_BREAK_PROP_CR ||
+ p.raw.prev_prop[0] == SENTENCE_BREAK_PROP_CR ||
p.raw.prev_prop[0] == SENTENCE_BREAK_PROP_LF) {
break;
}
@@ -179,7 +178,8 @@ next_sentence_break(HERODOTUS_READER *r)
* This is the most complicated rule, requiring
* the right-hand-side to satisfy the regular expression
*
- * ( ¬(OLetter | Upper | Lower | ParaSep | SATerm) )* Lower
+ * ( ¬(OLetter | Upper | Lower | ParaSep | SATerm) )*
+ * Lower
*
* which we simply check "manually" given LUT-lookups
* are very cheap by starting at the mid_reader.
@@ -198,12 +198,12 @@ next_sentence_break(HERODOTUS_READER *r)
* match the following condition
*/
if (prop == SENTENCE_BREAK_PROP_OLETTER ||
- prop == SENTENCE_BREAK_PROP_UPPER ||
- prop == SENTENCE_BREAK_PROP_LOWER ||
- prop == SENTENCE_BREAK_PROP_SEP ||
- prop == SENTENCE_BREAK_PROP_CR ||
- prop == SENTENCE_BREAK_PROP_LF ||
- prop == SENTENCE_BREAK_PROP_STERM ||
+ prop == SENTENCE_BREAK_PROP_UPPER ||
+ prop == SENTENCE_BREAK_PROP_LOWER ||
+ prop == SENTENCE_BREAK_PROP_SEP ||
+ prop == SENTENCE_BREAK_PROP_CR ||
+ prop == SENTENCE_BREAK_PROP_LF ||
+ prop == SENTENCE_BREAK_PROP_STERM ||
prop == SENTENCE_BREAK_PROP_ATERM) {
break;
}
@@ -219,8 +219,8 @@ next_sentence_break(HERODOTUS_READER *r)
state.saterm_close_sp_parasep_level == 2 ||
state.saterm_close_sp_parasep_level == 3) &&
(p.skip.next_prop[0] == SENTENCE_BREAK_PROP_SCONTINUE ||
- p.skip.next_prop[0] == SENTENCE_BREAK_PROP_STERM ||
- p.skip.next_prop[0] == SENTENCE_BREAK_PROP_ATERM)) {
+ p.skip.next_prop[0] == SENTENCE_BREAK_PROP_STERM ||
+ p.skip.next_prop[0] == SENTENCE_BREAK_PROP_ATERM)) {
continue;
}
@@ -228,9 +228,9 @@ next_sentence_break(HERODOTUS_READER *r)
if ((state.saterm_close_sp_parasep_level == 1 ||
state.saterm_close_sp_parasep_level == 2) &&
(p.skip.next_prop[0] == SENTENCE_BREAK_PROP_CLOSE ||
- p.skip.next_prop[0] == SENTENCE_BREAK_PROP_SP ||
- p.skip.next_prop[0] == SENTENCE_BREAK_PROP_SEP ||
- p.skip.next_prop[0] == SENTENCE_BREAK_PROP_CR ||
+ p.skip.next_prop[0] == SENTENCE_BREAK_PROP_SP ||
+ p.skip.next_prop[0] == SENTENCE_BREAK_PROP_SEP ||
+ p.skip.next_prop[0] == SENTENCE_BREAK_PROP_CR ||
p.skip.next_prop[0] == SENTENCE_BREAK_PROP_LF)) {
continue;
}
@@ -239,9 +239,9 @@ next_sentence_break(HERODOTUS_READER *r)
if ((state.saterm_close_sp_parasep_level == 1 ||
state.saterm_close_sp_parasep_level == 2 ||
state.saterm_close_sp_parasep_level == 3) &&
- (p.skip.next_prop[0] == SENTENCE_BREAK_PROP_SP ||
+ (p.skip.next_prop[0] == SENTENCE_BREAK_PROP_SP ||
p.skip.next_prop[0] == SENTENCE_BREAK_PROP_SEP ||
- p.skip.next_prop[0] == SENTENCE_BREAK_PROP_CR ||
+ p.skip.next_prop[0] == SENTENCE_BREAK_PROP_CR ||
p.skip.next_prop[0] == SENTENCE_BREAK_PROP_LF)) {
continue;
}
diff --git a/src/utf8.c b/src/utf8.c
@@ -9,14 +9,14 @@
/* lookup-table for the types of sequence first bytes */
static const struct {
- uint_least8_t lower; /* lower bound of sequence first byte */
- uint_least8_t upper; /* upper bound of sequence first byte */
+ uint_least8_t lower; /* lower bound of sequence first byte */
+ uint_least8_t upper; /* upper bound of sequence first byte */
uint_least32_t mincp; /* smallest non-overlong encoded codepoint */
uint_least32_t maxcp; /* largest encodable codepoint */
- /*
- * implicit: table-offset represents the number of following
- * bytes of the form 10xxxxxx (6 bits capacity each)
- */
+ /*
+ * implicit: table-offset represents the number of following
+ * bytes of the form 10xxxxxx (6 bits capacity each)
+ */
} lut[] = {
[0] = {
/* 0xxxxxxx */
@@ -104,8 +104,8 @@ grapheme_decode_utf8(const char *str, size_t len, uint_least32_t *cp)
* sequence starter occurs right before a NUL-byte.
*/
for (i = 0; 1 + i < len; i++) {
- if(!BETWEEN(((const unsigned char *)str)[1 + i],
- 0x80, 0xBF)) {
+ if (!BETWEEN(((const unsigned char *)str)[1 + i], 0x80,
+ 0xBF)) {
break;
}
}
@@ -124,7 +124,7 @@ grapheme_decode_utf8(const char *str, size_t len, uint_least32_t *cp)
* (i.e. between 0x80 (10000000) and 0xBF (10111111))
*/
for (i = 1; i <= off; i++) {
- if(!BETWEEN(((const unsigned char *)str)[i], 0x80, 0xBF)) {
+ if (!BETWEEN(((const unsigned char *)str)[i], 0x80, 0xBF)) {
/*
* byte does not match format; return
* number of bytes processed excluding the
@@ -201,8 +201,8 @@ grapheme_encode_utf8(uint_least32_t cp, char *str, size_t len)
* We do not overwrite the mask because we guaranteed earlier
* that there are no bits higher than the mask allows.
*/
- ((unsigned char *)str)[0] = lut[off].lower |
- (uint_least8_t)(cp >> (6 * off));
+ ((unsigned char *)str)[0] =
+ lut[off].lower | (uint_least8_t)(cp >> (6 * off));
for (i = 1; i <= off; i++) {
/*
@@ -211,8 +211,8 @@ grapheme_encode_utf8(uint_least32_t cp, char *str, size_t len)
* extract from the properly-shifted value using the
* mask 00111111 (0x3F)
*/
- ((unsigned char *)str)[i] = 0x80 |
- ((cp >> (6 * (off - i))) & 0x3F);
+ ((unsigned char *)str)[i] =
+ 0x80 | ((cp >> (6 * (off - i))) & 0x3F);
}
return 1 + off;
diff --git a/src/util.c b/src/util.c
@@ -37,16 +37,20 @@ herodotus_reader_copy(const HERODOTUS_READER *src, HERODOTUS_READER *dest)
*/
dest->type = src->type;
if (src->type == HERODOTUS_TYPE_CODEPOINT) {
- dest->src = (src->src == NULL) ? NULL :
- ((const uint_least32_t *)(src->src)) + src->off;
+ dest->src =
+ (src->src == NULL) ?
+ NULL :
+ ((const uint_least32_t *)(src->src)) + src->off;
} else { /* src->type == HERODOTUS_TYPE_UTF8 */
- dest->src = (src->src == NULL) ? NULL :
- ((const char *)(src->src)) + src->off;
+ dest->src = (src->src == NULL) ?
+ NULL :
+ ((const char *)(src->src)) + src->off;
}
if (src->srclen == SIZE_MAX) {
dest->srclen = SIZE_MAX;
} else {
- dest->srclen = (src->off < src->srclen) ? src->srclen - src->off : 0;
+ dest->srclen =
+ (src->off < src->srclen) ? src->srclen - src->off : 0;
}
dest->off = 0;
dest->terminated_by_null = src->terminated_by_null;
@@ -62,8 +66,10 @@ herodotus_reader_copy(const HERODOTUS_READER *src, HERODOTUS_READER *dest)
* to release the limit and, instead, we just
* prevent any more reads
*/
- dest->soft_limit[i] = (src->off < src->soft_limit[i]) ?
- src->soft_limit[i] - src->off : 0;
+ dest->soft_limit[i] =
+ (src->off < src->soft_limit[i]) ?
+ src->soft_limit[i] - src->off :
+ 0;
}
}
}
@@ -141,9 +147,9 @@ herodotus_read_codepoint(HERODOTUS_READER *r, bool advance, uint_least32_t *cp)
*cp = ((const uint_least32_t *)(r->src))[r->off];
ret = 1;
} else { /* r->type == HERODOTUS_TYPE_UTF8 */
- ret = grapheme_decode_utf8((const char *)r->src + r->off,
- MIN(r->srclen, r->soft_limit[0]) -
- r->off, cp);
+ ret = grapheme_decode_utf8(
+ (const char *)r->src + r->off,
+ MIN(r->srclen, r->soft_limit[0]) - r->off, cp);
}
if (unlikely(r->srclen == SIZE_MAX && *cp == 0)) {
@@ -176,8 +182,8 @@ herodotus_read_codepoint(HERODOTUS_READER *r, bool advance, uint_least32_t *cp)
}
void
-herodotus_writer_init(HERODOTUS_WRITER *w, enum herodotus_type type,
- void *dest, size_t destlen)
+herodotus_writer_init(HERODOTUS_WRITER *w, enum herodotus_type type, void *dest,
+ size_t destlen)
{
w->type = type;
w->dest = dest;
@@ -212,8 +218,8 @@ herodotus_writer_nul_terminate(HERODOTUS_WRITER *w)
* (the last case meaning truncation).
*/
if (w->type == HERODOTUS_TYPE_CODEPOINT) {
- ((uint_least32_t *)(w->dest))
- [w->first_unwritable_offset] = 0;
+ ((uint_least32_t
+ *)(w->dest))[w->first_unwritable_offset] = 0;
} else { /* w->type == HERODOTUS_TYPE_UTF8 */
((char *)(w->dest))[w->first_unwritable_offset] = '\0';
}
@@ -226,8 +232,7 @@ herodotus_writer_nul_terminate(HERODOTUS_WRITER *w)
* byte.
*/
if (w->type == HERODOTUS_TYPE_CODEPOINT) {
- ((uint_least32_t *)(w->dest))
- [w->destlen - 1] = 0;
+ ((uint_least32_t *)(w->dest))[w->destlen - 1] = 0;
} else { /* w->type == HERODOTUS_TYPE_UTF8 */
((char *)(w->dest))[w->destlen - 1] = '\0';
}
@@ -267,8 +272,8 @@ herodotus_write_codepoint(HERODOTUS_WRITER *w, uint_least32_t cp)
if (w->dest != NULL && w->off + ret < w->destlen) {
/* we still have enough room in the buffer */
- grapheme_encode_utf8(cp, (char *)(w->dest) +
- w->off, w->destlen - w->off);
+ grapheme_encode_utf8(cp, (char *)(w->dest) + w->off,
+ w->destlen - w->off);
} else if (w->first_unwritable_offset == SIZE_MAX) {
/*
* the first unwritable offset has not been
@@ -328,8 +333,9 @@ proper_init(const HERODOTUS_READER *r, void *state, uint_least8_t no_prop,
/* fill in the two next raw properties (after no-initialization) */
p->raw.next_prop[0] = p->raw.next_prop[1] = p->no_prop;
- for (i = 0; i < 2 && herodotus_read_codepoint(&(p->raw_reader), true, &cp) ==
- HERODOTUS_STATUS_SUCCESS; ) {
+ for (i = 0;
+ i < 2 && herodotus_read_codepoint(&(p->raw_reader), true, &cp) ==
+ HERODOTUS_STATUS_SUCCESS;) {
p->raw.next_prop[i++] = p->get_break_prop(cp);
}
@@ -338,8 +344,9 @@ proper_init(const HERODOTUS_READER *r, void *state, uint_least8_t no_prop,
/* fill in the two next skip properties (after no-initialization) */
p->skip.next_prop[0] = p->skip.next_prop[1] = p->no_prop;
- for (i = 0; i < 2 && herodotus_read_codepoint(&(p->skip_reader), true, &cp) ==
- HERODOTUS_STATUS_SUCCESS; ) {
+ for (i = 0;
+ i < 2 && herodotus_read_codepoint(&(p->skip_reader), true, &cp) ==
+ HERODOTUS_STATUS_SUCCESS;) {
prop = p->get_break_prop(cp);
if (!p->is_skippable_prop(prop)) {
p->skip.next_prop[i++] = prop;
diff --git a/src/util.h b/src/util.h
@@ -10,25 +10,25 @@
#include "../grapheme.h"
#undef MIN
-#define MIN(x,y) ((x) < (y) ? (x) : (y))
+#define MIN(x, y) ((x) < (y) ? (x) : (y))
#undef MAX
-#define MAX(x,y) ((x) > (y) ? (x) : (y))
+#define MAX(x, y) ((x) > (y) ? (x) : (y))
#undef LEN
#define LEN(x) (sizeof(x) / sizeof(*(x)))
#undef likely
#undef unlikely
#ifdef __has_builtin
- #if __has_builtin(__builtin_expect)
- #define likely(expr) __builtin_expect(!!(expr), 1)
- #define unlikely(expr) __builtin_expect(!!(expr), 0)
- #else
- #define likely(expr) (expr)
- #define unlikely(expr) (expr)
- #endif
+#if __has_builtin(__builtin_expect)
+#define likely(expr) __builtin_expect(!!(expr), 1)
+#define unlikely(expr) __builtin_expect(!!(expr), 0)
#else
- #define likely(expr) (expr)
- #define unlikely(expr) (expr)
+#define likely(expr) (expr)
+#define unlikely(expr) (expr)
+#endif
+#else
+#define likely(expr) (expr)
+#define unlikely(expr) (expr)
#endif
/*
@@ -84,6 +84,7 @@ struct proper {
uint_least8_t prev_prop[2];
uint_least8_t next_prop[2];
} raw, skip;
+
HERODOTUS_READER mid_reader, raw_reader, skip_reader;
void *state;
uint_least8_t no_prop;
@@ -100,7 +101,8 @@ void herodotus_reader_pop_limit(HERODOTUS_READER *);
size_t herodotus_reader_number_read(const HERODOTUS_READER *);
size_t herodotus_reader_next_word_break(const HERODOTUS_READER *);
size_t herodotus_reader_next_codepoint_break(const HERODOTUS_READER *);
-enum herodotus_status herodotus_read_codepoint(HERODOTUS_READER *, bool, uint_least32_t *);
+enum herodotus_status herodotus_read_codepoint(HERODOTUS_READER *, bool,
+ uint_least32_t *);
void herodotus_writer_init(HERODOTUS_WRITER *, enum herodotus_type, void *,
size_t);
diff --git a/src/word.c b/src/word.c
@@ -6,8 +6,7 @@
#include "../grapheme.h"
#include "util.h"
-struct word_break_state
-{
+struct word_break_state {
bool ri_even;
};
@@ -16,7 +15,8 @@ get_word_break_prop(uint_least32_t cp)
{
if (likely(cp <= UINT32_C(0x10FFFF))) {
return (uint_least8_t)
- word_break_minor[word_break_major[cp >> 8] + (cp & 0xff)];
+ word_break_minor[word_break_major[cp >> 8] +
+ (cp & 0xff)];
} else {
return WORD_BREAK_PROP_OTHER;
}
@@ -26,8 +26,7 @@ static bool
is_skippable_word_prop(uint_least8_t prop)
{
return prop == WORD_BREAK_PROP_EXTEND ||
- prop == WORD_BREAK_PROP_FORMAT ||
- prop == WORD_BREAK_PROP_ZWJ;
+ prop == WORD_BREAK_PROP_FORMAT || prop == WORD_BREAK_PROP_ZWJ;
}
static void
@@ -79,22 +78,24 @@ next_word_break(HERODOTUS_READER *r)
/* WB3a */
if (p.raw.prev_prop[0] == WORD_BREAK_PROP_NEWLINE ||
- p.raw.prev_prop[0] == WORD_BREAK_PROP_CR ||
+ p.raw.prev_prop[0] == WORD_BREAK_PROP_CR ||
p.raw.prev_prop[0] == WORD_BREAK_PROP_LF) {
break;
}
/* WB3b */
if (p.raw.next_prop[0] == WORD_BREAK_PROP_NEWLINE ||
- p.raw.next_prop[0] == WORD_BREAK_PROP_CR ||
+ p.raw.next_prop[0] == WORD_BREAK_PROP_CR ||
p.raw.next_prop[0] == WORD_BREAK_PROP_LF) {
break;
}
/* WB3c */
if (p.raw.prev_prop[0] == WORD_BREAK_PROP_ZWJ &&
- (p.raw.next_prop[0] == WORD_BREAK_PROP_EXTENDED_PICTOGRAPHIC ||
- p.raw.next_prop[0] == WORD_BREAK_PROP_BOTH_ALETTER_EXTPICT)) {
+ (p.raw.next_prop[0] ==
+ WORD_BREAK_PROP_EXTENDED_PICTOGRAPHIC ||
+ p.raw.next_prop[0] ==
+ WORD_BREAK_PROP_BOTH_ALETTER_EXTPICT)) {
continue;
}
@@ -112,37 +113,43 @@ next_word_break(HERODOTUS_READER *r)
}
/* WB5 */
- if ((p.skip.prev_prop[0] == WORD_BREAK_PROP_ALETTER ||
- p.skip.prev_prop[0] == WORD_BREAK_PROP_BOTH_ALETTER_EXTPICT ||
+ if ((p.skip.prev_prop[0] == WORD_BREAK_PROP_ALETTER ||
+ p.skip.prev_prop[0] ==
+ WORD_BREAK_PROP_BOTH_ALETTER_EXTPICT ||
p.skip.prev_prop[0] == WORD_BREAK_PROP_HEBREW_LETTER) &&
- (p.skip.next_prop[0] == WORD_BREAK_PROP_ALETTER ||
- p.skip.next_prop[0] == WORD_BREAK_PROP_BOTH_ALETTER_EXTPICT ||
+ (p.skip.next_prop[0] == WORD_BREAK_PROP_ALETTER ||
+ p.skip.next_prop[0] ==
+ WORD_BREAK_PROP_BOTH_ALETTER_EXTPICT ||
p.skip.next_prop[0] == WORD_BREAK_PROP_HEBREW_LETTER)) {
continue;
}
/* WB6 */
- if ((p.skip.prev_prop[0] == WORD_BREAK_PROP_ALETTER ||
- p.skip.prev_prop[0] == WORD_BREAK_PROP_BOTH_ALETTER_EXTPICT ||
+ if ((p.skip.prev_prop[0] == WORD_BREAK_PROP_ALETTER ||
+ p.skip.prev_prop[0] ==
+ WORD_BREAK_PROP_BOTH_ALETTER_EXTPICT ||
p.skip.prev_prop[0] == WORD_BREAK_PROP_HEBREW_LETTER) &&
- (p.skip.next_prop[0] == WORD_BREAK_PROP_MIDLETTER ||
- p.skip.next_prop[0] == WORD_BREAK_PROP_MIDNUMLET ||
+ (p.skip.next_prop[0] == WORD_BREAK_PROP_MIDLETTER ||
+ p.skip.next_prop[0] == WORD_BREAK_PROP_MIDNUMLET ||
p.skip.next_prop[0] == WORD_BREAK_PROP_SINGLE_QUOTE) &&
- (p.skip.next_prop[1] == WORD_BREAK_PROP_ALETTER ||
- p.skip.next_prop[1] == WORD_BREAK_PROP_BOTH_ALETTER_EXTPICT ||
+ (p.skip.next_prop[1] == WORD_BREAK_PROP_ALETTER ||
+ p.skip.next_prop[1] ==
+ WORD_BREAK_PROP_BOTH_ALETTER_EXTPICT ||
p.skip.next_prop[1] == WORD_BREAK_PROP_HEBREW_LETTER)) {
continue;
}
/* WB7 */
- if ((p.skip.prev_prop[0] == WORD_BREAK_PROP_MIDLETTER ||
- p.skip.prev_prop[0] == WORD_BREAK_PROP_MIDNUMLET ||
+ if ((p.skip.prev_prop[0] == WORD_BREAK_PROP_MIDLETTER ||
+ p.skip.prev_prop[0] == WORD_BREAK_PROP_MIDNUMLET ||
p.skip.prev_prop[0] == WORD_BREAK_PROP_SINGLE_QUOTE) &&
- (p.skip.next_prop[0] == WORD_BREAK_PROP_ALETTER ||
- p.skip.next_prop[0] == WORD_BREAK_PROP_BOTH_ALETTER_EXTPICT ||
+ (p.skip.next_prop[0] == WORD_BREAK_PROP_ALETTER ||
+ p.skip.next_prop[0] ==
+ WORD_BREAK_PROP_BOTH_ALETTER_EXTPICT ||
p.skip.next_prop[0] == WORD_BREAK_PROP_HEBREW_LETTER) &&
- (p.skip.prev_prop[1] == WORD_BREAK_PROP_ALETTER ||
- p.skip.prev_prop[1] == WORD_BREAK_PROP_BOTH_ALETTER_EXTPICT ||
+ (p.skip.prev_prop[1] == WORD_BREAK_PROP_ALETTER ||
+ p.skip.prev_prop[1] ==
+ WORD_BREAK_PROP_BOTH_ALETTER_EXTPICT ||
p.skip.prev_prop[1] == WORD_BREAK_PROP_HEBREW_LETTER)) {
continue;
}
@@ -174,8 +181,9 @@ next_word_break(HERODOTUS_READER *r)
}
/* WB9 */
- if ((p.skip.prev_prop[0] == WORD_BREAK_PROP_ALETTER ||
- p.skip.prev_prop[0] == WORD_BREAK_PROP_BOTH_ALETTER_EXTPICT ||
+ if ((p.skip.prev_prop[0] == WORD_BREAK_PROP_ALETTER ||
+ p.skip.prev_prop[0] ==
+ WORD_BREAK_PROP_BOTH_ALETTER_EXTPICT ||
p.skip.prev_prop[0] == WORD_BREAK_PROP_HEBREW_LETTER) &&
p.skip.next_prop[0] == WORD_BREAK_PROP_NUMERIC) {
continue;
@@ -183,15 +191,16 @@ next_word_break(HERODOTUS_READER *r)
/* WB10 */
if (p.skip.prev_prop[0] == WORD_BREAK_PROP_NUMERIC &&
- (p.skip.next_prop[0] == WORD_BREAK_PROP_ALETTER ||
- p.skip.next_prop[0] == WORD_BREAK_PROP_BOTH_ALETTER_EXTPICT ||
+ (p.skip.next_prop[0] == WORD_BREAK_PROP_ALETTER ||
+ p.skip.next_prop[0] ==
+ WORD_BREAK_PROP_BOTH_ALETTER_EXTPICT ||
p.skip.next_prop[0] == WORD_BREAK_PROP_HEBREW_LETTER)) {
continue;
}
/* WB11 */
- if ((p.skip.prev_prop[0] == WORD_BREAK_PROP_MIDNUM ||
- p.skip.prev_prop[0] == WORD_BREAK_PROP_MIDNUMLET ||
+ if ((p.skip.prev_prop[0] == WORD_BREAK_PROP_MIDNUM ||
+ p.skip.prev_prop[0] == WORD_BREAK_PROP_MIDNUMLET ||
p.skip.prev_prop[0] == WORD_BREAK_PROP_SINGLE_QUOTE) &&
p.skip.next_prop[0] == WORD_BREAK_PROP_NUMERIC &&
p.skip.prev_prop[1] == WORD_BREAK_PROP_NUMERIC) {
@@ -200,8 +209,8 @@ next_word_break(HERODOTUS_READER *r)
/* WB12 */
if (p.skip.prev_prop[0] == WORD_BREAK_PROP_NUMERIC &&
- (p.skip.next_prop[0] == WORD_BREAK_PROP_MIDNUM ||
- p.skip.next_prop[0] == WORD_BREAK_PROP_MIDNUMLET ||
+ (p.skip.next_prop[0] == WORD_BREAK_PROP_MIDNUM ||
+ p.skip.next_prop[0] == WORD_BREAK_PROP_MIDNUMLET ||
p.skip.next_prop[0] == WORD_BREAK_PROP_SINGLE_QUOTE) &&
p.skip.next_prop[1] == WORD_BREAK_PROP_NUMERIC) {
continue;
@@ -214,11 +223,12 @@ next_word_break(HERODOTUS_READER *r)
}
/* WB13a */
- if ((p.skip.prev_prop[0] == WORD_BREAK_PROP_ALETTER ||
- p.skip.prev_prop[0] == WORD_BREAK_PROP_BOTH_ALETTER_EXTPICT ||
- p.skip.prev_prop[0] == WORD_BREAK_PROP_HEBREW_LETTER ||
- p.skip.prev_prop[0] == WORD_BREAK_PROP_NUMERIC ||
- p.skip.prev_prop[0] == WORD_BREAK_PROP_KATAKANA ||
+ if ((p.skip.prev_prop[0] == WORD_BREAK_PROP_ALETTER ||
+ p.skip.prev_prop[0] ==
+ WORD_BREAK_PROP_BOTH_ALETTER_EXTPICT ||
+ p.skip.prev_prop[0] == WORD_BREAK_PROP_HEBREW_LETTER ||
+ p.skip.prev_prop[0] == WORD_BREAK_PROP_NUMERIC ||
+ p.skip.prev_prop[0] == WORD_BREAK_PROP_KATAKANA ||
p.skip.prev_prop[0] == WORD_BREAK_PROP_EXTENDNUMLET) &&
p.skip.next_prop[0] == WORD_BREAK_PROP_EXTENDNUMLET) {
continue;
@@ -226,10 +236,11 @@ next_word_break(HERODOTUS_READER *r)
/* WB13b */
if (p.skip.prev_prop[0] == WORD_BREAK_PROP_EXTENDNUMLET &&
- (p.skip.next_prop[0] == WORD_BREAK_PROP_ALETTER ||
- p.skip.next_prop[0] == WORD_BREAK_PROP_BOTH_ALETTER_EXTPICT ||
- p.skip.next_prop[0] == WORD_BREAK_PROP_HEBREW_LETTER ||
- p.skip.next_prop[0] == WORD_BREAK_PROP_NUMERIC ||
+ (p.skip.next_prop[0] == WORD_BREAK_PROP_ALETTER ||
+ p.skip.next_prop[0] ==
+ WORD_BREAK_PROP_BOTH_ALETTER_EXTPICT ||
+ p.skip.next_prop[0] == WORD_BREAK_PROP_HEBREW_LETTER ||
+ p.skip.next_prop[0] == WORD_BREAK_PROP_NUMERIC ||
p.skip.next_prop[0] == WORD_BREAK_PROP_KATAKANA)) {
continue;
}
diff --git a/test/bidirectional.c b/test/bidirectional.c
@@ -25,14 +25,16 @@ main(int argc, char *argv[])
for (i = 0, failed = 0; i < LEN(bidirectional_test); i++) {
/*if (i != 490798)
- continue;*/
+ continue;*/
for (m = 0; m < bidirectional_test[i].modelen; m++) {
ret = grapheme_get_bidirectional_embedding_levels(
- bidirectional_test[i].cp, bidirectional_test[i].cplen,
+ bidirectional_test[i].cp,
+ bidirectional_test[i].cplen,
bidirectional_test[i].mode[m], lev, levlen);
- if (ret != bidirectional_test[i].cplen || ret > levlen) {
+ if (ret != bidirectional_test[i].cplen ||
+ ret > levlen) {
goto err;
}
@@ -43,18 +45,22 @@ main(int argc, char *argv[])
}
continue;
err:
- fprintf(stderr, "%s: Failed conformance test %zu (mode %i) [",
+ fprintf(stderr,
+ "%s: Failed conformance test %zu (mode %i) [",
argv[0], i, bidirectional_test[i].mode[m]);
for (j = 0; j < bidirectional_test[i].cplen; j++) {
- fprintf(stderr, " 0x%04" PRIXLEAST32, bidirectional_test[i].cp[j]);
+ fprintf(stderr, " 0x%04" PRIXLEAST32,
+ bidirectional_test[i].cp[j]);
}
fprintf(stderr, " ],\n\tgot (");
for (j = 0; j < ret; j++) {
- fprintf(stderr, " %" PRIdLEAST8, (int_least8_t)lev[j]);
+ fprintf(stderr, " %" PRIdLEAST8,
+ (int_least8_t)lev[j]);
}
fprintf(stderr, " ),\n\texpected (");
for (j = 0; j < ret; j++) {
- fprintf(stderr, " %" PRIdLEAST8, bidirectional_test[i].level[j]);
+ fprintf(stderr, " %" PRIdLEAST8,
+ bidirectional_test[i].level[j]);
}
fprintf(stderr, " ).\n");
failed++;
diff --git a/test/case.c b/test/case.c
@@ -9,10 +9,12 @@
struct unit_test_is_case_utf8 {
const char *description;
+
struct {
const char *src;
size_t srclen;
} input;
+
struct {
bool ret;
size_t caselen;
@@ -21,11 +23,13 @@ struct unit_test_is_case_utf8 {
struct unit_test_to_case_utf8 {
const char *description;
+
struct {
const char *src;
size_t srclen;
size_t destlen;
} input;
+
struct {
const char *dest;
size_t ret;
@@ -35,57 +39,69 @@ struct unit_test_to_case_utf8 {
static const struct unit_test_is_case_utf8 is_lowercase_utf8[] = {
{
.description = "empty input",
- .input = { "", 0 },
+ .input = { "", 0 },
.output = { true, 0 },
},
{
.description = "one character, violation",
- .input = { "A", 1 },
+ .input = { "A", 1 },
.output = { false, 0 },
},
{
.description = "one character, confirmation",
- .input = { "\xC3\x9F", 2 },
+ .input = { "\xC3\x9F", 2 },
.output = { true, 2 },
},
{
.description = "one character, violation, NUL-terminated",
- .input = { "A", SIZE_MAX },
+ .input = { "A", SIZE_MAX },
.output = { false, 0 },
},
{
.description = "one character, confirmation, NUL-terminated",
- .input = { "\xC3\x9F", SIZE_MAX },
+ .input = { "\xC3\x9F", SIZE_MAX },
.output = { true, 2 },
},
{
.description = "one word, violation",
- .input = { "Hello", 5 },
+ .input = { "Hello", 5 },
.output = { false, 0 },
},
{
.description = "one word, partial confirmation",
- .input = { "gru" "\xC3\x9F" "fOrmel", 11 },
+ .input = { "gru"
+ "\xC3\x9F"
+ "fOrmel",
+ 11 },
.output = { false, 6 },
},
{
.description = "one word, full confirmation",
- .input = { "gru" "\xC3\x9F" "formel", 11 },
+ .input = { "gru"
+ "\xC3\x9F"
+ "formel",
+ 11 },
.output = { true, 11 },
},
{
.description = "one word, violation, NUL-terminated",
- .input = { "Hello", SIZE_MAX },
+ .input = { "Hello", SIZE_MAX },
.output = { false, 0 },
},
{
.description = "one word, partial confirmation, NUL-terminated",
- .input = { "gru" "\xC3\x9F" "fOrmel", SIZE_MAX },
+ .input = { "gru"
+ "\xC3\x9F"
+ "fOrmel",
+ SIZE_MAX },
.output = { false, 6 },
},
{
.description = "one word, full confirmation, NUL-terminated",
- .input = { "gru" "\xC3\x9F" "formel", SIZE_MAX },
+ .input = { "gru"
+ "\xC3\x9F"
+ "formel",
+ SIZE_MAX },
.output = { true, 11 },
},
};
@@ -93,57 +109,63 @@ static const struct unit_test_is_case_utf8 is_lowercase_utf8[] = {
static const struct unit_test_is_case_utf8 is_uppercase_utf8[] = {
{
.description = "empty input",
- .input = { "", 0 },
+ .input = { "", 0 },
.output = { true, 0 },
},
{
.description = "one character, violation",
- .input = { "\xC3\x9F", 2 },
+ .input = { "\xC3\x9F", 2 },
.output = { false, 0 },
},
{
.description = "one character, confirmation",
- .input = { "A", 1 },
+ .input = { "A", 1 },
.output = { true, 1 },
},
{
.description = "one character, violation, NUL-terminated",
- .input = { "\xC3\x9F", SIZE_MAX },
+ .input = { "\xC3\x9F", SIZE_MAX },
.output = { false, 0 },
},
{
.description = "one character, confirmation, NUL-terminated",
- .input = { "A", SIZE_MAX },
+ .input = { "A", SIZE_MAX },
.output = { true, 1 },
},
{
.description = "one word, violation",
- .input = { "hello", 5 },
+ .input = { "hello", 5 },
.output = { false, 0 },
},
{
.description = "one word, partial confirmation",
- .input = { "GRU" "\xC3\x9F" "formel", 11 },
+ .input = { "GRU"
+ "\xC3\x9F"
+ "formel",
+ 11 },
.output = { false, 3 },
},
{
.description = "one word, full confirmation",
- .input = { "HELLO", 5 },
+ .input = { "HELLO", 5 },
.output = { true, 5 },
},
{
.description = "one word, violation, NUL-terminated",
- .input = { "hello", SIZE_MAX },
+ .input = { "hello", SIZE_MAX },
.output = { false, 0 },
},
{
.description = "one word, partial confirmation, NUL-terminated",
- .input = { "GRU" "\xC3\x9F" "formel", SIZE_MAX },
+ .input = { "GRU"
+ "\xC3\x9F"
+ "formel",
+ SIZE_MAX },
.output = { false, 3 },
},
{
.description = "one word, full confirmation, NUL-terminated",
- .input = { "HELLO", SIZE_MAX },
+ .input = { "HELLO", SIZE_MAX },
.output = { true, 5 },
},
};
@@ -151,77 +173,103 @@ static const struct unit_test_is_case_utf8 is_uppercase_utf8[] = {
static const struct unit_test_is_case_utf8 is_titlecase_utf8[] = {
{
.description = "empty input",
- .input = { "", 0 },
+ .input = { "", 0 },
.output = { true, 0 },
},
{
.description = "one character, violation",
- .input = { "\xC3\x9F", 2 },
+ .input = { "\xC3\x9F", 2 },
.output = { false, 0 },
},
{
.description = "one character, confirmation",
- .input = { "A", 1 },
+ .input = { "A", 1 },
.output = { true, 1 },
},
{
.description = "one character, violation, NUL-terminated",
- .input = { "\xC3\x9F", SIZE_MAX },
+ .input = { "\xC3\x9F", SIZE_MAX },
.output = { false, 0 },
},
{
.description = "one character, confirmation, NUL-terminated",
- .input = { "A", SIZE_MAX },
+ .input = { "A", SIZE_MAX },
.output = { true, 1 },
},
{
.description = "one word, violation",
- .input = { "hello", 5 },
+ .input = { "hello", 5 },
.output = { false, 0 },
},
{
.description = "one word, partial confirmation",
- .input = { "Gru" "\xC3\x9F" "fOrmel", 11 },
+ .input = { "Gru"
+ "\xC3\x9F"
+ "fOrmel",
+ 11 },
.output = { false, 6 },
},
{
.description = "one word, full confirmation",
- .input = { "Gru" "\xC3\x9F" "formel", 11 },
+ .input = { "Gru"
+ "\xC3\x9F"
+ "formel",
+ 11 },
.output = { true, 11 },
},
{
.description = "one word, violation, NUL-terminated",
- .input = { "hello", SIZE_MAX },
+ .input = { "hello", SIZE_MAX },
.output = { false, 0 },
},
{
.description = "one word, partial confirmation, NUL-terminated",
- .input = { "Gru" "\xC3\x9F" "fOrmel", SIZE_MAX },
+ .input = { "Gru"
+ "\xC3\x9F"
+ "fOrmel",
+ SIZE_MAX },
.output = { false, 6 },
},
{
.description = "one word, full confirmation, NUL-terminated",
- .input = { "Gru" "\xC3\x9F" "formel", SIZE_MAX },
+ .input = { "Gru"
+ "\xC3\x9F"
+ "formel",
+ SIZE_MAX },
.output = { true, 11 },
},
{
.description = "multiple words, partial confirmation",
- .input = { "Hello Gru" "\xC3\x9F" "fOrmel!", 18 },
+ .input = { "Hello Gru"
+ "\xC3\x9F"
+ "fOrmel!",
+ 18 },
.output = { false, 12 },
},
{
.description = "multiple words, full confirmation",
- .input = { "Hello Gru" "\xC3\x9F" "formel!", 18 },
+ .input = { "Hello Gru"
+ "\xC3\x9F"
+ "formel!",
+ 18 },
.output = { true, 18 },
},
{
- .description = "multiple words, partial confirmation, NUL-terminated",
- .input = { "Hello Gru" "\xC3\x9F" "fOrmel!", SIZE_MAX },
+ .description =
+ "multiple words, partial confirmation, NUL-terminated",
+ .input = { "Hello Gru"
+ "\xC3\x9F"
+ "fOrmel!",
+ SIZE_MAX },
.output = { false, 12 },
},
{
- .description = "multiple words, full confirmation, NUL-terminated",
- .input = { "Hello Gru" "\xC3\x9F" "formel!", SIZE_MAX },
+ .description =
+ "multiple words, full confirmation, NUL-terminated",
+ .input = { "Hello Gru"
+ "\xC3\x9F"
+ "formel!",
+ SIZE_MAX },
.output = { true, 18 },
},
};
@@ -229,72 +277,74 @@ static const struct unit_test_is_case_utf8 is_titlecase_utf8[] = {
static const struct unit_test_to_case_utf8 to_lowercase_utf8[] = {
{
.description = "empty input",
- .input = { "", 0, 10 },
+ .input = { "", 0, 10 },
.output = { "", 0 },
},
{
.description = "empty output",
- .input = { "hello", 5, 0 },
+ .input = { "hello", 5, 0 },
.output = { "", 5 },
},
{
.description = "one character, conversion",
- .input = { "A", 1, 10 },
+ .input = { "A", 1, 10 },
.output = { "a", 1 },
},
{
.description = "one character, no conversion",
- .input = { "\xC3\x9F", 2, 10 },
+ .input = { "\xC3\x9F", 2, 10 },
.output = { "\xC3\x9F", 2 },
},
{
.description = "one character, conversion, truncation",
- .input = { "A", 1, 0 },
+ .input = { "A", 1, 0 },
.output = { "", 1 },
},
{
.description = "one character, conversion, NUL-terminated",
- .input = { "A", SIZE_MAX, 10 },
+ .input = { "A", SIZE_MAX, 10 },
.output = { "a", 1 },
},
{
.description = "one character, no conversion, NUL-terminated",
- .input = { "\xC3\x9F", SIZE_MAX, 10 },
+ .input = { "\xC3\x9F", SIZE_MAX, 10 },
.output = { "\xC3\x9F", 2 },
},
{
- .description = "one character, conversion, NUL-terminated, truncation",
- .input = { "A", SIZE_MAX, 0 },
+ .description =
+ "one character, conversion, NUL-terminated, truncation",
+ .input = { "A", SIZE_MAX, 0 },
.output = { "", 1 },
},
{
.description = "one word, conversion",
- .input = { "wOrD", 4, 10 },
+ .input = { "wOrD", 4, 10 },
.output = { "word", 4 },
},
{
.description = "one word, no conversion",
- .input = { "word", 4, 10 },
+ .input = { "word", 4, 10 },
.output = { "word", 4 },
},
{
.description = "one word, conversion, truncation",
- .input = { "wOrD", 4, 3 },
+ .input = { "wOrD", 4, 3 },
.output = { "wo", 4 },
},
{
.description = "one word, conversion, NUL-terminated",
- .input = { "wOrD", SIZE_MAX, 10 },
+ .input = { "wOrD", SIZE_MAX, 10 },
.output = { "word", 4 },
},
{
.description = "one word, no conversion, NUL-terminated",
- .input = { "word", SIZE_MAX, 10 },
+ .input = { "word", SIZE_MAX, 10 },
.output = { "word", 4 },
},
{
- .description = "one word, conversion, NUL-terminated, truncation",
- .input = { "wOrD", SIZE_MAX, 3 },
+ .description =
+ "one word, conversion, NUL-terminated, truncation",
+ .input = { "wOrD", SIZE_MAX, 3 },
.output = { "wo", 4 },
},
};
@@ -302,72 +352,86 @@ static const struct unit_test_to_case_utf8 to_lowercase_utf8[] = {
static const struct unit_test_to_case_utf8 to_uppercase_utf8[] = {
{
.description = "empty input",
- .input = { "", 0, 10 },
+ .input = { "", 0, 10 },
.output = { "", 0 },
},
{
.description = "empty output",
- .input = { "hello", 5, 0 },
+ .input = { "hello", 5, 0 },
.output = { "", 5 },
},
{
.description = "one character, conversion",
- .input = { "\xC3\x9F", 2, 10 },
+ .input = { "\xC3\x9F", 2, 10 },
.output = { "SS", 2 },
},
{
.description = "one character, no conversion",
- .input = { "A", 1, 10 },
+ .input = { "A", 1, 10 },
.output = { "A", 1 },
},
{
.description = "one character, conversion, truncation",
- .input = { "\xC3\x9F", 2, 0 },
+ .input = { "\xC3\x9F", 2, 0 },
.output = { "", 2 },
},
{
.description = "one character, conversion, NUL-terminated",
- .input = { "\xC3\x9F", SIZE_MAX, 10 },
+ .input = { "\xC3\x9F", SIZE_MAX, 10 },
.output = { "SS", 2 },
},
{
.description = "one character, no conversion, NUL-terminated",
- .input = { "A", SIZE_MAX, 10 },
+ .input = { "A", SIZE_MAX, 10 },
.output = { "A", 1 },
},
{
- .description = "one character, conversion, NUL-terminated, truncation",
- .input = { "\xC3\x9F", SIZE_MAX, 0 },
+ .description =
+ "one character, conversion, NUL-terminated, truncation",
+ .input = { "\xC3\x9F", SIZE_MAX, 0 },
.output = { "", 2 },
},
{
.description = "one word, conversion",
- .input = { "gRu" "\xC3\x9F" "fOrMel", 11, 15 },
+ .input = { "gRu"
+ "\xC3\x9F"
+ "fOrMel",
+ 11, 15 },
.output = { "GRUSSFORMEL", 11 },
},
{
.description = "one word, no conversion",
- .input = { "WORD", 4, 10 },
+ .input = { "WORD", 4, 10 },
.output = { "WORD", 4 },
},
{
.description = "one word, conversion, truncation",
- .input = { "gRu" "\xC3\x9F" "formel", 11, 5 },
+ .input = { "gRu"
+ "\xC3\x9F"
+ "formel",
+ 11, 5 },
.output = { "GRUS", 11 },
},
{
.description = "one word, conversion, NUL-terminated",
- .input = { "gRu" "\xC3\x9F" "formel", SIZE_MAX, 15 },
+ .input = { "gRu"
+ "\xC3\x9F"
+ "formel",
+ SIZE_MAX, 15 },
.output = { "GRUSSFORMEL", 11 },
},
{
.description = "one word, no conversion, NUL-terminated",
- .input = { "WORD", SIZE_MAX, 10 },
+ .input = { "WORD", SIZE_MAX, 10 },
.output = { "WORD", 4 },
},
{
- .description = "one word, conversion, NUL-terminated, truncation",
- .input = { "gRu" "\xC3\x9F" "formel", SIZE_MAX, 5 },
+ .description =
+ "one word, conversion, NUL-terminated, truncation",
+ .input = { "gRu"
+ "\xC3\x9F"
+ "formel",
+ SIZE_MAX, 5 },
.output = { "GRUS", 11 },
},
};
@@ -375,102 +439,105 @@ static const struct unit_test_to_case_utf8 to_uppercase_utf8[] = {
static const struct unit_test_to_case_utf8 to_titlecase_utf8[] = {
{
.description = "empty input",
- .input = { "", 0, 10 },
+ .input = { "", 0, 10 },
.output = { "", 0 },
},
{
.description = "empty output",
- .input = { "hello", 5, 0 },
+ .input = { "hello", 5, 0 },
.output = { "", 5 },
},
{
.description = "one character, conversion",
- .input = { "a", 1, 10 },
+ .input = { "a", 1, 10 },
.output = { "A", 1 },
},
{
.description = "one character, no conversion",
- .input = { "A", 1, 10 },
+ .input = { "A", 1, 10 },
.output = { "A", 1 },
},
{
.description = "one character, conversion, truncation",
- .input = { "a", 1, 0 },
+ .input = { "a", 1, 0 },
.output = { "", 1 },
},
{
.description = "one character, conversion, NUL-terminated",
- .input = { "a", SIZE_MAX, 10 },
+ .input = { "a", SIZE_MAX, 10 },
.output = { "A", 1 },
},
{
.description = "one character, no conversion, NUL-terminated",
- .input = { "A", SIZE_MAX, 10 },
+ .input = { "A", SIZE_MAX, 10 },
.output = { "A", 1 },
},
{
- .description = "one character, conversion, NUL-terminated, truncation",
- .input = { "a", SIZE_MAX, 0 },
+ .description =
+ "one character, conversion, NUL-terminated, truncation",
+ .input = { "a", SIZE_MAX, 0 },
.output = { "", 1 },
},
{
.description = "one word, conversion",
- .input = { "heLlo", 5, 10 },
+ .input = { "heLlo", 5, 10 },
.output = { "Hello", 5 },
},
{
.description = "one word, no conversion",
- .input = { "Hello", 5, 10 },
+ .input = { "Hello", 5, 10 },
.output = { "Hello", 5 },
},
{
.description = "one word, conversion, truncation",
- .input = { "heLlo", 5, 2 },
+ .input = { "heLlo", 5, 2 },
.output = { "H", 5 },
},
{
.description = "one word, conversion, NUL-terminated",
- .input = { "heLlo", SIZE_MAX, 10 },
+ .input = { "heLlo", SIZE_MAX, 10 },
.output = { "Hello", 5 },
},
{
.description = "one word, no conversion, NUL-terminated",
- .input = { "Hello", SIZE_MAX, 10 },
+ .input = { "Hello", SIZE_MAX, 10 },
.output = { "Hello", 5 },
},
{
- .description = "one word, conversion, NUL-terminated, truncation",
- .input = { "heLlo", SIZE_MAX, 3 },
+ .description =
+ "one word, conversion, NUL-terminated, truncation",
+ .input = { "heLlo", SIZE_MAX, 3 },
.output = { "He", 5 },
},
{
.description = "two words, conversion",
- .input = { "heLlo wORLd!", 12, 20 },
+ .input = { "heLlo wORLd!", 12, 20 },
.output = { "Hello World!", 12 },
},
{
.description = "two words, no conversion",
- .input = { "Hello World!", 12, 20 },
+ .input = { "Hello World!", 12, 20 },
.output = { "Hello World!", 12 },
},
{
.description = "two words, conversion, truncation",
- .input = { "heLlo wORLd!", 12, 8 },
+ .input = { "heLlo wORLd!", 12, 8 },
.output = { "Hello W", 12 },
},
{
.description = "two words, conversion, NUL-terminated",
- .input = { "heLlo wORLd!", SIZE_MAX, 20 },
+ .input = { "heLlo wORLd!", SIZE_MAX, 20 },
.output = { "Hello World!", 12 },
},
{
.description = "two words, no conversion, NUL-terminated",
- .input = { "Hello World!", SIZE_MAX, 20 },
+ .input = { "Hello World!", SIZE_MAX, 20 },
.output = { "Hello World!", 12 },
},
{
- .description = "two words, conversion, NUL-terminated, truncation",
- .input = { "heLlo wORLd!", SIZE_MAX, 4 },
+ .description =
+ "two words, conversion, NUL-terminated, truncation",
+ .input = { "heLlo wORLd!", SIZE_MAX, 4 },
.output = { "Hel", 12 },
},
};
@@ -485,14 +552,14 @@ unit_test_callback_is_case_utf8(const void *t, size_t off, const char *name,
size_t caselen = 0x7f;
if (t == is_lowercase_utf8) {
- ret = grapheme_is_lowercase_utf8(test->input.src, test->input.srclen,
- &caselen);
+ ret = grapheme_is_lowercase_utf8(test->input.src,
+ test->input.srclen, &caselen);
} else if (t == is_uppercase_utf8) {
- ret = grapheme_is_uppercase_utf8(test->input.src, test->input.srclen,
- &caselen);
+ ret = grapheme_is_uppercase_utf8(test->input.src,
+ test->input.srclen, &caselen);
} else if (t == is_titlecase_utf8) {
- ret = grapheme_is_titlecase_utf8(test->input.src, test->input.srclen,
- &caselen);
+ ret = grapheme_is_titlecase_utf8(test->input.src,
+ test->input.srclen, &caselen);
} else {
goto err;
@@ -505,10 +572,11 @@ unit_test_callback_is_case_utf8(const void *t, size_t off, const char *name,
return 0;
err:
- fprintf(stderr, "%s: %s: Failed unit test %zu \"%s\" "
- "(returned (%s, %zu) instead of (%s, %zu)).\n", argv0,
- name, off, test->description, ret ? "true" : "false",
- caselen, test->output.ret ? "true" : "false",
+ fprintf(stderr,
+ "%s: %s: Failed unit test %zu \"%s\" "
+ "(returned (%s, %zu) instead of (%s, %zu)).\n",
+ argv0, name, off, test->description, ret ? "true" : "false",
+ caselen, test->output.ret ? "true" : "false",
test->output.caselen);
return 1;
}
@@ -526,21 +594,25 @@ unit_test_callback_to_case_utf8(const void *t, size_t off, const char *name,
memset(buf, 0x7f, LEN(buf));
if (t == to_lowercase_utf8) {
- ret = grapheme_to_lowercase_utf8(test->input.src, test->input.srclen,
- buf, test->input.destlen);
+ ret = grapheme_to_lowercase_utf8(test->input.src,
+ test->input.srclen, buf,
+ test->input.destlen);
} else if (t == to_uppercase_utf8) {
- ret = grapheme_to_uppercase_utf8(test->input.src, test->input.srclen,
- buf, test->input.destlen);
+ ret = grapheme_to_uppercase_utf8(test->input.src,
+ test->input.srclen, buf,
+ test->input.destlen);
} else if (t == to_titlecase_utf8) {
- ret = grapheme_to_titlecase_utf8(test->input.src, test->input.srclen,
- buf, test->input.destlen);
+ ret = grapheme_to_titlecase_utf8(test->input.src,
+ test->input.srclen, buf,
+ test->input.destlen);
} else {
goto err;
}
/* check results */
if (ret != test->output.ret ||
- memcmp(buf, test->output.dest, MIN(test->input.destlen, test->output.ret))) {
+ memcmp(buf, test->output.dest,
+ MIN(test->input.destlen, test->output.ret))) {
goto err;
}
@@ -553,9 +625,10 @@ unit_test_callback_to_case_utf8(const void *t, size_t off, const char *name,
return 0;
err:
- fprintf(stderr, "%s: %s: Failed unit test %zu \"%s\" "
- "(returned (\"%.*s\", %zu) instead of (\"%.*s\", %zu)).\n", argv0,
- name, off, test->description, (int)ret, buf, ret,
+ fprintf(stderr,
+ "%s: %s: Failed unit test %zu \"%s\" "
+ "(returned (\"%.*s\", %zu) instead of (\"%.*s\", %zu)).\n",
+ argv0, name, off, test->description, (int)ret, buf, ret,
(int)test->output.ret, test->output.dest, test->output.ret);
return 1;
}
@@ -565,16 +638,22 @@ main(int argc, char *argv[])
{
(void)argc;
- return run_unit_tests(unit_test_callback_is_case_utf8, is_lowercase_utf8,
- LEN(is_lowercase_utf8), "grapheme_is_lowercase_utf8", argv[0]) +
- run_unit_tests(unit_test_callback_is_case_utf8, is_uppercase_utf8,
- LEN(is_uppercase_utf8), "grapheme_is_uppercase_utf8", argv[0]) +
- run_unit_tests(unit_test_callback_is_case_utf8, is_titlecase_utf8,
- LEN(is_titlecase_utf8), "grapheme_is_titlecase_utf8", argv[0]) +
- run_unit_tests(unit_test_callback_to_case_utf8, to_lowercase_utf8,
- LEN(to_lowercase_utf8), "grapheme_to_lowercase_utf8", argv[0]) +
- run_unit_tests(unit_test_callback_to_case_utf8, to_uppercase_utf8,
- LEN(to_uppercase_utf8), "grapheme_to_uppercase_utf8", argv[0]) +
- run_unit_tests(unit_test_callback_to_case_utf8, to_titlecase_utf8,
- LEN(to_titlecase_utf8), "grapheme_to_titlecase_utf8", argv[0]);
+ return run_unit_tests(unit_test_callback_is_case_utf8,
+ is_lowercase_utf8, LEN(is_lowercase_utf8),
+ "grapheme_is_lowercase_utf8", argv[0]) +
+ run_unit_tests(unit_test_callback_is_case_utf8,
+ is_uppercase_utf8, LEN(is_uppercase_utf8),
+ "grapheme_is_uppercase_utf8", argv[0]) +
+ run_unit_tests(unit_test_callback_is_case_utf8,
+ is_titlecase_utf8, LEN(is_titlecase_utf8),
+ "grapheme_is_titlecase_utf8", argv[0]) +
+ run_unit_tests(unit_test_callback_to_case_utf8,
+ to_lowercase_utf8, LEN(to_lowercase_utf8),
+ "grapheme_to_lowercase_utf8", argv[0]) +
+ run_unit_tests(unit_test_callback_to_case_utf8,
+ to_uppercase_utf8, LEN(to_uppercase_utf8),
+ "grapheme_to_uppercase_utf8", argv[0]) +
+ run_unit_tests(unit_test_callback_to_case_utf8,
+ to_titlecase_utf8, LEN(to_titlecase_utf8),
+ "grapheme_to_titlecase_utf8", argv[0]);
}
diff --git a/test/character.c b/test/character.c
@@ -92,12 +92,10 @@ static const struct unit_test_next_break_utf8 next_character_break_utf8[] = {
static int
unit_test_callback_next_character_break(const void *t, size_t off,
- const char *name,
- const char *argv0)
+ const char *name, const char *argv0)
{
- return unit_test_callback_next_break(t, off,
- grapheme_next_character_break,
- name, argv0);
+ return unit_test_callback_next_break(
+ t, off, grapheme_next_character_break, name, argv0);
}
static int
@@ -105,9 +103,8 @@ unit_test_callback_next_character_break_utf8(const void *t, size_t off,
const char *name,
const char *argv0)
{
- return unit_test_callback_next_break_utf8(t, off,
- grapheme_next_character_break_utf8,
- name, argv0);
+ return unit_test_callback_next_break_utf8(
+ t, off, grapheme_next_character_break_utf8, name, argv0);
}
int
@@ -116,11 +113,13 @@ main(int argc, char *argv[])
(void)argc;
return run_break_tests(grapheme_next_character_break,
- character_break_test, LEN(character_break_test), argv[0]) +
+ character_break_test, LEN(character_break_test),
+ argv[0]) +
run_unit_tests(unit_test_callback_next_character_break,
next_character_break, LEN(next_character_break),
"grapheme_next_character_break", argv[0]) +
run_unit_tests(unit_test_callback_next_character_break_utf8,
- next_character_break_utf8, LEN(next_character_break_utf8),
+ next_character_break_utf8,
+ LEN(next_character_break_utf8),
"grapheme_next_character_break_utf8", argv[0]);
}
diff --git a/test/line.c b/test/line.c
@@ -91,23 +91,19 @@ static const struct unit_test_next_break_utf8 next_line_break_utf8[] = {
};
static int
-unit_test_callback_next_line_break(const void *t, size_t off,
- const char *name,
- const char *argv0)
+unit_test_callback_next_line_break(const void *t, size_t off, const char *name,
+ const char *argv0)
{
- return unit_test_callback_next_break(t, off,
- grapheme_next_line_break,
+ return unit_test_callback_next_break(t, off, grapheme_next_line_break,
name, argv0);
}
static int
unit_test_callback_next_line_break_utf8(const void *t, size_t off,
- const char *name,
- const char *argv0)
+ const char *name, const char *argv0)
{
- return unit_test_callback_next_break_utf8(t, off,
- grapheme_next_line_break_utf8,
- name, argv0);
+ return unit_test_callback_next_break_utf8(
+ t, off, grapheme_next_line_break_utf8, name, argv0);
}
int
@@ -115,9 +111,8 @@ main(int argc, char *argv[])
{
(void)argc;
- return run_break_tests(grapheme_next_line_break,
- line_break_test, LEN(line_break_test),
- argv[0]) +
+ return run_break_tests(grapheme_next_line_break, line_break_test,
+ LEN(line_break_test), argv[0]) +
run_unit_tests(unit_test_callback_next_line_break,
next_line_break, LEN(next_line_break),
"grapheme_next_line_break", argv[0]) +
diff --git a/test/sentence.c b/test/sentence.c
@@ -92,22 +92,18 @@ static const struct unit_test_next_break_utf8 next_sentence_break_utf8[] = {
static int
unit_test_callback_next_sentence_break(const void *t, size_t off,
- const char *name,
- const char *argv0)
+ const char *name, const char *argv0)
{
- return unit_test_callback_next_break(t, off,
- grapheme_next_sentence_break,
- name, argv0);
+ return unit_test_callback_next_break(
+ t, off, grapheme_next_sentence_break, name, argv0);
}
static int
unit_test_callback_next_sentence_break_utf8(const void *t, size_t off,
- const char *name,
- const char *argv0)
+ const char *name, const char *argv0)
{
- return unit_test_callback_next_break_utf8(t, off,
- grapheme_next_sentence_break_utf8,
- name, argv0);
+ return unit_test_callback_next_break_utf8(
+ t, off, grapheme_next_sentence_break_utf8, name, argv0);
}
int
@@ -116,12 +112,13 @@ main(int argc, char *argv[])
(void)argc;
return run_break_tests(grapheme_next_sentence_break,
- sentence_break_test,
- LEN(sentence_break_test), argv[0]) +
+ sentence_break_test, LEN(sentence_break_test),
+ argv[0]) +
run_unit_tests(unit_test_callback_next_sentence_break,
next_sentence_break, LEN(next_sentence_break),
"grapheme_next_sentence_break", argv[0]) +
run_unit_tests(unit_test_callback_next_sentence_break_utf8,
- next_sentence_break_utf8, LEN(next_sentence_break_utf8),
+ next_sentence_break_utf8,
+ LEN(next_sentence_break_utf8),
"grapheme_next_character_break_utf8", argv[0]);
}
diff --git a/test/utf8-decode.c b/test/utf8-decode.c
@@ -8,281 +8,279 @@
#include "util.h"
static const struct {
- char *arr; /* UTF-8 byte sequence */
- size_t len; /* length of UTF-8 byte sequence */
- size_t exp_len; /* expected length returned */
- uint_least32_t exp_cp; /* expected codepoint returned */
+ char *arr; /* UTF-8 byte sequence */
+ size_t len; /* length of UTF-8 byte sequence */
+ size_t exp_len; /* expected length returned */
+ uint_least32_t exp_cp; /* expected codepoint returned */
} dec_test[] = {
{
/* empty sequence
- * [ ] ->
- * INVALID
- */
- .arr = NULL,
- .len = 0,
+ * [ ] ->
+ * INVALID
+ */
+ .arr = NULL,
+ .len = 0,
.exp_len = 0,
- .exp_cp = GRAPHEME_INVALID_CODEPOINT,
+ .exp_cp = GRAPHEME_INVALID_CODEPOINT,
},
{
/* invalid lead byte
- * [ 11111101 ] ->
- * INVALID
- */
- .arr = (char *)(unsigned char[]){ 0xFD },
- .len = 1,
+ * [ 11111101 ] ->
+ * INVALID
+ */
+ .arr = (char *)(unsigned char[]) { 0xFD },
+ .len = 1,
.exp_len = 1,
- .exp_cp = GRAPHEME_INVALID_CODEPOINT,
+ .exp_cp = GRAPHEME_INVALID_CODEPOINT,
},
{
/* valid 1-byte sequence
- * [ 00000001 ] ->
- * 0000001
- */
- .arr = (char *)(unsigned char[]){ 0x01 },
- .len = 1,
+ * [ 00000001 ] ->
+ * 0000001
+ */
+ .arr = (char *)(unsigned char[]) { 0x01 },
+ .len = 1,
.exp_len = 1,
- .exp_cp = 0x1,
+ .exp_cp = 0x1,
},
{
/* valid 2-byte sequence
- * [ 11000011 10111111 ] ->
- * 00011111111
- */
- .arr = (char *)(unsigned char[]){ 0xC3, 0xBF },
- .len = 2,
+ * [ 11000011 10111111 ] ->
+ * 00011111111
+ */
+ .arr = (char *)(unsigned char[]) { 0xC3, 0xBF },
+ .len = 2,
.exp_len = 2,
- .exp_cp = 0xFF,
+ .exp_cp = 0xFF,
},
{
/* invalid 2-byte sequence (second byte missing)
- * [ 11000011 ] ->
- * INVALID
- */
- .arr = (char *)(unsigned char[]){ 0xC3 },
- .len = 1,
+ * [ 11000011 ] ->
+ * INVALID
+ */
+ .arr = (char *)(unsigned char[]) { 0xC3 },
+ .len = 1,
.exp_len = 2,
- .exp_cp = GRAPHEME_INVALID_CODEPOINT,
+ .exp_cp = GRAPHEME_INVALID_CODEPOINT,
},
{
/* invalid 2-byte sequence (second byte malformed)
- * [ 11000011 11111111 ] ->
- * INVALID
- */
- .arr = (char *)(unsigned char[]){ 0xC3, 0xFF },
- .len = 2,
+ * [ 11000011 11111111 ] ->
+ * INVALID
+ */
+ .arr = (char *)(unsigned char[]) { 0xC3, 0xFF },
+ .len = 2,
.exp_len = 1,
- .exp_cp = GRAPHEME_INVALID_CODEPOINT,
+ .exp_cp = GRAPHEME_INVALID_CODEPOINT,
},
{
/* invalid 2-byte sequence (overlong encoded)
- * [ 11000001 10111111 ] ->
- * INVALID
- */
- .arr = (char *)(unsigned char[]){ 0xC1, 0xBF },
- .len = 2,
+ * [ 11000001 10111111 ] ->
+ * INVALID
+ */
+ .arr = (char *)(unsigned char[]) { 0xC1, 0xBF },
+ .len = 2,
.exp_len = 2,
- .exp_cp = GRAPHEME_INVALID_CODEPOINT,
+ .exp_cp = GRAPHEME_INVALID_CODEPOINT,
},
{
/* valid 3-byte sequence
- * [ 11100000 10111111 10111111 ] ->
- * 0000111111111111
- */
- .arr = (char *)(unsigned char[]){ 0xE0, 0xBF, 0xBF },
- .len = 3,
+ * [ 11100000 10111111 10111111 ] ->
+ * 0000111111111111
+ */
+ .arr = (char *)(unsigned char[]) { 0xE0, 0xBF, 0xBF },
+ .len = 3,
.exp_len = 3,
- .exp_cp = 0xFFF,
+ .exp_cp = 0xFFF,
},
{
/* invalid 3-byte sequence (second byte missing)
- * [ 11100000 ] ->
- * INVALID
- */
- .arr = (char *)(unsigned char[]){ 0xE0 },
- .len = 1,
+ * [ 11100000 ] ->
+ * INVALID
+ */
+ .arr = (char *)(unsigned char[]) { 0xE0 },
+ .len = 1,
.exp_len = 3,
- .exp_cp = GRAPHEME_INVALID_CODEPOINT,
+ .exp_cp = GRAPHEME_INVALID_CODEPOINT,
},
{
/* invalid 3-byte sequence (second byte malformed)
- * [ 11100000 01111111 10111111 ] ->
- * INVALID
- */
- .arr = (char *)(unsigned char[]){ 0xE0, 0x7F, 0xBF },
- .len = 3,
+ * [ 11100000 01111111 10111111 ] ->
+ * INVALID
+ */
+ .arr = (char *)(unsigned char[]) { 0xE0, 0x7F, 0xBF },
+ .len = 3,
.exp_len = 1,
- .exp_cp = GRAPHEME_INVALID_CODEPOINT,
+ .exp_cp = GRAPHEME_INVALID_CODEPOINT,
},
{
/* invalid 3-byte sequence (short string, second byte malformed)
- * [ 11100000 01111111 ] ->
- * INVALID
- */
- .arr = (char *)(unsigned char[]){ 0xE0, 0x7F },
- .len = 2,
+ * [ 11100000 01111111 ] ->
+ * INVALID
+ */
+ .arr = (char *)(unsigned char[]) { 0xE0, 0x7F },
+ .len = 2,
.exp_len = 1,
- .exp_cp = GRAPHEME_INVALID_CODEPOINT,
+ .exp_cp = GRAPHEME_INVALID_CODEPOINT,
},
{
/* invalid 3-byte sequence (third byte missing)
- * [ 11100000 10111111 ] ->
- * INVALID
- */
- .arr = (char *)(unsigned char[]){ 0xE0, 0xBF },
- .len = 2,
+ * [ 11100000 10111111 ] ->
+ * INVALID
+ */
+ .arr = (char *)(unsigned char[]) { 0xE0, 0xBF },
+ .len = 2,
.exp_len = 3,
- .exp_cp = GRAPHEME_INVALID_CODEPOINT,
+ .exp_cp = GRAPHEME_INVALID_CODEPOINT,
},
{
/* invalid 3-byte sequence (third byte malformed)
- * [ 11100000 10111111 01111111 ] ->
- * INVALID
- */
- .arr = (char *)(unsigned char[]){ 0xE0, 0xBF, 0x7F },
- .len = 3,
+ * [ 11100000 10111111 01111111 ] ->
+ * INVALID
+ */
+ .arr = (char *)(unsigned char[]) { 0xE0, 0xBF, 0x7F },
+ .len = 3,
.exp_len = 2,
- .exp_cp = GRAPHEME_INVALID_CODEPOINT,
+ .exp_cp = GRAPHEME_INVALID_CODEPOINT,
},
{
/* invalid 3-byte sequence (overlong encoded)
- * [ 11100000 10011111 10111111 ] ->
- * INVALID
- */
- .arr = (char *)(unsigned char[]){ 0xE0, 0x9F, 0xBF },
- .len = 3,
+ * [ 11100000 10011111 10111111 ] ->
+ * INVALID
+ */
+ .arr = (char *)(unsigned char[]) { 0xE0, 0x9F, 0xBF },
+ .len = 3,
.exp_len = 3,
- .exp_cp = GRAPHEME_INVALID_CODEPOINT,
+ .exp_cp = GRAPHEME_INVALID_CODEPOINT,
},
{
/* invalid 3-byte sequence (UTF-16 surrogate half)
- * [ 11101101 10100000 10000000 ] ->
- * INVALID
- */
- .arr = (char *)(unsigned char[]){ 0xED, 0xA0, 0x80 },
- .len = 3,
+ * [ 11101101 10100000 10000000 ] ->
+ * INVALID
+ */
+ .arr = (char *)(unsigned char[]) { 0xED, 0xA0, 0x80 },
+ .len = 3,
.exp_len = 3,
- .exp_cp = GRAPHEME_INVALID_CODEPOINT,
+ .exp_cp = GRAPHEME_INVALID_CODEPOINT,
},
{
/* valid 4-byte sequence
- * [ 11110011 10111111 10111111 10111111 ] ->
- * 011111111111111111111
- */
- .arr = (char *)(unsigned char[]){ 0xF3, 0xBF, 0xBF, 0xBF },
- .len = 4,
+ * [ 11110011 10111111 10111111 10111111 ] ->
+ * 011111111111111111111
+ */
+ .arr = (char *)(unsigned char[]) { 0xF3, 0xBF, 0xBF, 0xBF },
+ .len = 4,
.exp_len = 4,
- .exp_cp = UINT32_C(0xFFFFF),
+ .exp_cp = UINT32_C(0xFFFFF),
},
{
/* invalid 4-byte sequence (second byte missing)
- * [ 11110011 ] ->
- * INVALID
- */
- .arr = (char *)(unsigned char[]){ 0xF3 },
- .len = 1,
+ * [ 11110011 ] ->
+ * INVALID
+ */
+ .arr = (char *)(unsigned char[]) { 0xF3 },
+ .len = 1,
.exp_len = 4,
- .exp_cp = GRAPHEME_INVALID_CODEPOINT,
+ .exp_cp = GRAPHEME_INVALID_CODEPOINT,
},
{
/* invalid 4-byte sequence (second byte malformed)
- * [ 11110011 01111111 10111111 10111111 ] ->
- * INVALID
- */
- .arr = (char *)(unsigned char[]){ 0xF3, 0x7F, 0xBF, 0xBF },
- .len = 4,
+ * [ 11110011 01111111 10111111 10111111 ] ->
+ * INVALID
+ */
+ .arr = (char *)(unsigned char[]) { 0xF3, 0x7F, 0xBF, 0xBF },
+ .len = 4,
.exp_len = 1,
- .exp_cp = GRAPHEME_INVALID_CODEPOINT,
+ .exp_cp = GRAPHEME_INVALID_CODEPOINT,
},
{
- /* invalid 4-byte sequence (short string 1, second byte malformed)
- * [ 11110011 011111111 ] ->
- * INVALID
- */
- .arr = (char *)(unsigned char[]){ 0xF3, 0x7F },
- .len = 2,
+ /* invalid 4-byte sequence (short string 1, second byte
+ * malformed) [ 11110011 011111111 ] -> INVALID
+ */
+ .arr = (char *)(unsigned char[]) { 0xF3, 0x7F },
+ .len = 2,
.exp_len = 1,
- .exp_cp = GRAPHEME_INVALID_CODEPOINT,
+ .exp_cp = GRAPHEME_INVALID_CODEPOINT,
},
{
- /* invalid 4-byte sequence (short string 2, second byte malformed)
- * [ 11110011 011111111 10111111 ] ->
- * INVALID
- */
- .arr = (char *)(unsigned char[]){ 0xF3, 0x7F, 0xBF },
- .len = 3,
+ /* invalid 4-byte sequence (short string 2, second byte
+ * malformed) [ 11110011 011111111 10111111 ] -> INVALID
+ */
+ .arr = (char *)(unsigned char[]) { 0xF3, 0x7F, 0xBF },
+ .len = 3,
.exp_len = 1,
- .exp_cp = GRAPHEME_INVALID_CODEPOINT,
+ .exp_cp = GRAPHEME_INVALID_CODEPOINT,
},
{
/* invalid 4-byte sequence (third byte missing)
- * [ 11110011 10111111 ] ->
- * INVALID
- */
- .arr = (char *)(unsigned char[]){ 0xF3, 0xBF },
- .len = 2,
+ * [ 11110011 10111111 ] ->
+ * INVALID
+ */
+ .arr = (char *)(unsigned char[]) { 0xF3, 0xBF },
+ .len = 2,
.exp_len = 4,
- .exp_cp = GRAPHEME_INVALID_CODEPOINT,
+ .exp_cp = GRAPHEME_INVALID_CODEPOINT,
},
{
/* invalid 4-byte sequence (third byte malformed)
- * [ 11110011 10111111 01111111 10111111 ] ->
- * INVALID
- */
- .arr = (char *)(unsigned char[]){ 0xF3, 0xBF, 0x7F, 0xBF },
- .len = 4,
+ * [ 11110011 10111111 01111111 10111111 ] ->
+ * INVALID
+ */
+ .arr = (char *)(unsigned char[]) { 0xF3, 0xBF, 0x7F, 0xBF },
+ .len = 4,
.exp_len = 2,
- .exp_cp = GRAPHEME_INVALID_CODEPOINT,
+ .exp_cp = GRAPHEME_INVALID_CODEPOINT,
},
{
/* invalid 4-byte sequence (short string, third byte malformed)
- * [ 11110011 10111111 01111111 ] ->
- * INVALID
- */
- .arr = (char *)(unsigned char[]){ 0xF3, 0xBF, 0x7F },
- .len = 3,
+ * [ 11110011 10111111 01111111 ] ->
+ * INVALID
+ */
+ .arr = (char *)(unsigned char[]) { 0xF3, 0xBF, 0x7F },
+ .len = 3,
.exp_len = 2,
- .exp_cp = GRAPHEME_INVALID_CODEPOINT,
+ .exp_cp = GRAPHEME_INVALID_CODEPOINT,
},
{
/* invalid 4-byte sequence (fourth byte missing)
- * [ 11110011 10111111 10111111 ] ->
- * INVALID
- */
- .arr = (char *)(unsigned char[]){ 0xF3, 0xBF, 0xBF },
- .len = 3,
+ * [ 11110011 10111111 10111111 ] ->
+ * INVALID
+ */
+ .arr = (char *)(unsigned char[]) { 0xF3, 0xBF, 0xBF },
+ .len = 3,
.exp_len = 4,
- .exp_cp = GRAPHEME_INVALID_CODEPOINT,
+ .exp_cp = GRAPHEME_INVALID_CODEPOINT,
},
{
/* invalid 4-byte sequence (fourth byte malformed)
- * [ 11110011 10111111 10111111 01111111 ] ->
- * INVALID
- */
- .arr = (char *)(unsigned char[]){ 0xF3, 0xBF, 0xBF, 0x7F },
- .len = 4,
+ * [ 11110011 10111111 10111111 01111111 ] ->
+ * INVALID
+ */
+ .arr = (char *)(unsigned char[]) { 0xF3, 0xBF, 0xBF, 0x7F },
+ .len = 4,
.exp_len = 3,
- .exp_cp = GRAPHEME_INVALID_CODEPOINT,
+ .exp_cp = GRAPHEME_INVALID_CODEPOINT,
},
{
/* invalid 4-byte sequence (overlong encoded)
- * [ 11110000 10000000 10000001 10111111 ] ->
- * INVALID
- */
- .arr = (char *)(unsigned char[]){ 0xF0, 0x80, 0x81, 0xBF },
- .len = 4,
+ * [ 11110000 10000000 10000001 10111111 ] ->
+ * INVALID
+ */
+ .arr = (char *)(unsigned char[]) { 0xF0, 0x80, 0x81, 0xBF },
+ .len = 4,
.exp_len = 4,
- .exp_cp = GRAPHEME_INVALID_CODEPOINT,
+ .exp_cp = GRAPHEME_INVALID_CODEPOINT,
},
{
/* invalid 4-byte sequence (UTF-16-unrepresentable)
- * [ 11110100 10010000 10000000 10000000 ] ->
- * INVALID
- */
- .arr = (char *)(unsigned char[]){ 0xF4, 0x90, 0x80, 0x80 },
- .len = 4,
+ * [ 11110100 10010000 10000000 10000000 ] ->
+ * INVALID
+ */
+ .arr = (char *)(unsigned char[]) { 0xF4, 0x90, 0x80, 0x80 },
+ .len = 4,
.exp_len = 4,
- .exp_cp = GRAPHEME_INVALID_CODEPOINT,
+ .exp_cp = GRAPHEME_INVALID_CODEPOINT,
},
};
@@ -298,12 +296,12 @@ main(int argc, char *argv[])
size_t len;
uint_least32_t cp;
- len = grapheme_decode_utf8(dec_test[i].arr,
- dec_test[i].len, &cp);
+ len = grapheme_decode_utf8(dec_test[i].arr, dec_test[i].len,
+ &cp);
- if (len != dec_test[i].exp_len ||
- cp != dec_test[i].exp_cp) {
- fprintf(stderr, "%s: Failed test %zu: "
+ if (len != dec_test[i].exp_len || cp != dec_test[i].exp_cp) {
+ fprintf(stderr,
+ "%s: Failed test %zu: "
"Expected (%zx,%u), but got (%zx,%u).\n",
argv[0], i, dec_test[i].exp_len,
dec_test[i].exp_cp, len, cp);
diff --git a/test/utf8-encode.c b/test/utf8-encode.c
@@ -8,44 +8,44 @@
#include "util.h"
static const struct {
- uint_least32_t cp; /* input codepoint */
- char *exp_arr; /* expected UTF-8 byte sequence */
- size_t exp_len; /* expected length of UTF-8 sequence */
+ uint_least32_t cp; /* input codepoint */
+ char *exp_arr; /* expected UTF-8 byte sequence */
+ size_t exp_len; /* expected length of UTF-8 sequence */
} enc_test[] = {
{
/* invalid codepoint (UTF-16 surrogate half) */
- .cp = UINT32_C(0xD800),
- .exp_arr = (char *)(unsigned char[]){ 0xEF, 0xBF, 0xBD },
+ .cp = UINT32_C(0xD800),
+ .exp_arr = (char *)(unsigned char[]) { 0xEF, 0xBF, 0xBD },
.exp_len = 3,
},
{
/* invalid codepoint (UTF-16-unrepresentable) */
- .cp = UINT32_C(0x110000),
- .exp_arr = (char *)(unsigned char[]){ 0xEF, 0xBF, 0xBD },
+ .cp = UINT32_C(0x110000),
+ .exp_arr = (char *)(unsigned char[]) { 0xEF, 0xBF, 0xBD },
.exp_len = 3,
},
{
/* codepoint encoded to a 1-byte sequence */
- .cp = 0x01,
- .exp_arr = (char *)(unsigned char[]){ 0x01 },
+ .cp = 0x01,
+ .exp_arr = (char *)(unsigned char[]) { 0x01 },
.exp_len = 1,
},
{
/* codepoint encoded to a 2-byte sequence */
- .cp = 0xFF,
- .exp_arr = (char *)(unsigned char[]){ 0xC3, 0xBF },
+ .cp = 0xFF,
+ .exp_arr = (char *)(unsigned char[]) { 0xC3, 0xBF },
.exp_len = 2,
},
{
/* codepoint encoded to a 3-byte sequence */
- .cp = 0xFFF,
- .exp_arr = (char *)(unsigned char[]){ 0xE0, 0xBF, 0xBF },
+ .cp = 0xFFF,
+ .exp_arr = (char *)(unsigned char[]) { 0xE0, 0xBF, 0xBF },
.exp_len = 3,
},
{
/* codepoint encoded to a 4-byte sequence */
- .cp = UINT32_C(0xFFFFF),
- .exp_arr = (char *)(unsigned char[]){ 0xF3, 0xBF, 0xBF, 0xBF },
+ .cp = UINT32_C(0xFFFFF),
+ .exp_arr = (char *)(unsigned char[]) { 0xF3, 0xBF, 0xBF, 0xBF },
.exp_len = 4,
},
};
@@ -66,11 +66,12 @@ main(int argc, char *argv[])
if (len != enc_test[i].exp_len ||
memcmp(arr, enc_test[i].exp_arr, len)) {
- fprintf(stderr, "%s, Failed test %zu: "
- "Expected (", argv[0], i);
+ fprintf(stderr,
+ "%s, Failed test %zu: "
+ "Expected (",
+ argv[0], i);
for (j = 0; j < enc_test[i].exp_len; j++) {
- fprintf(stderr, "0x%x",
- enc_test[i].exp_arr[j]);
+ fprintf(stderr, "0x%x", enc_test[i].exp_arr[j]);
if (j + 1 < enc_test[i].exp_len) {
fprintf(stderr, " ");
}
diff --git a/test/util.c b/test/util.c
@@ -5,13 +5,14 @@
#include <stdio.h>
#include <string.h>
-#include "../grapheme.h"
#include "../gen/types.h"
+#include "../grapheme.h"
#include "util.h"
int
run_break_tests(size_t (*next_break)(const uint_least32_t *, size_t),
- const struct break_test *test, size_t testlen, const char *argv0)
+ const struct break_test *test, size_t testlen,
+ const char *argv0)
{
size_t i, j, off, res, failed;
@@ -21,11 +22,14 @@ run_break_tests(size_t (*next_break)(const uint_least32_t *, size_t),
res = next_break(test[i].cp + off, test[i].cplen - off);
/* check if our resulting offset matches */
- if (j == test[i].lenlen ||
- res != test[i].len[j++]) {
- fprintf(stderr, "%s: Failed conformance test %zu \"%s\".\n",
+ if (j == test[i].lenlen || res != test[i].len[j++]) {
+ fprintf(stderr,
+ "%s: Failed conformance test %zu "
+ "\"%s\".\n",
argv0, i, test[i].descr);
- fprintf(stderr, "J=%zu: EXPECTED len %zu, got %zu\n", j-1, test[i].len[j-1], res);
+ fprintf(stderr,
+ "J=%zu: EXPECTED len %zu, got %zu\n",
+ j - 1, test[i].len[j - 1], res);
failed++;
break;
}
@@ -39,13 +43,15 @@ run_break_tests(size_t (*next_break)(const uint_least32_t *, size_t),
int
run_unit_tests(int (*unit_test_callback)(const void *, size_t, const char *,
- const char *), const void *test, size_t testlen, const char *name,
+ const char *),
+ const void *test, size_t testlen, const char *name,
const char *argv0)
{
size_t i, failed;
for (i = 0, failed = 0; i < testlen; i++) {
- failed += (unit_test_callback(test, i, name, argv0) == 0) ? 0 : 1;
+ failed +=
+ (unit_test_callback(test, i, name, argv0) == 0) ? 0 : 1;
}
printf("%s: %s: %zu/%zu unit tests passed.\n", argv0, name,
@@ -56,8 +62,9 @@ run_unit_tests(int (*unit_test_callback)(const void *, size_t, const char *,
int
unit_test_callback_next_break(const struct unit_test_next_break *t, size_t off,
- size_t (*next_break)(const uint_least32_t *, size_t),
- const char *name, const char *argv0)
+ size_t (*next_break)(const uint_least32_t *,
+ size_t),
+ const char *name, const char *argv0)
{
const struct unit_test_next_break *test = t + off;
@@ -69,16 +76,18 @@ unit_test_callback_next_break(const struct unit_test_next_break *t, size_t off,
return 0;
err:
- fprintf(stderr, "%s: %s: Failed unit test %zu \"%s\" "
- "(returned %zu instead of %zu).\n", argv0,
- name, off, test->description, ret, test->output.ret);
+ fprintf(stderr,
+ "%s: %s: Failed unit test %zu \"%s\" "
+ "(returned %zu instead of %zu).\n",
+ argv0, name, off, test->description, ret, test->output.ret);
return 1;
}
int
unit_test_callback_next_break_utf8(const struct unit_test_next_break_utf8 *t,
size_t off,
- size_t (*next_break_utf8)(const char *, size_t),
+ size_t (*next_break_utf8)(const char *,
+ size_t),
const char *name, const char *argv0)
{
const struct unit_test_next_break_utf8 *test = t + off;
@@ -91,8 +100,9 @@ unit_test_callback_next_break_utf8(const struct unit_test_next_break_utf8 *t,
return 0;
err:
- fprintf(stderr, "%s: %s: Failed unit test %zu \"%s\" "
- "(returned %zu instead of %zu).\n", argv0,
- name, off, test->description, ret, test->output.ret);
+ fprintf(stderr,
+ "%s: %s: Failed unit test %zu \"%s\" "
+ "(returned %zu instead of %zu).\n",
+ argv0, name, off, test->description, ret, test->output.ret);
return 1;
}
diff --git a/test/util.h b/test/util.h
@@ -6,16 +6,18 @@
#include "../grapheme.h"
#undef MIN
-#define MIN(x,y) ((x) < (y) ? (x) : (y))
+#define MIN(x, y) ((x) < (y) ? (x) : (y))
#undef LEN
#define LEN(x) (sizeof(x) / sizeof(*(x)))
struct unit_test_next_break {
const char *description;
+
struct {
const uint_least32_t *src;
size_t srclen;
} input;
+
struct {
size_t ret;
} output;
@@ -23,10 +25,12 @@ struct unit_test_next_break {
struct unit_test_next_break_utf8 {
const char *description;
+
struct {
const char *src;
size_t srclen;
} input;
+
struct {
size_t ret;
} output;
@@ -36,14 +40,17 @@ int run_break_tests(size_t (*next_break)(const uint_least32_t *, size_t),
const struct break_test *test, size_t testlen,
const char *);
int run_unit_tests(int (*unit_test_callback)(const void *, size_t, const char *,
- const char *), const void *, size_t, const char *, const char *);
+ const char *),
+ const void *, size_t, const char *, const char *);
int unit_test_callback_next_break(const struct unit_test_next_break *, size_t,
- size_t (*next_break)(const uint_least32_t *, size_t),
+ size_t (*next_break)(const uint_least32_t *,
+ size_t),
const char *, const char *);
int unit_test_callback_next_break_utf8(const struct unit_test_next_break_utf8 *,
size_t,
- size_t (*next_break_utf8)(const char *, size_t),
+ size_t (*next_break_utf8)(const char *,
+ size_t),
const char *, const char *);
#endif /* UTIL_H */
diff --git a/test/word.c b/test/word.c
@@ -91,23 +91,19 @@ static const struct unit_test_next_break_utf8 next_word_break_utf8[] = {
};
static int
-unit_test_callback_next_word_break(const void *t, size_t off,
- const char *name,
- const char *argv0)
+unit_test_callback_next_word_break(const void *t, size_t off, const char *name,
+ const char *argv0)
{
- return unit_test_callback_next_break(t, off,
- grapheme_next_word_break,
+ return unit_test_callback_next_break(t, off, grapheme_next_word_break,
name, argv0);
}
static int
unit_test_callback_next_word_break_utf8(const void *t, size_t off,
- const char *name,
- const char *argv0)
+ const char *name, const char *argv0)
{
- return unit_test_callback_next_break_utf8(t, off,
- grapheme_next_word_break_utf8,
- name, argv0);
+ return unit_test_callback_next_break_utf8(
+ t, off, grapheme_next_word_break_utf8, name, argv0);
}
int