libgrapheme

unicode string library
git clone git://git.suckless.org/libgrapheme
Log | Files | Refs | README | LICENSE

commit 0ea9cff075677315e44a096b6e4d61a1252b95e9
parent 9ed87fdf96badfee0e8b2c7ec441254c4cb9c990
Author: Laslo Hunhold <dev@frign.de>
Date:   Sat,  8 Jan 2022 15:45:39 +0100

gen/util: Add properties-handling and clean up old range-list-functions

As already announced we will generate separate data-tables for separate
properties. To make it all strict-aliasing-compliant, we have one
properties-struct (which currently only contains one entry
"break_property") that will however contain more entries, if needed.
Playing around with void-pointers quickly turns into undefined behaviour,
which is why it makes no sense to have separate definitions for each
property-type.
Each "user" only uses a certain subset of those fields in the struct.
Given the unused ones will be just zero, they will make no difference
in the compression.

To avoid code-duplication, the generation of break-property-tables
is handled by a single function, which is then called in the respective
generation tool.

Signed-off-by: Laslo Hunhold <dev@frign.de>

Diffstat:
Mgen/properties.c | 310+------------------------------------------------------------------------------
Mgen/util.c | 386+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------------
Mgen/util.h | 28+++++++++++-----------------
Msrc/character.c | 3++-
4 files changed, 323 insertions(+), 404 deletions(-)

diff --git a/gen/properties.c b/gen/properties.c @@ -12,22 +12,6 @@ #define FILE_EMOJI "data/emoji-data.txt" #define FILE_GRAPHEME "data/GraphemeBreakProperty.txt" -struct properties { - uint_least8_t char_break_property; -}; - -struct property_spec { - const char *enumname; - const char *file; - const char *ucdname; -}; - -struct property_payload { - struct properties *prop; - const struct property_spec *spec; - uint_least8_t speclen; -}; - static const struct property_spec char_break_property[] = { { .enumname = "OTHER", @@ -106,302 +90,14 @@ static const struct property_spec char_break_property[] = { }, }; -static int -break_property_callback(char *file, char **field, size_t nfields, - char *comment, void *payload) -{ - /* prop always has the length 0x110000 */ - struct property_payload *p = (struct property_payload *)payload; - struct range r; - uint_least8_t i; - uint_least32_t cp; - - (void)comment; - - if (nfields < 2) { - return 1; - } - - for (i = 0; i < p->speclen; i++) { - /* identify fitting file and identifier */ - if (p->spec[i].file && - !strcmp(p->spec[i].file, file) && - !strcmp(p->spec[i].ucdname, field[1])) { - /* parse range in first field */ - if (range_parse(field[0], &r)) { - return 1; - } - - /* apply to all codepoints in the range */ - for (cp = r.lower; cp <= r.upper; cp++) { - if (p->spec == char_break_property) { - if (p->prop[cp].char_break_property != 0) { - fprintf(stderr, "break_property_callback: " - "Character break property overlap.\n"); - exit(1); - } - p->prop[cp].char_break_property = i; - } else { - fprintf(stderr, "break_property_callback: " - "Unknown specification.\n"); - exit(1); - } - } - - break; - } - } - - return 0; -} - -struct compressed_properties { - size_t *offset; - struct properties *data; - size_t datalen; -}; - -static void -compress_properties(const struct properties *prop, - struct compressed_properties *comp) -{ - uint_least32_t cp, i; - - /* initialization */ - if (!(comp->offset = malloc((size_t)0x110000 * sizeof(*(comp->offset))))) { - fprintf(stderr, "malloc: %s\n", strerror(errno)); - exit(1); - } - comp->data = NULL; - comp->datalen = 0; - - for (cp = 0; cp < 0x110000; cp++) { - for (i = 0; i < comp->datalen; i++) { - if (!memcmp(&(prop[cp]), &(comp->data[i]), sizeof(*prop))) { - /* found a match! */ - comp->offset[cp] = i; - break; - } - } - if (i == comp->datalen) { - /* - * found no matching properties-struct, so - * add current properties to data and add the - * offset in the offset-table - */ - if (!(comp->data = reallocarray(comp->data, - ++(comp->datalen), - sizeof(*(comp->data))))) { - fprintf(stderr, "reallocarray: %s\n", - strerror(errno)); - exit(1); - } - memcpy(&(comp->data[comp->datalen - 1]), &(prop[cp]), - sizeof(*prop)); - comp->offset[cp] = comp->datalen - 1; - } - } -} - -struct major_minor_properties { - size_t *major; - size_t *minor; - size_t minorlen; -}; - -static double -get_major_minor_properties(const struct compressed_properties *comp, - struct major_minor_properties *mm) -{ - size_t i, j, compression_count = 0; - - /* - * we currently have an array comp->offset which maps the - * codepoints 0..0x110000 to offsets into comp->data. - * To improve cache-locality instead and allow a bit of - * compressing, instead of directly mapping a codepoint - * 0xAAAABB with comp->offset, we generate two arrays major - * and minor such that - * comp->offset(0xAAAABB) == minor[major[0xAAAA] + 0xBB] - * This yields a major-array of length 2^16 and a minor array - * of variable length depending on how many common subsequences - * can be filtered out. - */ - - /* initialize */ - if (!(mm->major = malloc((size_t)0x1100 * sizeof(*(mm->major))))) { - fprintf(stderr, "malloc: %s\n", strerror(errno)); - exit(1); - } - mm->minor = NULL; - mm->minorlen = 0; - - printf("#include <stdint.h>\n\n"); - - for (i = 0; i < (size_t)0x1100; i++) { - /* - * we now look at the cp-range (i << 8)..(i << 8 + 0xFF) - * and check if its corresponding offset-data already - * exists in minor (because then we just point there - * and need less storage) - */ - for (j = 0; j + 0xFF < mm->minorlen; j++) { - if (!memcmp(&(comp->offset[i << 8]), - &(mm->minor[j]), - sizeof(*(comp->offset)) * 0x100)) { - break; - } - } - if (j + 0xFF < mm->minorlen) { - /* found an index */ - compression_count++; - mm->major[i] = j; - } else { - /* - * add "new" sequence to minor and point to it - * in major - */ - mm->minorlen += 0x100; - if (!(mm->minor = reallocarray(mm->minor, - mm->minorlen, - sizeof(*(mm->minor))))) { - fprintf(stderr, "reallocarray: %s\n", - strerror(errno)); - exit(1); - } - memcpy(&(mm->minor[mm->minorlen - 0x100]), - &(comp->offset[i << 8]), - sizeof(*(mm->minor)) * 0x100); - mm->major[i] = mm->minorlen - 0x100; - } - } - - /* return compression ratio */ - return (double)compression_count / 0x1100 * 100; -} - -static void -print_lookup_table(char *name, size_t *data, size_t datalen) -{ - char *type; - size_t i, maxval; - - for (i = 0, maxval = 0; i < datalen; i++) { - if (data[i] > maxval) { - maxval = data[i]; - } - } - - type = (maxval <= UINT_LEAST8_MAX) ? "uint_least8_t" : - (maxval <= UINT_LEAST16_MAX) ? "uint_least16_t" : - (maxval <= UINT_LEAST32_MAX) ? "uint_least32_t" : - "uint_least64_t"; - - printf("static const %s %s[] = {\n\t", type, name); - for (i = 0; i < datalen; i++) { - printf("%zu", data[i]); - if (i + 1 == datalen) { - printf("\n"); - } else if ((i + 1) % 8 != 0) { - printf(", "); - } else { - printf(",\n\t"); - } - - } - printf("};\n"); -} - -static uint_least8_t -get_value(const void *payload, size_t offset) -{ - return ((const struct properties *)payload)[offset].char_break_property; -} - -static void -print_derived_lookup_table(char *name, size_t *offset, size_t offsetlen, - uint_least8_t (*get_value)(const void *, size_t), - const void *payload) -{ - size_t i; - - printf("static const uint_least8_t %s[] = {\n\t", name); - for (i = 0; i < offsetlen; i++) { - printf("%"PRIuLEAST8, get_value(payload, offset[i])); - if (i + 1 == offsetlen) { - printf("\n"); - } else if ((i + 1) % 8 != 0) { - printf(", "); - } else { - printf(",\n\t"); - } - - } - printf("};\n"); -} - -static void -print_enum(const struct property_spec *spec, size_t speclen, - const char *enumname, const char *enumprefix) -{ - size_t i; - - printf("enum %s {\n", enumname); - for (i = 0; i < speclen; i++) { - printf("\t%s_%s,\n", enumprefix, spec[i].enumname); - } - printf("\tNUM_%sS,\n};\n\n", enumprefix); -} - int main(int argc, char *argv[]) { - struct compressed_properties comp; - struct major_minor_properties mm; - struct property_payload payload; - struct properties *prop; - (void)argc; - /* allocate property buffer for all codepoints */ - if (!(prop = calloc(0x110000, sizeof(*prop)))) { - fprintf(stderr, "calloc: %s\n", strerror(errno)); - exit(1); - } - - /* extract properties */ - payload.prop = prop; - payload.spec = char_break_property; - payload.speclen = LEN(char_break_property); - - parse_file_with_callback(FILE_EMOJI, break_property_callback, &payload); - parse_file_with_callback(FILE_GRAPHEME, break_property_callback, &payload); - - /* - * deduplicate by generating an array of offsets into prop where - * common data points at the same offset - */ - compress_properties(prop, &comp); - - /* generate major-minor-offset-tables */ - fprintf(stderr, "%s: compression-ratio: %.2f%%\n", argv[0], - get_major_minor_properties(&comp, &mm)); - - /* print data */ - print_enum(char_break_property, LEN(char_break_property), - "char_break_property", "CHAR_BREAK_PROP"); - - print_lookup_table("major", mm.major, 0x1100); - printf("\n"); - print_derived_lookup_table("minor", mm.minor, mm.minorlen, get_value, - comp.data); - - /* free data */ - free(prop); - free(comp.data); - free(comp.offset); - free(mm.major); - free(mm.minor); + properties_generate_break_property(char_break_property, + LEN(char_break_property), + "char", argv[0]); return 0; } diff --git a/gen/util.c b/gen/util.c @@ -1,5 +1,7 @@ /* See LICENSE file for copyright and license details. */ +#include <ctype.h> #include <errno.h> +#include <inttypes.h> #include <stdbool.h> #include <stdint.h> #include <stdlib.h> @@ -8,10 +10,28 @@ #include "util.h" -struct property_list_payload -{ - struct property *prop; - size_t numprops; +struct range { + uint_least32_t lower; + uint_least32_t upper; +}; + +struct properties_payload { + struct properties *prop; + const struct property_spec *spec; + uint_least8_t speclen; + int (*set_value)(struct properties_payload *, uint_least32_t, uint_least8_t); +}; + +struct properties_compressed { + size_t *offset; + struct properties *data; + size_t datalen; +}; + +struct properties_major_minor { + size_t *major; + size_t *minor; + size_t minorlen; }; struct segment_test_payload @@ -63,7 +83,7 @@ hextocp(const char *str, size_t len, uint_least32_t *cp) return 0; } -int +static int range_parse(const char *str, struct range *range) { char *p; @@ -85,28 +105,9 @@ range_parse(const char *str, struct range *range) return 0; } -static void -range_list_append(struct range **range, size_t *nranges, const struct range *new) -{ - if (*nranges > 0 && (*range)[*nranges - 1].upper == new->lower) { - /* we can merge with previous entry */ - (*range)[*nranges - 1].upper = new->upper; - } else { - /* need to append new entry */ - if ((*range = realloc(*range, (++(*nranges)) * - sizeof(**range))) == NULL) { - fprintf(stderr, "range_list_append: realloc: %s.\n", - strerror(errno)); - exit(1); - } - (*range)[*nranges - 1].lower = new->lower; - (*range)[*nranges - 1].upper = new->upper; - } -} - void -parse_file_with_callback(char *fname, int (*callback)(char *, char **, - size_t, char *, void *), void *payload) +parse_file_with_callback(const char *fname, int (*callback)(const char *, + char **, size_t, char *, void *), void *payload) { FILE *fp; char *line = NULL, **field = NULL, *comment; @@ -197,12 +198,14 @@ parse_file_with_callback(char *fname, int (*callback)(char *, char **, } static int -property_list_callback(char *fname, char **field, size_t nfields, - char *comment, void *payload) +properties_callback(const char *file, char **field, size_t nfields, + char *comment, void *payload) { - struct property *prop = ((struct property_list_payload *)payload)->prop; + /* prop always has the length 0x110000 */ + struct properties_payload *p = (struct properties_payload *)payload; struct range r; - size_t i, numprops = ((struct property_list_payload *)payload)->numprops; + uint_least8_t i; + uint_least32_t cp; (void)comment; @@ -210,14 +213,22 @@ property_list_callback(char *fname, char **field, size_t nfields, return 1; } - for (i = 0; i < numprops; i++) { - if (!strcmp(field[1], prop[i].identifier) && - !strcmp(fname, prop[i].fname)) { + for (i = 0; i < p->speclen; i++) { + /* identify fitting file and identifier */ + if (p->spec[i].file && + !strcmp(p->spec[i].file, file) && + !strcmp(p->spec[i].ucdname, field[1])) { + /* parse range in first field */ if (range_parse(field[0], &r)) { return 1; } - range_list_append(&(prop[i].table), - &(prop[i].tablelen), &r); + + /* apply to all codepoints in the range */ + for (cp = r.lower; cp <= r.upper; cp++) { + if (p->set_value(payload, cp, i)) { + exit(1); + } + } break; } } @@ -225,73 +236,290 @@ property_list_callback(char *fname, char **field, size_t nfields, return 0; } -void -property_list_parse(struct property *prop, size_t numprops) +static void +properties_compress(const struct properties *prop, + struct properties_compressed *comp) { - struct property_list_payload pl = { - .prop = prop, - .numprops = numprops - }; - size_t i; + uint_least32_t cp, i; - /* make sure to parse each file only once */ - for (i = 0; i < numprops; i++) { - if (prop[i].tablelen > 0) { - /* property's file was already parsed */ - continue; + /* initialization */ + if (!(comp->offset = malloc((size_t)0x110000 * sizeof(*(comp->offset))))) { + fprintf(stderr, "malloc: %s\n", strerror(errno)); + exit(1); + } + comp->data = NULL; + comp->datalen = 0; + + for (cp = 0; cp < 0x110000; cp++) { + for (i = 0; i < comp->datalen; i++) { + if (!memcmp(&(prop[cp]), &(comp->data[i]), sizeof(*prop))) { + /* found a match! */ + comp->offset[cp] = i; + break; + } + } + if (i == comp->datalen) { + /* + * found no matching properties-struct, so + * add current properties to data and add the + * offset in the offset-table + */ + if (!(comp->data = reallocarray(comp->data, + ++(comp->datalen), + sizeof(*(comp->data))))) { + fprintf(stderr, "reallocarray: %s\n", + strerror(errno)); + exit(1); + } + memcpy(&(comp->data[comp->datalen - 1]), &(prop[cp]), + sizeof(*prop)); + comp->offset[cp] = comp->datalen - 1; } + } +} - parse_file_with_callback(prop[i].fname, - property_list_callback, &pl); +static double +properties_get_major_minor(const struct properties_compressed *comp, + struct properties_major_minor *mm) +{ + size_t i, j, compression_count = 0; + + /* + * we currently have an array comp->offset which maps the + * codepoints 0..0x110000 to offsets into comp->data. + * To improve cache-locality instead and allow a bit of + * compressing, instead of directly mapping a codepoint + * 0xAAAABB with comp->offset, we generate two arrays major + * and minor such that + * comp->offset(0xAAAABB) == minor[major[0xAAAA] + 0xBB] + * This yields a major-array of length 2^16 and a minor array + * of variable length depending on how many common subsequences + * can be filtered out. + */ + + /* initialize */ + if (!(mm->major = malloc((size_t)0x1100 * sizeof(*(mm->major))))) { + fprintf(stderr, "malloc: %s\n", strerror(errno)); + exit(1); + } + mm->minor = NULL; + mm->minorlen = 0; + + printf("#include <stdint.h>\n\n"); + + for (i = 0; i < (size_t)0x1100; i++) { + /* + * we now look at the cp-range (i << 8)..(i << 8 + 0xFF) + * and check if its corresponding offset-data already + * exists in minor (because then we just point there + * and need less storage) + */ + for (j = 0; j + 0xFF < mm->minorlen; j++) { + if (!memcmp(&(comp->offset[i << 8]), + &(mm->minor[j]), + sizeof(*(comp->offset)) * 0x100)) { + break; + } + } + if (j + 0xFF < mm->minorlen) { + /* found an index */ + compression_count++; + mm->major[i] = j; + } else { + /* + * add "new" sequence to minor and point to it + * in major + */ + mm->minorlen += 0x100; + if (!(mm->minor = reallocarray(mm->minor, + mm->minorlen, + sizeof(*(mm->minor))))) { + fprintf(stderr, "reallocarray: %s\n", + strerror(errno)); + exit(1); + } + memcpy(&(mm->minor[mm->minorlen - 0x100]), + &(comp->offset[i << 8]), + sizeof(*(mm->minor)) * 0x100); + mm->major[i] = mm->minorlen - 0x100; + } } + + /* return compression ratio */ + return (double)compression_count / 0x1100 * 100; } -void -property_list_print(const struct property *prop, size_t numprops, - const char *identifier, const char *progname) +static void +properties_print_lookup_table(char *name, size_t *data, size_t datalen) { - size_t i, j; + char *type; + size_t i, maxval; - printf("/* Automatically generated by %s */\n" - "#include <stdint.h>\n\n#include \"../gen/types.h\"\n\n", - progname); + for (i = 0, maxval = 0; i < datalen; i++) { + if (data[i] > maxval) { + maxval = data[i]; + } + } + + type = (maxval <= UINT_LEAST8_MAX) ? "uint_least8_t" : + (maxval <= UINT_LEAST16_MAX) ? "uint_least16_t" : + (maxval <= UINT_LEAST32_MAX) ? "uint_least32_t" : + "uint_least64_t"; + + printf("static const %s %s[] = {\n\t", type, name); + for (i = 0; i < datalen; i++) { + printf("%zu", data[i]); + if (i + 1 == datalen) { + printf("\n"); + } else if ((i + 1) % 8 != 0) { + printf(", "); + } else { + printf(",\n\t"); + } - /* print enum */ - printf("enum %s {\n", identifier); - for (i = 0; i < numprops; i++) { - printf("\t%s,\n", prop[i].enumname); } - printf("};\n\n"); - - /* print table */ - printf("static const struct range_list %s[] = {\n", identifier); - for (i = 0; i < numprops; i++) { - printf("\t[%s] = {\n\t\t.data = (struct range[]){\n", - prop[i].enumname); - for (j = 0; j < prop[i].tablelen; j++) { - printf("\t\t\t{ UINT32_C(0x%06X), UINT32_C(0x%06X) },\n", - prop[i].table[j].lower, - prop[i].table[j].upper); + printf("};\n"); +} + +static void +properties_print_derived_lookup_table(char *name, size_t *offset, size_t offsetlen, + uint_least8_t (*get_value)(const struct properties *, + size_t), const void *payload) +{ + size_t i; + + printf("static const uint_least8_t %s[] = {\n\t", name); + for (i = 0; i < offsetlen; i++) { + printf("%"PRIuLEAST8, get_value(payload, offset[i])); + if (i + 1 == offsetlen) { + printf("\n"); + } else if ((i + 1) % 8 != 0) { + printf(", "); + } else { + printf(",\n\t"); } - printf("\t\t},\n\t\t.len = %zu,\n\t},\n", prop[i].tablelen); + } printf("};\n"); } -void -property_list_free(struct property *prop, size_t numprops) +static void +properties_print_enum(const struct property_spec *spec, size_t speclen, + const char *enumname, const char *enumprefix) { size_t i; - for (i = 0; i < numprops; i++) { - free(prop[i].table); - prop[i].table = NULL; - prop[i].tablelen = 0; + printf("enum %s {\n", enumname); + for (i = 0; i < speclen; i++) { + printf("\t%s_%s,\n", enumprefix, spec[i].enumname); + } + printf("\tNUM_%sS,\n};\n\n", enumprefix); +} + +static int +set_value_bp(struct properties_payload *payload, uint_least32_t cp, + uint_least8_t value) +{ + if (payload->prop[cp].break_property != 0) { + fprintf(stderr, "set_value_bp: " + "Character break property overlap.\n"); + return 1; + } + payload->prop[cp].break_property = value; + + return 0; +} + +static uint_least8_t +get_value_bp(const struct properties *prop, size_t offset) +{ + return prop[offset].break_property; +} + +void +properties_generate_break_property(const struct property_spec *spec, + uint_least8_t speclen, const char *prefix, + const char *argv0) +{ + struct properties_compressed comp; + struct properties_major_minor mm; + struct properties_payload payload; + struct properties *prop; + size_t i, j, prefixlen = strlen(prefix); + char buf1[64], prefix_uc[64], buf2[64], buf3[64], buf4[64]; + + /* allocate property buffer for all 0x110000 codepoints */ + if (!(prop = calloc(0x110000, sizeof(*prop)))) { + fprintf(stderr, "calloc: %s\n", strerror(errno)); + exit(1); + } + + /* generate data */ + payload.prop = prop; + payload.spec = spec; + payload.speclen = speclen; + payload.set_value = set_value_bp; + + /* parse each file exactly once and ignore NULL-fields */ + for (i = 0; i < speclen; i++) { + for (j = 0; j < i; j++) { + if (spec[i].file && spec[j].file && + !strcmp(spec[i].file, spec[j].file)) { + /* file has already been parsed */ + break; + } + } + if (i == j && spec[i].file) { + /* file has not been processed yet */ + parse_file_with_callback(spec[i].file, + properties_callback, + &payload); + } + } + + /* compress data */ + properties_compress(prop, &comp); + + fprintf(stderr, "%s: compression-ratio: %.2f%%\n", argv0, + properties_get_major_minor(&comp, &mm)); + + /* prepare names */ + if ((size_t)snprintf(buf1, LEN(buf1), "%s_break_property", prefix) >= LEN(buf1)) { + fprintf(stderr, "snprintf: String truncated.\n"); + exit(1); } + if (LEN(prefix_uc) + 1 < prefixlen) { + fprintf(stderr, "snprintf: Buffer too small.\n"); + exit(1); + } + for (i = 0; i < prefixlen; i++) { + prefix_uc[i] = (char)toupper(prefix[i]); + } + prefix_uc[prefixlen] = '\0'; + if ((size_t)snprintf(buf2, LEN(buf2), "%s_BREAK_PROP", prefix_uc) >= LEN(buf2) || + (size_t)snprintf(buf3, LEN(buf3), "%s_break_major", prefix) >= LEN(buf3) || + (size_t)snprintf(buf4, LEN(buf4), "%s_break_minor", prefix) >= LEN(buf4)) { + fprintf(stderr, "snprintf: String truncated.\n"); + exit(1); + } + + /* print data */ + properties_print_enum(spec, speclen, buf1, buf2); + properties_print_lookup_table(buf3, mm.major, 0x1100); + printf("\n"); + properties_print_derived_lookup_table(buf4, mm.minor, mm.minorlen, + get_value_bp, comp.data); + + /* free data */ + free(prop); + free(comp.data); + free(comp.offset); + free(mm.major); + free(mm.minor); } static int -segment_test_callback(char *fname, char **field, size_t nfields, +segment_test_callback(const char *fname, char **field, size_t nfields, char *comment, void *payload) { struct segment_test *t, diff --git a/gen/util.h b/gen/util.h @@ -7,17 +7,14 @@ #define LEN(x) (sizeof (x) / sizeof *(x)) -struct range { - uint_least32_t lower; - uint_least32_t upper; +struct property_spec { + const char *enumname; + const char *file; + const char *ucdname; }; -struct property { - char *enumname; - char *identifier; - char *fname; - struct range *table; - size_t tablelen; +struct properties { + uint_least8_t break_property; }; struct segment_test { @@ -28,15 +25,12 @@ struct segment_test { char *descr; }; -int range_parse(const char *, struct range *); +void parse_file_with_callback(const char *, int (*callback)(const char *, + char **, size_t, char *, void *), void *payload); -void parse_file_with_callback(char *, int (*callback)(char *, char **, - size_t, char *, void *), void *payload); - -void property_list_parse(struct property *, size_t); -void property_list_print(const struct property *, size_t, const char *, - const char *); -void property_list_free(struct property *, size_t); +void properties_generate_break_property(const struct property_spec *, + uint_least8_t, const char *, + const char *); void segment_test_list_parse(char *, struct segment_test **, size_t *); void segment_test_list_print(const struct segment_test *, size_t, diff --git a/src/character.c b/src/character.c @@ -106,7 +106,8 @@ static enum char_break_property get_break_prop(uint_least32_t cp) { if (likely(cp <= 0x10FFFF)) { - return (enum char_break_property)minor[major[cp >> 8] + (cp & 0xff)]; + return (enum char_break_property) + char_break_minor[char_break_major[cp >> 8] + (cp & 0xff)]; } else { return CHAR_BREAK_PROP_OTHER; }