commit ea1be565ad117a3e9846ae0e855d41021d94ee8a
parent f517655a98a155694cf57c180531724baa081c26
Author: Laslo Hunhold <dev@frign.de>
Date: Mon, 21 Nov 2022 11:05:26 +0100
Refactor state into unsigned integer
Now that we separated the level-determination itself, there
is no need to have a signed integer for this purpose. This
simplifies the masking.
Diffstat:
3 files changed, 37 insertions(+), 93 deletions(-)
diff --git a/grapheme.h b/grapheme.h
@@ -16,14 +16,14 @@ enum grapheme_bidirectional_override {
};
void grapheme_bidirectional_get_line_embedding_levels(
- const int_least32_t *, size_t, int_least8_t *);
+ const uint_least32_t *, size_t, int_least8_t *);
size_t grapheme_bidirectional_preprocess(
const uint_least32_t *, size_t, enum grapheme_bidirectional_override,
- int_least32_t *, size_t);
+ uint_least32_t *, size_t);
size_t grapheme_bidirectional_preprocess_utf8(
const char *, size_t, enum grapheme_bidirectional_override,
- int_least32_t *, size_t);
+ uint_least32_t *, size_t);
size_t grapheme_bidirectional_reorder_line(
const uint_least32_t *, const int_least8_t *, size_t,
diff --git a/src/bidirectional.c b/src/bidirectional.c
@@ -8,127 +8,71 @@
#define MAX_DEPTH 125
-#if 0
enum state_type {
STATE_PROP, /* in 0..23, bidi_property */
+ STATE_PRESERVED_PROP, /* in 0..23, preserved bidi_property for L1-rules */
STATE_BRACKET_OFF, /* in 0..255, offset in bidi_bracket */
STATE_LEVEL, /* in 0..MAX_DEPTH+1=126, embedding level */
STATE_PARAGRAPH_LEVEL, /* in 0..1, paragraph embedding level */
STATE_VISITED, /* in 0..1, visited within isolating run */
};
-/* without rawprop, as it should be */
static struct {
- int_least32_t filter_mask;
- int_least32_t clear_mask;
+ uint_least32_t filter_mask;
size_t mask_shift;
int_least16_t value_offset;
} state_lut[] = {
[STATE_PROP] = {
- .filter_mask = 0x00001F, /* 00000000 00000000 00011111 */
- .clear_mask = 0x3FFFE0, /* 00111111 11111111 11100000 */
+ .filter_mask = 0x000001F, /* 00000000 00000000 00000000 00011111 */
.mask_shift = 0,
.value_offset = 0,
},
- [STATE_BRACKET_OFF] = {
- .filter_mask = 0x001FE0, /* 00000000 00011111 11100000 */
- .clear_mask = 0x3FE01F, /* 00111111 11100000 00011111 */
+ [STATE_PRESERVED_PROP] = {
+ .filter_mask = 0x00003E0, /* 00000000 00000000 00000011 11100000 */
.mask_shift = 5,
.value_offset = 0,
},
- [STATE_LEVEL] = {
- .filter_mask = 0x0FE000, /* 00001111 11100000 00000000 */
- .clear_mask = 0x301FFF, /* 00110000 00011111 11111111 */
- .mask_shift = 13,
- .value_offset = -1,
- },
- [STATE_PARAGRAPH_LEVEL] = {
- .filter_mask = 0x100000, /* 00010000 00000000 00000000 */
- .clear_mask = 0x2FFFFF, /* 00101111 11111111 11111111 */
- .mask_shift = 20,
- .value_offset = 0,
- },
- [STATE_VISITED] = {
- .filter_mask = 0x200000, /* 00100000 00000000 00000000 */
- .clear_mask = 0x1FFFFF, /* 00011111 11111111 11111111 */
- .mask_shift = 21,
- .value_offset = 0,
- },
-};
-#endif
-
-enum state_type {
- STATE_PROP, /* in 0..23, bidi_property */
- STATE_BRACKET_OFF, /* in 0..255, offset in bidi_bracket */
- STATE_LEVEL, /* in 0..MAX_DEPTH+1=126, embedding level */
- STATE_PARAGRAPH_LEVEL, /* in 0..1, paragraph embedding level */
- STATE_VISITED, /* in 0..1, visited within isolating run */
- STATE_RAWPROP,
-};
-
-static struct {
- int_least32_t filter_mask;
- int_least32_t clear_mask;
- size_t mask_shift;
- int_least16_t value_offset;
-} state_lut[] = {
- [STATE_PROP] = {
- .filter_mask = 0x000001F, /* 00000000 00000000 00000000 00011111 */
- .clear_mask = 0x7FFFFE0, /* 00000111 11111111 11111111 11100000 */
- .mask_shift = 0,
- .value_offset = 0,
- },
[STATE_BRACKET_OFF] = {
- .filter_mask = 0x0001FE0, /* 00000000 00000000 00011111 11100000 */
- .clear_mask = 0x7FFE01F, /* 00000111 11111111 11100000 00011111 */
- .mask_shift = 5,
+ .filter_mask = 0x003FC00, /* 00000000 00000011 11111100 00000000 */
+ .mask_shift = 10,
.value_offset = 0,
},
[STATE_LEVEL] = {
- .filter_mask = 0x00FE000, /* 00000000 00001111 11100000 00000000 */
- .clear_mask = 0x7F01FFF, /* 00000111 11110000 00011111 11111111 */
- .mask_shift = 13,
+ .filter_mask = 0x1FC0000, /* 00000001 11111100 00000000 00000000 */
+ .mask_shift = 18,
.value_offset = -1,
},
[STATE_PARAGRAPH_LEVEL] = {
- .filter_mask = 0x0100000, /* 00000000 00010000 00000000 00000000 */
- .clear_mask = 0x7EFFFFF, /* 00000111 11101111 11111111 11111111 */
- .mask_shift = 20,
+ .filter_mask = 0x2000000, /* 00000010 00000000 00000000 00000000 */
+ .mask_shift = 25,
.value_offset = 0,
},
[STATE_VISITED] = {
- .filter_mask = 0x0200000, /* 00000000 00100000 00000000 00000000 */
- .clear_mask = 0x7DFFFFF, /* 00000111 11011111 11111111 11111111 */
- .mask_shift = 21,
- .value_offset = 0,
- },
- [STATE_RAWPROP] = {
- .filter_mask = 0x7C00000, /* 00000111 11000000 00000000 00000000 */
- .clear_mask = 0x03FFFFF, /* 00000000 00111111 11111111 11111111 */
- .mask_shift = 22,
+ .filter_mask = 0x4000000, /* 00000100 00000000 00000000 00000000 */
+ .mask_shift = 26,
.value_offset = 0,
},
};
static inline int_least16_t
-get_state(enum state_type t, int_least32_t input)
+get_state(enum state_type t, uint_least32_t input)
{
- return (int_least16_t)(((input & state_lut[t].filter_mask) >>
- state_lut[t].mask_shift) +
- state_lut[t].value_offset);
+ return (int_least16_t)((input & state_lut[t].filter_mask) >>
+ state_lut[t].mask_shift) +
+ state_lut[t].value_offset;
}
static inline void
-set_state(enum state_type t, int_least16_t value, int_least32_t *output)
+set_state(enum state_type t, int_least16_t value, uint_least32_t *output)
{
- *output &= state_lut[t].clear_mask;
- *output |= ((value - state_lut[t].value_offset)
+ *output &= ~state_lut[t].filter_mask;
+ *output |= ((uint_least32_t)(value - state_lut[t].value_offset)
<< state_lut[t].mask_shift) &
state_lut[t].filter_mask;
}
struct isolate_runner {
- int_least32_t *buf;
+ uint_least32_t *buf;
size_t buflen;
struct {
@@ -179,7 +123,7 @@ ir_set_current_prop(struct isolate_runner *ir, enum bidi_property prop)
}
static void
-ir_init(int_least32_t *buf, size_t buflen, size_t off,
+ir_init(uint_least32_t *buf, size_t buflen, size_t off,
uint_least8_t paragraph_level, bool within, struct isolate_runner *ir)
{
size_t i;
@@ -385,7 +329,7 @@ ir_advance(struct isolate_runner *ir)
}
static size_t
-preprocess_isolating_run_sequence(int_least32_t *buf, size_t buflen, size_t off,
+preprocess_isolating_run_sequence(uint_least32_t *buf, size_t buflen, size_t off,
uint_least8_t paragraph_level)
{
enum bidi_property sequence_prop, prop;
@@ -597,7 +541,7 @@ preprocess_isolating_run_sequence(int_least32_t *buf, size_t buflen, size_t off,
static uint_least8_t
get_paragraph_level(enum grapheme_bidirectional_override override,
- bool terminate_on_pdi, const int_least32_t *buf,
+ bool terminate_on_pdi, const uint_least32_t *buf,
size_t buflen)
{
enum bidi_property prop;
@@ -653,7 +597,7 @@ get_paragraph_level(enum grapheme_bidirectional_override override,
static void
preprocess_paragraph(enum grapheme_bidirectional_override override,
- int_least32_t *buf, size_t buflen)
+ uint_least32_t *buf, size_t buflen)
{
enum bidi_property prop;
int_least8_t level;
@@ -961,7 +905,7 @@ again:
runsince = SIZE_MAX;
for (bufoff = 0; bufoff < buflen; bufoff++) {
level = (int_least8_t)get_state(STATE_LEVEL, buf[bufoff]);
- prop = (uint_least8_t)get_state(STATE_RAWPROP, buf[bufoff]);
+ prop = (uint_least8_t)get_state(STATE_PRESERVED_PROP, buf[bufoff]);
if (level == -1) {
/* ignored character */
@@ -1038,7 +982,7 @@ get_bidi_bracket_off(uint_least32_t cp)
static size_t
preprocess(HERODOTUS_READER *r,
enum grapheme_bidirectional_override override,
- int_least32_t *buf, size_t buflen)
+ uint_least32_t *buf, size_t buflen)
{
size_t bufoff, bufsize, lastparoff;
uint_least32_t cp;
@@ -1075,7 +1019,7 @@ preprocess(HERODOTUS_READER *r,
set_state(STATE_LEVEL, 0, &(buf[bufoff]));
set_state(STATE_PARAGRAPH_LEVEL, 0, &(buf[bufoff]));
set_state(STATE_VISITED, 0, &(buf[bufoff]));
- set_state(STATE_RAWPROP,
+ set_state(STATE_PRESERVED_PROP,
(uint_least8_t)get_bidi_property(cp),
&(buf[bufoff]));
}
@@ -1110,7 +1054,7 @@ preprocess(HERODOTUS_READER *r,
size_t
grapheme_bidirectional_preprocess(
const uint_least32_t *src, size_t srclen,
- enum grapheme_bidirectional_override override, int_least32_t *dest,
+ enum grapheme_bidirectional_override override, uint_least32_t *dest,
size_t destlen)
{
HERODOTUS_READER r;
@@ -1123,7 +1067,7 @@ grapheme_bidirectional_preprocess(
size_t
grapheme_bidirectional_preprocess_utf8(
const char *src, size_t srclen,
- enum grapheme_bidirectional_override override, int_least32_t *dest,
+ enum grapheme_bidirectional_override override, uint_least32_t *dest,
size_t destlen)
{
HERODOTUS_READER r;
@@ -1135,7 +1079,7 @@ grapheme_bidirectional_preprocess_utf8(
void
grapheme_bidirectional_get_line_embedding_levels(
- const int_least32_t *linedata, size_t linelen, int_least8_t *linelevel)
+ const uint_least32_t *linedata, size_t linelen, int_least8_t *linelevel)
{
enum bidi_property prop;
size_t i, runsince;
@@ -1143,7 +1087,7 @@ grapheme_bidirectional_get_line_embedding_levels(
/* rule L1.4 */
runsince = SIZE_MAX;
for (i = 0; i < linelen; i++) {
- prop = (uint_least8_t)get_state(STATE_RAWPROP, linedata[i]);
+ prop = (uint_least8_t)get_state(STATE_PRESERVED_PROP, linedata[i]);
/* write level into level array */
if ((linelevel[i] = (int_least8_t)get_state(
@@ -1171,7 +1115,7 @@ grapheme_bidirectional_get_line_embedding_levels(
*/
for (i = runsince; i < linelen; i++) {
if (linelevel[i] != -1) {
- linelevel[i] = get_state(
+ linelevel[i] = (int_least8_t)get_state(
STATE_PARAGRAPH_LEVEL, linedata[i]);
}
}
diff --git a/test/bidirectional.c b/test/bidirectional.c
@@ -12,7 +12,7 @@
int
main(int argc, char *argv[])
{
- int_least32_t data[512]; /* TODO iterate and get max, allocate */
+ uint_least32_t data[512]; /* TODO iterate and get max, allocate */
int_least8_t lev[512];
size_t i, num_tests, failed, datalen, ret, j, m;