commit aa5dda2687c4907d6a47e57b1d7973b8f9d158ae
parent 25d89e6e460e68329e7a3f388fe3e150a8f5474a
Author: Laslo Hunhold <dev@frign.de>
Date: Tue, 16 Aug 2022 16:25:31 +0200
Move get_codepoint_*()-util-functions to src/util.c
Signed-off-by: Laslo Hunhold <dev@frign.de>
Diffstat:
6 files changed, 75 insertions(+), 137 deletions(-)
diff --git a/src/case.c b/src/case.c
@@ -33,68 +33,6 @@ get_case_offset(uint_least32_t cp, const uint_least16_t *major,
}
static inline size_t
-get_codepoint(const void *str, size_t len, size_t offset, uint_least32_t *cp)
-{
- if (offset < len) {
- *cp = ((const uint_least32_t *)str)[offset];
- return 1;
- } else {
- *cp = GRAPHEME_INVALID_CODEPOINT;
- return 0;
- }
-}
-
-static inline size_t
-get_codepoint_utf8(const void *str, size_t len, size_t offset, uint_least32_t *cp)
-{
- size_t ret;
-
- if (offset < len) {
- ret = grapheme_decode_utf8((const char *)str + offset,
- len - offset, cp);
-
- if (unlikely(len == SIZE_MAX && cp == 0)) {
- return 0;
- } else {
- return ret;
- }
- } else {
- *cp = GRAPHEME_INVALID_CODEPOINT;
- return 0;
- }
-}
-
-static inline size_t
-set_codepoint(uint_least32_t cp, void *str, size_t len, size_t offset)
-{
- if (str == NULL || len == 0) {
- return 1;
- }
-
- if (offset < len) {
- ((uint_least32_t *)str)[offset] = cp;
- return 1;
- } else {
- return 0;
- }
-}
-
-static inline size_t
-set_codepoint_utf8(uint_least32_t cp, void *str, size_t len, size_t offset)
-{
- if (str == NULL || len == 0) {
- return grapheme_encode_utf8(cp, NULL, 0);
- }
-
- if (offset < len) {
- return grapheme_encode_utf8(cp, (char *)str + offset,
- len - offset);
- } else {
- return grapheme_encode_utf8(cp, NULL, 0);
- }
-}
-
-static inline size_t
to_case(const void *src, size_t srclen, void *dest, size_t destlen,
size_t srcnumprocess, uint_least8_t final_sigma_level,
size_t (*get_codepoint)(const void *, size_t, size_t, uint_least32_t *),
diff --git a/src/line.c b/src/line.c
@@ -19,30 +19,6 @@ get_break_prop(uint_least32_t cp)
}
}
-static inline size_t
-get_codepoint(const void *str, size_t len, size_t offset, uint_least32_t *cp)
-{
- if (offset < len) {
- *cp = ((const uint_least32_t *)str)[offset];
- return 1;
- } else {
- *cp = GRAPHEME_INVALID_CODEPOINT;
- return 0;
- }
-}
-
-static inline size_t
-get_codepoint_utf8(const void *str, size_t len, size_t offset, uint_least32_t *cp)
-{
- if (offset < len) {
- return grapheme_decode_utf8((const char *)str + offset,
- len - offset, cp);
- } else {
- *cp = GRAPHEME_INVALID_CODEPOINT;
- return 0;
- }
-}
-
static size_t
next_line_break(const void *str, size_t len, size_t (*get_codepoint)
(const void *, size_t, size_t, uint_least32_t *))
@@ -152,7 +128,9 @@ next_line_break(const void *str, size_t len, size_t (*get_codepoint)
* and one (CL | CP) to the left of the middle
* spot
*/
- if (lb25_level == 0 && cp0_prop == LINE_BREAK_PROP_NU) {
+ if ((lb25_level == 0 ||
+ lb25_level == 1) &&
+ cp0_prop == LINE_BREAK_PROP_NU) {
/* sequence has begun */
lb25_level = 1;
} else if ((lb25_level == 1 || lb25_level == 2) &&
diff --git a/src/sentence.c b/src/sentence.c
@@ -20,30 +20,6 @@ get_break_prop(uint_least32_t cp)
}
}
-static inline size_t
-get_codepoint(const void *str, size_t len, size_t offset, uint_least32_t *cp)
-{
- if (offset < len) {
- *cp = ((const uint_least32_t *)str)[offset];
- return 1;
- } else {
- *cp = GRAPHEME_INVALID_CODEPOINT;
- return 0;
- }
-}
-
-static inline size_t
-get_codepoint_utf8(const void *str, size_t len, size_t offset, uint_least32_t *cp)
-{
- if (offset < len) {
- return grapheme_decode_utf8((const char *)str + offset,
- len - offset, cp);
- } else {
- *cp = GRAPHEME_INVALID_CODEPOINT;
- return 0;
- }
-}
-
static size_t
next_sentence_break(const void *str, size_t len, size_t (*get_codepoint)
(const void *, size_t, size_t, uint_least32_t *))
@@ -142,7 +118,8 @@ next_sentence_break(const void *str, size_t len, size_t (*get_codepoint)
* left of the middle spot.
*
*/
- if (aterm_close_sp_level == 0 &&
+ if ((aterm_close_sp_level == 0 ||
+ aterm_close_sp_level == 1) &&
skip.b == SENTENCE_BREAK_PROP_ATERM) {
/* sequence has begun */
aterm_close_sp_level = 1;
@@ -162,7 +139,8 @@ next_sentence_break(const void *str, size_t len, size_t (*get_codepoint)
aterm_close_sp_level = 0;
}
- if (saterm_close_sp_parasep_level == 0 &&
+ if ((saterm_close_sp_parasep_level == 0 ||
+ saterm_close_sp_parasep_level == 1) &&
(skip.b == SENTENCE_BREAK_PROP_STERM ||
skip.b == SENTENCE_BREAK_PROP_ATERM)) {
/* sequence has begun */
diff --git a/src/util.c b/src/util.c
@@ -6,3 +6,65 @@
#include "../gen/types.h"
#include "../grapheme.h"
#include "util.h"
+
+inline size_t
+get_codepoint(const void *str, size_t len, size_t offset, uint_least32_t *cp)
+{
+ if (offset < len) {
+ *cp = ((const uint_least32_t *)str)[offset];
+ return 1;
+ } else {
+ *cp = GRAPHEME_INVALID_CODEPOINT;
+ return 0;
+ }
+}
+
+inline size_t
+get_codepoint_utf8(const void *str, size_t len, size_t offset, uint_least32_t *cp)
+{
+ size_t ret;
+
+ if (offset < len) {
+ ret = grapheme_decode_utf8((const char *)str + offset,
+ len - offset, cp);
+
+ if (unlikely(len == SIZE_MAX && cp == 0)) {
+ return 0;
+ } else {
+ return ret;
+ }
+ } else {
+ *cp = GRAPHEME_INVALID_CODEPOINT;
+ return 0;
+ }
+}
+
+inline size_t
+set_codepoint(uint_least32_t cp, void *str, size_t len, size_t offset)
+{
+ if (str == NULL || len == 0) {
+ return 1;
+ }
+
+ if (offset < len) {
+ ((uint_least32_t *)str)[offset] = cp;
+ return 1;
+ } else {
+ return 0;
+ }
+}
+
+inline size_t
+set_codepoint_utf8(uint_least32_t cp, void *str, size_t len, size_t offset)
+{
+ if (str == NULL || len == 0) {
+ return grapheme_encode_utf8(cp, NULL, 0);
+ }
+
+ if (offset < len) {
+ return grapheme_encode_utf8(cp, (char *)str + offset,
+ len - offset);
+ } else {
+ return grapheme_encode_utf8(cp, NULL, 0);
+ }
+}
diff --git a/src/util.h b/src/util.h
@@ -25,4 +25,10 @@
#define unlikely(expr) (expr)
#endif
+size_t get_codepoint(const void *, size_t, size_t, uint_least32_t *);
+size_t get_codepoint_utf8(const void *, size_t, size_t, uint_least32_t *);
+
+size_t set_codepoint(uint_least32_t, void *, size_t, size_t);
+size_t set_codepoint_utf8(uint_least32_t, void *, size_t, size_t);
+
#endif /* UTIL_H */
diff --git a/src/word.c b/src/word.c
@@ -19,30 +19,6 @@ get_break_prop(uint_least32_t cp)
}
}
-static inline size_t
-get_codepoint(const void *str, size_t len, size_t offset, uint_least32_t *cp)
-{
- if (offset < len) {
- *cp = ((const uint_least32_t *)str)[offset];
- return 1;
- } else {
- *cp = GRAPHEME_INVALID_CODEPOINT;
- return 0;
- }
-}
-
-static inline size_t
-get_codepoint_utf8(const void *str, size_t len, size_t offset, uint_least32_t *cp)
-{
- if (offset < len) {
- return grapheme_decode_utf8((const char *)str + offset,
- len - offset, cp);
- } else {
- *cp = GRAPHEME_INVALID_CODEPOINT;
- return 0;
- }
-}
-
static size_t
next_word_break(const void *str, size_t len, size_t (*get_codepoint)
(const void *, size_t, size_t, uint_least32_t *))