util.h (3691B)
1 /* See LICENSE file for copyright and license details. */ 2 #ifndef UTIL_H 3 #define UTIL_H 4 5 #include <stdbool.h> 6 #include <stddef.h> 7 #include <stdint.h> 8 9 #include "../gen/types.h" 10 #include "../grapheme.h" 11 12 #undef MIN 13 #define MIN(x, y) ((x) < (y) ? (x) : (y)) 14 #undef MAX 15 #define MAX(x, y) ((x) > (y) ? (x) : (y)) 16 #undef LEN 17 #define LEN(x) (sizeof(x) / sizeof(*(x))) 18 19 #undef likely 20 #undef unlikely 21 #ifdef __has_builtin 22 #if __has_builtin(__builtin_expect) 23 #define likely(expr) __builtin_expect(!!(expr), 1) 24 #define unlikely(expr) __builtin_expect(!!(expr), 0) 25 #else 26 #define likely(expr) (expr) 27 #define unlikely(expr) (expr) 28 #endif 29 #else 30 #define likely(expr) (expr) 31 #define unlikely(expr) (expr) 32 #endif 33 34 /* 35 * Herodotus, the ancient greek historian and geographer, 36 * was criticized for including legends and other fantastic 37 * accounts into his works, among others by his contemporary 38 * Thucydides. 39 * 40 * The Herodotus readers and writers are tailored towards the needs 41 * of the library interface, doing all the dirty work behind the 42 * scenes. While the reader is relatively faithful in his accounts, 43 * the Herodotus writer will never fail and always claim to write the 44 * data. Internally, it only writes as much as it can, and will simply 45 * keep account of the rest. This way, we can properly signal truncation. 46 * 47 * In this sense, explaining the naming, the writer is always a bit 48 * inaccurate in his accounts. 49 * 50 */ 51 enum herodotus_status { 52 HERODOTUS_STATUS_SUCCESS, 53 HERODOTUS_STATUS_END_OF_BUFFER, 54 HERODOTUS_STATUS_SOFT_LIMIT_REACHED, 55 }; 56 57 enum herodotus_type { 58 HERODOTUS_TYPE_CODEPOINT, 59 HERODOTUS_TYPE_UTF8, 60 }; 61 62 typedef struct herodotus_reader { 63 enum herodotus_type type; 64 const void *src; 65 size_t srclen; 66 size_t off; 67 bool terminated_by_null; 68 size_t soft_limit[10]; 69 } HERODOTUS_READER; 70 71 typedef struct herodotus_writer { 72 enum herodotus_type type; 73 void *dest; 74 size_t destlen; 75 size_t off; 76 size_t first_unwritable_offset; 77 } HERODOTUS_WRITER; 78 79 struct proper { 80 /* 81 * prev_prop[1] prev_prop[0] | next_prop[0] next_prop[1] 82 */ 83 struct { 84 uint_least8_t prev_prop[2]; 85 uint_least8_t next_prop[2]; 86 } raw, skip; 87 88 HERODOTUS_READER mid_reader, raw_reader, skip_reader; 89 void *state; 90 uint_least8_t no_prop; 91 uint_least8_t (*get_break_prop)(uint_least32_t); 92 bool (*is_skippable_prop)(uint_least8_t); 93 void (*skip_shift_callback)(uint_least8_t, void *); 94 }; 95 96 void herodotus_reader_init(HERODOTUS_READER *, enum herodotus_type, 97 const void *, size_t); 98 void herodotus_reader_copy(const HERODOTUS_READER *, HERODOTUS_READER *); 99 void herodotus_reader_push_advance_limit(HERODOTUS_READER *, size_t); 100 void herodotus_reader_pop_limit(HERODOTUS_READER *); 101 size_t herodotus_reader_number_read(const HERODOTUS_READER *); 102 size_t herodotus_reader_next_word_break(const HERODOTUS_READER *); 103 size_t herodotus_reader_next_codepoint_break(const HERODOTUS_READER *); 104 enum herodotus_status herodotus_read_codepoint(HERODOTUS_READER *, bool, 105 uint_least32_t *); 106 107 void herodotus_writer_init(HERODOTUS_WRITER *, enum herodotus_type, void *, 108 size_t); 109 void herodotus_writer_nul_terminate(HERODOTUS_WRITER *); 110 size_t herodotus_writer_number_written(const HERODOTUS_WRITER *); 111 void herodotus_write_codepoint(HERODOTUS_WRITER *, uint_least32_t); 112 113 void proper_init(const HERODOTUS_READER *, void *, uint_least8_t, 114 uint_least8_t (*get_break_prop)(uint_least32_t), 115 bool (*is_skippable_prop)(uint_least8_t), 116 void (*skip_shift_callback)(uint_least8_t, void *), 117 struct proper *); 118 int proper_advance(struct proper *); 119 120 #endif /* UTIL_H */