libgrapheme

unicode string library
git clone git://git.suckless.org/libgrapheme
Log | Files | Refs | README | LICENSE

util.h (3691B)


      1 /* See LICENSE file for copyright and license details. */
      2 #ifndef UTIL_H
      3 #define UTIL_H
      4 
      5 #include <stdbool.h>
      6 #include <stddef.h>
      7 #include <stdint.h>
      8 
      9 #include "../gen/types.h"
     10 #include "../grapheme.h"
     11 
     12 #undef MIN
     13 #define MIN(x, y) ((x) < (y) ? (x) : (y))
     14 #undef MAX
     15 #define MAX(x, y) ((x) > (y) ? (x) : (y))
     16 #undef LEN
     17 #define LEN(x) (sizeof(x) / sizeof(*(x)))
     18 
     19 #undef likely
     20 #undef unlikely
     21 #ifdef __has_builtin
     22 #if __has_builtin(__builtin_expect)
     23 #define likely(expr)   __builtin_expect(!!(expr), 1)
     24 #define unlikely(expr) __builtin_expect(!!(expr), 0)
     25 #else
     26 #define likely(expr)   (expr)
     27 #define unlikely(expr) (expr)
     28 #endif
     29 #else
     30 #define likely(expr)   (expr)
     31 #define unlikely(expr) (expr)
     32 #endif
     33 
     34 /*
     35  * Herodotus, the ancient greek historian and geographer,
     36  * was criticized for including legends and other fantastic
     37  * accounts into his works, among others by his contemporary
     38  * Thucydides.
     39  *
     40  * The Herodotus readers and writers are tailored towards the needs
     41  * of the library interface, doing all the dirty work behind the
     42  * scenes. While the reader is relatively faithful in his accounts,
     43  * the Herodotus writer will never fail and always claim to write the
     44  * data. Internally, it only writes as much as it can, and will simply
     45  * keep account of the rest. This way, we can properly signal truncation.
     46  *
     47  * In this sense, explaining the naming, the writer is always a bit
     48  * inaccurate in his accounts.
     49  *
     50  */
     51 enum herodotus_status {
     52 	HERODOTUS_STATUS_SUCCESS,
     53 	HERODOTUS_STATUS_END_OF_BUFFER,
     54 	HERODOTUS_STATUS_SOFT_LIMIT_REACHED,
     55 };
     56 
     57 enum herodotus_type {
     58 	HERODOTUS_TYPE_CODEPOINT,
     59 	HERODOTUS_TYPE_UTF8,
     60 };
     61 
     62 typedef struct herodotus_reader {
     63 	enum herodotus_type type;
     64 	const void *src;
     65 	size_t srclen;
     66 	size_t off;
     67 	bool terminated_by_null;
     68 	size_t soft_limit[10];
     69 } HERODOTUS_READER;
     70 
     71 typedef struct herodotus_writer {
     72 	enum herodotus_type type;
     73 	void *dest;
     74 	size_t destlen;
     75 	size_t off;
     76 	size_t first_unwritable_offset;
     77 } HERODOTUS_WRITER;
     78 
     79 struct proper {
     80 	/*
     81 	 * prev_prop[1] prev_prop[0] | next_prop[0] next_prop[1]
     82 	 */
     83 	struct {
     84 		uint_least8_t prev_prop[2];
     85 		uint_least8_t next_prop[2];
     86 	} raw, skip;
     87 
     88 	HERODOTUS_READER mid_reader, raw_reader, skip_reader;
     89 	void *state;
     90 	uint_least8_t no_prop;
     91 	uint_least8_t (*get_break_prop)(uint_least32_t);
     92 	bool (*is_skippable_prop)(uint_least8_t);
     93 	void (*skip_shift_callback)(uint_least8_t, void *);
     94 };
     95 
     96 void herodotus_reader_init(HERODOTUS_READER *, enum herodotus_type,
     97                            const void *, size_t);
     98 void herodotus_reader_copy(const HERODOTUS_READER *, HERODOTUS_READER *);
     99 void herodotus_reader_push_advance_limit(HERODOTUS_READER *, size_t);
    100 void herodotus_reader_pop_limit(HERODOTUS_READER *);
    101 size_t herodotus_reader_number_read(const HERODOTUS_READER *);
    102 size_t herodotus_reader_next_word_break(const HERODOTUS_READER *);
    103 size_t herodotus_reader_next_codepoint_break(const HERODOTUS_READER *);
    104 enum herodotus_status herodotus_read_codepoint(HERODOTUS_READER *, bool,
    105                                                uint_least32_t *);
    106 
    107 void herodotus_writer_init(HERODOTUS_WRITER *, enum herodotus_type, void *,
    108                            size_t);
    109 void herodotus_writer_nul_terminate(HERODOTUS_WRITER *);
    110 size_t herodotus_writer_number_written(const HERODOTUS_WRITER *);
    111 void herodotus_write_codepoint(HERODOTUS_WRITER *, uint_least32_t);
    112 
    113 void proper_init(const HERODOTUS_READER *, void *, uint_least8_t,
    114                  uint_least8_t (*get_break_prop)(uint_least32_t),
    115                  bool (*is_skippable_prop)(uint_least8_t),
    116                  void (*skip_shift_callback)(uint_least8_t, void *),
    117                  struct proper *);
    118 int proper_advance(struct proper *);
    119 
    120 #endif /* UTIL_H */