libgrapheme

unicode string library
git clone git://git.suckless.org/libgrapheme
Log | Files | Refs | README | LICENSE

line.c (14965B)


      1 /* See LICENSE file for copyright and license details. */
      2 #include <stdbool.h>
      3 #include <stddef.h>
      4 #include <stdlib.h>
      5 #include <string.h>
      6 
      7 #include "../gen/line.h"
      8 #include "../grapheme.h"
      9 #include "util.h"
     10 
     11 static inline enum line_break_property
     12 get_break_prop(uint_least32_t cp)
     13 {
     14 	if (likely(cp <= 0x10FFFF)) {
     15 		return (enum line_break_property)
     16 		       line_break_minor[line_break_major[cp >> 8] + (cp & 0xff)];
     17 	} else {
     18 		return LINE_BREAK_PROP_AL;
     19 	}
     20 }
     21 
     22 static inline size_t
     23 get_codepoint(const void *str, size_t len, size_t offset, uint_least32_t *cp)
     24 {
     25 	if (offset < len) {
     26 		*cp = ((const uint_least32_t *)str)[offset];
     27 		return 1;
     28 	} else {
     29 		*cp = GRAPHEME_INVALID_CODEPOINT;
     30 		return 0;
     31 	}
     32 }
     33 
     34 static inline size_t
     35 get_codepoint_utf8(const void *str, size_t len, size_t offset, uint_least32_t *cp)
     36 {
     37 	if (offset < len) {
     38 		return grapheme_decode_utf8((const char *)str + offset,
     39 		                            len - offset, cp);
     40 	} else {
     41 		*cp = GRAPHEME_INVALID_CODEPOINT;
     42 		return 0;
     43 	}
     44 }
     45 
     46 static size_t
     47 next_line_break(const void *str, size_t len, size_t (*get_codepoint)
     48                 (const void *, size_t, size_t, uint_least32_t *))
     49 {
     50 	enum line_break_property cp0_prop, cp1_prop, last_non_cm_or_zwj_prop,
     51 	                         last_non_sp_prop, last_non_sp_cm_or_zwj_prop;
     52 	enum line_break_property res;
     53 	uint_least32_t cp;
     54 	uint_least8_t lb25_level = 0;
     55 	size_t off, new_off;
     56 	bool lb21a_flag = false, ri_even = true;
     57 
     58 	/* check degenerate cases */
     59 	if (str == NULL || len == 0) {
     60 		return 0;
     61 	}
     62 
     63 	/*
     64 	 * Apply line breaking algorithm (UAX #14), see
     65 	 * https://unicode.org/reports/tr14/#Algorithm and tailoring
     66 	 * https://unicode.org/reports/tr14/#Examples (example 7),
     67 	 * given the automatic test-cases implement this example for
     68 	 * better number handling.
     69 	 *
     70 	 */
     71 
     72 	/*
     73 	 * Initialize the different properties such that we have
     74 	 * a good state after the state-update in the loop
     75 	 */
     76 	cp0_prop = NUM_LINE_BREAK_PROPS;
     77 	if ((off = get_codepoint(str, len, 0, &cp)) >= len) {
     78 		return 1;
     79 	}
     80 	cp1_prop = get_break_prop(cp);
     81 	last_non_cm_or_zwj_prop = LINE_BREAK_PROP_AL; /* according to LB10 */
     82 	last_non_sp_prop = last_non_sp_cm_or_zwj_prop = NUM_LINE_BREAK_PROPS;
     83 
     84 	for (; off < len; off = new_off) {
     85 		/* update state */
     86 		cp0_prop = cp1_prop;
     87 		if ((new_off = off + get_codepoint(str, len, off, &cp)) <= len) {
     88 			get_codepoint(str, len, off, &cp);
     89 			cp1_prop = get_break_prop(cp);
     90 		} else {
     91 			/* LB3 */
     92 			break;
     93 		}
     94 
     95 		/* update retention-states */
     96 
     97 		/*
     98 		 * store the last observed non-CM-or-ZWJ-property for
     99 		 * LB9 and following.
    100 		 */
    101 		if (cp0_prop != LINE_BREAK_PROP_CM &&
    102 		    cp0_prop != LINE_BREAK_PROP_ZWJ) {
    103 			/*
    104 			 * check if the property we are overwriting now is an
    105 			 * HL. If so, we set the LB21a-flag which depends on this
    106 			 * knowledge.
    107 			 */
    108 			lb21a_flag = (last_non_cm_or_zwj_prop == LINE_BREAK_PROP_HL);
    109 
    110 			/* check regional indicator state */
    111 			if (cp0_prop == LINE_BREAK_PROP_RI) {
    112 				/*
    113 				 * The property we just shifted in is
    114 				 * a regional indicator, increasing the
    115 				 * number of consecutive RIs on the left
    116 				 * side of the breakpoint by one, changing
    117 				 * the oddness.
    118 				 *
    119 				 */
    120 				ri_even = !ri_even;
    121 			} else {
    122 				/*
    123 				 * We saw no regional indicator, so the
    124 				 * number of consecutive RIs on the left
    125 				 * side of the breakpoint is zero, which
    126 				 * is an even number.
    127 				 *
    128 				 */
    129 				ri_even = true;
    130 			}
    131 
    132 			/*
    133 			 * Here comes a bit of magic. The tailored rule
    134 			 * LB25 (using example 7) has a very complicated
    135 			 * left-hand-side-rule of the form
    136 			 *
    137 			 *  NU (NU | SY | IS)* (CL | CP)?
    138 			 *
    139 			 * but instead of backtracking, we keep the state
    140 			 * as some kind of "power level" in the variable
    141 			 *
    142 			 *  lb25_level
    143 			 *
    144 			 * that goes from 0 to 3
    145 			 *
    146 			 *  0: we are not in the sequence
    147 			 *  1: we have one NU to the left of the middle
    148 			 *     spot
    149 			 *  2: we have one NU and one or more (NU | SY | IS)
    150 			 *     to the left of the middle spot
    151 			 *  3: we have one NU, zero or more (NU | SY | IS)
    152 			 *     and one (CL | CP) to the left of the middle
    153 			 *     spot
    154 			 */
    155 			if (lb25_level == 0 && cp0_prop == LINE_BREAK_PROP_NU) {
    156 				/* sequence has begun */
    157 				lb25_level = 1;
    158 			} else if ((lb25_level == 1 || lb25_level == 2) &&
    159 			           (cp0_prop == LINE_BREAK_PROP_NU ||
    160 			            cp0_prop == LINE_BREAK_PROP_SY ||
    161 			            cp0_prop == LINE_BREAK_PROP_IS)) {
    162 				/* (NU | SY | IS) sequence begins or continued */
    163 				lb25_level = 2;
    164 			} else if ((lb25_level == 1 || lb25_level == 2) &&
    165 			           (cp0_prop == LINE_BREAK_PROP_CL                 ||
    166 				    cp0_prop == LINE_BREAK_PROP_CP_WITHOUT_EAW_HWF ||
    167 				    cp0_prop == LINE_BREAK_PROP_CP_WITH_EAW_HWF)) {
    168 				/* CL or CP at the end of the sequence */
    169 				lb25_level = 3;
    170 			} else {
    171 				/* sequence broke */
    172 				lb25_level = 0;
    173 			}
    174 
    175 			last_non_cm_or_zwj_prop = cp0_prop;
    176 		}
    177 
    178 		/*
    179 		 * store the last observed non-SP-property for LB8, LB14,
    180 		 * LB15, LB16 and LB17. LB8 gets its own unskipped property,
    181 		 * whereas the others build on top of the CM-ZWJ-skipped
    182 		 * properties as they come after LB9
    183 		 */
    184 		if (cp0_prop != LINE_BREAK_PROP_SP) {
    185 			last_non_sp_prop = cp0_prop;
    186 		}
    187 		if (last_non_cm_or_zwj_prop != LINE_BREAK_PROP_SP) {
    188 			last_non_sp_cm_or_zwj_prop = last_non_cm_or_zwj_prop;
    189 		}
    190 
    191 		/* apply the algorithm */
    192 
    193 		/* LB4 */
    194 		if (cp0_prop == LINE_BREAK_PROP_BK) {
    195 			break;
    196 		}
    197 
    198 		/* LB5 */
    199 		if (cp0_prop == LINE_BREAK_PROP_CR &&
    200 		    cp1_prop == LINE_BREAK_PROP_LF) {
    201 			continue;
    202 		}
    203 		if (cp0_prop == LINE_BREAK_PROP_CR ||
    204 		    cp0_prop == LINE_BREAK_PROP_LF ||
    205 		    cp0_prop == LINE_BREAK_PROP_NL) {
    206 			break;
    207 		}
    208 
    209 		/* LB6 */
    210 		if (cp1_prop == LINE_BREAK_PROP_BK ||
    211 		    cp1_prop == LINE_BREAK_PROP_CR ||
    212 		    cp1_prop == LINE_BREAK_PROP_LF ||
    213 		    cp1_prop == LINE_BREAK_PROP_NL) {
    214 			continue;
    215 		}
    216 
    217 		/* LB7 */
    218 		if (cp1_prop == LINE_BREAK_PROP_SP ||
    219 		    cp1_prop == LINE_BREAK_PROP_ZW) {
    220 			continue;
    221 		}
    222 
    223 		/* LB8 */
    224 		if (last_non_sp_prop == LINE_BREAK_PROP_ZW) {
    225 			break;
    226 		}
    227 
    228 		/* LB8a */
    229 		if (cp0_prop == LINE_BREAK_PROP_ZWJ) {
    230 			continue;
    231 		}
    232 
    233 		/* LB9 */
    234 		if ((cp0_prop != LINE_BREAK_PROP_BK &&
    235 		     cp0_prop != LINE_BREAK_PROP_CR &&
    236 		     cp0_prop != LINE_BREAK_PROP_LF &&
    237 		     cp0_prop != LINE_BREAK_PROP_NL &&
    238 		     cp0_prop != LINE_BREAK_PROP_SP &&
    239 		     cp0_prop != LINE_BREAK_PROP_ZW) &&
    240 		    (cp1_prop == LINE_BREAK_PROP_CM ||
    241 		     cp1_prop == LINE_BREAK_PROP_ZWJ)) {
    242 			/*
    243 			 * given we skip them, we don't break in such
    244 			 * a sequence
    245 			 */
    246 			continue;
    247 		}
    248 
    249 		/* LB10 is baked into the following rules */
    250 
    251 		/* LB11 */
    252 		if (last_non_cm_or_zwj_prop == LINE_BREAK_PROP_WJ ||
    253 		    cp1_prop == LINE_BREAK_PROP_WJ) {
    254 			continue;
    255 		}
    256 
    257 		/* LB12 */
    258 		if (last_non_cm_or_zwj_prop == LINE_BREAK_PROP_GL) {
    259 			continue;
    260 		}
    261 
    262 		/* LB12a */
    263 		if ((last_non_cm_or_zwj_prop != LINE_BREAK_PROP_SP &&
    264 		     last_non_cm_or_zwj_prop != LINE_BREAK_PROP_BA &&
    265 		     last_non_cm_or_zwj_prop != LINE_BREAK_PROP_HY) &&
    266 		    cp1_prop == LINE_BREAK_PROP_GL) {
    267 			continue;
    268 		}
    269 
    270 		/* LB13 (affected by tailoring for LB25, see example 7) */
    271 		if (cp1_prop == LINE_BREAK_PROP_EX ||
    272 		    (last_non_cm_or_zwj_prop != LINE_BREAK_PROP_NU &&
    273 		     (cp1_prop == LINE_BREAK_PROP_CL                 ||
    274 		      cp1_prop == LINE_BREAK_PROP_CP_WITHOUT_EAW_HWF ||
    275 		      cp1_prop == LINE_BREAK_PROP_CP_WITH_EAW_HWF    ||
    276 		      cp1_prop == LINE_BREAK_PROP_IS                 ||
    277 		      cp1_prop == LINE_BREAK_PROP_SY))) {
    278 			continue;
    279 		}
    280 
    281 		/* LB14 */
    282 		if (last_non_sp_cm_or_zwj_prop == LINE_BREAK_PROP_OP_WITHOUT_EAW_HWF ||
    283 		    last_non_sp_cm_or_zwj_prop == LINE_BREAK_PROP_OP_WITH_EAW_HWF) {
    284 			continue;
    285 		}
    286 
    287 		/* LB15 */
    288 		if (last_non_sp_cm_or_zwj_prop == LINE_BREAK_PROP_QU &&
    289 		    (cp1_prop == LINE_BREAK_PROP_OP_WITHOUT_EAW_HWF ||
    290 		     cp1_prop == LINE_BREAK_PROP_OP_WITH_EAW_HWF)) {
    291 			continue;
    292 		}
    293 
    294 		/* LB16 */
    295 		if ((last_non_sp_cm_or_zwj_prop == LINE_BREAK_PROP_CL                 ||
    296 		     last_non_sp_cm_or_zwj_prop == LINE_BREAK_PROP_CP_WITHOUT_EAW_HWF ||
    297 		     last_non_sp_cm_or_zwj_prop == LINE_BREAK_PROP_CP_WITH_EAW_HWF) &&
    298 		    cp1_prop == LINE_BREAK_PROP_NS) {
    299 			continue;
    300 		}
    301 
    302 		/* LB17 */
    303 		if (last_non_sp_cm_or_zwj_prop == LINE_BREAK_PROP_B2 &&
    304 		    cp1_prop == LINE_BREAK_PROP_B2) {
    305 			continue;
    306 		}
    307 
    308 		/* LB18 */
    309 		if (last_non_cm_or_zwj_prop == LINE_BREAK_PROP_SP) {
    310 			break;
    311 		}
    312 
    313 		/* LB19 */
    314 		if (last_non_cm_or_zwj_prop == LINE_BREAK_PROP_QU ||
    315 		    cp1_prop == LINE_BREAK_PROP_QU) {
    316 			continue;
    317 		}
    318 
    319 		/* LB20 */
    320 		if (last_non_cm_or_zwj_prop == LINE_BREAK_PROP_CB ||
    321 		    cp1_prop == LINE_BREAK_PROP_CB) {
    322 			break;
    323 		}
    324 
    325 		/* LB21 */
    326 		if (cp1_prop == LINE_BREAK_PROP_BA ||
    327 		    cp1_prop == LINE_BREAK_PROP_HY ||
    328 		    cp1_prop == LINE_BREAK_PROP_NS ||
    329 		    last_non_cm_or_zwj_prop == LINE_BREAK_PROP_BB) {
    330 			continue;
    331 		}
    332 
    333 		/* LB21a */
    334 		if (lb21a_flag &&
    335 		    (last_non_cm_or_zwj_prop == LINE_BREAK_PROP_HY ||
    336 		     last_non_cm_or_zwj_prop == LINE_BREAK_PROP_BA)) {
    337 			continue;
    338 		}
    339 
    340 		/* LB21b */
    341 		if (last_non_cm_or_zwj_prop == LINE_BREAK_PROP_SY &&
    342 		    cp1_prop == LINE_BREAK_PROP_HL) {
    343 			continue;
    344 		}
    345 
    346 		/* LB22 */
    347 		if (cp1_prop == LINE_BREAK_PROP_IN) {
    348 			continue;
    349 		}
    350 
    351 		/* LB23 */
    352 		if ((last_non_cm_or_zwj_prop == LINE_BREAK_PROP_AL  ||
    353 		     last_non_cm_or_zwj_prop == LINE_BREAK_PROP_HL) &&
    354 		    cp1_prop == LINE_BREAK_PROP_NU) {
    355 			continue;
    356 		}
    357 		if (last_non_cm_or_zwj_prop == LINE_BREAK_PROP_NU &&
    358 		    (cp1_prop == LINE_BREAK_PROP_AL ||
    359 		     cp1_prop == LINE_BREAK_PROP_HL)) {
    360 			continue;
    361 		}
    362 
    363 		/* LB23a */
    364 		if (last_non_cm_or_zwj_prop == LINE_BREAK_PROP_PR &&
    365 		    (cp1_prop == LINE_BREAK_PROP_ID ||
    366 		     cp1_prop == LINE_BREAK_PROP_EB ||
    367 		     cp1_prop == LINE_BREAK_PROP_EM)) {
    368 			continue;
    369 		}
    370 		if ((last_non_cm_or_zwj_prop == LINE_BREAK_PROP_ID ||
    371 		     last_non_cm_or_zwj_prop == LINE_BREAK_PROP_EB ||
    372 		     last_non_cm_or_zwj_prop == LINE_BREAK_PROP_EM) &&
    373 		    cp1_prop == LINE_BREAK_PROP_PO) {
    374 			continue;
    375 		}
    376 
    377 		/* LB24 */
    378 		if ((last_non_cm_or_zwj_prop == LINE_BREAK_PROP_PR ||
    379 		     last_non_cm_or_zwj_prop == LINE_BREAK_PROP_PO) &&
    380 		    (cp1_prop == LINE_BREAK_PROP_AL  ||
    381 		     cp1_prop == LINE_BREAK_PROP_HL)) {
    382 			continue;
    383 		}
    384 		if ((last_non_cm_or_zwj_prop == LINE_BREAK_PROP_AL  ||
    385 		     last_non_cm_or_zwj_prop == LINE_BREAK_PROP_HL) &&
    386 		    (cp1_prop == LINE_BREAK_PROP_PR ||
    387 		     cp1_prop == LINE_BREAK_PROP_PO)) {
    388 			continue;
    389 		}
    390 
    391 		/* LB25 (tailored with example 7) */
    392 		if ((last_non_cm_or_zwj_prop == LINE_BREAK_PROP_PR ||
    393 		     last_non_cm_or_zwj_prop == LINE_BREAK_PROP_PO)) {
    394 			if (cp1_prop == LINE_BREAK_PROP_NU) {
    395 				continue;
    396 			}
    397 
    398 			/* this stupid rule is the reason why we cannot
    399 			 * simply have a stateful break-detection between
    400 			 * two adjacent codepoints as we have it with
    401 			 * characters.
    402 			 */
    403 			if (new_off < len &&
    404 			    (cp1_prop == LINE_BREAK_PROP_OP_WITHOUT_EAW_HWF ||
    405 			     cp1_prop == LINE_BREAK_PROP_OP_WITH_EAW_HWF    ||
    406 			     cp1_prop == LINE_BREAK_PROP_HY)) {
    407 				get_codepoint(str, len, new_off, &cp);
    408 				res = get_break_prop(cp);
    409 
    410 				if (res == LINE_BREAK_PROP_NU) {
    411 					continue;
    412 				}
    413 			}
    414 		}
    415 		if ((last_non_cm_or_zwj_prop == LINE_BREAK_PROP_OP_WITHOUT_EAW_HWF ||
    416 		     last_non_cm_or_zwj_prop == LINE_BREAK_PROP_OP_WITH_EAW_HWF    ||
    417 		     last_non_cm_or_zwj_prop == LINE_BREAK_PROP_HY) &&
    418 		    cp1_prop == LINE_BREAK_PROP_NU) {
    419 			continue;
    420 		}
    421 		if (lb25_level == 1 &&
    422 		    (cp1_prop == LINE_BREAK_PROP_NU ||
    423 		     cp1_prop == LINE_BREAK_PROP_SY ||
    424 		     cp1_prop == LINE_BREAK_PROP_IS)) {
    425 			continue;
    426 		}
    427 		if ((lb25_level == 1 || lb25_level == 2) &&
    428 		    (cp1_prop == LINE_BREAK_PROP_NU                 ||
    429 		     cp1_prop == LINE_BREAK_PROP_SY                 ||
    430 		     cp1_prop == LINE_BREAK_PROP_IS                 ||
    431 		     cp1_prop == LINE_BREAK_PROP_CL                 ||
    432 		     cp1_prop == LINE_BREAK_PROP_CP_WITHOUT_EAW_HWF ||
    433 		     cp1_prop == LINE_BREAK_PROP_CP_WITH_EAW_HWF)) {
    434 			continue;
    435 		}
    436 		if ((lb25_level == 1 || lb25_level == 2 || lb25_level == 3) &&
    437 		    (cp1_prop == LINE_BREAK_PROP_PO ||
    438 		     cp1_prop == LINE_BREAK_PROP_PR)) {
    439 			continue;
    440 		}
    441 
    442 		/* LB26 */
    443 		if (last_non_cm_or_zwj_prop == LINE_BREAK_PROP_JL &&
    444 		    (cp1_prop == LINE_BREAK_PROP_JL ||
    445 		     cp1_prop == LINE_BREAK_PROP_JV ||
    446 		     cp1_prop == LINE_BREAK_PROP_H2 ||
    447 		     cp1_prop == LINE_BREAK_PROP_H3)) {
    448 			continue;
    449 		}
    450 		if ((last_non_cm_or_zwj_prop == LINE_BREAK_PROP_JV ||
    451 		     last_non_cm_or_zwj_prop == LINE_BREAK_PROP_H2) &&
    452 		    (cp1_prop == LINE_BREAK_PROP_JV ||
    453 		     cp1_prop == LINE_BREAK_PROP_JT)) {
    454 			continue;
    455 		}
    456 		if ((last_non_cm_or_zwj_prop == LINE_BREAK_PROP_JT ||
    457 		     last_non_cm_or_zwj_prop == LINE_BREAK_PROP_H3) &&
    458 		    cp1_prop == LINE_BREAK_PROP_JT) {
    459 			continue;
    460 		}
    461 
    462 		/* LB27 */
    463 		if ((last_non_cm_or_zwj_prop == LINE_BREAK_PROP_JL ||
    464 		     last_non_cm_or_zwj_prop == LINE_BREAK_PROP_JV ||
    465 		     last_non_cm_or_zwj_prop == LINE_BREAK_PROP_JT ||
    466 		     last_non_cm_or_zwj_prop == LINE_BREAK_PROP_H2 ||
    467 		     last_non_cm_or_zwj_prop == LINE_BREAK_PROP_H3) &&
    468 		    cp1_prop == LINE_BREAK_PROP_PO) {
    469 			continue;
    470 		}
    471 		if (last_non_cm_or_zwj_prop == LINE_BREAK_PROP_PR &&
    472 		    (cp1_prop == LINE_BREAK_PROP_JL ||
    473 		     cp1_prop == LINE_BREAK_PROP_JV ||
    474 		     cp1_prop == LINE_BREAK_PROP_JT ||
    475 		     cp1_prop == LINE_BREAK_PROP_H2 ||
    476 		     cp1_prop == LINE_BREAK_PROP_H3)) {
    477 			continue;
    478 		}
    479 
    480 		/* LB28 */
    481 		if ((last_non_cm_or_zwj_prop == LINE_BREAK_PROP_AL  ||
    482 		     last_non_cm_or_zwj_prop == LINE_BREAK_PROP_HL) &&
    483 		    (cp1_prop == LINE_BREAK_PROP_AL  ||
    484 		     cp1_prop == LINE_BREAK_PROP_HL)) {
    485 			continue;
    486 		}
    487 
    488 		/* LB29 */
    489 		if (last_non_cm_or_zwj_prop == LINE_BREAK_PROP_IS &&
    490 		    (cp1_prop == LINE_BREAK_PROP_AL  ||
    491 		     cp1_prop == LINE_BREAK_PROP_HL)) {
    492 			continue;
    493 		}
    494 
    495 		/* LB30 */
    496 		if ((last_non_cm_or_zwj_prop == LINE_BREAK_PROP_AL  ||
    497 		     last_non_cm_or_zwj_prop == LINE_BREAK_PROP_HL  ||
    498 		     last_non_cm_or_zwj_prop == LINE_BREAK_PROP_NU) &&
    499 		    cp1_prop == LINE_BREAK_PROP_OP_WITHOUT_EAW_HWF) {
    500 			continue;
    501 		}
    502 		if (last_non_cm_or_zwj_prop == LINE_BREAK_PROP_CP_WITHOUT_EAW_HWF &&
    503 		    (cp1_prop == LINE_BREAK_PROP_AL  ||
    504 		     cp1_prop == LINE_BREAK_PROP_HL  ||
    505 		     cp1_prop == LINE_BREAK_PROP_NU)) {
    506 			continue;
    507 		}
    508 
    509 		/* LB30a */
    510 		if (!ri_even &&
    511 		    last_non_cm_or_zwj_prop == LINE_BREAK_PROP_RI &&
    512 		    cp1_prop == LINE_BREAK_PROP_RI) {
    513 			continue;
    514 		}
    515 
    516 		/* LB30b */
    517 		if (last_non_cm_or_zwj_prop == LINE_BREAK_PROP_EB &&
    518 		    cp1_prop == LINE_BREAK_PROP_EM) {
    519 			continue;
    520 		}
    521 		if (last_non_cm_or_zwj_prop == LINE_BREAK_PROP_BOTH_CN_EXTPICT &&
    522 		    cp1_prop == LINE_BREAK_PROP_EM) {
    523 			continue;
    524 		}
    525 
    526 		/* LB31 */
    527 		break;
    528 	}
    529 
    530 	return off;
    531 }
    532 
    533 size_t
    534 grapheme_next_line_break(const uint_least32_t *str, size_t len)
    535 {
    536 	return next_line_break(str, len, get_codepoint);
    537 }
    538 
    539 size_t
    540 grapheme_next_line_break_utf8(const char *str, size_t len)
    541 {
    542 	return next_line_break(str, len, get_codepoint_utf8);
    543 }