grapheme_next_character_break_utf8.3 (2492B)
1 .Dd 2022-08-26 2 .Dt GRAPHEME_NEXT_CHARACTER_BREAK_UTF8 3 3 .Os suckless.org 4 .Sh NAME 5 .Nm grapheme_next_character_break_utf8 6 .Nd determine byte-offset to next grapheme cluster break 7 .Sh SYNOPSIS 8 .In grapheme.h 9 .Ft size_t 10 .Fn grapheme_next_character_break_utf8 "const char *str" "size_t len" 11 .Sh DESCRIPTION 12 The 13 .Fn grapheme_next_character_break_utf8 14 function computes the offset (in bytes) to the next grapheme 15 cluster break (see 16 .Xr libgrapheme 7 ) 17 in the UTF-8-encoded string 18 .Va str 19 of length 20 .Va len . 21 If a grapheme cluster begins at 22 .Va str 23 this offset is equal to the length of said grapheme cluster. 24 .Pp 25 If 26 .Va len 27 is set to 28 .Dv SIZE_MAX 29 (stdint.h is already included by grapheme.h) the string 30 .Va str 31 is interpreted to be NUL-terminated and processing stops when a 32 NUL-byte is encountered. 33 .Pp 34 For non-UTF-8 input data 35 .Xr grapheme_is_character_break 3 36 and 37 .Xr grapheme_next_character_break 3 38 can be used instead. 39 .Sh RETURN VALUES 40 The 41 .Fn grapheme_next_character_break_utf8 42 function returns the offset (in bytes) to the next grapheme cluster 43 break in 44 .Va str 45 or 0 if 46 .Va str 47 is 48 .Dv NULL . 49 .Sh EXAMPLES 50 .Bd -literal 51 /* cc (-static) -o example example.c -lgrapheme */ 52 #include <grapheme.h> 53 #include <stdint.h> 54 #include <stdio.h> 55 56 int 57 main(void) 58 { 59 /* UTF-8 encoded input */ 60 char *s = "T\\xC3\\xABst \\xF0\\x9F\\x91\\xA8\\xE2\\x80\\x8D\\xF0" 61 "\\x9F\\x91\\xA9\\xE2\\x80\\x8D\\xF0\\x9F\\x91\\xA6 \\xF0" 62 "\\x9F\\x87\\xBA\\xF0\\x9F\\x87\\xB8 \\xE0\\xA4\\xA8\\xE0" 63 "\\xA5\\x80 \\xE0\\xAE\\xA8\\xE0\\xAE\\xBF!"; 64 size_t ret, len, off; 65 66 printf("Input: \\"%s\\"\\n", s); 67 68 /* print each grapheme cluster with byte-length */ 69 printf("Grapheme clusters in NUL-delimited input:\\n"); 70 for (off = 0; s[off] != '\\0'; off += ret) { 71 ret = grapheme_next_character_break_utf8(s + off, SIZE_MAX); 72 printf("%2zu bytes | %.*s\\n", ret, (int)ret, s + off, ret); 73 } 74 printf("\\n"); 75 76 /* do the same, but this time string is length-delimited */ 77 len = 17; 78 printf("Grapheme clusters in input delimited to %zu bytes:\\n", len); 79 for (off = 0; off < len; off += ret) { 80 ret = grapheme_next_character_break_utf8(s + off, len - off); 81 printf("%2zu bytes | %.*s\\n", ret, (int)ret, s + off, ret); 82 } 83 84 return 0; 85 } 86 .Ed 87 .Sh SEE ALSO 88 .Xr grapheme_is_character_break 3 , 89 .Xr grapheme_next_character_break 3 , 90 .Xr libgrapheme 7 91 .Sh STANDARDS 92 .Fn grapheme_next_character_break_utf8 93 is compliant with the Unicode 14.0.0 specification. 94 .Sh AUTHORS 95 .An Laslo Hunhold Aq Mt dev@frign.de