grapheme_character_nextbreak.3 (1734B)
1 .Dd 2021-12-18 2 .Dt GRAPHEME_CHARACTER_NEXTBREAK 3 3 .Os suckless.org 4 .Sh NAME 5 .Nm grapheme_character_nextbreak 6 .Nd determine byte-offset to next grapheme cluster break 7 .Sh SYNOPSIS 8 .In grapheme.h 9 .Ft size_t 10 .Fn grapheme_character_nextbreak "const char *str" 11 .Sh DESCRIPTION 12 The 13 .Fn grapheme_character_nextbreak 14 function computes the offset (in bytes) to the next grapheme 15 cluster break (see 16 .Xr libgrapheme 7 ) 17 in the UTF-8-encoded NUL-terminated string 18 .Va str . 19 If a grapheme cluster begins at 20 .Va str 21 this offset is equal to the length of said grapheme cluster. 22 .Pp 23 For non-UTF-8 input data 24 .Xr grapheme_character_isbreak 3 25 can be used instead. 26 .Sh RETURN VALUES 27 The 28 .Fn grapheme_character_nextbreak 29 function returns the offset (in bytes) to the next grapheme cluster 30 break in 31 .Va str 32 or 0 if 33 .Va str 34 is 35 .Dv NULL . 36 .Sh EXAMPLES 37 .Bd -literal 38 /* cc (-static) -o example example.c -lgrapheme */ 39 #include <grapheme.h> 40 #include <stdint.h> 41 #include <stdio.h> 42 43 int 44 main(void) 45 { 46 /* UTF-8 encoded input */ 47 char *s = "T\\xC3\\xABst \\xF0\\x9F\\x91\\xA8\\xE2\\x80\\x8D\\xF0" 48 "\\x9F\\x91\\xA9\\xE2\\x80\\x8D\\xF0\\x9F\\x91\\xA6 \\xF0" 49 "\\x9F\\x87\\xBA\\xF0\\x9F\\x87\\xB8 \\xE0\\xA4\\xA8\\xE0" 50 "\\xA5\\x80 \\xE0\\xAE\\xA8\\xE0\\xAE\\xBF!"; 51 size_t len; 52 53 printf("Input: \\"%s\\"\\n", s); 54 55 /* print each grapheme cluster with byte-length */ 56 for (; *s != '\\0';) { 57 len = grapheme_character_nextbreak(s); 58 printf("%2zu bytes | %.*s\\n", len, (int)len, s, len); 59 s += len; 60 } 61 62 return 0; 63 } 64 .Ed 65 .Sh SEE ALSO 66 .Xr grapheme_character_isbreak 3 , 67 .Xr libgrapheme 7 68 .Sh STANDARDS 69 .Fn grapheme_character_nextbreak 70 is compliant with the Unicode 14.0.0 specification. 71 .Sh AUTHORS 72 .An Laslo Hunhold Aq Mt dev@frign.de