grapheme_next_line_break_utf8.3 (2258B)
1 .Dd 2022-08-26 2 .Dt GRAPHEME_NEXT_LINE_BREAK_UTF8 3 3 .Os suckless.org 4 .Sh NAME 5 .Nm grapheme_next_line_break_utf8 6 .Nd determine byte-offset to next possible line break 7 .Sh SYNOPSIS 8 .In grapheme.h 9 .Ft size_t 10 .Fn grapheme_next_line_break_utf8 "const char *str" "size_t len" 11 .Sh DESCRIPTION 12 The 13 .Fn grapheme_next_line_break_utf8 14 function computes the offset (in bytes) to the next possible line 15 break (see 16 .Xr libgrapheme 7 ) 17 in the UTF-8-encoded string 18 .Va str 19 of length 20 .Va len . 21 .Pp 22 If 23 .Va len 24 is set to 25 .Dv SIZE_MAX 26 (stdint.h is already included by grapheme.h) the string 27 .Va str 28 is interpreted to be NUL-terminated and processing stops when a 29 NUL-byte is encountered. 30 .Pp 31 For non-UTF-8 input data 32 .Xr grapheme_next_line_break 3 33 can be used instead. 34 .Sh RETURN VALUES 35 The 36 .Fn grapheme_next_line_break_utf8 37 function returns the offset (in bytes) to the next possible line 38 break in 39 .Va str 40 or 0 if 41 .Va str 42 is 43 .Dv NULL . 44 .Sh EXAMPLES 45 .Bd -literal 46 /* cc (-static) -o example example.c -lgrapheme */ 47 #include <grapheme.h> 48 #include <stdint.h> 49 #include <stdio.h> 50 51 int 52 main(void) 53 { 54 /* UTF-8 encoded input */ 55 char *s = "T\\xC3\\xABst \\xF0\\x9F\\x91\\xA8\\xE2\\x80\\x8D\\xF0" 56 "\\x9F\\x91\\xA9\\xE2\\x80\\x8D\\xF0\\x9F\\x91\\xA6 \\xF0" 57 "\\x9F\\x87\\xBA\\xF0\\x9F\\x87\\xB8 \\xE0\\xA4\\xA8\\xE0" 58 "\\xA5\\x80 \\xE0\\xAE\\xA8\\xE0\\xAE\\xBF!"; 59 size_t ret, len, off; 60 61 printf("Input: \\"%s\\"\\n", s); 62 63 /* print each grapheme cluster with byte-length */ 64 printf("Grapheme clusters in NUL-delimited input:\\n"); 65 for (off = 0; s[off] != '\\0'; off += ret) { 66 ret = grapheme_next_line_break_utf8(s + off, SIZE_MAX); 67 printf("%2zu bytes | %.*s\\n", ret, (int)ret, s + off, ret); 68 } 69 printf("\\n"); 70 71 /* do the same, but this time string is length-delimited */ 72 len = 17; 73 printf("Grapheme clusters in input delimited to %zu bytes:\\n", len); 74 for (off = 0; off < len; off += ret) { 75 ret = grapheme_next_line_break_utf8(s + off, len - off); 76 printf("%2zu bytes | %.*s\\n", ret, (int)ret, s + off, ret); 77 } 78 79 return 0; 80 } 81 .Ed 82 .Sh SEE ALSO 83 .Xr grapheme_next_line_break 3 , 84 .Xr libgrapheme 7 85 .Sh STANDARDS 86 .Fn grapheme_next_line_break_utf8 87 is compliant with the Unicode 14.0.0 specification. 88 .Sh AUTHORS 89 .An Laslo Hunhold Aq Mt dev@frign.de