grapheme_next_sentence_break_utf8.3 (2368B)
1 .Dd 2022-08-26 2 .Dt GRAPHEME_NEXT_SENTENCE_BREAK_UTF8 3 3 .Os suckless.org 4 .Sh NAME 5 .Nm grapheme_next_sentence_break_utf8 6 .Nd determine byte-offset to next sentence break 7 .Sh SYNOPSIS 8 .In grapheme.h 9 .Ft size_t 10 .Fn grapheme_next_sentence_break_utf8 "const char *str" "size_t len" 11 .Sh DESCRIPTION 12 The 13 .Fn grapheme_next_sentence_break_utf8 14 function computes the offset (in bytes) to the next sentence 15 break (see 16 .Xr libgrapheme 7 ) 17 in the UTF-8-encoded string 18 .Va str 19 of length 20 .Va len . 21 If a sentence begins at 22 .Va str 23 this offset is equal to the length of said sentence. 24 .Pp 25 If 26 .Va len 27 is set to 28 .Dv SIZE_MAX 29 (stdint.h is already included by grapheme.h) the string 30 .Va str 31 is interpreted to be NUL-terminated and processing stops when a 32 NUL-byte is encountered. 33 .Pp 34 For non-UTF-8 input data 35 .Xr grapheme_next_sentence_break 3 36 can be used instead. 37 .Sh RETURN VALUES 38 The 39 .Fn grapheme_next_sentence_break_utf8 40 function returns the offset (in bytes) to the next sentence 41 break in 42 .Va str 43 or 0 if 44 .Va str 45 is 46 .Dv NULL . 47 .Sh EXAMPLES 48 .Bd -literal 49 /* cc (-static) -o example example.c -lgrapheme */ 50 #include <grapheme.h> 51 #include <stdint.h> 52 #include <stdio.h> 53 54 int 55 main(void) 56 { 57 /* UTF-8 encoded input */ 58 char *s = "T\\xC3\\xABst \\xF0\\x9F\\x91\\xA8\\xE2\\x80\\x8D\\xF0" 59 "\\x9F\\x91\\xA9\\xE2\\x80\\x8D\\xF0\\x9F\\x91\\xA6 \\xF0" 60 "\\x9F\\x87\\xBA\\xF0\\x9F\\x87\\xB8 \\xE0\\xA4\\xA8\\xE0" 61 "\\xA5\\x80 \\xE0\\xAE\\xA8\\xE0\\xAE\\xBF!"; 62 size_t ret, len, off; 63 64 printf("Input: \\"%s\\"\\n", s); 65 66 /* print each grapheme cluster with byte-length */ 67 printf("Grapheme clusters in NUL-delimited input:\\n"); 68 for (off = 0; s[off] != '\\0'; off += ret) { 69 ret = grapheme_next_sentence_break_utf8(s + off, SIZE_MAX); 70 printf("%2zu bytes | %.*s\\n", ret, (int)ret, s + off, ret); 71 } 72 printf("\\n"); 73 74 /* do the same, but this time string is length-delimited */ 75 len = 17; 76 printf("Grapheme clusters in input delimited to %zu bytes:\\n", len); 77 for (off = 0; off < len; off += ret) { 78 ret = grapheme_next_sentence_break_utf8(s + off, len - off); 79 printf("%2zu bytes | %.*s\\n", ret, (int)ret, s + off, ret); 80 } 81 82 return 0; 83 } 84 .Ed 85 .Sh SEE ALSO 86 .Xr grapheme_next_sentence_break 3 , 87 .Xr libgrapheme 7 88 .Sh STANDARDS 89 .Fn grapheme_next_sentence_break_utf8 90 is compliant with the Unicode 14.0.0 specification. 91 .Sh AUTHORS 92 .An Laslo Hunhold Aq Mt dev@frign.de