next_break_utf8.sh (2809B)
1 cat << EOF 2 .Dd $MAN_DATE 3 .Dt GRAPHEME_NEXT_$(printf $TYPE | tr [:lower:] [:upper:])_BREAK_UTF8 3 4 .Os suckless.org 5 .Sh NAME 6 .Nm grapheme_next_$(printf $TYPE)_break_utf8 7 .Nd determine byte-offset to next $REALTYPE break 8 .Sh SYNOPSIS 9 .In grapheme.h 10 .Ft size_t 11 .Fn grapheme_next_$(printf $TYPE)_break_utf8 "const char *str" "size_t len" 12 .Sh DESCRIPTION 13 The 14 .Fn grapheme_next_$(printf $TYPE)_break_utf8 15 function computes the offset (in bytes) to the next $REALTYPE 16 break (see 17 .Xr libgrapheme 7 ) 18 in the UTF-8-encoded string 19 .Va str 20 of length 21 .Va len .$(if [ "$TYPE" != "line" ]; then printf "\nIf a $REALTYPE begins at 22 .Va str 23 this offset is equal to the length of said $REALTYPE."; fi) 24 .Pp 25 If 26 .Va len 27 is set to 28 .Dv SIZE_MAX 29 (stdint.h is already included by grapheme.h) the string 30 .Va str 31 is interpreted to be NUL-terminated and processing stops when a 32 NUL-byte is encountered. 33 .Pp 34 For non-UTF-8 input data$(if [ "$TYPE" = "character" ]; 35 then printf "\n.Xr grapheme_is_character_break 3 36 and"; fi) 37 .Xr grapheme_next_$(printf $TYPE)_break 3 38 can be used instead. 39 .Sh RETURN VALUES 40 The 41 .Fn grapheme_next_$(printf $TYPE)_break_utf8 42 function returns the offset (in bytes) to the next $REALTYPE 43 break in 44 .Va str 45 or 0 if 46 .Va str 47 is 48 .Dv NULL . 49 .Sh EXAMPLES 50 .Bd -literal 51 /* cc (-static) -o example example.c -lgrapheme */ 52 #include <grapheme.h> 53 #include <stdint.h> 54 #include <stdio.h> 55 56 int 57 main(void) 58 { 59 /* UTF-8 encoded input */ 60 char *s = "T\\\\xC3\\\\xABst \\\\xF0\\\\x9F\\\\x91\\\\xA8\\\\xE2\\\\x80\\\\x8D\\\\xF0" 61 "\\\\x9F\\\\x91\\\\xA9\\\\xE2\\\\x80\\\\x8D\\\\xF0\\\\x9F\\\\x91\\\\xA6 \\\\xF0" 62 "\\\\x9F\\\\x87\\\\xBA\\\\xF0\\\\x9F\\\\x87\\\\xB8 \\\\xE0\\\\xA4\\\\xA8\\\\xE0" 63 "\\\\xA5\\\\x80 \\\\xE0\\\\xAE\\\\xA8\\\\xE0\\\\xAE\\\\xBF!"; 64 size_t ret, len, off; 65 66 printf("Input: \\\\"%s\\\\"\\\\n", s); 67 68 /* print each $REALTYPE with byte-length */ 69 printf("$(printf "$REALTYPE")s in NUL-delimited input:\\\\n"); 70 for (off = 0; s[off] != '\\\\0'; off += ret) { 71 ret = grapheme_next_$(printf $TYPE)_break_utf8(s + off, SIZE_MAX); 72 printf("%2zu bytes | %.*s\\\\n", ret, (int)ret, s + off, ret); 73 } 74 printf("\\\\n"); 75 76 /* do the same, but this time string is length-delimited */ 77 len = 17; 78 printf("$(printf "$REALTYPE")s in input delimited to %zu bytes:\\\\n", len); 79 for (off = 0; off < len; off += ret) { 80 ret = grapheme_next_$(printf $TYPE)_break_utf8(s + off, len - off); 81 printf("%2zu bytes | %.*s\\\\n", ret, (int)ret, s + off, ret); 82 } 83 84 return 0; 85 } 86 .Ed 87 .Sh SEE ALSO$(if [ "$TYPE" = "character" ]; 88 then printf "\n.Xr grapheme_is_character_break 3 ,"; fi) 89 .Xr grapheme_next_$(printf $TYPE)_break 3 , 90 .Xr libgrapheme 7 91 .Sh STANDARDS 92 .Fn grapheme_next_$(printf $TYPE)_break_utf8 93 is compliant with the Unicode 14.0.0 specification. 94 .Sh AUTHORS 95 .An Laslo Hunhold Aq Mt dev@frign.de 96 EOF