next_break.sh (3397B)
1 if [ "$ENCODING" = "utf8" ]; then 2 UNIT="byte" 3 SUFFIX="_utf8" 4 ANTISUFFIX="" 5 else 6 UNIT="codepoint" 7 SUFFIX="" 8 ANTISUFFIX="_utf8" 9 fi 10 11 cat << EOF 12 .Dd ${MAN_DATE} 13 .Dt GRAPHEME_NEXT_$(printf "%s_break%s" "$TYPE" "$SUFFIX" | tr [:lower:] [:upper:]) 3 14 .Os suckless.org 15 .Sh NAME 16 .Nm grapheme_next_${TYPE}_break${SUFFIX} 17 .Nd determine ${UNIT}-offset to next ${REALTYPE} break 18 .Sh SYNOPSIS 19 .In grapheme.h 20 .Ft size_t 21 .Fn grapheme_next_${TYPE}_break${SUFFIX} "const $(if [ "$ENCODING" = "utf8" ]; then printf "char"; else printf "uint_least32_t"; fi) *str" "size_t len" 22 .Sh DESCRIPTION 23 The 24 .Fn grapheme_next_${TYPE}_break${SUFFIX} 25 function computes the offset (in ${UNIT}s) to the next ${REALTYPE} 26 break (see 27 .Xr libgrapheme 7 ) 28 in the $(if [ "$ENCODING" = "utf8" ]; then printf "UTF-8-encoded string"; else printf "codepoint array"; fi) 29 .Va str 30 of length 31 .Va len .$(if [ "$TYPE" != "line" ]; then printf "\nIf a ${REALTYPE} begins at 32 .Va str 33 this offset is equal to the length of said ${REALTYPE}."; fi) 34 .Pp 35 If 36 .Va len 37 is set to 38 .Dv SIZE_MAX 39 (stdint.h is already included by grapheme.h) the string 40 .Va str 41 is interpreted to be NUL-terminated and processing stops when 42 a $(if [ "$ENCODING" = "utf8" ]; then printf "NUL-byte"; else printf "codepoint with the value 0"; fi) is encountered. 43 .Pp 44 For $(if [ "$ENCODING" != "utf8" ]; then printf "UTF-8-encoded"; else printf "non-UTF-8"; fi) input 45 data$(if [ "$TYPE" = "character" ] && [ "$ENCODING" = "utf8" ]; then printf "\n.Xr grapheme_is_character_break 3 and"; fi) 46 .Xr grapheme_next_${TYPE}_break${ANTISUFFIX} 3 47 can be used instead. 48 .Sh RETURN VALUES 49 The 50 .Fn grapheme_next_${TYPE}_break${SUFFIX} 51 function returns the offset (in ${UNIT}s) to the next ${REALTYPE} 52 break in 53 .Va str 54 or 0 if 55 .Va str 56 is 57 .Dv NULL . 58 EOF 59 60 if [ "$ENCODING" = "utf8" ]; then 61 cat << EOF 62 .Sh EXAMPLES 63 .Bd -literal 64 /* cc (-static) -o example example.c -lgrapheme */ 65 #include <grapheme.h> 66 #include <stdint.h> 67 #include <stdio.h> 68 69 int 70 main(void) 71 { 72 /* UTF-8 encoded input */ 73 char *s = "T\\\\xC3\\\\xABst \\\\xF0\\\\x9F\\\\x91\\\\xA8\\\\xE2\\\\x80\\\\x8D\\\\xF0" 74 "\\\\x9F\\\\x91\\\\xA9\\\\xE2\\\\x80\\\\x8D\\\\xF0\\\\x9F\\\\x91\\\\xA6 \\\\xF0" 75 "\\\\x9F\\\\x87\\\\xBA\\\\xF0\\\\x9F\\\\x87\\\\xB8 \\\\xE0\\\\xA4\\\\xA8\\\\xE0" 76 "\\\\xA5\\\\x80 \\\\xE0\\\\xAE\\\\xA8\\\\xE0\\\\xAE\\\\xBF!"; 77 size_t ret, len, off; 78 79 printf("Input: \\\\"%s\\\\"\\\\n", s); 80 81 /* print each ${REALTYPE} with byte-length */ 82 printf("${REALTYPE}s in NUL-delimited input:\\\\n"); 83 for (off = 0; s[off] != '\\\\0'; off += ret) { 84 ret = grapheme_next_${TYPE}_break_utf8(s + off, SIZE_MAX); 85 printf("%2zu bytes | %.*s\\\\n", ret, (int)ret, s + off); 86 } 87 printf("\\\\n"); 88 89 /* do the same, but this time string is length-delimited */ 90 len = 17; 91 printf("${REALTYPE}s in input delimited to %zu bytes:\\\\n", len); 92 for (off = 0; off < len; off += ret) { 93 ret = grapheme_next_${TYPE}_break_utf8(s + off, len - off); 94 printf("%2zu bytes | %.*s\\\\n", ret, (int)ret, s + off); 95 } 96 97 return 0; 98 } 99 .Ed 100 EOF 101 fi 102 103 cat << EOF 104 .Sh SEE ALSO$(if [ "$TYPE" = "character" ] && [ "$ENCODING" != "utf8" ]; then printf "\n.Xr grapheme_is_character_break 3 ,"; fi) 105 .Xr grapheme_next_${TYPE}_break${ANTISUFFIX} 3 , 106 .Xr libgrapheme 7 107 .Sh STANDARDS 108 .Fn grapheme_next_${TYPE}_break${SUFFIX} 109 is compliant with the Unicode ${UNICODE_VERSION} specification. 110 .Sh AUTHORS 111 .An Laslo Hunhold Aq Mt dev@frign.de 112 EOF