libgrapheme

unicode string library
git clone git://git.suckless.org/libgrapheme
Log | Files | Refs | README | LICENSE

next_break.sh (3397B)


      1 if [ "$ENCODING" = "utf8" ]; then
      2 	UNIT="byte"
      3 	SUFFIX="_utf8"
      4 	ANTISUFFIX=""
      5 else
      6 	UNIT="codepoint"
      7 	SUFFIX=""
      8 	ANTISUFFIX="_utf8"
      9 fi
     10 
     11 cat << EOF
     12 .Dd ${MAN_DATE}
     13 .Dt GRAPHEME_NEXT_$(printf "%s_break%s" "$TYPE" "$SUFFIX" | tr [:lower:] [:upper:]) 3
     14 .Os suckless.org
     15 .Sh NAME
     16 .Nm grapheme_next_${TYPE}_break${SUFFIX}
     17 .Nd determine ${UNIT}-offset to next ${REALTYPE} break
     18 .Sh SYNOPSIS
     19 .In grapheme.h
     20 .Ft size_t
     21 .Fn grapheme_next_${TYPE}_break${SUFFIX} "const $(if [ "$ENCODING" = "utf8" ]; then printf "char"; else printf "uint_least32_t"; fi) *str" "size_t len"
     22 .Sh DESCRIPTION
     23 The
     24 .Fn grapheme_next_${TYPE}_break${SUFFIX}
     25 function computes the offset (in ${UNIT}s) to the next ${REALTYPE}
     26 break (see
     27 .Xr libgrapheme 7 )
     28 in the $(if [ "$ENCODING" = "utf8" ]; then printf "UTF-8-encoded string"; else printf "codepoint array"; fi)
     29 .Va str
     30 of length
     31 .Va len .$(if [ "$TYPE" != "line" ]; then printf "\nIf a ${REALTYPE} begins at
     32 .Va str
     33 this offset is equal to the length of said ${REALTYPE}."; fi)
     34 .Pp
     35 If
     36 .Va len
     37 is set to
     38 .Dv SIZE_MAX
     39 (stdint.h is already included by grapheme.h) the string
     40 .Va str
     41 is interpreted to be NUL-terminated and processing stops when
     42 a $(if [ "$ENCODING" = "utf8" ]; then printf "NUL-byte"; else printf "codepoint with the value 0"; fi) is encountered.
     43 .Pp
     44 For $(if [ "$ENCODING" != "utf8" ]; then printf "UTF-8-encoded"; else printf "non-UTF-8"; fi) input
     45 data$(if [ "$TYPE" = "character" ] && [ "$ENCODING" = "utf8" ]; then printf "\n.Xr grapheme_is_character_break 3 and"; fi)
     46 .Xr grapheme_next_${TYPE}_break${ANTISUFFIX} 3
     47 can be used instead.
     48 .Sh RETURN VALUES
     49 The
     50 .Fn grapheme_next_${TYPE}_break${SUFFIX}
     51 function returns the offset (in ${UNIT}s) to the next ${REALTYPE}
     52 break in
     53 .Va str
     54 or 0 if
     55 .Va str
     56 is
     57 .Dv NULL .
     58 EOF
     59 
     60 if [ "$ENCODING" = "utf8" ]; then
     61 cat << EOF
     62 .Sh EXAMPLES
     63 .Bd -literal
     64 /* cc (-static) -o example example.c -lgrapheme */
     65 #include <grapheme.h>
     66 #include <stdint.h>
     67 #include <stdio.h>
     68 
     69 int
     70 main(void)
     71 {
     72 	/* UTF-8 encoded input */
     73 	char *s = "T\\\\xC3\\\\xABst \\\\xF0\\\\x9F\\\\x91\\\\xA8\\\\xE2\\\\x80\\\\x8D\\\\xF0"
     74 	          "\\\\x9F\\\\x91\\\\xA9\\\\xE2\\\\x80\\\\x8D\\\\xF0\\\\x9F\\\\x91\\\\xA6 \\\\xF0"
     75 	          "\\\\x9F\\\\x87\\\\xBA\\\\xF0\\\\x9F\\\\x87\\\\xB8 \\\\xE0\\\\xA4\\\\xA8\\\\xE0"
     76 	          "\\\\xA5\\\\x80 \\\\xE0\\\\xAE\\\\xA8\\\\xE0\\\\xAE\\\\xBF!";
     77 	size_t ret, len, off;
     78 
     79 	printf("Input: \\\\"%s\\\\"\\\\n", s);
     80 
     81 	/* print each ${REALTYPE} with byte-length */
     82 	printf("${REALTYPE}s in NUL-delimited input:\\\\n");
     83 	for (off = 0; s[off] != '\\\\0'; off += ret) {
     84 		ret = grapheme_next_${TYPE}_break_utf8(s + off, SIZE_MAX);
     85 		printf("%2zu bytes | %.*s\\\\n", ret, (int)ret, s + off);
     86 	}
     87 	printf("\\\\n");
     88 
     89 	/* do the same, but this time string is length-delimited */
     90 	len = 17;
     91 	printf("${REALTYPE}s in input delimited to %zu bytes:\\\\n", len);
     92 	for (off = 0; off < len; off += ret) {
     93 		ret = grapheme_next_${TYPE}_break_utf8(s + off, len - off);
     94 		printf("%2zu bytes | %.*s\\\\n", ret, (int)ret, s + off);
     95 	}
     96 
     97 	return 0;
     98 }
     99 .Ed
    100 EOF
    101 fi
    102 
    103 cat << EOF
    104 .Sh SEE ALSO$(if [ "$TYPE" = "character" ] && [ "$ENCODING" != "utf8" ]; then printf "\n.Xr grapheme_is_character_break 3 ,"; fi)
    105 .Xr grapheme_next_${TYPE}_break${ANTISUFFIX} 3 ,
    106 .Xr libgrapheme 7
    107 .Sh STANDARDS
    108 .Fn grapheme_next_${TYPE}_break${SUFFIX}
    109 is compliant with the Unicode ${UNICODE_VERSION} specification.
    110 .Sh AUTHORS
    111 .An Laslo Hunhold Aq Mt dev@frign.de
    112 EOF