libgrapheme

unicode string library
git clone git://git.suckless.org/libgrapheme
Log | Files | Refs | README | LICENSE

grapheme_next_character_break_utf8.3 (2492B)


      1 .Dd 2022-08-26
      2 .Dt GRAPHEME_NEXT_CHARACTER_BREAK_UTF8 3
      3 .Os suckless.org
      4 .Sh NAME
      5 .Nm grapheme_next_character_break_utf8
      6 .Nd determine byte-offset to next grapheme cluster break
      7 .Sh SYNOPSIS
      8 .In grapheme.h
      9 .Ft size_t
     10 .Fn grapheme_next_character_break_utf8 "const char *str" "size_t len"
     11 .Sh DESCRIPTION
     12 The
     13 .Fn grapheme_next_character_break_utf8
     14 function computes the offset (in bytes) to the next grapheme
     15 cluster break (see
     16 .Xr libgrapheme 7 )
     17 in the UTF-8-encoded string
     18 .Va str
     19 of length
     20 .Va len .
     21 If a grapheme cluster begins at
     22 .Va str
     23 this offset is equal to the length of said grapheme cluster.
     24 .Pp
     25 If
     26 .Va len
     27 is set to
     28 .Dv SIZE_MAX
     29 (stdint.h is already included by grapheme.h) the string
     30 .Va str
     31 is interpreted to be NUL-terminated and processing stops when a
     32 NUL-byte is encountered.
     33 .Pp
     34 For non-UTF-8 input data
     35 .Xr grapheme_is_character_break 3
     36 and
     37 .Xr grapheme_next_character_break 3
     38 can be used instead.
     39 .Sh RETURN VALUES
     40 The
     41 .Fn grapheme_next_character_break_utf8
     42 function returns the offset (in bytes) to the next grapheme cluster
     43 break in
     44 .Va str
     45 or 0 if
     46 .Va str
     47 is
     48 .Dv NULL .
     49 .Sh EXAMPLES
     50 .Bd -literal
     51 /* cc (-static) -o example example.c -lgrapheme */
     52 #include <grapheme.h>
     53 #include <stdint.h>
     54 #include <stdio.h>
     55 
     56 int
     57 main(void)
     58 {
     59 	/* UTF-8 encoded input */
     60 	char *s = "T\\xC3\\xABst \\xF0\\x9F\\x91\\xA8\\xE2\\x80\\x8D\\xF0"
     61 	          "\\x9F\\x91\\xA9\\xE2\\x80\\x8D\\xF0\\x9F\\x91\\xA6 \\xF0"
     62 	          "\\x9F\\x87\\xBA\\xF0\\x9F\\x87\\xB8 \\xE0\\xA4\\xA8\\xE0"
     63 	          "\\xA5\\x80 \\xE0\\xAE\\xA8\\xE0\\xAE\\xBF!";
     64 	size_t ret, len, off;
     65 
     66 	printf("Input: \\"%s\\"\\n", s);
     67 
     68 	/* print each grapheme cluster with byte-length */
     69 	printf("Grapheme clusters in NUL-delimited input:\\n");
     70 	for (off = 0; s[off] != '\\0'; off += ret) {
     71 		ret = grapheme_next_character_break_utf8(s + off, SIZE_MAX);
     72 		printf("%2zu bytes | %.*s\\n", ret, (int)ret, s + off, ret);
     73 	}
     74 	printf("\\n");
     75 
     76 	/* do the same, but this time string is length-delimited */
     77 	len = 17;
     78 	printf("Grapheme clusters in input delimited to %zu bytes:\\n", len);
     79 	for (off = 0; off < len; off += ret) {
     80 		ret = grapheme_next_character_break_utf8(s + off, len - off);
     81 		printf("%2zu bytes | %.*s\\n", ret, (int)ret, s + off, ret);
     82 	}
     83 
     84 	return 0;
     85 }
     86 .Ed
     87 .Sh SEE ALSO
     88 .Xr grapheme_is_character_break 3 ,
     89 .Xr grapheme_next_character_break 3 ,
     90 .Xr libgrapheme 7
     91 .Sh STANDARDS
     92 .Fn grapheme_next_character_break_utf8
     93 is compliant with the Unicode 14.0.0 specification.
     94 .Sh AUTHORS
     95 .An Laslo Hunhold Aq Mt dev@frign.de