libgrapheme

unicode string library
git clone git://git.suckless.org/libgrapheme
Log | Files | Refs | README | LICENSE

grapheme_next_line_break_utf8.3 (2258B)


      1 .Dd 2022-08-26
      2 .Dt GRAPHEME_NEXT_LINE_BREAK_UTF8 3
      3 .Os suckless.org
      4 .Sh NAME
      5 .Nm grapheme_next_line_break_utf8
      6 .Nd determine byte-offset to next possible line break
      7 .Sh SYNOPSIS
      8 .In grapheme.h
      9 .Ft size_t
     10 .Fn grapheme_next_line_break_utf8 "const char *str" "size_t len"
     11 .Sh DESCRIPTION
     12 The
     13 .Fn grapheme_next_line_break_utf8
     14 function computes the offset (in bytes) to the next possible line
     15 break (see
     16 .Xr libgrapheme 7 )
     17 in the UTF-8-encoded string
     18 .Va str
     19 of length
     20 .Va len .
     21 .Pp
     22 If
     23 .Va len
     24 is set to
     25 .Dv SIZE_MAX
     26 (stdint.h is already included by grapheme.h) the string
     27 .Va str
     28 is interpreted to be NUL-terminated and processing stops when a
     29 NUL-byte is encountered.
     30 .Pp
     31 For non-UTF-8 input data
     32 .Xr grapheme_next_line_break 3
     33 can be used instead.
     34 .Sh RETURN VALUES
     35 The
     36 .Fn grapheme_next_line_break_utf8
     37 function returns the offset (in bytes) to the next possible line
     38 break in
     39 .Va str
     40 or 0 if
     41 .Va str
     42 is
     43 .Dv NULL .
     44 .Sh EXAMPLES
     45 .Bd -literal
     46 /* cc (-static) -o example example.c -lgrapheme */
     47 #include <grapheme.h>
     48 #include <stdint.h>
     49 #include <stdio.h>
     50 
     51 int
     52 main(void)
     53 {
     54 	/* UTF-8 encoded input */
     55 	char *s = "T\\xC3\\xABst \\xF0\\x9F\\x91\\xA8\\xE2\\x80\\x8D\\xF0"
     56 	          "\\x9F\\x91\\xA9\\xE2\\x80\\x8D\\xF0\\x9F\\x91\\xA6 \\xF0"
     57 	          "\\x9F\\x87\\xBA\\xF0\\x9F\\x87\\xB8 \\xE0\\xA4\\xA8\\xE0"
     58 	          "\\xA5\\x80 \\xE0\\xAE\\xA8\\xE0\\xAE\\xBF!";
     59 	size_t ret, len, off;
     60 
     61 	printf("Input: \\"%s\\"\\n", s);
     62 
     63 	/* print each grapheme cluster with byte-length */
     64 	printf("Grapheme clusters in NUL-delimited input:\\n");
     65 	for (off = 0; s[off] != '\\0'; off += ret) {
     66 		ret = grapheme_next_line_break_utf8(s + off, SIZE_MAX);
     67 		printf("%2zu bytes | %.*s\\n", ret, (int)ret, s + off, ret);
     68 	}
     69 	printf("\\n");
     70 
     71 	/* do the same, but this time string is length-delimited */
     72 	len = 17;
     73 	printf("Grapheme clusters in input delimited to %zu bytes:\\n", len);
     74 	for (off = 0; off < len; off += ret) {
     75 		ret = grapheme_next_line_break_utf8(s + off, len - off);
     76 		printf("%2zu bytes | %.*s\\n", ret, (int)ret, s + off, ret);
     77 	}
     78 
     79 	return 0;
     80 }
     81 .Ed
     82 .Sh SEE ALSO
     83 .Xr grapheme_next_line_break 3 ,
     84 .Xr libgrapheme 7
     85 .Sh STANDARDS
     86 .Fn grapheme_next_line_break_utf8
     87 is compliant with the Unicode 14.0.0 specification.
     88 .Sh AUTHORS
     89 .An Laslo Hunhold Aq Mt dev@frign.de