commit 7df283987a5d360d9a46bca94c6335987a2b4a2e
parent f59343351a34045671fc506160d19c9bd1dfec0d
Author: Laslo Hunhold <dev@frign.de>
Date: Thu, 6 Oct 2022 22:25:52 +0200
Use mandoc's markdown-output
Today I learned that this exists. Very nice!
Signed-off-by: Laslo Hunhold <dev@frign.de>
Diffstat:
24 files changed, 2001 insertions(+), 1283 deletions(-)
diff --git a/libs.suckless.org/libgrapheme/man/grapheme_decode_utf8(3)/index.md b/libs.suckless.org/libgrapheme/man/grapheme_decode_utf8(3)/index.md
@@ -1,80 +1,112 @@
- GRAPHEME_DECODE_UTF8(3) Library Functions Manual GRAPHEME_DECODE_UTF8(3)
+GRAPHEME\_DECODE\_UTF8(3) - Library Functions Manual
+
+# NAME
+
+**grapheme\_decode\_utf8** - decode first codepoint in UTF-8-encoded string
+
+# SYNOPSIS
+
+**#include <grapheme.h>**
+
+*size\_t*
+**grapheme\_decode\_utf8**(*const char \*str*, *size\_t len*, *uint\_least32\_t \*cp*);
+
+# DESCRIPTION
+
+The
+**grapheme\_decode\_utf8**()
+function decodes the first codepoint in the UTF-8-encoded string
+*str*
+of length
+*len*.
+If the UTF-8-sequence is invalid (overlong encoding, unexpected byte,
+string ends unexpectedly, empty string, etc.) the decoding is stopped
+at the last processed byte and the decoded codepoint set to
+`GRAPHEME_INVALID_CODEPOINT`.
+
+If
+*cp*
+is not
+`NULL`
+the decoded codepoint is stored in the memory pointed to by
+*cp*.
+
+Given NUL has a unique 1 byte representation, it is safe to operate on
+NUL-terminated strings by setting
+*len*
+to
+`SIZE_MAX`
+(stdint.h is already included by grapheme.h) and terminating when
+*cp*
+is 0 (see
+*EXAMPLES*
+for an example).
+
+# RETURN VALUES
+
+The
+**grapheme\_decode\_utf8**()
+function returns the number of processed bytes and 0 if
+*str*
+is
+`NULL`
+or
+*len*
+is 0.
+If the string ends unexpectedly in a multibyte sequence, the desired
+length (that is larger than
+*len*)
+is returned.
+
+# EXAMPLES
+
+ /* cc (-static) -o example example.c -lgrapheme */
+ #include <grapheme.h>
+ #include <inttypes.h>
+ #include <stdio.h>
- NAME
- grapheme_decode_utf8 – decode first codepoint in UTF-8-encoded string
+ void
+ print_cps(const char *str, size_t len)
+ {
+ size_t ret, off;
+ uint_least32_t cp;
- SYNOPSIS
- #include <grapheme.h>
+ for (off = 0; off < len; off += ret) {
+ if ((ret = grapheme_decode_utf8(str + off,
+ len - off, &cp)) > (len - off)) {
+ /*
+ * string ended unexpectedly in the middle of a
+ * multibyte sequence and we have the choice
+ * here to possibly expand str by ret - len + off
+ * bytes to get a full sequence, but we just
+ * bail out in this case.
+ */
+ break;
+ }
+ printf("%"PRIxLEAST32"\n", cp);
+ }
+ }
- size_t
- grapheme_decode_utf8(const char *str, size_t len, uint_least32_t *cp);
+ void
+ print_cps_nul_terminated(const char *str)
+ {
+ size_t ret, off;
+ uint_least32_t cp;
- DESCRIPTION
- The grapheme_decode_utf8() function decodes the first codepoint in the
- UTF-8-encoded string str of length len. If the UTF-8-sequence is invalid
- (overlong encoding, unexpected byte, string ends unexpectedly, empty
- string, etc.) the decoding is stopped at the last processed byte and the
- decoded codepoint set to GRAPHEME_INVALID_CODEPOINT.
-
- If cp is not NULL the decoded codepoint is stored in the memory pointed
- to by cp.
-
- Given NUL has a unique 1 byte representation, it is safe to operate on
- NUL-terminated strings by setting len to SIZE_MAX (stdint.h is already
- included by grapheme.h) and terminating when cp is 0 (see EXAMPLES for an
- example).
-
- RETURN VALUES
- The grapheme_decode_utf8() function returns the number of processed bytes
- and 0 if str is NULL or len is 0. If the string ends unexpectedly in a
- multibyte sequence, the desired length (that is larger than len) is
- returned.
-
- EXAMPLES
- /* cc (-static) -o example example.c -lgrapheme */
- #include <grapheme.h>
- #include <inttypes.h>
- #include <stdio.h>
-
- void
- print_cps(const char *str, size_t len)
- {
- size_t ret, off;
- uint_least32_t cp;
-
- for (off = 0; off < len; off += ret) {
- if ((ret = grapheme_decode_utf8(str + off,
- len - off, &cp)) > (len - off)) {
- /*
- * string ended unexpectedly in the middle of a
- * multibyte sequence and we have the choice
- * here to possibly expand str by ret - len + off
- * bytes to get a full sequence, but we just
- * bail out in this case.
- */
- break;
- }
- printf("%"PRIxLEAST32"\n", cp);
- }
- }
-
- void
- print_cps_nul_terminated(const char *str)
- {
- size_t ret, off;
- uint_least32_t cp;
-
- for (off = 0; (ret = grapheme_decode_utf8(str + off,
- SIZE_MAX, &cp)) > 0 &&
- cp != 0; off += ret) {
- printf("%"PRIxLEAST32"\n", cp);
- }
- }
-
- SEE ALSO
- grapheme_encode_utf8(3), libgrapheme(7)
-
- AUTHORS
- Laslo Hunhold <dev@frign.de>
-
- suckless.org 2022-10-06 suckless.org
+ for (off = 0; (ret = grapheme_decode_utf8(str + off,
+ SIZE_MAX, &cp)) > 0 &&
+ cp != 0; off += ret) {
+ printf("%"PRIxLEAST32"\n", cp);
+ }
+ }
+
+# SEE ALSO
+
+grapheme\_encode\_utf8(3),
+libgrapheme(7)
+
+# AUTHORS
+
+Laslo Hunhold <[dev@frign.de](mailto:dev@frign.de)>
+
+suckless.org - 2022-10-06
diff --git a/libs.suckless.org/libgrapheme/man/grapheme_encode_utf8(3)/index.md b/libs.suckless.org/libgrapheme/man/grapheme_encode_utf8(3)/index.md
@@ -1,87 +1,113 @@
- GRAPHEME_ENCODE_UTF8(3) Library Functions Manual GRAPHEME_ENCODE_UTF8(3)
-
- NAME
- grapheme_encode_utf8 – encode codepoint into UTF-8 string
-
- SYNOPSIS
- #include <grapheme.h>
-
- size_t
- grapheme_encode_utf8(uint_least32_t cp, char *str, size_t len);
-
- DESCRIPTION
- The grapheme_encode_utf8() function encodes the codepoint cp into a
- UTF-8-string. If str is not NULL and len is large enough it writes the
- UTF-8-string to the memory pointed to by str. Otherwise no data is
- written.
-
- RETURN VALUES
- The grapheme_encode_utf8() function returns the length (in bytes) of the
- UTF-8-string resulting from encoding cp, even if len is not large enough
- or str is NULL.
-
- EXAMPLES
- /* cc (-static) -o example example.c -lgrapheme */
- #include <grapheme.h>
- #include <stddef.h>
- #include <stdlib.h>
-
- size_t
- cps_to_utf8(const uint_least32_t *cp, size_t cplen, char *str, size_t len)
- {
- size_t i, off, ret;
-
- for (i = 0, off = 0; i < cplen; i++, off += ret) {
- if ((ret = grapheme_encode_utf8(cp[i], str + off,
- len - off)) > (len - off)) {
- /* buffer too small */
- break;
- }
- }
-
- return off;
- }
-
- size_t
- cps_bytelen(const uint_least32_t *cp, size_t cplen)
- {
- size_t i, len;
-
- for (i = 0, len = 0; i < cplen; i++) {
- len += grapheme_encode_utf8(cp[i], NULL, 0);
- }
-
- return len;
- }
-
- char *
- cps_to_utf8_alloc(const uint_least32_t *cp, size_t cplen)
- {
- char *str;
- size_t len, i, ret, off;
-
- len = cps_bytelen(cp, cplen);
-
- if (!(str = malloc(len))) {
- return NULL;
- }
-
- for (i = 0, off = 0; i < cplen; i++, off += ret) {
- if ((ret = grapheme_encode_utf8(cp[i], str + off,
- len - off)) > (len - off)) {
- /* buffer too small */
- break;
- }
- }
- str[off] = '\0';
-
- return str;
- }
-
- SEE ALSO
- grapheme_decode_utf8(3), libgrapheme(7)
-
- AUTHORS
- Laslo Hunhold <dev@frign.de>
-
- suckless.org 2022-10-06 suckless.org
+GRAPHEME\_ENCODE\_UTF8(3) - Library Functions Manual
+
+# NAME
+
+**grapheme\_encode\_utf8** - encode codepoint into UTF-8 string
+
+# SYNOPSIS
+
+**#include <grapheme.h>**
+
+*size\_t*
+**grapheme\_encode\_utf8**(*uint\_least32\_t cp*, *char \*str*, *size\_t len*);
+
+# DESCRIPTION
+
+The
+**grapheme\_encode\_utf8**()
+function encodes the codepoint
+*cp*
+into a UTF-8-string.
+If
+*str*
+is not
+`NULL`
+and
+*len*
+is large enough it writes the UTF-8-string to the memory pointed to by
+*str*.
+Otherwise no data is written.
+
+# RETURN VALUES
+
+The
+**grapheme\_encode\_utf8**()
+function returns the length (in bytes) of the UTF-8-string resulting
+from encoding
+*cp*,
+even if
+*len*
+is not large enough or
+*str*
+is
+`NULL`.
+
+# EXAMPLES
+
+ /* cc (-static) -o example example.c -lgrapheme */
+ #include <grapheme.h>
+ #include <stddef.h>
+ #include <stdlib.h>
+
+ size_t
+ cps_to_utf8(const uint_least32_t *cp, size_t cplen, char *str, size_t len)
+ {
+ size_t i, off, ret;
+
+ for (i = 0, off = 0; i < cplen; i++, off += ret) {
+ if ((ret = grapheme_encode_utf8(cp[i], str + off,
+ len - off)) > (len - off)) {
+ /* buffer too small */
+ break;
+ }
+ }
+
+ return off;
+ }
+
+ size_t
+ cps_bytelen(const uint_least32_t *cp, size_t cplen)
+ {
+ size_t i, len;
+
+ for (i = 0, len = 0; i < cplen; i++) {
+ len += grapheme_encode_utf8(cp[i], NULL, 0);
+ }
+
+ return len;
+ }
+
+ char *
+ cps_to_utf8_alloc(const uint_least32_t *cp, size_t cplen)
+ {
+ char *str;
+ size_t len, i, ret, off;
+
+ len = cps_bytelen(cp, cplen);
+
+ if (!(str = malloc(len))) {
+ return NULL;
+ }
+
+ for (i = 0, off = 0; i < cplen; i++, off += ret) {
+ if ((ret = grapheme_encode_utf8(cp[i], str + off,
+ len - off)) > (len - off)) {
+ /* buffer too small */
+ break;
+ }
+ }
+ str[off] = '\0';
+
+ return str;
+ }
+
+# SEE ALSO
+
+grapheme\_decode\_utf8(3),
+libgrapheme(7)
+
+# AUTHORS
+
+Laslo Hunhold <[dev@frign.de](mailto:dev@frign.de)>
+
+suckless.org - 2022-10-06
diff --git a/libs.suckless.org/libgrapheme/man/grapheme_is_character_break(3)/index.md b/libs.suckless.org/libgrapheme/man/grapheme_is_character_break(3)/index.md
@@ -1,69 +1,95 @@
- GRAPHEME_IS_CHARACTER_BREAK(3) Library Functions Manual
+GRAPHEME\_IS\_CHARACTER\_BREAK(3) - Library Functions Manual
+
+# NAME
+
+**grapheme\_is\_character\_break** - test for a grapheme cluster break between two codepoints
+
+# SYNOPSIS
+
+**#include <grapheme.h>**
+
+*size\_t*
+**grapheme\_is\_character\_break**(*uint\_least32\_t cp1*, *uint\_least32\_t cp2*, *uint\_least16\_t \*state*);
+
+# DESCRIPTION
+
+The
+**grapheme\_is\_character\_break**()
+function determines if there is a grapheme cluster break (see
+libgrapheme(7))
+between the two codepoints
+*cp1*
+and
+*cp2*.
+By specification this decision depends on a
+*state*
+that can at most be completely reset after detecting a break and must
+be reset every time one deviates from sequential processing.
+
+If
+*state*
+is
+`NULL`
+**grapheme\_is\_character\_break**()
+behaves as if it was called with a fully reset state.
+
+# RETURN VALUES
+
+The
+**grapheme\_is\_character\_break**()
+function returns
+*true*
+if there is a grapheme cluster break between the codepoints
+*cp1*
+and
+*cp2*
+and
+*false*
+if there is not.
+
+# EXAMPLES
+
+ /* cc (-static) -o example example.c -lgrapheme */
+ #include <grapheme.h>
+ #include <stdint.h>
+ #include <stdio.h>
+ #include <stdlib.h>
- NAME
- grapheme_is_character_break – test for a grapheme cluster break between
- two codepoints
+ int
+ main(void)
+ {
+ uint_least16_t state = 0;
+ uint_least32_t s1[] = ..., s2[] = ...; /* two input arrays */
+ size_t i;
- SYNOPSIS
- #include <grapheme.h>
+ for (i = 0; i + 1 < sizeof(s1) / sizeof(*s1); i++) {
+ if (grapheme_is_character_break(s[i], s[i + 1], &state)) {
+ printf("break in s1 at offset %zu0, i);
+ }
+ }
+ memset(&state, 0, sizeof(state)); /* reset state */
+ for (i = 0; i + 1 < sizeof(s2) / sizeof(*s2); i++) {
+ if (grapheme_is_character_break(s[i], s[i + 1], &state)) {
+ printf("break in s2 at offset %zu0, i);
+ }
+ }
- size_t
- grapheme_is_character_break(uint_least32_t cp1, uint_least32_t cp2,
- uint_least16_t *state);
-
- DESCRIPTION
- The grapheme_is_character_break() function determines if there is a
- grapheme cluster break (see libgrapheme(7)) between the two codepoints
- cp1 and cp2. By specification this decision depends on a state that can
- at most be completely reset after detecting a break and must be reset
- every time one deviates from sequential processing.
-
- If state is NULL grapheme_is_character_break() behaves as if it was
- called with a fully reset state.
-
- RETURN VALUES
- The grapheme_is_character_break() function returns true if there is a
- grapheme cluster break between the codepoints cp1 and cp2 and false if
- there is not.
-
- EXAMPLES
- /* cc (-static) -o example example.c -lgrapheme */
- #include <grapheme.h>
- #include <stdint.h>
- #include <stdio.h>
- #include <stdlib.h>
-
- int
- main(void)
- {
- uint_least16_t state = 0;
- uint_least32_t s1[] = ..., s2[] = ...; /* two input arrays */
- size_t i;
-
- for (i = 0; i + 1 < sizeof(s1) / sizeof(*s1); i++) {
- if (grapheme_is_character_break(s[i], s[i + 1], &state)) {
- printf("break in s1 at offset %zu0, i);
- }
- }
- memset(&state, 0, sizeof(state)); /* reset state */
- for (i = 0; i + 1 < sizeof(s2) / sizeof(*s2); i++) {
- if (grapheme_is_character_break(s[i], s[i + 1], &state)) {
- printf("break in s2 at offset %zu0, i);
- }
- }
-
- return 0;
- }
-
- SEE ALSO
- grapheme_next_character_break(3), grapheme_next_character_break_utf8(3),
- libgrapheme(7)
-
- STANDARDS
- grapheme_is_character_break() is compliant with the Unicode 15.0.0
- specification.
-
- AUTHORS
- Laslo Hunhold <dev@frign.de>
-
- suckless.org 2022-10-06 suckless.org
+ return 0;
+ }
+
+# SEE ALSO
+
+grapheme\_next\_character\_break(3),
+grapheme\_next\_character\_break\_utf8(3),
+libgrapheme(7)
+
+# STANDARDS
+
+**grapheme\_is\_character\_break**()
+is compliant with the Unicode 15.0.0 specification.
+
+# AUTHORS
+
+Laslo Hunhold <[dev@frign.de](mailto:dev@frign.de)>
+
+suckless.org - 2022-10-06
diff --git a/libs.suckless.org/libgrapheme/man/grapheme_is_lowercase(3)/index.md b/libs.suckless.org/libgrapheme/man/grapheme_is_lowercase(3)/index.md
@@ -1,39 +1,65 @@
- GRAPHEME_IS_LOWERCASE(3) Library Functions Manual GRAPHEME_IS_LOWERCASE(3)
-
- NAME
- grapheme_is_lowercase – check if codepoint array is lowercase
-
- SYNOPSIS
- #include <grapheme.h>
-
- size_t
- grapheme_is_lowercase(const uint_least32_t *str, size_t len,
- size_t *caselen);
-
- DESCRIPTION
- The grapheme_is_lowercase() function checks if the codepoint array str is
- lowercase and writes the length of the matching lowercase-sequence to the
- integer pointed to by caselen, unless caselen is set to NULL.
-
- If len is set to SIZE_MAX (stdint.h is already included by grapheme.h)
- the codepoint array src is interpreted to be NUL-terminated and
- processing stops when a NUL-byte is encountered.
-
- For UTF-8-encoded input data grapheme_is_lowercase_utf8(3) can be used
- instead.
-
- RETURN VALUES
- The grapheme_is_lowercase() function returns true if the codepoint array
- str is lowercase, otherwise false.
-
- SEE ALSO
- grapheme_is_lowercase_utf8(3), libgrapheme(7)
-
- STANDARDS
- grapheme_is_lowercase() is compliant with the Unicode 15.0.0
- specification.
-
- AUTHORS
- Laslo Hunhold <dev@frign.de>
-
- suckless.org 2022-10-06 suckless.org
+GRAPHEME\_IS\_LOWERCASE(3) - Library Functions Manual
+
+# NAME
+
+**grapheme\_is\_lowercase** - check if codepoint array is lowercase
+
+# SYNOPSIS
+
+**#include <grapheme.h>**
+
+*size\_t*
+**grapheme\_is\_lowercase**(*const uint\_least32\_t \*str*, *size\_t len*, *size\_t \*caselen*);
+
+# DESCRIPTION
+
+The
+**grapheme\_is\_lowercase**()
+function checks if the codepoint array
+*str*
+is lowercase and writes the length of the matching lowercase-sequence to the integer pointed to by
+*caselen*,
+unless
+*caselen*
+is set to
+`NULL`.
+
+If
+*len*
+is set to
+`SIZE_MAX`
+(stdint.h is already included by grapheme.h) the codepoint array
+*src*
+is interpreted to be NUL-terminated and processing stops when a
+NUL-byte is encountered.
+
+For UTF-8-encoded input data
+grapheme\_is\_lowercase\_utf8(3)
+can be used instead.
+
+# RETURN VALUES
+
+The
+**grapheme\_is\_lowercase**()
+function returns
+`true`
+if the codepoint array
+*str*
+is lowercase, otherwise
+`false`.
+
+# SEE ALSO
+
+grapheme\_is\_lowercase\_utf8(3),
+libgrapheme(7)
+
+# STANDARDS
+
+**grapheme\_is\_lowercase**()
+is compliant with the Unicode 15.0.0 specification.
+
+# AUTHORS
+
+Laslo Hunhold <[dev@frign.de](mailto:dev@frign.de)>
+
+suckless.org - 2022-10-06
diff --git a/libs.suckless.org/libgrapheme/man/grapheme_is_lowercase_utf8(3)/index.md b/libs.suckless.org/libgrapheme/man/grapheme_is_lowercase_utf8(3)/index.md
@@ -1,38 +1,65 @@
- GRAPHEME_IS_LOWERCASE_UTF8(3) Library Functions Manual
-
- NAME
- grapheme_is_lowercase_utf8 – check if UTF-8-encoded string is lowercase
-
- SYNOPSIS
- #include <grapheme.h>
-
- size_t
- grapheme_is_lowercase_utf8(const char *str, size_t len, size_t *caselen);
-
- DESCRIPTION
- The grapheme_is_lowercase_utf8() function checks if the UTF-8-encoded
- string str is lowercase and writes the length of the matching lowercase-
- sequence to the integer pointed to by caselen, unless caselen is set to
- NULL.
-
- If len is set to SIZE_MAX (stdint.h is already included by grapheme.h)
- the UTF-8-encoded string src is interpreted to be NUL-terminated and
- processing stops when a NUL-byte is encountered.
-
- For non-UTF-8 input data grapheme_is_lowercase(3) can be used instead.
-
- RETURN VALUES
- The grapheme_is_lowercase_utf8() function returns true if the
- UTF-8-encoded string str is lowercase, otherwise false.
-
- SEE ALSO
- grapheme_is_lowercase(3), libgrapheme(7)
-
- STANDARDS
- grapheme_is_lowercase_utf8() is compliant with the Unicode 15.0.0
- specification.
-
- AUTHORS
- Laslo Hunhold <dev@frign.de>
-
- suckless.org 2022-10-06 suckless.org
+GRAPHEME\_IS\_LOWERCASE\_UTF8(3) - Library Functions Manual
+
+# NAME
+
+**grapheme\_is\_lowercase\_utf8** - check if UTF-8-encoded string is lowercase
+
+# SYNOPSIS
+
+**#include <grapheme.h>**
+
+*size\_t*
+**grapheme\_is\_lowercase\_utf8**(*const char \*str*, *size\_t len*, *size\_t \*caselen*);
+
+# DESCRIPTION
+
+The
+**grapheme\_is\_lowercase\_utf8**()
+function checks if the UTF-8-encoded string
+*str*
+is lowercase and writes the length of the matching lowercase-sequence to the integer pointed to by
+*caselen*,
+unless
+*caselen*
+is set to
+`NULL`.
+
+If
+*len*
+is set to
+`SIZE_MAX`
+(stdint.h is already included by grapheme.h) the UTF-8-encoded string
+*src*
+is interpreted to be NUL-terminated and processing stops when a
+NUL-byte is encountered.
+
+For non-UTF-8 input data
+grapheme\_is\_lowercase(3)
+can be used instead.
+
+# RETURN VALUES
+
+The
+**grapheme\_is\_lowercase\_utf8**()
+function returns
+`true`
+if the UTF-8-encoded string
+*str*
+is lowercase, otherwise
+`false`.
+
+# SEE ALSO
+
+grapheme\_is\_lowercase(3),
+libgrapheme(7)
+
+# STANDARDS
+
+**grapheme\_is\_lowercase\_utf8**()
+is compliant with the Unicode 15.0.0 specification.
+
+# AUTHORS
+
+Laslo Hunhold <[dev@frign.de](mailto:dev@frign.de)>
+
+suckless.org - 2022-10-06
diff --git a/libs.suckless.org/libgrapheme/man/grapheme_is_titlecase(3)/index.md b/libs.suckless.org/libgrapheme/man/grapheme_is_titlecase(3)/index.md
@@ -1,39 +1,65 @@
- GRAPHEME_IS_TITLECASE(3) Library Functions Manual GRAPHEME_IS_TITLECASE(3)
-
- NAME
- grapheme_is_titlecase – check if codepoint array is titlecase
-
- SYNOPSIS
- #include <grapheme.h>
-
- size_t
- grapheme_is_titlecase(const uint_least32_t *str, size_t len,
- size_t *caselen);
-
- DESCRIPTION
- The grapheme_is_titlecase() function checks if the codepoint array str is
- titlecase and writes the length of the matching titlecase-sequence to the
- integer pointed to by caselen, unless caselen is set to NULL.
-
- If len is set to SIZE_MAX (stdint.h is already included by grapheme.h)
- the codepoint array src is interpreted to be NUL-terminated and
- processing stops when a NUL-byte is encountered.
-
- For UTF-8-encoded input data grapheme_is_titlecase_utf8(3) can be used
- instead.
-
- RETURN VALUES
- The grapheme_is_titlecase() function returns true if the codepoint array
- str is titlecase, otherwise false.
-
- SEE ALSO
- grapheme_is_titlecase_utf8(3), libgrapheme(7)
-
- STANDARDS
- grapheme_is_titlecase() is compliant with the Unicode 15.0.0
- specification.
-
- AUTHORS
- Laslo Hunhold <dev@frign.de>
-
- suckless.org 2022-10-06 suckless.org
+GRAPHEME\_IS\_TITLECASE(3) - Library Functions Manual
+
+# NAME
+
+**grapheme\_is\_titlecase** - check if codepoint array is titlecase
+
+# SYNOPSIS
+
+**#include <grapheme.h>**
+
+*size\_t*
+**grapheme\_is\_titlecase**(*const uint\_least32\_t \*str*, *size\_t len*, *size\_t \*caselen*);
+
+# DESCRIPTION
+
+The
+**grapheme\_is\_titlecase**()
+function checks if the codepoint array
+*str*
+is titlecase and writes the length of the matching titlecase-sequence to the integer pointed to by
+*caselen*,
+unless
+*caselen*
+is set to
+`NULL`.
+
+If
+*len*
+is set to
+`SIZE_MAX`
+(stdint.h is already included by grapheme.h) the codepoint array
+*src*
+is interpreted to be NUL-terminated and processing stops when a
+NUL-byte is encountered.
+
+For UTF-8-encoded input data
+grapheme\_is\_titlecase\_utf8(3)
+can be used instead.
+
+# RETURN VALUES
+
+The
+**grapheme\_is\_titlecase**()
+function returns
+`true`
+if the codepoint array
+*str*
+is titlecase, otherwise
+`false`.
+
+# SEE ALSO
+
+grapheme\_is\_titlecase\_utf8(3),
+libgrapheme(7)
+
+# STANDARDS
+
+**grapheme\_is\_titlecase**()
+is compliant with the Unicode 15.0.0 specification.
+
+# AUTHORS
+
+Laslo Hunhold <[dev@frign.de](mailto:dev@frign.de)>
+
+suckless.org - 2022-10-06
diff --git a/libs.suckless.org/libgrapheme/man/grapheme_is_titlecase_utf8(3)/index.md b/libs.suckless.org/libgrapheme/man/grapheme_is_titlecase_utf8(3)/index.md
@@ -1,38 +1,65 @@
- GRAPHEME_IS_TITLECASE_UTF8(3) Library Functions Manual
-
- NAME
- grapheme_is_titlecase_utf8 – check if UTF-8-encoded string is titlecase
-
- SYNOPSIS
- #include <grapheme.h>
-
- size_t
- grapheme_is_titlecase_utf8(const char *str, size_t len, size_t *caselen);
-
- DESCRIPTION
- The grapheme_is_titlecase_utf8() function checks if the UTF-8-encoded
- string str is titlecase and writes the length of the matching titlecase-
- sequence to the integer pointed to by caselen, unless caselen is set to
- NULL.
-
- If len is set to SIZE_MAX (stdint.h is already included by grapheme.h)
- the UTF-8-encoded string src is interpreted to be NUL-terminated and
- processing stops when a NUL-byte is encountered.
-
- For non-UTF-8 input data grapheme_is_titlecase(3) can be used instead.
-
- RETURN VALUES
- The grapheme_is_titlecase_utf8() function returns true if the
- UTF-8-encoded string str is titlecase, otherwise false.
-
- SEE ALSO
- grapheme_is_titlecase(3), libgrapheme(7)
-
- STANDARDS
- grapheme_is_titlecase_utf8() is compliant with the Unicode 15.0.0
- specification.
-
- AUTHORS
- Laslo Hunhold <dev@frign.de>
-
- suckless.org 2022-10-06 suckless.org
+GRAPHEME\_IS\_TITLECASE\_UTF8(3) - Library Functions Manual
+
+# NAME
+
+**grapheme\_is\_titlecase\_utf8** - check if UTF-8-encoded string is titlecase
+
+# SYNOPSIS
+
+**#include <grapheme.h>**
+
+*size\_t*
+**grapheme\_is\_titlecase\_utf8**(*const char \*str*, *size\_t len*, *size\_t \*caselen*);
+
+# DESCRIPTION
+
+The
+**grapheme\_is\_titlecase\_utf8**()
+function checks if the UTF-8-encoded string
+*str*
+is titlecase and writes the length of the matching titlecase-sequence to the integer pointed to by
+*caselen*,
+unless
+*caselen*
+is set to
+`NULL`.
+
+If
+*len*
+is set to
+`SIZE_MAX`
+(stdint.h is already included by grapheme.h) the UTF-8-encoded string
+*src*
+is interpreted to be NUL-terminated and processing stops when a
+NUL-byte is encountered.
+
+For non-UTF-8 input data
+grapheme\_is\_titlecase(3)
+can be used instead.
+
+# RETURN VALUES
+
+The
+**grapheme\_is\_titlecase\_utf8**()
+function returns
+`true`
+if the UTF-8-encoded string
+*str*
+is titlecase, otherwise
+`false`.
+
+# SEE ALSO
+
+grapheme\_is\_titlecase(3),
+libgrapheme(7)
+
+# STANDARDS
+
+**grapheme\_is\_titlecase\_utf8**()
+is compliant with the Unicode 15.0.0 specification.
+
+# AUTHORS
+
+Laslo Hunhold <[dev@frign.de](mailto:dev@frign.de)>
+
+suckless.org - 2022-10-06
diff --git a/libs.suckless.org/libgrapheme/man/grapheme_is_uppercase(3)/index.md b/libs.suckless.org/libgrapheme/man/grapheme_is_uppercase(3)/index.md
@@ -1,39 +1,65 @@
- GRAPHEME_IS_UPPERCASE(3) Library Functions Manual GRAPHEME_IS_UPPERCASE(3)
-
- NAME
- grapheme_is_uppercase – check if codepoint array is uppercase
-
- SYNOPSIS
- #include <grapheme.h>
-
- size_t
- grapheme_is_uppercase(const uint_least32_t *str, size_t len,
- size_t *caselen);
-
- DESCRIPTION
- The grapheme_is_uppercase() function checks if the codepoint array str is
- uppercase and writes the length of the matching uppercase-sequence to the
- integer pointed to by caselen, unless caselen is set to NULL.
-
- If len is set to SIZE_MAX (stdint.h is already included by grapheme.h)
- the codepoint array src is interpreted to be NUL-terminated and
- processing stops when a NUL-byte is encountered.
-
- For UTF-8-encoded input data grapheme_is_uppercase_utf8(3) can be used
- instead.
-
- RETURN VALUES
- The grapheme_is_uppercase() function returns true if the codepoint array
- str is uppercase, otherwise false.
-
- SEE ALSO
- grapheme_is_uppercase_utf8(3), libgrapheme(7)
-
- STANDARDS
- grapheme_is_uppercase() is compliant with the Unicode 15.0.0
- specification.
-
- AUTHORS
- Laslo Hunhold <dev@frign.de>
-
- suckless.org 2022-10-06 suckless.org
+GRAPHEME\_IS\_UPPERCASE(3) - Library Functions Manual
+
+# NAME
+
+**grapheme\_is\_uppercase** - check if codepoint array is uppercase
+
+# SYNOPSIS
+
+**#include <grapheme.h>**
+
+*size\_t*
+**grapheme\_is\_uppercase**(*const uint\_least32\_t \*str*, *size\_t len*, *size\_t \*caselen*);
+
+# DESCRIPTION
+
+The
+**grapheme\_is\_uppercase**()
+function checks if the codepoint array
+*str*
+is uppercase and writes the length of the matching uppercase-sequence to the integer pointed to by
+*caselen*,
+unless
+*caselen*
+is set to
+`NULL`.
+
+If
+*len*
+is set to
+`SIZE_MAX`
+(stdint.h is already included by grapheme.h) the codepoint array
+*src*
+is interpreted to be NUL-terminated and processing stops when a
+NUL-byte is encountered.
+
+For UTF-8-encoded input data
+grapheme\_is\_uppercase\_utf8(3)
+can be used instead.
+
+# RETURN VALUES
+
+The
+**grapheme\_is\_uppercase**()
+function returns
+`true`
+if the codepoint array
+*str*
+is uppercase, otherwise
+`false`.
+
+# SEE ALSO
+
+grapheme\_is\_uppercase\_utf8(3),
+libgrapheme(7)
+
+# STANDARDS
+
+**grapheme\_is\_uppercase**()
+is compliant with the Unicode 15.0.0 specification.
+
+# AUTHORS
+
+Laslo Hunhold <[dev@frign.de](mailto:dev@frign.de)>
+
+suckless.org - 2022-10-06
diff --git a/libs.suckless.org/libgrapheme/man/grapheme_is_uppercase_utf8(3)/index.md b/libs.suckless.org/libgrapheme/man/grapheme_is_uppercase_utf8(3)/index.md
@@ -1,38 +1,65 @@
- GRAPHEME_IS_LOWERCASE_UTF8(3) Library Functions Manual
-
- NAME
- grapheme_is_lowercase_utf8 – check if UTF-8-encoded string is lowercase
-
- SYNOPSIS
- #include <grapheme.h>
-
- size_t
- grapheme_is_lowercase_utf8(const char *str, size_t len, size_t *caselen);
-
- DESCRIPTION
- The grapheme_is_lowercase_utf8() function checks if the UTF-8-encoded
- string str is lowercase and writes the length of the matching lowercase-
- sequence to the integer pointed to by caselen, unless caselen is set to
- NULL.
-
- If len is set to SIZE_MAX (stdint.h is already included by grapheme.h)
- the UTF-8-encoded string src is interpreted to be NUL-terminated and
- processing stops when a NUL-byte is encountered.
-
- For non-UTF-8 input data grapheme_is_lowercase(3) can be used instead.
-
- RETURN VALUES
- The grapheme_is_lowercase_utf8() function returns true if the
- UTF-8-encoded string str is lowercase, otherwise false.
-
- SEE ALSO
- grapheme_is_lowercase(3), libgrapheme(7)
-
- STANDARDS
- grapheme_is_lowercase_utf8() is compliant with the Unicode 15.0.0
- specification.
-
- AUTHORS
- Laslo Hunhold <dev@frign.de>
-
- suckless.org 2022-10-06 suckless.org
+GRAPHEME\_IS\_LOWERCASE\_UTF8(3) - Library Functions Manual
+
+# NAME
+
+**grapheme\_is\_lowercase\_utf8** - check if UTF-8-encoded string is lowercase
+
+# SYNOPSIS
+
+**#include <grapheme.h>**
+
+*size\_t*
+**grapheme\_is\_lowercase\_utf8**(*const char \*str*, *size\_t len*, *size\_t \*caselen*);
+
+# DESCRIPTION
+
+The
+**grapheme\_is\_lowercase\_utf8**()
+function checks if the UTF-8-encoded string
+*str*
+is lowercase and writes the length of the matching lowercase-sequence to the integer pointed to by
+*caselen*,
+unless
+*caselen*
+is set to
+`NULL`.
+
+If
+*len*
+is set to
+`SIZE_MAX`
+(stdint.h is already included by grapheme.h) the UTF-8-encoded string
+*src*
+is interpreted to be NUL-terminated and processing stops when a
+NUL-byte is encountered.
+
+For non-UTF-8 input data
+grapheme\_is\_lowercase(3)
+can be used instead.
+
+# RETURN VALUES
+
+The
+**grapheme\_is\_lowercase\_utf8**()
+function returns
+`true`
+if the UTF-8-encoded string
+*str*
+is lowercase, otherwise
+`false`.
+
+# SEE ALSO
+
+grapheme\_is\_lowercase(3),
+libgrapheme(7)
+
+# STANDARDS
+
+**grapheme\_is\_lowercase\_utf8**()
+is compliant with the Unicode 15.0.0 specification.
+
+# AUTHORS
+
+Laslo Hunhold <[dev@frign.de](mailto:dev@frign.de)>
+
+suckless.org - 2022-10-06
diff --git a/libs.suckless.org/libgrapheme/man/grapheme_next_character_break(3)/index.md b/libs.suckless.org/libgrapheme/man/grapheme_next_character_break(3)/index.md
@@ -1,42 +1,70 @@
- GRAPHEME_NEXT_CHARACTER_BREAK(3) Library Functions Manual
-
- NAME
- grapheme_next_character_break – determine codepoint-offset to next
- grapheme cluster break
-
- SYNOPSIS
- #include <grapheme.h>
-
- size_t
- grapheme_next_character_break(const uint_least32_t *str, size_t len);
-
- DESCRIPTION
- The grapheme_next_character_break() function computes the offset (in
- codepoints) to the next grapheme cluster break (see libgrapheme(7)) in
- the codepoint array str of length len. If a grapheme cluster begins at
- str this offset is equal to the length of said grapheme cluster.
-
- If len is set to SIZE_MAX (stdint.h is already included by grapheme.h)
- the string str is interpreted to be NUL-terminated and processing stops
- when a codepoint with the value 0 is encountered.
-
- For UTF-8-encoded input data grapheme_next_character_break_utf8(3) can be
- used instead.
-
- RETURN VALUES
- The grapheme_next_character_break() function returns the offset (in
- codepoints) to the next grapheme cluster break in str or 0 if str is
- NULL.
-
- SEE ALSO
- grapheme_is_character_break(3), grapheme_next_character_break_utf8(3),
- libgrapheme(7)
-
- STANDARDS
- grapheme_next_character_break() is compliant with the Unicode 15.0.0
- specification.
-
- AUTHORS
- Laslo Hunhold <dev@frign.de>
-
- suckless.org 2022-10-06 suckless.org
+GRAPHEME\_NEXT\_CHARACTER\_BREAK(3) - Library Functions Manual
+
+# NAME
+
+**grapheme\_next\_character\_break** - determine codepoint-offset to next grapheme cluster break
+
+# SYNOPSIS
+
+**#include <grapheme.h>**
+
+*size\_t*
+**grapheme\_next\_character\_break**(*const uint\_least32\_t \*str*, *size\_t len*);
+
+# DESCRIPTION
+
+The
+**grapheme\_next\_character\_break**()
+function computes the offset (in codepoints) to the next grapheme cluster
+break (see
+libgrapheme(7))
+in the codepoint array
+*str*
+of length
+*len*.
+If a grapheme cluster begins at
+*str*
+this offset is equal to the length of said grapheme cluster.
+
+If
+*len*
+is set to
+`SIZE_MAX`
+(stdint.h is already included by grapheme.h) the string
+*str*
+is interpreted to be NUL-terminated and processing stops when
+a codepoint with the value 0 is encountered.
+
+For UTF-8-encoded input
+data
+grapheme\_next\_character\_break\_utf8(3)
+can be used instead.
+
+# RETURN VALUES
+
+The
+**grapheme\_next\_character\_break**()
+function returns the offset (in codepoints) to the next grapheme cluster
+break in
+*str*
+or 0 if
+*str*
+is
+`NULL`.
+
+# SEE ALSO
+
+grapheme\_is\_character\_break(3),
+grapheme\_next\_character\_break\_utf8(3),
+libgrapheme(7)
+
+# STANDARDS
+
+**grapheme\_next\_character\_break**()
+is compliant with the Unicode 15.0.0 specification.
+
+# AUTHORS
+
+Laslo Hunhold <[dev@frign.de](mailto:dev@frign.de)>
+
+suckless.org - 2022-10-06
diff --git a/libs.suckless.org/libgrapheme/man/grapheme_next_character_break_utf8(3)/index.md b/libs.suckless.org/libgrapheme/man/grapheme_next_character_break_utf8(3)/index.md
@@ -1,77 +1,108 @@
- GRAPHEME_NEXT_CHARACTER_BREAK_UTF8(3) Library Functions Manual
-
- NAME
- grapheme_next_character_break_utf8 – determine byte-offset to next
- grapheme cluster break
-
- SYNOPSIS
- #include <grapheme.h>
-
- size_t
- grapheme_next_character_break_utf8(const char *str, size_t len);
-
- DESCRIPTION
- The grapheme_next_character_break_utf8() function computes the offset (in
- bytes) to the next grapheme cluster break (see libgrapheme(7)) in the
- UTF-8-encoded string str of length len. If a grapheme cluster begins at
- str this offset is equal to the length of said grapheme cluster.
-
- If len is set to SIZE_MAX (stdint.h is already included by grapheme.h)
- the string str is interpreted to be NUL-terminated and processing stops
- when a NUL-byte is encountered.
-
- For non-UTF-8 input data grapheme_is_character_break(3) and
- grapheme_next_character_break(3) can be used instead.
-
- RETURN VALUES
- The grapheme_next_character_break_utf8() function returns the offset (in
- bytes) to the next grapheme cluster break in str or 0 if str is NULL.
-
- EXAMPLES
- /* cc (-static) -o example example.c -lgrapheme */
- #include <grapheme.h>
- #include <stdint.h>
- #include <stdio.h>
-
- int
- main(void)
- {
- /* UTF-8 encoded input */
- char *s = "T\xC3\xABst \xF0\x9F\x91\xA8\xE2\x80\x8D\xF0"
- "\x9F\x91\xA9\xE2\x80\x8D\xF0\x9F\x91\xA6 \xF0"
- "\x9F\x87\xBA\xF0\x9F\x87\xB8 \xE0\xA4\xA8\xE0"
- "\xA5\x80 \xE0\xAE\xA8\xE0\xAE\xBF!";
- size_t ret, len, off;
-
- printf("Input: \"%s\"\n", s);
-
- /* print each grapheme cluster with byte-length */
- printf("grapheme clusters in NUL-delimited input:\n");
- for (off = 0; s[off] != '\0'; off += ret) {
- ret = grapheme_next_character_break_utf8(s + off, SIZE_MAX);
- printf("%2zu bytes | %.*s\n", ret, (int)ret, s + off, ret);
- }
- printf("\n");
-
- /* do the same, but this time string is length-delimited */
- len = 17;
- printf("grapheme clusters in input delimited to %zu bytes:\n", len);
- for (off = 0; off < len; off += ret) {
- ret = grapheme_next_character_break_utf8(s + off, len - off);
- printf("%2zu bytes | %.*s\n", ret, (int)ret, s + off, ret);
- }
-
- return 0;
- }
-
- SEE ALSO
- grapheme_next_character_break(3), libgrapheme(7)
-
- STANDARDS
- grapheme_next_character_break_utf8() is compliant with the Unicode 15.0.0
- specification.
-
- AUTHORS
- Laslo Hunhold <dev@frign.de>
-
- suckless.org 2022-10-06 suckless.org
+GRAPHEME\_NEXT\_CHARACTER\_BREAK\_UTF8(3) - Library Functions Manual
+
+# NAME
+
+**grapheme\_next\_character\_break\_utf8** - determine byte-offset to next grapheme cluster break
+
+# SYNOPSIS
+
+**#include <grapheme.h>**
+
+*size\_t*
+**grapheme\_next\_character\_break\_utf8**(*const char \*str*, *size\_t len*);
+
+# DESCRIPTION
+
+The
+**grapheme\_next\_character\_break\_utf8**()
+function computes the offset (in bytes) to the next grapheme cluster
+break (see
+libgrapheme(7))
+in the UTF-8-encoded string
+*str*
+of length
+*len*.
+If a grapheme cluster begins at
+*str*
+this offset is equal to the length of said grapheme cluster.
+
+If
+*len*
+is set to
+`SIZE_MAX`
+(stdint.h is already included by grapheme.h) the string
+*str*
+is interpreted to be NUL-terminated and processing stops when
+a NUL-byte is encountered.
+
+For non-UTF-8 input
+data
+grapheme\_is\_character\_break(3) and
+grapheme\_next\_character\_break(3)
+can be used instead.
+
+# RETURN VALUES
+
+The
+**grapheme\_next\_character\_break\_utf8**()
+function returns the offset (in bytes) to the next grapheme cluster
+break in
+*str*
+or 0 if
+*str*
+is
+`NULL`.
+
+# EXAMPLES
+
+ /* cc (-static) -o example example.c -lgrapheme */
+ #include <grapheme.h>
+ #include <stdint.h>
+ #include <stdio.h>
+
+ int
+ main(void)
+ {
+ /* UTF-8 encoded input */
+ char *s = "T\xC3\xABst \xF0\x9F\x91\xA8\xE2\x80\x8D\xF0"
+ "\x9F\x91\xA9\xE2\x80\x8D\xF0\x9F\x91\xA6 \xF0"
+ "\x9F\x87\xBA\xF0\x9F\x87\xB8 \xE0\xA4\xA8\xE0"
+ "\xA5\x80 \xE0\xAE\xA8\xE0\xAE\xBF!";
+ size_t ret, len, off;
+
+ printf("Input: \"%s\"\n", s);
+
+ /* print each grapheme cluster with byte-length */
+ printf("grapheme clusters in NUL-delimited input:\n");
+ for (off = 0; s[off] != '\0'; off += ret) {
+ ret = grapheme_next_character_break_utf8(s + off, SIZE_MAX);
+ printf("%2zu bytes | %.*s\n", ret, (int)ret, s + off, ret);
+ }
+ printf("\n");
+
+ /* do the same, but this time string is length-delimited */
+ len = 17;
+ printf("grapheme clusters in input delimited to %zu bytes:\n", len);
+ for (off = 0; off < len; off += ret) {
+ ret = grapheme_next_character_break_utf8(s + off, len - off);
+ printf("%2zu bytes | %.*s\n", ret, (int)ret, s + off, ret);
+ }
+
+ return 0;
+ }
+
+# SEE ALSO
+
+grapheme\_next\_character\_break(3),
+libgrapheme(7)
+
+# STANDARDS
+
+**grapheme\_next\_character\_break\_utf8**()
+is compliant with the Unicode 15.0.0 specification.
+
+# AUTHORS
+
+Laslo Hunhold <[dev@frign.de](mailto:dev@frign.de)>
+
+suckless.org - 2022-10-06
diff --git a/libs.suckless.org/libgrapheme/man/grapheme_next_line_break(3)/index.md b/libs.suckless.org/libgrapheme/man/grapheme_next_line_break(3)/index.md
@@ -1,39 +1,66 @@
- GRAPHEME_NEXT_LINE_BREAK(3) Library Functions Manual
-
- NAME
- grapheme_next_line_break – determine codepoint-offset to next possible
- line break
-
- SYNOPSIS
- #include <grapheme.h>
-
- size_t
- grapheme_next_line_break(const uint_least32_t *str, size_t len);
-
- DESCRIPTION
- The grapheme_next_line_break() function computes the offset (in
- codepoints) to the next possible line break (see libgrapheme(7)) in the
- codepoint array str of length len.
-
- If len is set to SIZE_MAX (stdint.h is already included by grapheme.h)
- the string str is interpreted to be NUL-terminated and processing stops
- when a codepoint with the value 0 is encountered.
-
- For UTF-8-encoded input data grapheme_next_line_break_utf8(3) can be used
- instead.
-
- RETURN VALUES
- The grapheme_next_line_break() function returns the offset (in
- codepoints) to the next possible line break in str or 0 if str is NULL.
-
- SEE ALSO
- grapheme_next_line_break_utf8(3), libgrapheme(7)
-
- STANDARDS
- grapheme_next_line_break() is compliant with the Unicode 15.0.0
- specification.
-
- AUTHORS
- Laslo Hunhold <dev@frign.de>
-
- suckless.org 2022-10-06 suckless.org
+GRAPHEME\_NEXT\_LINE\_BREAK(3) - Library Functions Manual
+
+# NAME
+
+**grapheme\_next\_line\_break** - determine codepoint-offset to next possible line break
+
+# SYNOPSIS
+
+**#include <grapheme.h>**
+
+*size\_t*
+**grapheme\_next\_line\_break**(*const uint\_least32\_t \*str*, *size\_t len*);
+
+# DESCRIPTION
+
+The
+**grapheme\_next\_line\_break**()
+function computes the offset (in codepoints) to the next possible line
+break (see
+libgrapheme(7))
+in the codepoint array
+*str*
+of length
+*len*.
+
+If
+*len*
+is set to
+`SIZE_MAX`
+(stdint.h is already included by grapheme.h) the string
+*str*
+is interpreted to be NUL-terminated and processing stops when
+a codepoint with the value 0 is encountered.
+
+For UTF-8-encoded input
+data
+grapheme\_next\_line\_break\_utf8(3)
+can be used instead.
+
+# RETURN VALUES
+
+The
+**grapheme\_next\_line\_break**()
+function returns the offset (in codepoints) to the next possible line
+break in
+*str*
+or 0 if
+*str*
+is
+`NULL`.
+
+# SEE ALSO
+
+grapheme\_next\_line\_break\_utf8(3),
+libgrapheme(7)
+
+# STANDARDS
+
+**grapheme\_next\_line\_break**()
+is compliant with the Unicode 15.0.0 specification.
+
+# AUTHORS
+
+Laslo Hunhold <[dev@frign.de](mailto:dev@frign.de)>
+
+suckless.org - 2022-10-06
diff --git a/libs.suckless.org/libgrapheme/man/grapheme_next_line_break_utf8(3)/index.md b/libs.suckless.org/libgrapheme/man/grapheme_next_line_break_utf8(3)/index.md
@@ -1,75 +1,104 @@
- GRAPHEME_NEXT_LINE_BREAK_UTF8(3) Library Functions Manual
-
- NAME
- grapheme_next_line_break_utf8 – determine byte-offset to next possible
- line break
-
- SYNOPSIS
- #include <grapheme.h>
-
- size_t
- grapheme_next_line_break_utf8(const char *str, size_t len);
-
- DESCRIPTION
- The grapheme_next_line_break_utf8() function computes the offset (in
- bytes) to the next possible line break (see libgrapheme(7)) in the
- UTF-8-encoded string str of length len.
-
- If len is set to SIZE_MAX (stdint.h is already included by grapheme.h)
- the string str is interpreted to be NUL-terminated and processing stops
- when a NUL-byte is encountered.
-
- For non-UTF-8 input data grapheme_next_line_break(3) can be used instead.
-
- RETURN VALUES
- The grapheme_next_line_break_utf8() function returns the offset (in
- bytes) to the next possible line break in str or 0 if str is NULL.
-
- EXAMPLES
- /* cc (-static) -o example example.c -lgrapheme */
- #include <grapheme.h>
- #include <stdint.h>
- #include <stdio.h>
-
- int
- main(void)
- {
- /* UTF-8 encoded input */
- char *s = "T\xC3\xABst \xF0\x9F\x91\xA8\xE2\x80\x8D\xF0"
- "\x9F\x91\xA9\xE2\x80\x8D\xF0\x9F\x91\xA6 \xF0"
- "\x9F\x87\xBA\xF0\x9F\x87\xB8 \xE0\xA4\xA8\xE0"
- "\xA5\x80 \xE0\xAE\xA8\xE0\xAE\xBF!";
- size_t ret, len, off;
-
- printf("Input: \"%s\"\n", s);
-
- /* print each possible line with byte-length */
- printf("possible lines in NUL-delimited input:\n");
- for (off = 0; s[off] != '\0'; off += ret) {
- ret = grapheme_next_line_break_utf8(s + off, SIZE_MAX);
- printf("%2zu bytes | %.*s\n", ret, (int)ret, s + off, ret);
- }
- printf("\n");
-
- /* do the same, but this time string is length-delimited */
- len = 17;
- printf("possible lines in input delimited to %zu bytes:\n", len);
- for (off = 0; off < len; off += ret) {
- ret = grapheme_next_line_break_utf8(s + off, len - off);
- printf("%2zu bytes | %.*s\n", ret, (int)ret, s + off, ret);
- }
-
- return 0;
- }
-
- SEE ALSO
- grapheme_next_line_break(3), libgrapheme(7)
-
- STANDARDS
- grapheme_next_line_break_utf8() is compliant with the Unicode 15.0.0
- specification.
-
- AUTHORS
- Laslo Hunhold <dev@frign.de>
-
- suckless.org 2022-10-06 suckless.org
+GRAPHEME\_NEXT\_LINE\_BREAK\_UTF8(3) - Library Functions Manual
+
+# NAME
+
+**grapheme\_next\_line\_break\_utf8** - determine byte-offset to next possible line break
+
+# SYNOPSIS
+
+**#include <grapheme.h>**
+
+*size\_t*
+**grapheme\_next\_line\_break\_utf8**(*const char \*str*, *size\_t len*);
+
+# DESCRIPTION
+
+The
+**grapheme\_next\_line\_break\_utf8**()
+function computes the offset (in bytes) to the next possible line
+break (see
+libgrapheme(7))
+in the UTF-8-encoded string
+*str*
+of length
+*len*.
+
+If
+*len*
+is set to
+`SIZE_MAX`
+(stdint.h is already included by grapheme.h) the string
+*str*
+is interpreted to be NUL-terminated and processing stops when
+a NUL-byte is encountered.
+
+For non-UTF-8 input
+data
+grapheme\_next\_line\_break(3)
+can be used instead.
+
+# RETURN VALUES
+
+The
+**grapheme\_next\_line\_break\_utf8**()
+function returns the offset (in bytes) to the next possible line
+break in
+*str*
+or 0 if
+*str*
+is
+`NULL`.
+
+# EXAMPLES
+
+ /* cc (-static) -o example example.c -lgrapheme */
+ #include <grapheme.h>
+ #include <stdint.h>
+ #include <stdio.h>
+
+ int
+ main(void)
+ {
+ /* UTF-8 encoded input */
+ char *s = "T\xC3\xABst \xF0\x9F\x91\xA8\xE2\x80\x8D\xF0"
+ "\x9F\x91\xA9\xE2\x80\x8D\xF0\x9F\x91\xA6 \xF0"
+ "\x9F\x87\xBA\xF0\x9F\x87\xB8 \xE0\xA4\xA8\xE0"
+ "\xA5\x80 \xE0\xAE\xA8\xE0\xAE\xBF!";
+ size_t ret, len, off;
+
+ printf("Input: \"%s\"\n", s);
+
+ /* print each possible line with byte-length */
+ printf("possible lines in NUL-delimited input:\n");
+ for (off = 0; s[off] != '\0'; off += ret) {
+ ret = grapheme_next_line_break_utf8(s + off, SIZE_MAX);
+ printf("%2zu bytes | %.*s\n", ret, (int)ret, s + off, ret);
+ }
+ printf("\n");
+
+ /* do the same, but this time string is length-delimited */
+ len = 17;
+ printf("possible lines in input delimited to %zu bytes:\n", len);
+ for (off = 0; off < len; off += ret) {
+ ret = grapheme_next_line_break_utf8(s + off, len - off);
+ printf("%2zu bytes | %.*s\n", ret, (int)ret, s + off, ret);
+ }
+
+ return 0;
+ }
+
+# SEE ALSO
+
+grapheme\_next\_line\_break(3),
+libgrapheme(7)
+
+# STANDARDS
+
+**grapheme\_next\_line\_break\_utf8**()
+is compliant with the Unicode 15.0.0 specification.
+
+# AUTHORS
+
+Laslo Hunhold <[dev@frign.de](mailto:dev@frign.de)>
+
+suckless.org - 2022-10-06
diff --git a/libs.suckless.org/libgrapheme/man/grapheme_next_sentence_break(3)/index.md b/libs.suckless.org/libgrapheme/man/grapheme_next_sentence_break(3)/index.md
@@ -1,40 +1,69 @@
- GRAPHEME_NEXT_SENTENCE_BREAK(3) Library Functions Manual
-
- NAME
- grapheme_next_sentence_break – determine codepoint-offset to next
- sentence break
-
- SYNOPSIS
- #include <grapheme.h>
-
- size_t
- grapheme_next_sentence_break(const uint_least32_t *str, size_t len);
-
- DESCRIPTION
- The grapheme_next_sentence_break() function computes the offset (in
- codepoints) to the next sentence break (see libgrapheme(7)) in the
- codepoint array str of length len. If a sentence begins at str this
- offset is equal to the length of said sentence.
-
- If len is set to SIZE_MAX (stdint.h is already included by grapheme.h)
- the string str is interpreted to be NUL-terminated and processing stops
- when a codepoint with the value 0 is encountered.
-
- For UTF-8-encoded input data grapheme_next_sentence_break_utf8(3) can be
- used instead.
-
- RETURN VALUES
- The grapheme_next_sentence_break() function returns the offset (in
- codepoints) to the next sentence break in str or 0 if str is NULL.
-
- SEE ALSO
- grapheme_next_sentence_break_utf8(3), libgrapheme(7)
-
- STANDARDS
- grapheme_next_sentence_break() is compliant with the Unicode 15.0.0
- specification.
-
- AUTHORS
- Laslo Hunhold <dev@frign.de>
-
- suckless.org 2022-10-06 suckless.org
+GRAPHEME\_NEXT\_SENTENCE\_BREAK(3) - Library Functions Manual
+
+# NAME
+
+**grapheme\_next\_sentence\_break** - determine codepoint-offset to next sentence break
+
+# SYNOPSIS
+
+**#include <grapheme.h>**
+
+*size\_t*
+**grapheme\_next\_sentence\_break**(*const uint\_least32\_t \*str*, *size\_t len*);
+
+# DESCRIPTION
+
+The
+**grapheme\_next\_sentence\_break**()
+function computes the offset (in codepoints) to the next sentence
+break (see
+libgrapheme(7))
+in the codepoint array
+*str*
+of length
+*len*.
+If a sentence begins at
+*str*
+this offset is equal to the length of said sentence.
+
+If
+*len*
+is set to
+`SIZE_MAX`
+(stdint.h is already included by grapheme.h) the string
+*str*
+is interpreted to be NUL-terminated and processing stops when
+a codepoint with the value 0 is encountered.
+
+For UTF-8-encoded input
+data
+grapheme\_next\_sentence\_break\_utf8(3)
+can be used instead.
+
+# RETURN VALUES
+
+The
+**grapheme\_next\_sentence\_break**()
+function returns the offset (in codepoints) to the next sentence
+break in
+*str*
+or 0 if
+*str*
+is
+`NULL`.
+
+# SEE ALSO
+
+grapheme\_next\_sentence\_break\_utf8(3),
+libgrapheme(7)
+
+# STANDARDS
+
+**grapheme\_next\_sentence\_break**()
+is compliant with the Unicode 15.0.0 specification.
+
+# AUTHORS
+
+Laslo Hunhold <[dev@frign.de](mailto:dev@frign.de)>
+
+suckless.org - 2022-10-06
diff --git a/libs.suckless.org/libgrapheme/man/grapheme_next_sentence_break_utf8(3)/index.md b/libs.suckless.org/libgrapheme/man/grapheme_next_sentence_break_utf8(3)/index.md
@@ -1,77 +1,107 @@
- GRAPHEME_NEXT_SENTENCE_BREAK_UTF8(3) Library Functions Manual
-
- NAME
- grapheme_next_sentence_break_utf8 – determine byte-offset to next
- sentence break
-
- SYNOPSIS
- #include <grapheme.h>
-
- size_t
- grapheme_next_sentence_break_utf8(const char *str, size_t len);
-
- DESCRIPTION
- The grapheme_next_sentence_break_utf8() function computes the offset (in
- bytes) to the next sentence break (see libgrapheme(7)) in the
- UTF-8-encoded string str of length len. If a sentence begins at str this
- offset is equal to the length of said sentence.
-
- If len is set to SIZE_MAX (stdint.h is already included by grapheme.h)
- the string str is interpreted to be NUL-terminated and processing stops
- when a NUL-byte is encountered.
-
- For non-UTF-8 input data grapheme_next_sentence_break(3) can be used
- instead.
-
- RETURN VALUES
- The grapheme_next_sentence_break_utf8() function returns the offset (in
- bytes) to the next sentence break in str or 0 if str is NULL.
-
- EXAMPLES
- /* cc (-static) -o example example.c -lgrapheme */
- #include <grapheme.h>
- #include <stdint.h>
- #include <stdio.h>
-
- int
- main(void)
- {
- /* UTF-8 encoded input */
- char *s = "T\xC3\xABst \xF0\x9F\x91\xA8\xE2\x80\x8D\xF0"
- "\x9F\x91\xA9\xE2\x80\x8D\xF0\x9F\x91\xA6 \xF0"
- "\x9F\x87\xBA\xF0\x9F\x87\xB8 \xE0\xA4\xA8\xE0"
- "\xA5\x80 \xE0\xAE\xA8\xE0\xAE\xBF!";
- size_t ret, len, off;
-
- printf("Input: \"%s\"\n", s);
-
- /* print each sentence with byte-length */
- printf("sentences in NUL-delimited input:\n");
- for (off = 0; s[off] != '\0'; off += ret) {
- ret = grapheme_next_sentence_break_utf8(s + off, SIZE_MAX);
- printf("%2zu bytes | %.*s\n", ret, (int)ret, s + off, ret);
- }
- printf("\n");
-
- /* do the same, but this time string is length-delimited */
- len = 17;
- printf("sentences in input delimited to %zu bytes:\n", len);
- for (off = 0; off < len; off += ret) {
- ret = grapheme_next_sentence_break_utf8(s + off, len - off);
- printf("%2zu bytes | %.*s\n", ret, (int)ret, s + off, ret);
- }
-
- return 0;
- }
-
- SEE ALSO
- grapheme_next_sentence_break(3), libgrapheme(7)
-
- STANDARDS
- grapheme_next_sentence_break_utf8() is compliant with the Unicode 15.0.0
- specification.
-
- AUTHORS
- Laslo Hunhold <dev@frign.de>
-
- suckless.org 2022-10-06 suckless.org
+GRAPHEME\_NEXT\_SENTENCE\_BREAK\_UTF8(3) - Library Functions Manual
+
+# NAME
+
+**grapheme\_next\_sentence\_break\_utf8** - determine byte-offset to next sentence break
+
+# SYNOPSIS
+
+**#include <grapheme.h>**
+
+*size\_t*
+**grapheme\_next\_sentence\_break\_utf8**(*const char \*str*, *size\_t len*);
+
+# DESCRIPTION
+
+The
+**grapheme\_next\_sentence\_break\_utf8**()
+function computes the offset (in bytes) to the next sentence
+break (see
+libgrapheme(7))
+in the UTF-8-encoded string
+*str*
+of length
+*len*.
+If a sentence begins at
+*str*
+this offset is equal to the length of said sentence.
+
+If
+*len*
+is set to
+`SIZE_MAX`
+(stdint.h is already included by grapheme.h) the string
+*str*
+is interpreted to be NUL-terminated and processing stops when
+a NUL-byte is encountered.
+
+For non-UTF-8 input
+data
+grapheme\_next\_sentence\_break(3)
+can be used instead.
+
+# RETURN VALUES
+
+The
+**grapheme\_next\_sentence\_break\_utf8**()
+function returns the offset (in bytes) to the next sentence
+break in
+*str*
+or 0 if
+*str*
+is
+`NULL`.
+
+# EXAMPLES
+
+ /* cc (-static) -o example example.c -lgrapheme */
+ #include <grapheme.h>
+ #include <stdint.h>
+ #include <stdio.h>
+
+ int
+ main(void)
+ {
+ /* UTF-8 encoded input */
+ char *s = "T\xC3\xABst \xF0\x9F\x91\xA8\xE2\x80\x8D\xF0"
+ "\x9F\x91\xA9\xE2\x80\x8D\xF0\x9F\x91\xA6 \xF0"
+ "\x9F\x87\xBA\xF0\x9F\x87\xB8 \xE0\xA4\xA8\xE0"
+ "\xA5\x80 \xE0\xAE\xA8\xE0\xAE\xBF!";
+ size_t ret, len, off;
+
+ printf("Input: \"%s\"\n", s);
+
+ /* print each sentence with byte-length */
+ printf("sentences in NUL-delimited input:\n");
+ for (off = 0; s[off] != '\0'; off += ret) {
+ ret = grapheme_next_sentence_break_utf8(s + off, SIZE_MAX);
+ printf("%2zu bytes | %.*s\n", ret, (int)ret, s + off, ret);
+ }
+ printf("\n");
+
+ /* do the same, but this time string is length-delimited */
+ len = 17;
+ printf("sentences in input delimited to %zu bytes:\n", len);
+ for (off = 0; off < len; off += ret) {
+ ret = grapheme_next_sentence_break_utf8(s + off, len - off);
+ printf("%2zu bytes | %.*s\n", ret, (int)ret, s + off, ret);
+ }
+
+ return 0;
+ }
+
+# SEE ALSO
+
+grapheme\_next\_sentence\_break(3),
+libgrapheme(7)
+
+# STANDARDS
+
+**grapheme\_next\_sentence\_break\_utf8**()
+is compliant with the Unicode 15.0.0 specification.
+
+# AUTHORS
+
+Laslo Hunhold <[dev@frign.de](mailto:dev@frign.de)>
+
+suckless.org - 2022-10-06
diff --git a/libs.suckless.org/libgrapheme/man/grapheme_next_word_break(3)/index.md b/libs.suckless.org/libgrapheme/man/grapheme_next_word_break(3)/index.md
@@ -1,39 +1,69 @@
- GRAPHEME_NEXT_WORD_BREAK(3) Library Functions Manual
-
- NAME
- grapheme_next_word_break – determine codepoint-offset to next word break
-
- SYNOPSIS
- #include <grapheme.h>
-
- size_t
- grapheme_next_word_break(const uint_least32_t *str, size_t len);
-
- DESCRIPTION
- The grapheme_next_word_break() function computes the offset (in
- codepoints) to the next word break (see libgrapheme(7)) in the codepoint
- array str of length len. If a word begins at str this offset is equal to
- the length of said word.
-
- If len is set to SIZE_MAX (stdint.h is already included by grapheme.h)
- the string str is interpreted to be NUL-terminated and processing stops
- when a codepoint with the value 0 is encountered.
-
- For UTF-8-encoded input data grapheme_next_word_break_utf8(3) can be used
- instead.
-
- RETURN VALUES
- The grapheme_next_word_break() function returns the offset (in
- codepoints) to the next word break in str or 0 if str is NULL.
-
- SEE ALSO
- grapheme_next_word_break_utf8(3), libgrapheme(7)
-
- STANDARDS
- grapheme_next_word_break() is compliant with the Unicode 15.0.0
- specification.
-
- AUTHORS
- Laslo Hunhold <dev@frign.de>
-
- suckless.org 2022-10-06 suckless.org
+GRAPHEME\_NEXT\_WORD\_BREAK(3) - Library Functions Manual
+
+# NAME
+
+**grapheme\_next\_word\_break** - determine codepoint-offset to next word break
+
+# SYNOPSIS
+
+**#include <grapheme.h>**
+
+*size\_t*
+**grapheme\_next\_word\_break**(*const uint\_least32\_t \*str*, *size\_t len*);
+
+# DESCRIPTION
+
+The
+**grapheme\_next\_word\_break**()
+function computes the offset (in codepoints) to the next word
+break (see
+libgrapheme(7))
+in the codepoint array
+*str*
+of length
+*len*.
+If a word begins at
+*str*
+this offset is equal to the length of said word.
+
+If
+*len*
+is set to
+`SIZE_MAX`
+(stdint.h is already included by grapheme.h) the string
+*str*
+is interpreted to be NUL-terminated and processing stops when
+a codepoint with the value 0 is encountered.
+
+For UTF-8-encoded input
+data
+grapheme\_next\_word\_break\_utf8(3)
+can be used instead.
+
+# RETURN VALUES
+
+The
+**grapheme\_next\_word\_break**()
+function returns the offset (in codepoints) to the next word
+break in
+*str*
+or 0 if
+*str*
+is
+`NULL`.
+
+# SEE ALSO
+
+grapheme\_next\_word\_break\_utf8(3),
+libgrapheme(7)
+
+# STANDARDS
+
+**grapheme\_next\_word\_break**()
+is compliant with the Unicode 15.0.0 specification.
+
+# AUTHORS
+
+Laslo Hunhold <[dev@frign.de](mailto:dev@frign.de)>
+
+suckless.org - 2022-10-06
diff --git a/libs.suckless.org/libgrapheme/man/grapheme_next_word_break_utf8(3)/index.md b/libs.suckless.org/libgrapheme/man/grapheme_next_word_break_utf8(3)/index.md
@@ -1,75 +1,107 @@
- GRAPHEME_NEXT_WORD_BREAK_UTF8(3) Library Functions Manual
-
- NAME
- grapheme_next_word_break_utf8 – determine byte-offset to next word break
-
- SYNOPSIS
- #include <grapheme.h>
-
- size_t
- grapheme_next_word_break_utf8(const char *str, size_t len);
-
- DESCRIPTION
- The grapheme_next_word_break_utf8() function computes the offset (in
- bytes) to the next word break (see libgrapheme(7)) in the UTF-8-encoded
- string str of length len. If a word begins at str this offset is equal
- to the length of said word.
-
- If len is set to SIZE_MAX (stdint.h is already included by grapheme.h)
- the string str is interpreted to be NUL-terminated and processing stops
- when a NUL-byte is encountered.
-
- For non-UTF-8 input data grapheme_next_word_break(3) can be used instead.
-
- RETURN VALUES
- The grapheme_next_word_break_utf8() function returns the offset (in
- bytes) to the next word break in str or 0 if str is NULL.
-
- EXAMPLES
- /* cc (-static) -o example example.c -lgrapheme */
- #include <grapheme.h>
- #include <stdint.h>
- #include <stdio.h>
-
- int
- main(void)
- {
- /* UTF-8 encoded input */
- char *s = "T\xC3\xABst \xF0\x9F\x91\xA8\xE2\x80\x8D\xF0"
- "\x9F\x91\xA9\xE2\x80\x8D\xF0\x9F\x91\xA6 \xF0"
- "\x9F\x87\xBA\xF0\x9F\x87\xB8 \xE0\xA4\xA8\xE0"
- "\xA5\x80 \xE0\xAE\xA8\xE0\xAE\xBF!";
- size_t ret, len, off;
-
- printf("Input: \"%s\"\n", s);
-
- /* print each word with byte-length */
- printf("words in NUL-delimited input:\n");
- for (off = 0; s[off] != '\0'; off += ret) {
- ret = grapheme_next_word_break_utf8(s + off, SIZE_MAX);
- printf("%2zu bytes | %.*s\n", ret, (int)ret, s + off, ret);
- }
- printf("\n");
-
- /* do the same, but this time string is length-delimited */
- len = 17;
- printf("words in input delimited to %zu bytes:\n", len);
- for (off = 0; off < len; off += ret) {
- ret = grapheme_next_word_break_utf8(s + off, len - off);
- printf("%2zu bytes | %.*s\n", ret, (int)ret, s + off, ret);
- }
-
- return 0;
- }
-
- SEE ALSO
- grapheme_next_word_break(3), libgrapheme(7)
-
- STANDARDS
- grapheme_next_word_break_utf8() is compliant with the Unicode 15.0.0
- specification.
-
- AUTHORS
- Laslo Hunhold <dev@frign.de>
-
- suckless.org 2022-10-06 suckless.org
+GRAPHEME\_NEXT\_WORD\_BREAK\_UTF8(3) - Library Functions Manual
+
+# NAME
+
+**grapheme\_next\_word\_break\_utf8** - determine byte-offset to next word break
+
+# SYNOPSIS
+
+**#include <grapheme.h>**
+
+*size\_t*
+**grapheme\_next\_word\_break\_utf8**(*const char \*str*, *size\_t len*);
+
+# DESCRIPTION
+
+The
+**grapheme\_next\_word\_break\_utf8**()
+function computes the offset (in bytes) to the next word
+break (see
+libgrapheme(7))
+in the UTF-8-encoded string
+*str*
+of length
+*len*.
+If a word begins at
+*str*
+this offset is equal to the length of said word.
+
+If
+*len*
+is set to
+`SIZE_MAX`
+(stdint.h is already included by grapheme.h) the string
+*str*
+is interpreted to be NUL-terminated and processing stops when
+a NUL-byte is encountered.
+
+For non-UTF-8 input
+data
+grapheme\_next\_word\_break(3)
+can be used instead.
+
+# RETURN VALUES
+
+The
+**grapheme\_next\_word\_break\_utf8**()
+function returns the offset (in bytes) to the next word
+break in
+*str*
+or 0 if
+*str*
+is
+`NULL`.
+
+# EXAMPLES
+
+ /* cc (-static) -o example example.c -lgrapheme */
+ #include <grapheme.h>
+ #include <stdint.h>
+ #include <stdio.h>
+
+ int
+ main(void)
+ {
+ /* UTF-8 encoded input */
+ char *s = "T\xC3\xABst \xF0\x9F\x91\xA8\xE2\x80\x8D\xF0"
+ "\x9F\x91\xA9\xE2\x80\x8D\xF0\x9F\x91\xA6 \xF0"
+ "\x9F\x87\xBA\xF0\x9F\x87\xB8 \xE0\xA4\xA8\xE0"
+ "\xA5\x80 \xE0\xAE\xA8\xE0\xAE\xBF!";
+ size_t ret, len, off;
+
+ printf("Input: \"%s\"\n", s);
+
+ /* print each word with byte-length */
+ printf("words in NUL-delimited input:\n");
+ for (off = 0; s[off] != '\0'; off += ret) {
+ ret = grapheme_next_word_break_utf8(s + off, SIZE_MAX);
+ printf("%2zu bytes | %.*s\n", ret, (int)ret, s + off, ret);
+ }
+ printf("\n");
+
+ /* do the same, but this time string is length-delimited */
+ len = 17;
+ printf("words in input delimited to %zu bytes:\n", len);
+ for (off = 0; off < len; off += ret) {
+ ret = grapheme_next_word_break_utf8(s + off, len - off);
+ printf("%2zu bytes | %.*s\n", ret, (int)ret, s + off, ret);
+ }
+
+ return 0;
+ }
+
+# SEE ALSO
+
+grapheme\_next\_word\_break(3),
+libgrapheme(7)
+
+# STANDARDS
+
+**grapheme\_next\_word\_break\_utf8**()
+is compliant with the Unicode 15.0.0 specification.
+
+# AUTHORS
+
+Laslo Hunhold <[dev@frign.de](mailto:dev@frign.de)>
+
+suckless.org - 2022-10-06
diff --git a/libs.suckless.org/libgrapheme/man/grapheme_to_lowercase(3)/index.md b/libs.suckless.org/libgrapheme/man/grapheme_to_lowercase(3)/index.md
@@ -1,40 +1,70 @@
- GRAPHEME_TO_LOWERCASE(3) Library Functions Manual GRAPHEME_TO_LOWERCASE(3)
-
- NAME
- grapheme_to_lowercase – convert codepoint array to lowercase
-
- SYNOPSIS
- #include <grapheme.h>
-
- size_t
- grapheme_to_lowercase(const uint_least32_t *src, size_t srclen,
- uint_least32_t *dest, size_t destlen);
-
- DESCRIPTION
- The grapheme_to_lowercase() function converts the codepoint array str to
- lowercase and writes the result to dest up to destlen, unless dest is set
- to NULL.
-
- If srclen is set to SIZE_MAX (stdint.h is already included by grapheme.h)
- the codepoint array src is interpreted to be NUL-terminated and
- processing stops when a NUL-byte is encountered.
-
- For UTF-8-encoded input data grapheme_to_lowercase_utf8(3) can be used
- instead.
-
- RETURN VALUES
- The grapheme_to_lowercase() function returns the number of codepoints in
- the array resulting from converting src to lowercase, even if destlen is
- not large enough or dest is NULL.
-
- SEE ALSO
- grapheme_to_lowercase_utf8(3), libgrapheme(7)
-
- STANDARDS
- grapheme_to_lowercase() is compliant with the Unicode 15.0.0
- specification.
-
- AUTHORS
- Laslo Hunhold <dev@frign.de>
-
- suckless.org 2022-10-06 suckless.org
+GRAPHEME\_TO\_LOWERCASE(3) - Library Functions Manual
+
+# NAME
+
+**grapheme\_to\_lowercase** - convert codepoint array to lowercase
+
+# SYNOPSIS
+
+**#include <grapheme.h>**
+
+*size\_t*
+**grapheme\_to\_lowercase**(*const uint\_least32\_t \*src*, *size\_t srclen*, *uint\_least32\_t \*dest*, *size\_t destlen*);
+
+# DESCRIPTION
+
+The
+**grapheme\_to\_lowercase**()
+function converts the codepoint array
+*str*
+to lowercase and writes the result to
+*dest*
+up to
+*destlen*,
+unless
+*dest*
+is set to
+`NULL`.
+
+If
+*srclen*
+is set to
+`SIZE_MAX`
+(stdint.h is already included by grapheme.h) the codepoint array
+*src*
+is interpreted to be NUL-terminated and processing stops when a
+NUL-byte is encountered.
+
+For UTF-8-encoded input data
+grapheme\_to\_lowercase\_utf8(3)
+can be used instead.
+
+# RETURN VALUES
+
+The
+**grapheme\_to\_lowercase**()
+function returns the number of codepoints in the array resulting
+from converting
+*src*
+to lowercase, even if
+*destlen*
+is not large enough or
+*dest*
+is
+`NULL`.
+
+# SEE ALSO
+
+grapheme\_to\_lowercase\_utf8(3),
+libgrapheme(7)
+
+# STANDARDS
+
+**grapheme\_to\_lowercase**()
+is compliant with the Unicode 15.0.0 specification.
+
+# AUTHORS
+
+Laslo Hunhold <[dev@frign.de](mailto:dev@frign.de)>
+
+suckless.org - 2022-10-06
diff --git a/libs.suckless.org/libgrapheme/man/grapheme_to_lowercase_utf8(3)/index.md b/libs.suckless.org/libgrapheme/man/grapheme_to_lowercase_utf8(3)/index.md
@@ -1,39 +1,70 @@
- GRAPHEME_TO_LOWERCASE_UTF8(3) Library Functions Manual
-
- NAME
- grapheme_to_lowercase_utf8 – convert UTF-8-encoded string to lowercase
-
- SYNOPSIS
- #include <grapheme.h>
-
- size_t
- grapheme_to_lowercase_utf8(const char *src, size_t srclen, char *dest,
- size_t destlen);
-
- DESCRIPTION
- The grapheme_to_lowercase_utf8() function converts the UTF-8-encoded
- string str to lowercase and writes the result to dest up to destlen,
- unless dest is set to NULL.
-
- If srclen is set to SIZE_MAX (stdint.h is already included by grapheme.h)
- the UTF-8-encoded string src is interpreted to be NUL-terminated and
- processing stops when a NUL-byte is encountered.
-
- For non-UTF-8 input data grapheme_to_lowercase(3) can be used instead.
-
- RETURN VALUES
- The grapheme_to_lowercase_utf8() function returns the number of bytes in
- the array resulting from converting src to lowercase, even if destlen is
- not large enough or dest is NULL.
-
- SEE ALSO
- grapheme_to_lowercase(3), libgrapheme(7)
-
- STANDARDS
- grapheme_to_lowercase_utf8() is compliant with the Unicode 15.0.0
- specification.
-
- AUTHORS
- Laslo Hunhold <dev@frign.de>
-
- suckless.org 2022-10-06 suckless.org
+GRAPHEME\_TO\_LOWERCASE\_UTF8(3) - Library Functions Manual
+
+# NAME
+
+**grapheme\_to\_lowercase\_utf8** - convert UTF-8-encoded string to lowercase
+
+# SYNOPSIS
+
+**#include <grapheme.h>**
+
+*size\_t*
+**grapheme\_to\_lowercase\_utf8**(*const char \*src*, *size\_t srclen*, *char \*dest*, *size\_t destlen*);
+
+# DESCRIPTION
+
+The
+**grapheme\_to\_lowercase\_utf8**()
+function converts the UTF-8-encoded string
+*str*
+to lowercase and writes the result to
+*dest*
+up to
+*destlen*,
+unless
+*dest*
+is set to
+`NULL`.
+
+If
+*srclen*
+is set to
+`SIZE_MAX`
+(stdint.h is already included by grapheme.h) the UTF-8-encoded string
+*src*
+is interpreted to be NUL-terminated and processing stops when a
+NUL-byte is encountered.
+
+For non-UTF-8 input data
+grapheme\_to\_lowercase(3)
+can be used instead.
+
+# RETURN VALUES
+
+The
+**grapheme\_to\_lowercase\_utf8**()
+function returns the number of bytes in the array resulting
+from converting
+*src*
+to lowercase, even if
+*destlen*
+is not large enough or
+*dest*
+is
+`NULL`.
+
+# SEE ALSO
+
+grapheme\_to\_lowercase(3),
+libgrapheme(7)
+
+# STANDARDS
+
+**grapheme\_to\_lowercase\_utf8**()
+is compliant with the Unicode 15.0.0 specification.
+
+# AUTHORS
+
+Laslo Hunhold <[dev@frign.de](mailto:dev@frign.de)>
+
+suckless.org - 2022-10-06
diff --git a/libs.suckless.org/libgrapheme/man/grapheme_to_titlecase(3)/index.md b/libs.suckless.org/libgrapheme/man/grapheme_to_titlecase(3)/index.md
@@ -1,40 +1,70 @@
- GRAPHEME_TO_TITLECASE(3) Library Functions Manual GRAPHEME_TO_TITLECASE(3)
-
- NAME
- grapheme_to_titlecase – convert codepoint array to titlecase
-
- SYNOPSIS
- #include <grapheme.h>
-
- size_t
- grapheme_to_titlecase(const uint_least32_t *src, size_t srclen,
- uint_least32_t *dest, size_t destlen);
-
- DESCRIPTION
- The grapheme_to_titlecase() function converts the codepoint array str to
- titlecase and writes the result to dest up to destlen, unless dest is set
- to NULL.
-
- If srclen is set to SIZE_MAX (stdint.h is already included by grapheme.h)
- the codepoint array src is interpreted to be NUL-terminated and
- processing stops when a NUL-byte is encountered.
-
- For UTF-8-encoded input data grapheme_to_titlecase_utf8(3) can be used
- instead.
-
- RETURN VALUES
- The grapheme_to_titlecase() function returns the number of codepoints in
- the array resulting from converting src to titlecase, even if destlen is
- not large enough or dest is NULL.
-
- SEE ALSO
- grapheme_to_titlecase_utf8(3), libgrapheme(7)
-
- STANDARDS
- grapheme_to_titlecase() is compliant with the Unicode 15.0.0
- specification.
-
- AUTHORS
- Laslo Hunhold <dev@frign.de>
-
- suckless.org 2022-10-06 suckless.org
+GRAPHEME\_TO\_TITLECASE(3) - Library Functions Manual
+
+# NAME
+
+**grapheme\_to\_titlecase** - convert codepoint array to titlecase
+
+# SYNOPSIS
+
+**#include <grapheme.h>**
+
+*size\_t*
+**grapheme\_to\_titlecase**(*const uint\_least32\_t \*src*, *size\_t srclen*, *uint\_least32\_t \*dest*, *size\_t destlen*);
+
+# DESCRIPTION
+
+The
+**grapheme\_to\_titlecase**()
+function converts the codepoint array
+*str*
+to titlecase and writes the result to
+*dest*
+up to
+*destlen*,
+unless
+*dest*
+is set to
+`NULL`.
+
+If
+*srclen*
+is set to
+`SIZE_MAX`
+(stdint.h is already included by grapheme.h) the codepoint array
+*src*
+is interpreted to be NUL-terminated and processing stops when a
+NUL-byte is encountered.
+
+For UTF-8-encoded input data
+grapheme\_to\_titlecase\_utf8(3)
+can be used instead.
+
+# RETURN VALUES
+
+The
+**grapheme\_to\_titlecase**()
+function returns the number of codepoints in the array resulting
+from converting
+*src*
+to titlecase, even if
+*destlen*
+is not large enough or
+*dest*
+is
+`NULL`.
+
+# SEE ALSO
+
+grapheme\_to\_titlecase\_utf8(3),
+libgrapheme(7)
+
+# STANDARDS
+
+**grapheme\_to\_titlecase**()
+is compliant with the Unicode 15.0.0 specification.
+
+# AUTHORS
+
+Laslo Hunhold <[dev@frign.de](mailto:dev@frign.de)>
+
+suckless.org - 2022-10-06
diff --git a/libs.suckless.org/libgrapheme/man/grapheme_to_titlecase_utf8(3)/index.md b/libs.suckless.org/libgrapheme/man/grapheme_to_titlecase_utf8(3)/index.md
@@ -1,39 +1,70 @@
- GRAPHEME_TO_TITLECASE_UTF8(3) Library Functions Manual
-
- NAME
- grapheme_to_titlecase_utf8 – convert UTF-8-encoded string to titlecase
-
- SYNOPSIS
- #include <grapheme.h>
-
- size_t
- grapheme_to_titlecase_utf8(const char *src, size_t srclen, char *dest,
- size_t destlen);
-
- DESCRIPTION
- The grapheme_to_titlecase_utf8() function converts the UTF-8-encoded
- string str to titlecase and writes the result to dest up to destlen,
- unless dest is set to NULL.
-
- If srclen is set to SIZE_MAX (stdint.h is already included by grapheme.h)
- the UTF-8-encoded string src is interpreted to be NUL-terminated and
- processing stops when a NUL-byte is encountered.
-
- For non-UTF-8 input data grapheme_to_titlecase(3) can be used instead.
-
- RETURN VALUES
- The grapheme_to_titlecase_utf8() function returns the number of bytes in
- the array resulting from converting src to titlecase, even if destlen is
- not large enough or dest is NULL.
-
- SEE ALSO
- grapheme_to_titlecase(3), libgrapheme(7)
-
- STANDARDS
- grapheme_to_titlecase_utf8() is compliant with the Unicode 15.0.0
- specification.
-
- AUTHORS
- Laslo Hunhold <dev@frign.de>
-
- suckless.org 2022-10-06 suckless.org
+GRAPHEME\_TO\_TITLECASE\_UTF8(3) - Library Functions Manual
+
+# NAME
+
+**grapheme\_to\_titlecase\_utf8** - convert UTF-8-encoded string to titlecase
+
+# SYNOPSIS
+
+**#include <grapheme.h>**
+
+*size\_t*
+**grapheme\_to\_titlecase\_utf8**(*const char \*src*, *size\_t srclen*, *char \*dest*, *size\_t destlen*);
+
+# DESCRIPTION
+
+The
+**grapheme\_to\_titlecase\_utf8**()
+function converts the UTF-8-encoded string
+*str*
+to titlecase and writes the result to
+*dest*
+up to
+*destlen*,
+unless
+*dest*
+is set to
+`NULL`.
+
+If
+*srclen*
+is set to
+`SIZE_MAX`
+(stdint.h is already included by grapheme.h) the UTF-8-encoded string
+*src*
+is interpreted to be NUL-terminated and processing stops when a
+NUL-byte is encountered.
+
+For non-UTF-8 input data
+grapheme\_to\_titlecase(3)
+can be used instead.
+
+# RETURN VALUES
+
+The
+**grapheme\_to\_titlecase\_utf8**()
+function returns the number of bytes in the array resulting
+from converting
+*src*
+to titlecase, even if
+*destlen*
+is not large enough or
+*dest*
+is
+`NULL`.
+
+# SEE ALSO
+
+grapheme\_to\_titlecase(3),
+libgrapheme(7)
+
+# STANDARDS
+
+**grapheme\_to\_titlecase\_utf8**()
+is compliant with the Unicode 15.0.0 specification.
+
+# AUTHORS
+
+Laslo Hunhold <[dev@frign.de](mailto:dev@frign.de)>
+
+suckless.org - 2022-10-06
diff --git a/libs.suckless.org/libgrapheme/man/grapheme_to_uppercase(3)/index.md b/libs.suckless.org/libgrapheme/man/grapheme_to_uppercase(3)/index.md
@@ -1,40 +1,70 @@
- GRAPHEME_TO_UPPERCASE(3) Library Functions Manual GRAPHEME_TO_UPPERCASE(3)
-
- NAME
- grapheme_to_uppercase – convert codepoint array to uppercase
-
- SYNOPSIS
- #include <grapheme.h>
-
- size_t
- grapheme_to_uppercase(const uint_least32_t *src, size_t srclen,
- uint_least32_t *dest, size_t destlen);
-
- DESCRIPTION
- The grapheme_to_uppercase() function converts the codepoint array str to
- uppercase and writes the result to dest up to destlen, unless dest is set
- to NULL.
-
- If srclen is set to SIZE_MAX (stdint.h is already included by grapheme.h)
- the codepoint array src is interpreted to be NUL-terminated and
- processing stops when a NUL-byte is encountered.
-
- For UTF-8-encoded input data grapheme_to_uppercase_utf8(3) can be used
- instead.
-
- RETURN VALUES
- The grapheme_to_uppercase() function returns the number of codepoints in
- the array resulting from converting src to uppercase, even if destlen is
- not large enough or dest is NULL.
-
- SEE ALSO
- grapheme_to_uppercase_utf8(3), libgrapheme(7)
-
- STANDARDS
- grapheme_to_uppercase() is compliant with the Unicode 15.0.0
- specification.
-
- AUTHORS
- Laslo Hunhold <dev@frign.de>
-
- suckless.org 2022-10-06 suckless.org
+GRAPHEME\_TO\_UPPERCASE(3) - Library Functions Manual
+
+# NAME
+
+**grapheme\_to\_uppercase** - convert codepoint array to uppercase
+
+# SYNOPSIS
+
+**#include <grapheme.h>**
+
+*size\_t*
+**grapheme\_to\_uppercase**(*const uint\_least32\_t \*src*, *size\_t srclen*, *uint\_least32\_t \*dest*, *size\_t destlen*);
+
+# DESCRIPTION
+
+The
+**grapheme\_to\_uppercase**()
+function converts the codepoint array
+*str*
+to uppercase and writes the result to
+*dest*
+up to
+*destlen*,
+unless
+*dest*
+is set to
+`NULL`.
+
+If
+*srclen*
+is set to
+`SIZE_MAX`
+(stdint.h is already included by grapheme.h) the codepoint array
+*src*
+is interpreted to be NUL-terminated and processing stops when a
+NUL-byte is encountered.
+
+For UTF-8-encoded input data
+grapheme\_to\_uppercase\_utf8(3)
+can be used instead.
+
+# RETURN VALUES
+
+The
+**grapheme\_to\_uppercase**()
+function returns the number of codepoints in the array resulting
+from converting
+*src*
+to uppercase, even if
+*destlen*
+is not large enough or
+*dest*
+is
+`NULL`.
+
+# SEE ALSO
+
+grapheme\_to\_uppercase\_utf8(3),
+libgrapheme(7)
+
+# STANDARDS
+
+**grapheme\_to\_uppercase**()
+is compliant with the Unicode 15.0.0 specification.
+
+# AUTHORS
+
+Laslo Hunhold <[dev@frign.de](mailto:dev@frign.de)>
+
+suckless.org - 2022-10-06
diff --git a/libs.suckless.org/libgrapheme/man/grapheme_to_uppercase_utf8(3)/index.md b/libs.suckless.org/libgrapheme/man/grapheme_to_uppercase_utf8(3)/index.md
@@ -1,39 +1,70 @@
- GRAPHEME_TO_LOWERCASE_UTF8(3) Library Functions Manual
-
- NAME
- grapheme_to_lowercase_utf8 – convert UTF-8-encoded string to lowercase
-
- SYNOPSIS
- #include <grapheme.h>
-
- size_t
- grapheme_to_lowercase_utf8(const char *src, size_t srclen, char *dest,
- size_t destlen);
-
- DESCRIPTION
- The grapheme_to_lowercase_utf8() function converts the UTF-8-encoded
- string str to lowercase and writes the result to dest up to destlen,
- unless dest is set to NULL.
-
- If srclen is set to SIZE_MAX (stdint.h is already included by grapheme.h)
- the UTF-8-encoded string src is interpreted to be NUL-terminated and
- processing stops when a NUL-byte is encountered.
-
- For non-UTF-8 input data grapheme_to_lowercase(3) can be used instead.
-
- RETURN VALUES
- The grapheme_to_lowercase_utf8() function returns the number of bytes in
- the array resulting from converting src to lowercase, even if destlen is
- not large enough or dest is NULL.
-
- SEE ALSO
- grapheme_to_lowercase(3), libgrapheme(7)
-
- STANDARDS
- grapheme_to_lowercase_utf8() is compliant with the Unicode 15.0.0
- specification.
-
- AUTHORS
- Laslo Hunhold <dev@frign.de>
-
- suckless.org 2022-10-06 suckless.org
+GRAPHEME\_TO\_LOWERCASE\_UTF8(3) - Library Functions Manual
+
+# NAME
+
+**grapheme\_to\_lowercase\_utf8** - convert UTF-8-encoded string to lowercase
+
+# SYNOPSIS
+
+**#include <grapheme.h>**
+
+*size\_t*
+**grapheme\_to\_lowercase\_utf8**(*const char \*src*, *size\_t srclen*, *char \*dest*, *size\_t destlen*);
+
+# DESCRIPTION
+
+The
+**grapheme\_to\_lowercase\_utf8**()
+function converts the UTF-8-encoded string
+*str*
+to lowercase and writes the result to
+*dest*
+up to
+*destlen*,
+unless
+*dest*
+is set to
+`NULL`.
+
+If
+*srclen*
+is set to
+`SIZE_MAX`
+(stdint.h is already included by grapheme.h) the UTF-8-encoded string
+*src*
+is interpreted to be NUL-terminated and processing stops when a
+NUL-byte is encountered.
+
+For non-UTF-8 input data
+grapheme\_to\_lowercase(3)
+can be used instead.
+
+# RETURN VALUES
+
+The
+**grapheme\_to\_lowercase\_utf8**()
+function returns the number of bytes in the array resulting
+from converting
+*src*
+to lowercase, even if
+*destlen*
+is not large enough or
+*dest*
+is
+`NULL`.
+
+# SEE ALSO
+
+grapheme\_to\_lowercase(3),
+libgrapheme(7)
+
+# STANDARDS
+
+**grapheme\_to\_lowercase\_utf8**()
+is compliant with the Unicode 15.0.0 specification.
+
+# AUTHORS
+
+Laslo Hunhold <[dev@frign.de](mailto:dev@frign.de)>
+
+suckless.org - 2022-10-06
diff --git a/libs.suckless.org/libgrapheme/man/libgrapheme(7)/index.md b/libs.suckless.org/libgrapheme/man/libgrapheme(7)/index.md
@@ -1,122 +1,178 @@
- LIBGRAPHEME(7) Miscellaneous Information Manual LIBGRAPHEME(7)
-
- NAME
- libgrapheme – unicode string library
-
- SYNOPSIS
- #include <grapheme.h>
-
- DESCRIPTION
- The libgrapheme library provides functions to properly handle Unicode
- strings according to the Unicode specification in regard to character,
- word, sentence and line segmentation and case detection and conversion.
-
- Unicode strings are made up of user-perceived characters (so-called
- “grapheme clusters”, see MOTIVATION) that are composed of one or more
- Unicode codepoints, which in turn are encoded in one or more bytes in an
- encoding like UTF-8.
-
- There is a widespread misconception that it was enough to simply
- determine codepoints in a string and treat them as user-perceived
- characters to be Unicode compliant. While this may work in some cases,
- this assumption quickly breaks, especially for non-Western languages and
- decomposed Unicode strings where user-perceived characters are usually
- represented using multiple codepoints.
-
- Despite this complicated multilevel structure of Unicode strings,
- libgrapheme provides methods to work with them at the byte-level (i.e.
- UTF-8 ‘char’ arrays) while also offering codepoint-level methods.
- Additionally, it is a “freestanding” library (see ISO/IEC 9899:1999
- section 4.6) and thus does not depend on a standard library. This makes
- it easy to use in bare metal environments.
-
- Every documented function's manual page provides a self-contained example
- illustrating the possible usage.
-
- SEE ALSO
- grapheme_decode_utf8(3), grapheme_encode_utf8(3),
- grapheme_is_character_break(3), grapheme_is_lowercase(3),
- grapheme_is_lowercase_utf8(3), grapheme_is_titlecase(3),
- grapheme_is_titlecase_utf8(3), grapheme_is_uppercase(3),
- grapheme_is_uppercase_utf8(3), grapheme_next_character_break(3),
- grapheme_next_character_break_utf8(3), grapheme_next_line_break(3),
- grapheme_next_line_break_utf8(3), grapheme_next_sentence_break(3),
- grapheme_next_sentence_break_utf8(3), grapheme_next_word_break(3),
- grapheme_next_word_break_utf8(3), grapheme_to_lowercase(3),
- grapheme_to_lowercase_utf8(3), grapheme_to_titlecase(3),
- grapheme_to_titlecase_utf8(3) grapheme_to_uppercase(3),
- grapheme_to_uppercase_utf8(3),
-
- STANDARDS
- libgrapheme is compliant with the Unicode 15.0.0 specification.
-
- MOTIVATION
- The idea behind every character encoding scheme like ASCII or Unicode is
- to express abstract characters (which can be thought of as shapes making
- up a written language). ASCII for instance, which comprises the range 0
- to 127, assigns the number 65 (0x41) to the abstract character ‘A’. This
- number is called a “codepoint”, and all codepoints of an encoding make up
- its so-called “code space”.
-
- Unicode's code space is much larger, ranging from 0 to 0x10FFFF, but its
- first 128 codepoints are identical to ASCII's. The additional code points
- are needed as Unicode's goal is to express all writing systems of the
- world. To give an example, the abstract character ‘Ä’ is not expressable
- in ASCII, given no ASCII codepoint has been assigned to it. It can be
- expressed in Unicode, though, with the codepoint 196 (0xC4).
-
- One may assume that this process is straightfoward, but as more and more
- codepoints were assigned to abstract characters, the Unicode Consortium
- (that defines the Unicode standard) was facing a problem: Many (mostly
- non-European) languages have such a large amount of abstract characters
- that it would exhaust the available Unicode code space if one tried to
- assign a codepoint to each abstract character. The solution to that
- problem is best introduced with an example: Consider the abstract
- character ‘Ǟ’, which is ‘A’ with an umlaut and a macron added to it. In
- this sense, one can consider ‘Ǟ’ as a two-fold modification (namely “add
- umlaut” and “add macron”) of the “base character” ‘A’.
-
- The Unicode Consortium adapted this idea by assigning codepoints to
- modifications. For example, the codepoint 0x308 represents adding an
- umlaut and 0x304 represents adding a macron, and thus, the codepoint
- sequence “0x41 0x308 0x304”, namely the base character ‘A’ followed by
- the umlaut and macron modifiers, represents the abstract character ‘Ǟ’.
- As a side-note, the single codepoint 0x1DE was also assigned to ‘Ǟ’,
- which is a good example for the fact that there can be multiple
- representations of a single abstract character in Unicode.
-
- Expressing a single abstract character with multiple codepoints solved
- the code space exhaustion-problem, and the concept has been greatly
- expanded since its first introduction (emojis, joiners, etc.). A sequence
- (which can also have the length 1) of codepoints that belong together
- this way and represents an abstract character is called a “grapheme
- cluster”.
-
- In many applications it is necessary to count the number of user-
- perceived characters, i.e. grapheme clusters, in a string. A good
- example for this is a terminal text editor, which needs to properly align
- characters on a grid. This is pretty simple with ASCII-strings, where
- you just count the number of bytes (as each byte is a codepoint and each
- codepoint is a grapheme cluster). With Unicode-strings, it is a common
- mistake to simply adapt the ASCII-approach and count the number of code
- points. This is wrong, as, for example, the sequence “0x41 0x308 0x304”,
- while made up of 3 codepoints, is a single grapheme cluster and
- represents the user-perceived character ‘Ǟ’.
-
- The proper way to segment a string into user-perceived characters is to
- segment it into its grapheme clusters by applying the Unicode grapheme
- cluster breaking algorithm (UAX #29). It is based on a complex ruleset
- and lookup-tables and determines if a grapheme cluster ends or is
- continued between two codepoints. Libraries like ICU and libunistring,
- which also offer this functionality, are often bloated, not correct,
- difficult to use or not reasonably statically linkable.
-
- Analogously, the standard provides algorithms to separate strings by
- words, sentences and lines, convert cases and compare strings. The
- motivation behind libgrapheme is to make unicode handling suck less and
- abide by the UNIX philosophy.
-
- AUTHORS
- Laslo Hunhold <dev@frign.de>
-
- suckless.org 2022-10-06 suckless.org
+LIBGRAPHEME(7) - Miscellaneous Information Manual
+
+# NAME
+
+**libgrapheme** - unicode string library
+
+# SYNOPSIS
+
+**#include <grapheme.h>**
+
+# DESCRIPTION
+
+The
+**libgrapheme**
+library provides functions to properly handle Unicode strings according
+to the Unicode specification in regard to character, word, sentence and
+line segmentation and case detection and conversion.
+
+Unicode strings are made up of user-perceived characters (so-called
+"grapheme clusters",
+see
+*MOTIVATION*)
+that are composed of one or more Unicode codepoints, which in turn
+are encoded in one or more bytes in an encoding like UTF-8.
+
+There is a widespread misconception that it was enough to simply
+determine codepoints in a string and treat them as user-perceived
+characters to be Unicode compliant.
+While this may work in some cases, this assumption quickly breaks,
+especially for non-Western languages and decomposed Unicode strings
+where user-perceived characters are usually represented using multiple
+codepoints.
+
+Despite this complicated multilevel structure of Unicode strings,
+**libgrapheme**
+provides methods to work with them at the byte-level (i.e. UTF-8
+'char'
+arrays) while also offering codepoint-level methods.
+Additionally, it is a
+"freestanding"
+library (see ISO/IEC 9899:1999 section 4.6) and thus does not depend on
+a standard library. This makes it easy to use in bare metal environments.
+
+Every documented function's manual page provides a self-contained
+example illustrating the possible usage.
+
+# SEE ALSO
+
+grapheme\_decode\_utf8(3),
+grapheme\_encode\_utf8(3),
+grapheme\_is\_character\_break(3),
+grapheme\_is\_lowercase(3),
+grapheme\_is\_lowercase\_utf8(3),
+grapheme\_is\_titlecase(3),
+grapheme\_is\_titlecase\_utf8(3),
+grapheme\_is\_uppercase(3),
+grapheme\_is\_uppercase\_utf8(3),
+grapheme\_next\_character\_break(3),
+grapheme\_next\_character\_break\_utf8(3),
+grapheme\_next\_line\_break(3),
+grapheme\_next\_line\_break\_utf8(3),
+grapheme\_next\_sentence\_break(3),
+grapheme\_next\_sentence\_break\_utf8(3),
+grapheme\_next\_word\_break(3),
+grapheme\_next\_word\_break\_utf8(3),
+grapheme\_to\_lowercase(3),
+grapheme\_to\_lowercase\_utf8(3),
+grapheme\_to\_titlecase(3),
+grapheme\_to\_titlecase\_utf8(3)
+grapheme\_to\_uppercase(3),
+grapheme\_to\_uppercase\_utf8(3),
+
+# STANDARDS
+
+**libgrapheme**
+is compliant with the Unicode 15.0.0 specification.
+
+# MOTIVATION
+
+The idea behind every character encoding scheme like ASCII or Unicode
+is to express abstract characters (which can be thought of as shapes
+making up a written language). ASCII for instance, which comprises the
+range 0 to 127, assigns the number 65 (0x41) to the abstract character
+'A'.
+This number is called a
+"codepoint",
+and all codepoints of an encoding make up its so-called
+"code space".
+
+Unicode's code space is much larger, ranging from 0 to 0x10FFFF, but its
+first 128 codepoints are identical to ASCII's. The additional code
+points are needed as Unicode's goal is to express all writing systems
+of the world.
+To give an example, the abstract character
+'Ä'
+is not expressable in ASCII, given no ASCII codepoint has been assigned
+to it.
+It can be expressed in Unicode, though, with the codepoint 196 (0xC4).
+
+One may assume that this process is straightfoward, but as more and
+more codepoints were assigned to abstract characters, the Unicode
+Consortium (that defines the Unicode standard) was facing a problem:
+Many (mostly non-European) languages have such a large amount of
+abstract characters that it would exhaust the available Unicode code
+space if one tried to assign a codepoint to each abstract character.
+The solution to that problem is best introduced with an example: Consider
+the abstract character
+'Ǟ',
+which is
+'A'
+with an umlaut and a macron added to it.
+In this sense, one can consider
+'Ǟ'
+as a two-fold modification (namely
+"add umlaut"
+and
+"add macron")
+of the
+"base character"
+'A'.
+
+The Unicode Consortium adapted this idea by assigning codepoints to
+modifications.
+For example, the codepoint 0x308 represents adding an umlaut and 0x304
+represents adding a macron, and thus, the codepoint sequence
+"0x41 0x308 0x304",
+namely the base character
+'A'
+followed by the umlaut and macron modifiers, represents the abstract
+character
+'Ǟ'.
+As a side-note, the single codepoint 0x1DE was also assigned to
+'Ǟ',
+which is a good example for the fact that there can be multiple
+representations of a single abstract character in Unicode.
+
+Expressing a single abstract character with multiple codepoints solved
+the code space exhaustion-problem, and the concept has been greatly
+expanded since its first introduction (emojis, joiners, etc.). A sequence
+(which can also have the length 1) of codepoints that belong together
+this way and represents an abstract character is called a
+"grapheme cluster".
+
+In many applications it is necessary to count the number of
+user-perceived characters, i.e. grapheme clusters, in a string.
+A good example for this is a terminal text editor, which needs to
+properly align characters on a grid.
+This is pretty simple with ASCII-strings, where you just count the number
+of bytes (as each byte is a codepoint and each codepoint is a grapheme
+cluster).
+With Unicode-strings, it is a common mistake to simply adapt the
+ASCII-approach and count the number of code points.
+This is wrong, as, for example, the sequence
+"0x41 0x308 0x304",
+while made up of 3 codepoints, is a single grapheme cluster and
+represents the user-perceived character
+'Ǟ'.
+
+The proper way to segment a string into user-perceived characters
+is to segment it into its grapheme clusters by applying the Unicode
+grapheme cluster breaking algorithm (UAX #29).
+It is based on a complex ruleset and lookup-tables and determines if a
+grapheme cluster ends or is continued between two codepoints.
+Libraries like ICU and libunistring, which also offer this functionality,
+are often bloated, not correct, difficult to use or not reasonably
+statically linkable.
+
+Analogously, the standard provides algorithms to separate strings by
+words, sentences and lines, convert cases and compare strings.
+The motivation behind
+**libgrapheme**
+is to make unicode handling suck less and abide by the UNIX philosophy.
+
+# AUTHORS
+
+Laslo Hunhold <[dev@frign.de](mailto:dev@frign.de)>
+
+suckless.org - 2022-10-06