libgrapheme

grapheme cluster utility library
git clone git://git.suckless.org/libgrapheme
Log | Files | Refs | LICENSE

grapheme_bytelen.3 (2047B)


      1 .Dd 2020-10-12
      2 .Dt GRAPHEME_BYTELEN 3
      3 .Os suckless.org
      4 .Sh NAME
      5 .Nm grapheme_bytelen
      6 .Nd compute grapheme cluster byte-length
      7 .Sh SYNOPSIS
      8 .In grapheme.h
      9 .Ft size_t
     10 .Fn grapheme_bytelen "const char *str"
     11 .Sh DESCRIPTION
     12 The
     13 .Fn grapheme_bytelen
     14 function computes the length (in bytes) of the grapheme cluster
     15 (see
     16 .Xr libgrapheme 7 )
     17 beginning at the UTF-8-encoded NUL-terminated string
     18 .Va str .
     19 .Sh RETURN VALUES
     20 The
     21 .Fn grapheme_bytelen
     22 function returns the length (in bytes) of the grapheme cluster beginning
     23 at
     24 .Va str
     25 or 0 if
     26 .Va str
     27 is
     28 .Dv NULL .
     29 .Sh EXAMPLES
     30 .Bd -literal
     31 /* cc (-static) -o example example.c -lgrapheme */
     32 #include <grapheme.h>
     33 #include <stdio.h>
     34 
     35 int
     36 main(void)
     37 {
     38 	/* UTF-8 encoded input */
     39 	char *s =
     40 		"T"
     41 		"\\xC3\\xAB"         /* U+000EB LATIN SMALL LETTER E
     42 		                              WITH DIAERESIS */
     43 		"s"
     44 		"t"
     45 		" "
     46 		"\\xF0\\x9F\\x91\\xA8" /* U+1F468 MAN */
     47 		"\\xE2\\x80\\x8D"     /* U+0200D ZERO WIDTH JOINER */
     48 		"\\xF0\\x9F\\x91\\xA9" /* U+1F469 WOMAN */
     49 		"\\xE2\\x80\\x8D"     /* U+0200D ZERO WIDTH JOINER */
     50 		"\\xF0\\x9F\\x91\\xA6" /* U+1F466 BOY */
     51 		" "
     52 		"\\xF0\\x9F\\x87\\xBA" /* U+1F1FA REGIONAL INDICATOR
     53 		                              SYMBOL LETTER U */
     54 		"\\xF0\\x9F\\x87\\xB8" /* U+1F1F8 REGIONAL INDICATOR
     55 		                              SYMBOL LETTER S */
     56 		" "
     57 		"\\xE0\\xA4\\xA8"     /* U+00928 DEVANAGARI LETTER NA */
     58 		"\\xE0\\xA5\\x80"     /* U+00940 DEVANAGARI VOWEL
     59 		                              SIGN II */
     60 		" "
     61 		"\\xE0\\xAE\\xA8"     /* U+00BA8 TAMIL LETTER NA */
     62 		"\\xE0\\xAE\\xBF"     /* U+00BBF TAMIL VOWEL SIGN I */
     63 		"!";
     64 	size_t len;
     65 
     66 	/* print input string */
     67 	printf("Input: %s\\n", s);
     68 
     69 	/* print each grapheme cluster with accompanying byte-length */
     70 	while (*s != '\\0') {
     71 		len = grapheme_bytelen(s);
     72 		printf("%2zu byte(s) | %.*s\\n", len, (int)len, s, len);
     73 		s += len;
     74 	}
     75 
     76 	return 0;
     77 }
     78 .Ed
     79 .Sh SEE ALSO
     80 .Xr libgrapheme 7
     81 .Sh STANDARDS
     82 .Fn grapheme_bytelen
     83 is compliant with the Unicode 13.0.0 specification.
     84 .Sh AUTHORS
     85 .An Laslo Hunhold Aq Mt dev@frign.de