libgrapheme

grapheme cluster utility library
git clone git://git.suckless.org/libgrapheme
Log | Files | Refs | LICENSE

lg_utf8_encode.3 (1869B)


      1 .Dd 2021-12-17
      2 .Dt LG_UTF8_ENCODE 3
      3 .Os suckless.org
      4 .Sh NAME
      5 .Nm lg_utf8_encode
      6 .Nd encode code point into UTF-8 string
      7 .Sh SYNOPSIS
      8 .In grapheme.h
      9 .Ft size_t
     10 .Fn lg_utf8_encode "uint_least32_t cp" "char *" "size_t"
     11 .Sh DESCRIPTION
     12 The
     13 .Fn lg_utf8_encode
     14 function encodes the code point
     15 .Va cp
     16 into a UTF-8-string.
     17 If
     18 .Va str
     19 is not
     20 .Dv NULL
     21 and
     22 .Va len
     23 is large enough it writes the UTF-8-string to the memory pointed to by
     24 .Va str .
     25 .Sh RETURN VALUES
     26 The
     27 .Fn lg_utf8_encode
     28 function returns the length (in bytes) of the UTF-8-string resulting
     29 from encoding
     30 .Va cp .
     31 When the returned value is larger than
     32 .Va len
     33 it is indicated that the output string is too small and no data has been
     34 written.
     35 .Sh EXAMPLES
     36 .Bd -literal
     37 /* cc (-static) -o example example.c -lgrapheme */
     38 #include <grapheme.h>
     39 #include <stddef.h>
     40 #include <stdlib.h>
     41 
     42 size_t
     43 cps_to_utf8(const uint_least32_t *cp, size_t cplen, char *str, size_t len)
     44 {
     45 	size_t i, off, ret;
     46 
     47 	for (i = 0, off = 0; i < cplen; i++, off += ret) {
     48 		if ((ret = lg_utf8_encode(cp[i], str + off,
     49 		                          len - off)) > (len - off)) {
     50 			/* buffer too small */
     51 			break;
     52 		}
     53 	}
     54 	
     55 	return off;
     56 }
     57 
     58 size_t
     59 cps_bytelen(const uint_least32_t *cp, size_t cplen)
     60 {
     61 	size_t i, len;
     62 
     63 	for (i = 0, len = 0; i < cplen; i++) {
     64 		len += lg_utf8_encode(cp[i], NULL, 0);
     65 	}
     66 
     67 	return len;
     68 }
     69 
     70 char *
     71 cps_to_utf8_alloc(const uint_least32_t *cp, size_t cplen)
     72 {
     73 	char *str;
     74 	size_t len, i, ret, off;
     75 
     76 	len = cps_bytelen(cp, cplen);
     77 
     78 	if (!(str = malloc(len))) {
     79 		return NULL;
     80 	}
     81 
     82 	for (i = 0, off = 0; i < cplen; i++, off += ret) {
     83 		if ((ret = lg_utf8_encode(cp[i], str + off,
     84 		                          len - off)) > (len - off)) {
     85 			/* buffer too small */
     86 			break;
     87 		}
     88 	}
     89 	str[off] = '\\0';
     90 
     91 	return str;
     92 }
     93 .Ed
     94 .Sh SEE ALSO
     95 .Xr lg_grapheme_decode 3 ,
     96 .Xr libgrapheme 7
     97 .Sh AUTHORS
     98 .An Laslo Hunhold Aq Mt dev@frign.de