libgrapheme

unicode string library
git clone git://git.suckless.org/libgrapheme
Log | Files | Refs | README | LICENSE

grapheme_encode_utf8.3 (1897B)


      1 .Dd 2022-08-26
      2 .Dt GRAPHEME_ENCODE_UTF8 3
      3 .Os suckless.org
      4 .Sh NAME
      5 .Nm grapheme_encode_utf8
      6 .Nd encode codepoint into UTF-8 string
      7 .Sh SYNOPSIS
      8 .In grapheme.h
      9 .Ft size_t
     10 .Fn grapheme_encode_utf8 "uint_least32_t cp" "char *str" "size_t len"
     11 .Sh DESCRIPTION
     12 The
     13 .Fn grapheme_encode_utf8
     14 function encodes the codepoint
     15 .Va cp
     16 into a UTF-8-string.
     17 If
     18 .Va str
     19 is not
     20 .Dv NULL
     21 and
     22 .Va len
     23 is large enough it writes the UTF-8-string to the memory pointed to by
     24 .Va str .
     25 Otherwise no data is written.
     26 .Sh RETURN VALUES
     27 The
     28 .Fn grapheme_encode_utf8
     29 function returns the length (in bytes) of the UTF-8-string resulting
     30 from encoding
     31 .Va cp ,
     32 even if
     33 .Va len
     34 is not large enough or
     35 .Va str
     36 is
     37 .Dv NULL .
     38 .Sh EXAMPLES
     39 .Bd -literal
     40 /* cc (-static) -o example example.c -lgrapheme */
     41 #include <grapheme.h>
     42 #include <stddef.h>
     43 #include <stdlib.h>
     44 
     45 size_t
     46 cps_to_utf8(const uint_least32_t *cp, size_t cplen, char *str, size_t len)
     47 {
     48 	size_t i, off, ret;
     49 
     50 	for (i = 0, off = 0; i < cplen; i++, off += ret) {
     51 		if ((ret = grapheme_encode_utf8(cp[i], str + off,
     52 		                                len - off)) > (len - off)) {
     53 			/* buffer too small */
     54 			break;
     55 		}
     56 	}
     57 
     58 	return off;
     59 }
     60 
     61 size_t
     62 cps_bytelen(const uint_least32_t *cp, size_t cplen)
     63 {
     64 	size_t i, len;
     65 
     66 	for (i = 0, len = 0; i < cplen; i++) {
     67 		len += grapheme_encode_utf8(cp[i], NULL, 0);
     68 	}
     69 
     70 	return len;
     71 }
     72 
     73 char *
     74 cps_to_utf8_alloc(const uint_least32_t *cp, size_t cplen)
     75 {
     76 	char *str;
     77 	size_t len, i, ret, off;
     78 
     79 	len = cps_bytelen(cp, cplen);
     80 
     81 	if (!(str = malloc(len))) {
     82 		return NULL;
     83 	}
     84 
     85 	for (i = 0, off = 0; i < cplen; i++, off += ret) {
     86 		if ((ret = grapheme_encode_utf8(cp[i], str + off,
     87 		                                len - off)) > (len - off)) {
     88 			/* buffer too small */
     89 			break;
     90 		}
     91 	}
     92 	str[off] = '\\0';
     93 
     94 	return str;
     95 }
     96 .Ed
     97 .Sh SEE ALSO
     98 .Xr grapheme_decode_utf8 3 ,
     99 .Xr libgrapheme 7
    100 .Sh AUTHORS
    101 .An Laslo Hunhold Aq Mt dev@frign.de