grapheme_decode_utf8.3 (2302B)
1 .Dd 2022-08-26 2 .Dt GRAPHEME_DECODE_UTF8 3 3 .Os suckless.org 4 .Sh NAME 5 .Nm grapheme_decode_utf8 6 .Nd decode first codepoint in UTF-8-encoded string 7 .Sh SYNOPSIS 8 .In grapheme.h 9 .Ft size_t 10 .Fn grapheme_decode_utf8 "const char *str" "size_t len" "uint_least32_t *cp" 11 .Sh DESCRIPTION 12 The 13 .Fn grapheme_decode_utf8 14 function decodes the first codepoint in the UTF-8-encoded string 15 .Va str 16 of length 17 .Va len . 18 If the UTF-8-sequence is invalid (overlong encoding, unexpected byte, 19 string ends unexpectedly, empty string, etc.) the decoding is stopped 20 at the last processed byte and the decoded codepoint set to 21 .Dv GRAPHEME_INVALID_CODEPOINT . 22 .Pp 23 If 24 .Va cp 25 is not 26 .Dv NULL 27 the decoded codepoint is stored in the memory pointed to by 28 .Va cp . 29 .Pp 30 Given NUL has a unique 1 byte representation, it is safe to operate on 31 NUL-terminated strings by setting 32 .Va len 33 to 34 .Dv SIZE_MAX 35 (stdint.h is already included by grapheme.h) and terminating when 36 .Va cp 37 is 0 (see 38 .Sx EXAMPLES 39 for an example). 40 .Sh RETURN VALUES 41 The 42 .Fn grapheme_decode_utf8 43 function returns the number of processed bytes and 0 if 44 .Va str 45 is 46 .Dv NULL 47 or 48 .Va len 49 is 0. 50 If the string ends unexpectedly in a multibyte sequence, the desired 51 length (that is larger than 52 .Va len ) 53 is returned. 54 .Sh EXAMPLES 55 .Bd -literal 56 /* cc (-static) -o example example.c -lgrapheme */ 57 #include <grapheme.h> 58 #include <inttypes.h> 59 #include <stdio.h> 60 61 void 62 print_cps(const char *str, size_t len) 63 { 64 size_t ret, off; 65 uint_least32_t cp; 66 67 for (off = 0; off < len; off += ret) { 68 if ((ret = grapheme_decode_utf8(str + off, 69 len - off, &cp)) > (len - off)) { 70 /* 71 * string ended unexpectedly in the middle of a 72 * multibyte sequence and we have the choice 73 * here to possibly expand str by ret - len + off 74 * bytes to get a full sequence, but we just 75 * bail out in this case. 76 */ 77 break; 78 } 79 printf("%"PRIxLEAST32"\\n", cp); 80 } 81 } 82 83 void 84 print_cps_nul_terminated(const char *str) 85 { 86 size_t ret, off; 87 uint_least32_t cp; 88 89 for (off = 0; (ret = grapheme_decode_utf8(str + off, 90 SIZE_MAX, &cp)) > 0 && 91 cp != 0; off += ret) { 92 printf("%"PRIxLEAST32"\\n", cp); 93 } 94 } 95 .Ed 96 .Sh SEE ALSO 97 .Xr grapheme_encode_utf8 3 , 98 .Xr libgrapheme 7 99 .Sh AUTHORS 100 .An Laslo Hunhold Aq Mt dev@frign.de