grapheme_decode_utf8.sh (2322B)
1 cat << EOF 2 .Dd ${MAN_DATE} 3 .Dt GRAPHEME_DECODE_UTF8 3 4 .Os suckless.org 5 .Sh NAME 6 .Nm grapheme_decode_utf8 7 .Nd decode first codepoint in UTF-8-encoded string 8 .Sh SYNOPSIS 9 .In grapheme.h 10 .Ft size_t 11 .Fn grapheme_decode_utf8 "const char *str" "size_t len" "uint_least32_t *cp" 12 .Sh DESCRIPTION 13 The 14 .Fn grapheme_decode_utf8 15 function decodes the first codepoint in the UTF-8-encoded string 16 .Va str 17 of length 18 .Va len . 19 If the UTF-8-sequence is invalid (overlong encoding, unexpected byte, 20 string ends unexpectedly, empty string, etc.) the decoding is stopped 21 at the last processed byte and the decoded codepoint set to 22 .Dv GRAPHEME_INVALID_CODEPOINT . 23 .Pp 24 If 25 .Va cp 26 is not 27 .Dv NULL 28 the decoded codepoint is stored in the memory pointed to by 29 .Va cp . 30 .Pp 31 Given NUL has a unique 1 byte representation, it is safe to operate on 32 NUL-terminated strings by setting 33 .Va len 34 to 35 .Dv SIZE_MAX 36 (stdint.h is already included by grapheme.h) and terminating when 37 .Va cp 38 is 0 (see 39 .Sx EXAMPLES 40 for an example). 41 .Sh RETURN VALUES 42 The 43 .Fn grapheme_decode_utf8 44 function returns the number of processed bytes and 0 if 45 .Va str 46 is 47 .Dv NULL 48 or 49 .Va len 50 is 0. 51 If the string ends unexpectedly in a multibyte sequence, the desired 52 length (that is larger than 53 .Va len ) 54 is returned. 55 .Sh EXAMPLES 56 .Bd -literal 57 /* cc (-static) -o example example.c -lgrapheme */ 58 #include <grapheme.h> 59 #include <inttypes.h> 60 #include <stdio.h> 61 62 void 63 print_cps(const char *str, size_t len) 64 { 65 size_t ret, off; 66 uint_least32_t cp; 67 68 for (off = 0; off < len; off += ret) { 69 if ((ret = grapheme_decode_utf8(str + off, 70 len - off, &cp)) > (len - off)) { 71 /* 72 * string ended unexpectedly in the middle of a 73 * multibyte sequence and we have the choice 74 * here to possibly expand str by ret - len + off 75 * bytes to get a full sequence, but we just 76 * bail out in this case. 77 */ 78 break; 79 } 80 printf("%"PRIxLEAST32"\\\\n", cp); 81 } 82 } 83 84 void 85 print_cps_nul_terminated(const char *str) 86 { 87 size_t ret, off; 88 uint_least32_t cp; 89 90 for (off = 0; (ret = grapheme_decode_utf8(str + off, 91 SIZE_MAX, &cp)) > 0 && 92 cp != 0; off += ret) { 93 printf("%"PRIxLEAST32"\\\\n", cp); 94 } 95 } 96 .Ed 97 .Sh SEE ALSO 98 .Xr grapheme_encode_utf8 3 , 99 .Xr libgrapheme 7 100 .Sh AUTHORS 101 .An Laslo Hunhold Aq Mt dev@frign.de 102 EOF