sites

public wiki contents of suckless.org
git clone git://git.suckless.org/sites
Log | Files | Refs

index.md (2347B)


      1 GRAPHEME\_DECODE\_UTF8(3) - Library Functions Manual
      2 
      3 # NAME
      4 
      5 **grapheme\_decode\_utf8** - decode first codepoint in UTF-8-encoded string
      6 
      7 # SYNOPSIS
      8 
      9 **#include <grapheme.h>**
     10 
     11 *size\_t*  
     12 **grapheme\_decode\_utf8**(*const char \*str*, *size\_t len*, *uint\_least32\_t \*cp*);
     13 
     14 # DESCRIPTION
     15 
     16 The
     17 **grapheme\_decode\_utf8**()
     18 function decodes the first codepoint in the UTF-8-encoded string
     19 *str*
     20 of length
     21 *len*.
     22 If the UTF-8-sequence is invalid (overlong encoding, unexpected byte,
     23 string ends unexpectedly, empty string, etc.) the decoding is stopped
     24 at the last processed byte and the decoded codepoint set to
     25 `GRAPHEME_INVALID_CODEPOINT`.
     26 
     27 If
     28 *cp*
     29 is not
     30 `NULL`
     31 the decoded codepoint is stored in the memory pointed to by
     32 *cp*.
     33 
     34 Given NUL has a unique 1 byte representation, it is safe to operate on
     35 NUL-terminated strings by setting
     36 *len*
     37 to
     38 `SIZE_MAX`
     39 (stdint.h is already included by grapheme.h) and terminating when
     40 *cp*
     41 is 0 (see
     42 *EXAMPLES*
     43 for an example).
     44 
     45 # RETURN VALUES
     46 
     47 The
     48 **grapheme\_decode\_utf8**()
     49 function returns the number of processed bytes and 0 if
     50 *str*
     51 is
     52 `NULL`
     53 or
     54 *len*
     55 is 0.
     56 If the string ends unexpectedly in a multibyte sequence, the desired
     57 length (that is larger than
     58 *len*)
     59 is returned.
     60 
     61 # EXAMPLES
     62 
     63 	/* cc (-static) -o example example.c -lgrapheme */
     64 	#include <grapheme.h>
     65 	#include <inttypes.h>
     66 	#include <stdio.h>
     67 	
     68 	void
     69 	print_cps(const char *str, size_t len)
     70 	{
     71 		size_t ret, off;
     72 		uint_least32_t cp;
     73 	
     74 		for (off = 0; off < len; off += ret) {
     75 			if ((ret = grapheme_decode_utf8(str + off,
     76 			                                len - off, &cp)) > (len - off)) {
     77 				/*
     78 				 * string ended unexpectedly in the middle of a
     79 				 * multibyte sequence and we have the choice
     80 				 * here to possibly expand str by ret - len + off
     81 				 * bytes to get a full sequence, but we just
     82 				 * bail out in this case.
     83 				 */
     84 				break;
     85 			}
     86 			printf("%"PRIxLEAST32"\n", cp);
     87 		}
     88 	}
     89 	
     90 	void
     91 	print_cps_nul_terminated(const char *str)
     92 	{
     93 		size_t ret, off;
     94 		uint_least32_t cp;
     95 	
     96 		for (off = 0; (ret = grapheme_decode_utf8(str + off,
     97 		                                          SIZE_MAX, &cp)) > 0 &&
     98 		     cp != 0; off += ret) {
     99 			printf("%"PRIxLEAST32"\n", cp);
    100 		}
    101 	}
    102 
    103 # SEE ALSO
    104 
    105 grapheme\_encode\_utf8(3),
    106 libgrapheme(7)
    107 
    108 # AUTHORS
    109 
    110 Laslo Hunhold ([dev@frign.de](mailto:dev@frign.de))
    111 
    112 suckless.org - 2022-10-06