sites

public wiki contents of suckless.org
git clone git://git.suckless.org/sites
Log | Files | Refs

index.md (2461B)


      1 GRAPHEME\_NEXT\_CHARACTER\_BREAK\_UTF8(3) - Library Functions Manual
      2 
      3 # NAME
      4 
      5 **grapheme\_next\_character\_break\_utf8** - determine byte-offset to next grapheme cluster break
      6 
      7 # SYNOPSIS
      8 
      9 **#include <grapheme.h>**
     10 
     11 *size\_t*  
     12 **grapheme\_next\_character\_break\_utf8**(*const char \*str*, *size\_t len*);
     13 
     14 # DESCRIPTION
     15 
     16 The
     17 **grapheme\_next\_character\_break\_utf8**()
     18 function computes the offset (in bytes) to the next grapheme cluster
     19 break (see
     20 libgrapheme(7))
     21 in the UTF-8-encoded string
     22 *str*
     23 of length
     24 *len*.
     25 If a grapheme cluster begins at
     26 *str*
     27 this offset is equal to the length of said grapheme cluster.
     28 
     29 If
     30 *len*
     31 is set to
     32 `SIZE_MAX`
     33 (stdint.h is already included by grapheme.h) the string
     34 *str*
     35 is interpreted to be NUL-terminated and processing stops when
     36 a NUL-byte is encountered.
     37 
     38 For non-UTF-8 input
     39 data
     40 grapheme\_is\_character\_break(3) and
     41 grapheme\_next\_character\_break(3)
     42 can be used instead.
     43 
     44 # RETURN VALUES
     45 
     46 The
     47 **grapheme\_next\_character\_break\_utf8**()
     48 function returns the offset (in bytes) to the next grapheme cluster
     49 break in
     50 *str*
     51 or 0 if
     52 *str*
     53 is
     54 `NULL`.
     55 
     56 # EXAMPLES
     57 
     58 	/* cc (-static) -o example example.c -lgrapheme */
     59 	#include <grapheme.h>
     60 	#include <stdint.h>
     61 	#include <stdio.h>
     62 	
     63 	int
     64 	main(void)
     65 	{
     66 		/* UTF-8 encoded input */
     67 		char *s = "T\xC3\xABst \xF0\x9F\x91\xA8\xE2\x80\x8D\xF0"
     68 		          "\x9F\x91\xA9\xE2\x80\x8D\xF0\x9F\x91\xA6 \xF0"
     69 		          "\x9F\x87\xBA\xF0\x9F\x87\xB8 \xE0\xA4\xA8\xE0"
     70 		          "\xA5\x80 \xE0\xAE\xA8\xE0\xAE\xBF!";
     71 		size_t ret, len, off;
     72 	
     73 		printf("Input: \"%s\"\n", s);
     74 	
     75 		/* print each grapheme cluster with byte-length */
     76 		printf("grapheme clusters in NUL-delimited input:\n");
     77 		for (off = 0; s[off] != '\0'; off += ret) {
     78 			ret = grapheme_next_character_break_utf8(s + off, SIZE_MAX);
     79 			printf("%2zu bytes | %.*s\n", ret, (int)ret, s + off);
     80 		}
     81 		printf("\n");
     82 	
     83 		/* do the same, but this time string is length-delimited */
     84 		len = 17;
     85 		printf("grapheme clusters in input delimited to %zu bytes:\n", len);
     86 		for (off = 0; off < len; off += ret) {
     87 			ret = grapheme_next_character_break_utf8(s + off, len - off);
     88 			printf("%2zu bytes | %.*s\n", ret, (int)ret, s + off);
     89 		}
     90 	
     91 		return 0;
     92 	}
     93 
     94 # SEE ALSO
     95 
     96 grapheme\_next\_character\_break(3),
     97 libgrapheme(7)
     98 
     99 # STANDARDS
    100 
    101 **grapheme\_next\_character\_break\_utf8**()
    102 is compliant with the Unicode 15.0.0 specification.
    103 
    104 # AUTHORS
    105 
    106 Laslo Hunhold ([dev@frign.de](mailto:dev@frign.de))
    107 
    108 suckless.org - 2022-10-06