sites

public wiki contents of suckless.org
git clone git://git.suckless.org/sites
Log | Files | Refs

index.md (2277B)


      1 GRAPHEME\_NEXT\_WORD\_BREAK\_UTF8(3) - Library Functions Manual
      2 
      3 # NAME
      4 
      5 **grapheme\_next\_word\_break\_utf8** - determine byte-offset to next word break
      6 
      7 # SYNOPSIS
      8 
      9 **#include <grapheme.h>**
     10 
     11 *size\_t*  
     12 **grapheme\_next\_word\_break\_utf8**(*const char \*str*, *size\_t len*);
     13 
     14 # DESCRIPTION
     15 
     16 The
     17 **grapheme\_next\_word\_break\_utf8**()
     18 function computes the offset (in bytes) to the next word
     19 break (see
     20 libgrapheme(7))
     21 in the UTF-8-encoded string
     22 *str*
     23 of length
     24 *len*.
     25 If a word begins at
     26 *str*
     27 this offset is equal to the length of said word.
     28 
     29 If
     30 *len*
     31 is set to
     32 `SIZE_MAX`
     33 (stdint.h is already included by grapheme.h) the string
     34 *str*
     35 is interpreted to be NUL-terminated and processing stops when
     36 a NUL-byte is encountered.
     37 
     38 For non-UTF-8 input
     39 data
     40 grapheme\_next\_word\_break(3)
     41 can be used instead.
     42 
     43 # RETURN VALUES
     44 
     45 The
     46 **grapheme\_next\_word\_break\_utf8**()
     47 function returns the offset (in bytes) to the next word
     48 break in
     49 *str*
     50 or 0 if
     51 *str*
     52 is
     53 `NULL`.
     54 
     55 # EXAMPLES
     56 
     57 	/* cc (-static) -o example example.c -lgrapheme */
     58 	#include <grapheme.h>
     59 	#include <stdint.h>
     60 	#include <stdio.h>
     61 	
     62 	int
     63 	main(void)
     64 	{
     65 		/* UTF-8 encoded input */
     66 		char *s = "T\xC3\xABst \xF0\x9F\x91\xA8\xE2\x80\x8D\xF0"
     67 		          "\x9F\x91\xA9\xE2\x80\x8D\xF0\x9F\x91\xA6 \xF0"
     68 		          "\x9F\x87\xBA\xF0\x9F\x87\xB8 \xE0\xA4\xA8\xE0"
     69 		          "\xA5\x80 \xE0\xAE\xA8\xE0\xAE\xBF!";
     70 		size_t ret, len, off;
     71 	
     72 		printf("Input: \"%s\"\n", s);
     73 	
     74 		/* print each word with byte-length */
     75 		printf("words in NUL-delimited input:\n");
     76 		for (off = 0; s[off] != '\0'; off += ret) {
     77 			ret = grapheme_next_word_break_utf8(s + off, SIZE_MAX);
     78 			printf("%2zu bytes | %.*s\n", ret, (int)ret, s + off);
     79 		}
     80 		printf("\n");
     81 	
     82 		/* do the same, but this time string is length-delimited */
     83 		len = 17;
     84 		printf("words in input delimited to %zu bytes:\n", len);
     85 		for (off = 0; off < len; off += ret) {
     86 			ret = grapheme_next_word_break_utf8(s + off, len - off);
     87 			printf("%2zu bytes | %.*s\n", ret, (int)ret, s + off);
     88 		}
     89 	
     90 		return 0;
     91 	}
     92 
     93 # SEE ALSO
     94 
     95 grapheme\_next\_word\_break(3),
     96 libgrapheme(7)
     97 
     98 # STANDARDS
     99 
    100 **grapheme\_next\_word\_break\_utf8**()
    101 is compliant with the Unicode 15.0.0 specification.
    102 
    103 # AUTHORS
    104 
    105 Laslo Hunhold ([dev@frign.de](mailto:dev@frign.de))
    106 
    107 suckless.org - 2022-10-06