libgrapheme

unicode string library
git clone git://git.suckless.org/libgrapheme
Log | Files | Refs | README | LICENSE

character.c (3173B)


      1 /* See LICENSE file for copyright and license details. */
      2 #include <errno.h>
      3 #include <stddef.h>
      4 #include <stdint.h>
      5 #include <stdio.h>
      6 #include <stdlib.h>
      7 #include <string.h>
      8 
      9 #include "character.h"
     10 #include "util.h"
     11 
     12 int
     13 main(int argc, char *argv[])
     14 {
     15 	const struct codepoint_property *match;
     16 	struct codepoint_property_set *properties_dcp, *properties_emoji,
     17 	                              *properties_grapheme;
     18 	uint_least64_t *properties;
     19 	uint_least32_t cp;
     20 
     21 	(void)argc;
     22 	
     23 	/* parse properties from the Unicode data files */
     24 	properties_dcp = parse_property_file("data/DerivedCoreProperties.txt");
     25 	properties_emoji = parse_property_file("data/emoji-data.txt");
     26 	properties_grapheme = parse_property_file("data/GraphemeBreakProperty.txt");
     27 
     28 	/* allocate property array and initialise to zero */
     29 	if (!(properties = calloc(UINT32_C(0x110000), sizeof(*properties)))) {	
     30 		fprintf(stderr, "%s: malloc: %s\n", argv[0], strerror(errno));
     31 		exit(1);
     32 	}
     33 
     34 	for (cp = 0; cp <= UINT32_C(0x10FFFF); cp++) {
     35 		if (match_in_codepoint_property_set(
     36 			&(properties_grapheme[cp]), "Control", 0)) {
     37 			properties[cp] |= CHAR_PROP_CONTROL;
     38 		}
     39 
     40 		if (match_in_codepoint_property_set(
     41 			&(properties_grapheme[cp]), "Extend", 0)) {
     42 			properties[cp] |= CHAR_PROP_EXTEND;
     43 		}
     44 
     45 		if (match_in_codepoint_property_set(
     46 			&(properties_emoji[cp]), "Extended_Pictographic", 0)) {
     47 			properties[cp] |= CHAR_PROP_EXTENDED_PICTOGRAPHIC;
     48 		}
     49 
     50 		if (match_in_codepoint_property_set(
     51 			&(properties_grapheme[cp]), "L", 0)) {
     52 			properties[cp] |= CHAR_PROP_HANGUL_L;
     53 		}
     54 
     55 		if (match_in_codepoint_property_set(
     56 			&(properties_grapheme[cp]), "V", 0)) {
     57 			properties[cp] |= CHAR_PROP_HANGUL_V;
     58 		}
     59 
     60 		if (match_in_codepoint_property_set(
     61 			&(properties_grapheme[cp]), "T", 0)) {
     62 			properties[cp] |= CHAR_PROP_HANGUL_T;
     63 		}
     64 
     65 		if (match_in_codepoint_property_set(
     66 			&(properties_grapheme[cp]), "LV", 0)) {
     67 			properties[cp] |= CHAR_PROP_HANGUL_LV;
     68 		}
     69 
     70 		if (match_in_codepoint_property_set(
     71 			&(properties_grapheme[cp]), "LVT", 0)) {
     72 			properties[cp] |= CHAR_PROP_HANGUL_LVT;
     73 		}
     74 
     75 		if ((match = match_in_codepoint_property_set(
     76 			&(properties_dcp[cp]), "InCB", 0))) {
     77 			if (strcmp(match->fields[1], "Consonant") == 0) {
     78 				properties[cp] |= CHAR_PROP_ICB_CONSONANT;
     79 			} else if (strcmp(match->fields[1], "Extend") == 0) {
     80 				properties[cp] |= CHAR_PROP_ICB_EXTEND;
     81 			} else if (strcmp(match->fields[1], "Linker") == 0) {
     82 				properties[cp] |= CHAR_PROP_ICB_LINKER;
     83 			}
     84 		}
     85 
     86 		if (match_in_codepoint_property_set(
     87 			&(properties_grapheme[cp]), "Prepend", 0)) {
     88 			properties[cp] |= CHAR_PROP_PREPEND;
     89 		}
     90 
     91 		if (match_in_codepoint_property_set(
     92 			&(properties_grapheme[cp]), "Regional_Indicator", 0)) {
     93 			properties[cp] |= CHAR_PROP_REGIONAL_INDICATOR;
     94 		}
     95 
     96 		if (match_in_codepoint_property_set(
     97 			&(properties_grapheme[cp]), "SpacingMark", 0)) {
     98 			properties[cp] |= CHAR_PROP_SPACINGMARK;
     99 		}
    100 	}
    101 
    102 	/* generate code */
    103 	compress_and_output(properties, "character");
    104 
    105 	/* cleanup */
    106 	free_codepoint_property_set_array(properties_dcp);
    107 	free_codepoint_property_set_array(properties_emoji);
    108 	free_codepoint_property_set_array(properties_grapheme);
    109 	free(properties);
    110 
    111 	return 0;
    112 }