character.c (3173B)
1 /* See LICENSE file for copyright and license details. */ 2 #include <errno.h> 3 #include <stddef.h> 4 #include <stdint.h> 5 #include <stdio.h> 6 #include <stdlib.h> 7 #include <string.h> 8 9 #include "character.h" 10 #include "util.h" 11 12 int 13 main(int argc, char *argv[]) 14 { 15 const struct codepoint_property *match; 16 struct codepoint_property_set *properties_dcp, *properties_emoji, 17 *properties_grapheme; 18 uint_least64_t *properties; 19 uint_least32_t cp; 20 21 (void)argc; 22 23 /* parse properties from the Unicode data files */ 24 properties_dcp = parse_property_file("data/DerivedCoreProperties.txt"); 25 properties_emoji = parse_property_file("data/emoji-data.txt"); 26 properties_grapheme = parse_property_file("data/GraphemeBreakProperty.txt"); 27 28 /* allocate property array and initialise to zero */ 29 if (!(properties = calloc(UINT32_C(0x110000), sizeof(*properties)))) { 30 fprintf(stderr, "%s: malloc: %s\n", argv[0], strerror(errno)); 31 exit(1); 32 } 33 34 for (cp = 0; cp <= UINT32_C(0x10FFFF); cp++) { 35 if (match_in_codepoint_property_set( 36 &(properties_grapheme[cp]), "Control", 0)) { 37 properties[cp] |= CHAR_PROP_CONTROL; 38 } 39 40 if (match_in_codepoint_property_set( 41 &(properties_grapheme[cp]), "Extend", 0)) { 42 properties[cp] |= CHAR_PROP_EXTEND; 43 } 44 45 if (match_in_codepoint_property_set( 46 &(properties_emoji[cp]), "Extended_Pictographic", 0)) { 47 properties[cp] |= CHAR_PROP_EXTENDED_PICTOGRAPHIC; 48 } 49 50 if (match_in_codepoint_property_set( 51 &(properties_grapheme[cp]), "L", 0)) { 52 properties[cp] |= CHAR_PROP_HANGUL_L; 53 } 54 55 if (match_in_codepoint_property_set( 56 &(properties_grapheme[cp]), "V", 0)) { 57 properties[cp] |= CHAR_PROP_HANGUL_V; 58 } 59 60 if (match_in_codepoint_property_set( 61 &(properties_grapheme[cp]), "T", 0)) { 62 properties[cp] |= CHAR_PROP_HANGUL_T; 63 } 64 65 if (match_in_codepoint_property_set( 66 &(properties_grapheme[cp]), "LV", 0)) { 67 properties[cp] |= CHAR_PROP_HANGUL_LV; 68 } 69 70 if (match_in_codepoint_property_set( 71 &(properties_grapheme[cp]), "LVT", 0)) { 72 properties[cp] |= CHAR_PROP_HANGUL_LVT; 73 } 74 75 if ((match = match_in_codepoint_property_set( 76 &(properties_dcp[cp]), "InCB", 0))) { 77 if (strcmp(match->fields[1], "Consonant") == 0) { 78 properties[cp] |= CHAR_PROP_ICB_CONSONANT; 79 } else if (strcmp(match->fields[1], "Extend") == 0) { 80 properties[cp] |= CHAR_PROP_ICB_EXTEND; 81 } else if (strcmp(match->fields[1], "Linker") == 0) { 82 properties[cp] |= CHAR_PROP_ICB_LINKER; 83 } 84 } 85 86 if (match_in_codepoint_property_set( 87 &(properties_grapheme[cp]), "Prepend", 0)) { 88 properties[cp] |= CHAR_PROP_PREPEND; 89 } 90 91 if (match_in_codepoint_property_set( 92 &(properties_grapheme[cp]), "Regional_Indicator", 0)) { 93 properties[cp] |= CHAR_PROP_REGIONAL_INDICATOR; 94 } 95 96 if (match_in_codepoint_property_set( 97 &(properties_grapheme[cp]), "SpacingMark", 0)) { 98 properties[cp] |= CHAR_PROP_SPACINGMARK; 99 } 100 } 101 102 /* generate code */ 103 compress_and_output(properties, "character"); 104 105 /* cleanup */ 106 free_codepoint_property_set_array(properties_dcp); 107 free_codepoint_property_set_array(properties_emoji); 108 free_codepoint_property_set_array(properties_grapheme); 109 free(properties); 110 111 return 0; 112 }