libgrapheme

unicode string library
git clone git://git.suckless.org/libgrapheme
Log | Files | Refs | README | LICENSE

character.c (4417B)


      1 /* See LICENSE file for copyright and license details. */
      2 #include <stddef.h>
      3 #include <stdio.h>
      4 #include <stdlib.h>
      5 #include <string.h>
      6 
      7 #include "util.h"
      8 
      9 #define FILE_DCP      "data/DerivedCoreProperties.txt"
     10 #define FILE_EMOJI    "data/emoji-data.txt"
     11 #define FILE_GRAPHEME "data/GraphemeBreakProperty.txt"
     12 
     13 static const struct property_spec char_break_property[] = {
     14 	{
     15 		.enumname = "OTHER",
     16 		.file = NULL,
     17 		.ucdname = NULL,
     18 	},
     19 	{
     20 		.enumname = "BOTH_EXTEND_ICB_EXTEND",
     21 		.file = NULL,
     22 		.ucdname = NULL,
     23 	},
     24 	{
     25 		.enumname = "BOTH_EXTEND_ICB_LINKER",
     26 		.file = NULL,
     27 		.ucdname = NULL,
     28 	},
     29 	{
     30 		.enumname = "BOTH_ZWJ_ICB_EXTEND",
     31 		.file = NULL,
     32 		.ucdname = NULL,
     33 	},
     34 	{
     35 		.enumname = "CONTROL",
     36 		.file = FILE_GRAPHEME,
     37 		.ucdname = "Control",
     38 	},
     39 	{
     40 		.enumname = "CR",
     41 		.file = FILE_GRAPHEME,
     42 		.ucdname = "CR",
     43 	},
     44 	{
     45 		.enumname = "EXTEND",
     46 		.file = FILE_GRAPHEME,
     47 		.ucdname = "Extend",
     48 	},
     49 	{
     50 		.enumname = "EXTENDED_PICTOGRAPHIC",
     51 		.file = FILE_EMOJI,
     52 		.ucdname = "Extended_Pictographic",
     53 	},
     54 	{
     55 		.enumname = "HANGUL_L",
     56 		.file = FILE_GRAPHEME,
     57 		.ucdname = "L",
     58 	},
     59 	{
     60 		.enumname = "HANGUL_V",
     61 		.file = FILE_GRAPHEME,
     62 		.ucdname = "V",
     63 	},
     64 	{
     65 		.enumname = "HANGUL_T",
     66 		.file = FILE_GRAPHEME,
     67 		.ucdname = "T",
     68 	},
     69 	{
     70 		.enumname = "HANGUL_LV",
     71 		.file = FILE_GRAPHEME,
     72 		.ucdname = "LV",
     73 	},
     74 	{
     75 		.enumname = "HANGUL_LVT",
     76 		.file = FILE_GRAPHEME,
     77 		.ucdname = "LVT",
     78 	},
     79 	{
     80 		.enumname = "ICB_CONSONANT",
     81 		.file = FILE_DCP,
     82 		.ucdname = "InCB",
     83 		.ucdsubname = "Consonant",
     84 	},
     85 	{
     86 		.enumname = "ICB_EXTEND",
     87 		.file = FILE_DCP,
     88 		.ucdname = "InCB",
     89 		.ucdsubname = "Extend",
     90 	},
     91 	{
     92 		.enumname = "ICB_LINKER",
     93 		.file = FILE_DCP,
     94 		.ucdname = "InCB",
     95 		.ucdsubname = "Linker",
     96 	},
     97 	{
     98 		.enumname = "LF",
     99 		.file = FILE_GRAPHEME,
    100 		.ucdname = "LF",
    101 	},
    102 	{
    103 		.enumname = "PREPEND",
    104 		.file = FILE_GRAPHEME,
    105 		.ucdname = "Prepend",
    106 	},
    107 	{
    108 		.enumname = "REGIONAL_INDICATOR",
    109 		.file = FILE_GRAPHEME,
    110 		.ucdname = "Regional_Indicator",
    111 	},
    112 	{
    113 		.enumname = "SPACINGMARK",
    114 		.file = FILE_GRAPHEME,
    115 		.ucdname = "SpacingMark",
    116 	},
    117 	{
    118 		.enumname = "ZWJ",
    119 		.file = FILE_GRAPHEME,
    120 		.ucdname = "ZWJ",
    121 	},
    122 };
    123 
    124 static uint_least8_t
    125 handle_conflict(uint_least32_t cp, uint_least8_t prop1, uint_least8_t prop2)
    126 {
    127 	uint_least8_t result;
    128 
    129 	(void)cp;
    130 
    131 	if ((!strcmp(char_break_property[prop1].enumname, "EXTEND") &&
    132 	     !strcmp(char_break_property[prop2].enumname, "ICB_EXTEND")) ||
    133 	    (!strcmp(char_break_property[prop1].enumname, "ICB_EXTEND") &&
    134 	     !strcmp(char_break_property[prop2].enumname, "EXTEND"))) {
    135 		for (result = 0; result < LEN(char_break_property); result++) {
    136 			if (!strcmp(char_break_property[result].enumname,
    137 			            "BOTH_EXTEND_ICB_EXTEND")) {
    138 				break;
    139 			}
    140 		}
    141 		if (result == LEN(char_break_property)) {
    142 			fprintf(stderr, "handle_conflict: Internal error.\n");
    143 			exit(1);
    144 		}
    145 	} else if ((!strcmp(char_break_property[prop1].enumname, "EXTEND") &&
    146 	            !strcmp(char_break_property[prop2].enumname,
    147 	                    "ICB_LINKER")) ||
    148 	           (!strcmp(char_break_property[prop1].enumname,
    149 	                    "ICB_LINKER") &&
    150 	            !strcmp(char_break_property[prop2].enumname, "EXTEND"))) {
    151 		for (result = 0; result < LEN(char_break_property); result++) {
    152 			if (!strcmp(char_break_property[result].enumname,
    153 			            "BOTH_EXTEND_ICB_LINKER")) {
    154 				break;
    155 			}
    156 		}
    157 		if (result == LEN(char_break_property)) {
    158 			fprintf(stderr, "handle_conflict: Internal error.\n");
    159 			exit(1);
    160 		}
    161 	} else if ((!strcmp(char_break_property[prop1].enumname, "ZWJ") &&
    162 	            !strcmp(char_break_property[prop2].enumname,
    163 	                    "ICB_EXTEND")) ||
    164 	           (!strcmp(char_break_property[prop1].enumname,
    165 	                    "ICB_EXTEND") &&
    166 	            !strcmp(char_break_property[prop2].enumname, "ZWJ"))) {
    167 		for (result = 0; result < LEN(char_break_property); result++) {
    168 			if (!strcmp(char_break_property[result].enumname,
    169 			            "BOTH_ZWJ_ICB_EXTEND")) {
    170 				break;
    171 			}
    172 		}
    173 		if (result == LEN(char_break_property)) {
    174 			fprintf(stderr, "handle_conflict: Internal error.\n");
    175 			exit(1);
    176 		}
    177 	} else {
    178 		fprintf(stderr, "handle_conflict: Cannot handle conflict.\n");
    179 		exit(1);
    180 	}
    181 
    182 	return result;
    183 }
    184 
    185 int
    186 main(int argc, char *argv[])
    187 {
    188 	(void)argc;
    189 
    190 	properties_generate_break_property(
    191 		char_break_property, LEN(char_break_property), NULL,
    192 		handle_conflict, NULL, "char_break", argv[0]);
    193 
    194 	return 0;
    195 }