libgrapheme

unicode string library
git clone git://git.suckless.org/libgrapheme
Log | Files | Refs | README | LICENSE

word.c (3212B)


      1 /* See LICENSE file for copyright and license details. */
      2 #include <stdio.h>
      3 #include <stdlib.h>
      4 #include <string.h>
      5 
      6 #include "util.h"
      7 
      8 #define FILE_EMOJI "data/emoji-data.txt"
      9 #define FILE_WORD  "data/WordBreakProperty.txt"
     10 
     11 static const struct property_spec word_break_property[] = {
     12 	{
     13 		.enumname = "OTHER",
     14 		.file     = NULL,
     15 		.ucdname  = NULL,
     16 	},
     17 	{
     18 		.enumname = "ALETTER",
     19 		.file     = FILE_WORD,
     20 		.ucdname  = "ALetter",
     21 	},
     22 	{
     23 		.enumname = "BOTH_ALETTER_EXTPICT",
     24 		.file     = NULL,
     25 		.ucdname  = NULL,
     26 	},
     27 	{
     28 		.enumname = "CR",
     29 		.file     = FILE_WORD,
     30 		.ucdname  = "CR",
     31 	},
     32 	{
     33 		.enumname = "DOUBLE_QUOTE",
     34 		.file     = FILE_WORD,
     35 		.ucdname  = "Double_Quote",
     36 	},
     37 	{
     38 		.enumname = "EXTEND",
     39 		.file     = FILE_WORD,
     40 		.ucdname  = "Extend",
     41 	},
     42 	{
     43 		.enumname = "EXTENDED_PICTOGRAPHIC",
     44 		.file     = FILE_EMOJI,
     45 		.ucdname  = "Extended_Pictographic",
     46 	},
     47 	{
     48 		.enumname = "EXTENDNUMLET",
     49 		.file     = FILE_WORD,
     50 		.ucdname  = "ExtendNumLet",
     51 	},
     52 	{
     53 		.enumname = "FORMAT",
     54 		.file     = FILE_WORD,
     55 		.ucdname  = "Format",
     56 	},
     57 	{
     58 		.enumname = "HEBREW_LETTER",
     59 		.file     = FILE_WORD,
     60 		.ucdname  = "Hebrew_Letter",
     61 	},
     62 	{
     63 		.enumname = "KATAKANA",
     64 		.file     = FILE_WORD,
     65 		.ucdname  = "Katakana",
     66 	},
     67 	{
     68 		.enumname = "LF",
     69 		.file     = FILE_WORD,
     70 		.ucdname  = "LF",
     71 	},
     72 	{
     73 		.enumname = "MIDLETTER",
     74 		.file     = FILE_WORD,
     75 		.ucdname  = "MidLetter",
     76 	},
     77 	{
     78 		.enumname = "MIDNUM",
     79 		.file     = FILE_WORD,
     80 		.ucdname  = "MidNum",
     81 	},
     82 	{
     83 		.enumname = "MIDNUMLET",
     84 		.file     = FILE_WORD,
     85 		.ucdname  = "MidNumLet",
     86 	},
     87 	{
     88 		.enumname = "NEWLINE",
     89 		.file     = FILE_WORD,
     90 		.ucdname  = "Newline",
     91 	},
     92 	{
     93 		.enumname = "NUMERIC",
     94 		.file     = FILE_WORD,
     95 		.ucdname  = "Numeric",
     96 	},
     97 	{
     98 		.enumname = "REGIONAL_INDICATOR",
     99 		.file     = FILE_WORD,
    100 		.ucdname  = "Regional_Indicator",
    101 	},
    102 	{
    103 		.enumname = "SINGLE_QUOTE",
    104 		.file     = FILE_WORD,
    105 		.ucdname  = "Single_Quote",
    106 	},
    107 	{
    108 		.enumname = "WSEGSPACE",
    109 		.file     = FILE_WORD,
    110 		.ucdname  = "WSegSpace",
    111 	},
    112 	{
    113 		.enumname = "ZWJ",
    114 		.file     = FILE_WORD,
    115 		.ucdname  = "ZWJ",
    116 	},
    117 };
    118 
    119 static uint_least8_t
    120 handle_conflict(uint_least32_t cp, uint_least8_t prop1, uint_least8_t prop2)
    121 {
    122 	uint_least8_t result;
    123 
    124 	(void)cp;
    125 
    126 	if ((!strcmp(word_break_property[prop1].enumname, "ALETTER") &&
    127 	     !strcmp(word_break_property[prop2].enumname, "EXTENDED_PICTOGRAPHIC")) ||
    128 	    (!strcmp(word_break_property[prop1].enumname, "EXTENDED_PICTOGRAPHIC") &&
    129 	     !strcmp(word_break_property[prop2].enumname, "ALETTER"))) {
    130 		for (result = 0; result < LEN(word_break_property); result++) {
    131 			if (!strcmp(word_break_property[result].enumname,
    132 			            "BOTH_ALETTER_EXTPICT")) {
    133 				break;
    134 			}
    135 		}
    136 		if (result == LEN(word_break_property)) {
    137 			fprintf(stderr, "handle_conflict: Internal error.\n");
    138 			exit(1);
    139 		}
    140 	} else {
    141 		fprintf(stderr, "handle_conflict: Cannot handle conflict.\n");
    142 		exit(1);
    143 	}
    144 
    145 	return result;
    146 }
    147 
    148 int
    149 main(int argc, char *argv[])
    150 {
    151 	(void)argc;
    152 
    153 	properties_generate_break_property(word_break_property,
    154 	                                   LEN(word_break_property),
    155 	                                   handle_conflict, NULL, "word_break",
    156 	                                   argv[0]);
    157 
    158 	return 0;
    159 }