Refactor case-checking-functions with Herodotus and add unit tests - libgrapheme

commit 5dec22a7143e1105f25c7a7626fa166d882367d0
parent 8a7e2ee85f0a2824e48e85e57534c5b18113cf07
Author: Laslo Hunhold <dev@frign.de>
Date:   Sat, 24 Sep 2022 10:36:15 +0200

Refactor case-checking-functions with Herodotus and add unit tests

Additionally, expand the unit tests with special-casing-cases.

Signed-off-by: Laslo Hunhold <dev@frign.de>

Diffstat:
M src/case.c  | 213 +++++++++++++++++++++++++++++++++++++------------------------------------------
M src/util.h  | 3 ++-
M test/case.c  | 312 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------

3 files changed, 382 insertions(+), 146 deletions(-)
diff --git a/src/case.c b/src/case.c
@@ -1,4 +1,5 @@
 /* See LICENSE file for copyright and license details. */
+#include <stddef.h>
 #include <stdint.h>
 
 #include "../grapheme.h"
@@ -208,6 +209,7 @@ to_titlecase(HERODOTUS_READER *r, HERODOTUS_WRITER *w)
 		/* cast the rest of the codepoints in the word to lowercase */
 		to_case(r, w, 1, lower_major, lower_minor, lower_special);
 
+		/* remove the limit on the word before the next iteration */
 		herodotus_reader_pop_limit(r);
 	}
 
@@ -289,20 +291,16 @@ grapheme_to_titlecase_utf8(const char *src, size_t srclen, char *dest, size_t de
 }
 
 static inline bool
-is_case(const void *src, size_t srclen,
-        size_t srcnumprocess,
-        size_t (*get_codepoint)(const void *, size_t, size_t, uint_least32_t *),
-        const uint_least16_t *major, const int_least32_t *minor,
-        const struct special_case *sc, size_t *output)
+is_case(HERODOTUS_READER *r, const uint_least16_t *major,
+        const int_least32_t *minor, const struct special_case *sc,
+        size_t *output)
 {
-	size_t srcoff, new_srcoff, tmp, res, off, i;
-	uint_least32_t cp, tmp_cp;
+	size_t off, i;
+	bool ret = true;
+	uint_least32_t cp;
 	int_least32_t map;
 
-	for (srcoff = 0; srcoff < srcnumprocess; srcoff = new_srcoff) {
-		/* read in next source codepoint */
-		new_srcoff = srcoff + get_codepoint(src, srclen, srcoff, &cp);
-
+	for (; herodotus_read_codepoint(r, false, &cp) == HERODOTUS_STATUS_SUCCESS;) {
 		/* get and handle case mapping */
 		if (unlikely((map = get_case_offset(cp, major, minor)) >=
 		             INT32_C(0x110000))) {
@@ -310,173 +308,162 @@ is_case(const void *src, size_t srclen,
 			 * is the difference to 0x110000*/
 			off = (uint_least32_t)map - UINT32_C(0x110000);
 
-			for (i = 0, tmp = srcoff; i < sc[off].cplen; i++, tmp += res) {
-				res = get_codepoint(src, srclen, srcoff, &tmp_cp);
-				if (tmp_cp != sc[off].cp[i]) {
-					/* we have a difference */
-					if (output) {
-						*output = tmp;
+			for (i = 0; i < sc[off].cplen; i++) {
+				if (herodotus_read_codepoint(r, false, &cp) ==
+				    HERODOTUS_STATUS_SUCCESS) {
+					if (cp != sc[off].cp[i]) {
+						ret = false;
+						goto done;
+					} else {
+						/* move forward */
+						herodotus_read_codepoint(r, true, &cp);
 					}
-					return false;
+				} else {
+					/*
+					 * input ended and we didn't see
+					 * any difference so far, so this
+					 * string is in fact okay
+					 */
+					ret = true;
+					goto done;
 				}
 			}
-			new_srcoff = tmp;
 		} else {
 			/* we have a simple mapping */
 			if (cp != (uint_least32_t)((int_least32_t)cp + map)) {
 				/* we have a difference */
-				if (output) {
-					*output = srcoff;
-				}
-				return false;
+				ret = false;
+				goto done;
+			} else {
+				/* move forward */
+				herodotus_read_codepoint(r, true, &cp);
 			}
 		}
 	}
-
+done:
 	if (output) {
-		*output = srcoff;
+		*output = herodotus_reader_number_read(r);
 	}
-	return true;
+	return ret;
 }
 
 static inline bool
-is_titlecase(const void *src, size_t srclen,
-             size_t (*get_codepoint)(const void *, size_t, size_t, uint_least32_t *),
-	     size_t *output)
+is_titlecase(HERODOTUS_READER *r, size_t *output)
 {
 	enum case_property prop;
-	size_t next_wb, srcoff, res, tmp_output;
+	enum herodotus_status s;
+	bool ret = true;
 	uint_least32_t cp;
 
-	for (srcoff = 0; ; ) {
-		if (get_codepoint == get_codepoint_utf8) {
-			if ((next_wb = grapheme_next_word_break_utf8((const char *)src + srcoff,
-			                                             srclen - srcoff)) == 0) {
-				/* we consumed all of the string */
-				break;
-			}
-		} else {
-			if ((next_wb = grapheme_next_word_break((const uint_least32_t *)src + srcoff,
-			                                        srclen - srcoff)) == 0) {
-				/* we consumed all of the string */
-				break;
-			}
-		}
-
-		for (; next_wb > 0 && srcoff < srclen; next_wb -= res, srcoff += res) {
+	for (;;) {
+		herodotus_reader_push_advance_limit(r, herodotus_next_word_break(r));
+		for (; (s = herodotus_read_codepoint(r, false, &cp)) == HERODOTUS_STATUS_SUCCESS;) {
 			/* check if we have a cased character */
-			res = get_codepoint(src, srclen, srcoff, &cp);
 			prop = get_case_property(cp);
 			if (prop == CASE_PROP_CASED ||
 			    prop == CASE_PROP_BOTH_CASED_CASE_IGNORABLE) {
 				break;
-			}
-		}
-
-		if (next_wb > 0) {
-			/* get character length */
-			res = get_codepoint(src, srclen, srcoff, &cp);
-
-			/* we have a cased character at srcoff, check if it's titlecase */
-			if (get_codepoint == get_codepoint_utf8) {
-				if (!is_case((const char *)src + srcoff,
-				              srclen - srcoff, res,
-					      get_codepoint_utf8, title_major,
-			                      title_minor, title_special, &tmp_output)) {
-					if (output) {
-						*output = srcoff + tmp_output;
-					}
-					return false;
-				}
 			} else {
-				if (!is_case((const uint_least32_t *)src + srcoff,
-				              srclen - srcoff, res,
-					      get_codepoint, title_major,
-			                      title_minor, title_special, &tmp_output)) {
-					if (output) {
-						*output = srcoff + tmp_output;
-					}
-					return false;
-				}
+				/* increment reader */
+				herodotus_read_codepoint(r, true, &cp);
 			}
+		}
 
+		if (s == HERODOTUS_STATUS_END_OF_BUFFER) {
+			/* we are done */
+			break;
+		} else if (s == HERODOTUS_STATUS_SOFT_LIMIT_REACHED) {
+			/*
+			 * we did not encounter any cased character
+			 * up to the word break
+			 */
+			continue;
+		} else {
 			/*
-			 * we consumed a character (make sure to never
-			 * underflow next_wb; this should not happen,
-			 * but it's better to be sure)
+			 * we encountered a cased character before the word
+			 * break, check if it's titlecase
 			 */
-			srcoff += res;
-			next_wb -= (res <= next_wb) ? res : next_wb;
+			herodotus_reader_push_advance_limit(r,
+				herodotus_reader_next_codepoint_break(r));
+			if (!is_case(r, title_major, title_minor, title_special, NULL)) {
+				ret = false;
+				goto done;
+			}
+			herodotus_reader_pop_limit(r);
 		}
 
 		/* check if the rest of the codepoints in the word are lowercase */
-		if (get_codepoint == get_codepoint_utf8) {
-			if (!is_case((const char *)src + srcoff,
-			              srclen - srcoff, next_wb,
-				      get_codepoint_utf8, lower_major,
-		                      lower_minor, lower_special, &tmp_output)) {
-				if (output) {
-					*output = srcoff + tmp_output;
-				}
-				return false;
-			}
-		} else {
-			if (!is_case((const uint_least32_t *)src + srcoff,
-			              srclen - srcoff, next_wb,
-				      get_codepoint, lower_major,
-		                      lower_minor, lower_special, &tmp_output)) {
-				if (output) {
-					*output = srcoff + tmp_output;
-				}
-				return false;
-			}
+		if (!is_case(r, lower_major, lower_minor, lower_special, NULL)) {
+			ret = false;
+			goto done;
 		}
-		srcoff += next_wb;
-	}
 
+		/* remove the limit on the word before the next iteration */
+		herodotus_reader_pop_limit(r);
+	}
+done:
 	if (output) {
-		*output = srcoff;
+		*output = herodotus_reader_number_read(r);
 	}
-	return true;
+	return ret;
 }
 
 bool
 grapheme_is_uppercase(const uint_least32_t *src, size_t srclen, size_t *caselen)
 {
-	return is_case(src, srclen, srclen, get_codepoint,
-	               upper_major, upper_minor, upper_special, caselen);
+	HERODOTUS_READER r;
+
+	herodotus_reader_init(&r, HERODOTUS_TYPE_CODEPOINT, src, srclen);
+
+	return is_case(&r, upper_major, upper_minor, upper_special, caselen);
 }
 
 bool
 grapheme_is_lowercase(const uint_least32_t *src, size_t srclen, size_t *caselen)
 {
-	return is_case(src, srclen, srclen, get_codepoint,
-	               lower_major, lower_minor, lower_special, caselen);
+	HERODOTUS_READER r;
+
+	herodotus_reader_init(&r, HERODOTUS_TYPE_CODEPOINT, src, srclen);
+
+	return is_case(&r, lower_major, lower_minor, lower_special, caselen);
 }
 
 bool
 grapheme_is_titlecase(const uint_least32_t *src, size_t srclen, size_t *caselen)
 {
-	return is_titlecase(src, srclen, get_codepoint, caselen);
+	HERODOTUS_READER r;
+
+	herodotus_reader_init(&r, HERODOTUS_TYPE_CODEPOINT, src, srclen);
+
+	return is_titlecase(&r, caselen);
 }
 
 bool
 grapheme_is_uppercase_utf8(const char *src, size_t srclen, size_t *caselen)
 {
-	return is_case(src, srclen, srclen, get_codepoint_utf8,
-	               upper_major, upper_minor, upper_special, caselen);
+	HERODOTUS_READER r;
+
+	herodotus_reader_init(&r, HERODOTUS_TYPE_UTF8, src, srclen);
+
+	return is_case(&r, upper_major, upper_minor, upper_special, caselen);
 }
 
 bool
 grapheme_is_lowercase_utf8(const char *src, size_t srclen, size_t *caselen)
 {
-	return is_case(src, srclen, srclen, get_codepoint_utf8,
-	               lower_major, lower_minor, lower_special, caselen);
+	HERODOTUS_READER r;
+
+	herodotus_reader_init(&r, HERODOTUS_TYPE_UTF8, src, srclen);
 
+	return is_case(&r, lower_major, lower_minor, lower_special, caselen);
 }
 
 bool
 grapheme_is_titlecase_utf8(const char *src, size_t srclen, size_t *caselen)
 {
-	return is_titlecase(src, srclen, get_codepoint_utf8, caselen);
+	HERODOTUS_READER r;
+
+	herodotus_reader_init(&r, HERODOTUS_TYPE_UTF8, src, srclen);
+
+	return is_titlecase(&r, caselen);
 }
diff --git a/src/util.h b/src/util.h
@@ -79,6 +79,7 @@ void herodotus_reader_init(HERODOTUS_READER *, enum herodotus_type,
 void herodotus_reader_copy(const HERODOTUS_READER *, HERODOTUS_READER *);
 void herodotus_reader_push_advance_limit(HERODOTUS_READER *, size_t);
 void herodotus_reader_pop_limit(HERODOTUS_READER *);
+size_t herodotus_reader_number_read(const HERODOTUS_READER *);
 size_t herodotus_reader_next_word_break(const HERODOTUS_READER *);
 size_t herodotus_reader_next_codepoint_break(const HERODOTUS_READER *);
 enum herodotus_status herodotus_read_codepoint(HERODOTUS_READER *, bool, uint_least32_t *);
@@ -86,7 +87,7 @@ enum herodotus_status herodotus_read_codepoint(HERODOTUS_READER *, bool, uint_le
 void herodotus_writer_init(HERODOTUS_WRITER *, enum herodotus_type, void *,
                            size_t);
 void herodotus_writer_nul_terminate(HERODOTUS_WRITER *);
-size_t herodotus_writer_number_written(HERODOTUS_WRITER *);
+size_t herodotus_writer_number_written(const HERODOTUS_WRITER *);
 void herodotus_write_codepoint(HERODOTUS_WRITER *, uint_least32_t);
 
 size_t get_codepoint(const void *, size_t, size_t, uint_least32_t *);
diff --git a/test/case.c b/test/case.c
@@ -7,6 +7,18 @@
 #include "../grapheme.h"
 #include "util.h"
 
+struct unit_test_is_case_utf8 {
+	const char *description;
+	struct {
+		const char *src;
+		size_t srclen;
+	} input;
+	struct {
+		bool ret;
+		size_t caselen;
+	} output;
+};
+
 struct unit_test_to_case_utf8 {
 	const char *description;
 	struct {
@@ -20,7 +32,201 @@ struct unit_test_to_case_utf8 {
 	} output;
 };
 
-static struct unit_test_to_case_utf8 lowercase_utf8[] = {
+static struct unit_test_is_case_utf8 is_lowercase_utf8[] = {
+	{
+		.description = "empty input",
+		.input =  { "", 0 },
+		.output = { true, 0 },
+	},
+	{
+		.description = "one character, violation",
+		.input =  { "A", 1 },
+		.output = { false, 0 },
+	},
+	{
+		.description = "one character, confirmation",
+		.input =  { "\xc3\x9f", 2 },
+		.output = { true, 2 },
+	},
+	{
+		.description = "one character, violation, NUL-terminated",
+		.input =  { "A", SIZE_MAX },
+		.output = { false, 0 },
+	},
+	{
+		.description = "one character, confirmation, NUL-terminated",
+		.input =  { "\xc3\x9f", SIZE_MAX },
+		.output = { true, 2 },
+	},
+	{
+		.description = "one word, violation",
+		.input =  { "Hello", 5 },
+		.output = { false, 0 },
+	},
+	{
+		.description = "one word, partial confirmation",
+		.input =  { "gru" "\xc3\x9f" "fOrmel", 11 },
+		.output = { false, 6 },
+	},
+	{
+		.description = "one word, full confirmation",
+		.input =  { "gru" "\xc3\x9f" "formel", 11 },
+		.output = { true, 11 },
+	},
+	{
+		.description = "one word, violation, NUL-terminated",
+		.input =  { "Hello", SIZE_MAX },
+		.output = { false, 0 },
+	},
+	{
+		.description = "one word, partial confirmation, NUL-terminated",
+		.input =  { "gru" "\xc3\x9f" "fOrmel", SIZE_MAX },
+		.output = { false, 6 },
+	},
+	{
+		.description = "one word, full confirmation, NUL-terminated",
+		.input =  { "gru" "\xc3\x9f" "formel", SIZE_MAX },
+		.output = { true, 11 },
+	},
+};
+
+static struct unit_test_is_case_utf8 is_uppercase_utf8[] = {
+	{
+		.description = "empty input",
+		.input =  { "", 0 },
+		.output = { true, 0 },
+	},
+	{
+		.description = "one character, violation",
+		.input =  { "\xc3\x9f", 2 },
+		.output = { false, 0 },
+	},
+	{
+		.description = "one character, confirmation",
+		.input =  { "A", 1 },
+		.output = { true, 1 },
+	},
+	{
+		.description = "one character, violation, NUL-terminated",
+		.input =  { "\xc3\x9f", SIZE_MAX },
+		.output = { false, 0 },
+	},
+	{
+		.description = "one character, confirmation, NUL-terminated",
+		.input =  { "A", SIZE_MAX },
+		.output = { true, 1 },
+	},
+	{
+		.description = "one word, violation",
+		.input =  { "hello", 5 },
+		.output = { false, 0 },
+	},
+	{
+		.description = "one word, partial confirmation",
+		.input =  { "GRU" "\xc3\x9f" "formel", 11 },
+		.output = { false, 3 },
+	},
+	{
+		.description = "one word, full confirmation",
+		.input =  { "HELLO", 5 },
+		.output = { true, 5 },
+	},
+	{
+		.description = "one word, violation, NUL-terminated",
+		.input =  { "hello", SIZE_MAX },
+		.output = { false, 0 },
+	},
+	{
+		.description = "one word, partial confirmation, NUL-terminated",
+		.input =  { "GRU" "\xc3\x9f" "formel", SIZE_MAX },
+		.output = { false, 3 },
+	},
+	{
+		.description = "one word, full confirmation, NUL-terminated",
+		.input =  { "HELLO", SIZE_MAX },
+		.output = { true, 5 },
+	},
+};
+
+static struct unit_test_is_case_utf8 is_titlecase_utf8[] = {
+	{
+		.description = "empty input",
+		.input =  { "", 0 },
+		.output = { true, 0 },
+	},
+	{
+		.description = "one character, violation",
+		.input =  { "\xc3\x9f", 2 },
+		.output = { false, 0 },
+	},
+	{
+		.description = "one character, confirmation",
+		.input =  { "A", 1 },
+		.output = { true, 1 },
+	},
+	{
+		.description = "one character, violation, NUL-terminated",
+		.input =  { "\xc3\x9f", SIZE_MAX },
+		.output = { false, 0 },
+	},
+	{
+		.description = "one character, confirmation, NUL-terminated",
+		.input =  { "A", SIZE_MAX },
+		.output = { true, 1 },
+	},
+	{
+		.description = "one word, violation",
+		.input =  { "hello", 5 },
+		.output = { false, 0 },
+	},
+	{
+		.description = "one word, partial confirmation",
+		.input =  { "Gru" "\xc3\x9f" "fOrmel", 11 },
+		.output = { false, 6 },
+	},
+	{
+		.description = "one word, full confirmation",
+		.input =  { "Gru" "\xc3\x9f" "formel", 11 },
+		.output = { true, 11 },
+	},
+	{
+		.description = "one word, violation, NUL-terminated",
+		.input =  { "hello", SIZE_MAX },
+		.output = { false, 0 },
+	},
+	{
+		.description = "one word, partial confirmation, NUL-terminated",
+		.input =  { "Gru" "\xc3\x9f" "fOrmel", SIZE_MAX },
+		.output = { false, 6 },
+	},
+	{
+		.description = "one word, full confirmation, NUL-terminated",
+		.input =  { "Gru" "\xc3\x9f" "formel", SIZE_MAX },
+		.output = { true, 11 },
+	},
+	{
+		.description = "multiple words, partial confirmation",
+		.input =  { "Hello Gru" "\xc3\x9f" "fOrmel!", 18 },
+		.output = { false, 12 },
+	},
+	{
+		.description = "multiple words, full confirmation",
+		.input =  { "Hello Gru" "\xc3\x9f" "formel!", 18 },
+		.output = { true, 18 },
+	},
+	{
+		.description = "multiple words, partial confirmation, NUL-terminated",
+		.input =  { "Hello Gru" "\xc3\x9f" "fOrmel!", SIZE_MAX },
+		.output = { false, 12 },
+	},
+	{
+		.description = "multiple words, full confirmation, NUL-terminated",
+		.input =  { "Hello Gru" "\xc3\x9f" "formel!", SIZE_MAX },
+		.output = { true, 18 },
+	},
+};
+
+static struct unit_test_to_case_utf8 to_lowercase_utf8[] = {
 	{
 		.description = "empty input",
 		.input =  { "", 0, 10 },
@@ -38,8 +244,8 @@ static struct unit_test_to_case_utf8 lowercase_utf8[] = {
 	},
 	{
 		.description = "one character, no conversion",
-		.input =  { "a", 1, 10 },
-		.output = { "a", 1 },
+		.input =  { "\xc3\x9f", 2, 10 },
+		.output = { "\xc3\x9f", 2 },
 	},
 	{
 		.description = "one character, conversion, truncation",
@@ -53,8 +259,8 @@ static struct unit_test_to_case_utf8 lowercase_utf8[] = {
 	},
 	{
 		.description = "one character, no conversion, NUL-terminated",
-		.input =  { "a", SIZE_MAX, 10 },
-		.output = { "a", 1 },
+		.input =  { "\xc3\x9f", SIZE_MAX, 10 },
+		.output = { "\xc3\x9f", 2 },
 	},
 	{
 		.description = "one character, conversion, NUL-terminated, truncation",
@@ -93,7 +299,7 @@ static struct unit_test_to_case_utf8 lowercase_utf8[] = {
 	},
 };
 
-static struct unit_test_to_case_utf8 uppercase_utf8[] = {
+static struct unit_test_to_case_utf8 to_uppercase_utf8[] = {
 	{
 		.description = "empty input",
 		.input =  { "", 0, 10 },
@@ -106,8 +312,8 @@ static struct unit_test_to_case_utf8 uppercase_utf8[] = {
 	},
 	{
 		.description = "one character, conversion",
-		.input =  { "a", 1, 10 },
-		.output = { "A", 1 },
+		.input =  { "\xc3\x9f", 2, 10 },
+		.output = { "SS", 2 },
 	},
 	{
 		.description = "one character, no conversion",
@@ -116,13 +322,13 @@ static struct unit_test_to_case_utf8 uppercase_utf8[] = {
 	},
 	{
 		.description = "one character, conversion, truncation",
-		.input =  { "a", 1, 0 },
-		.output = { "", 1 },
+		.input =  { "\xc3\x9f", 2, 0 },
+		.output = { "", 2 },
 	},
 	{
 		.description = "one character, conversion, NUL-terminated",
-		.input =  { "a", SIZE_MAX, 10 },
-		.output = { "A", 1 },
+		.input =  { "\xc3\x9f", SIZE_MAX, 10 },
+		.output = { "SS", 2 },
 	},
 	{
 		.description = "one character, no conversion, NUL-terminated",
@@ -131,13 +337,13 @@ static struct unit_test_to_case_utf8 uppercase_utf8[] = {
 	},
 	{
 		.description = "one character, conversion, NUL-terminated, truncation",
-		.input =  { "a", SIZE_MAX, 0 },
-		.output = { "", 1 },
+		.input =  { "\xc3\x9f", SIZE_MAX, 0 },
+		.output = { "", 2 },
 	},
 	{
 		.description = "one word, conversion",
-		.input =  { "wOrD", 4, 10 },
-		.output = { "WORD", 4 },
+		.input =  { "gRu" "\xc3\x9f" "fOrMel", 11, 15 },
+		.output = { "GRUSSFORMEL", 11 },
 	},
 	{
 		.description = "one word, no conversion",
@@ -146,13 +352,13 @@ static struct unit_test_to_case_utf8 uppercase_utf8[] = {
 	},
 	{
 		.description = "one word, conversion, truncation",
-		.input =  { "wOrD", 4, 3 },
-		.output = { "WO", 4 },
+		.input =  { "gRu" "\xc3\x9f" "formel", 11, 5 },
+		.output = { "GRUS", 11 },
 	},
 	{
 		.description = "one word, conversion, NUL-terminated",
-		.input =  { "wOrD", SIZE_MAX, 10 },
-		.output = { "WORD", 4 },
+		.input =  { "gRu" "\xc3\x9f" "formel", SIZE_MAX, 15 },
+		.output = { "GRUSSFORMEL", 11 },
 	},
 	{
 		.description = "one word, no conversion, NUL-terminated",
@@ -161,12 +367,12 @@ static struct unit_test_to_case_utf8 uppercase_utf8[] = {
 	},
 	{
 		.description = "one word, conversion, NUL-terminated, truncation",
-		.input =  { "wOrD", SIZE_MAX, 3 },
-		.output = { "WO", 4 },
+		.input =  { "gRu" "\xc3\x9f" "formel", SIZE_MAX, 5 },
+		.output = { "GRUS", 11 },
 	},
 };
 
-static struct unit_test_to_case_utf8 titlecase_utf8[] = {
+static struct unit_test_to_case_utf8 to_titlecase_utf8[] = {
 	{
 		.description = "empty input",
 		.input =  { "", 0, 10 },
@@ -270,6 +476,42 @@ static struct unit_test_to_case_utf8 titlecase_utf8[] = {
 };
 
 static int
+unit_test_callback_is_case_utf8(void *t, size_t off, const char *name, const char *argv0)
+{
+	struct unit_test_is_case_utf8 *test = (struct unit_test_is_case_utf8 *)t + off;
+	bool ret = false;
+	size_t caselen = 0x7f;
+
+	if (t == is_lowercase_utf8) {
+		ret = grapheme_is_lowercase_utf8(test->input.src, test->input.srclen,
+		                                 &caselen);
+	} else if (t == is_uppercase_utf8) {
+		ret = grapheme_is_uppercase_utf8(test->input.src, test->input.srclen,
+		                                 &caselen);
+	} else if (t == is_titlecase_utf8) {
+		ret = grapheme_is_titlecase_utf8(test->input.src, test->input.srclen,
+		                                 &caselen);
+
+	} else {
+		goto err;
+	}
+
+	/* check results */
+	if (ret != test->output.ret || caselen != test->output.caselen) {
+		goto err;
+	}
+
+	return 0;
+err:
+	fprintf(stderr, "%s: %s: Failed unit test %zu \"%s\" "
+	        "(returned (%s, %zu) instead of (%s, %zu)).\n", argv0,
+	        name, off, test->description, ret ? "true" : "false",
+		caselen, test->output.ret ? "true" : "false",
+	        test->output.caselen);
+	return 1;
+}
+
+static int
 unit_test_callback_to_case_utf8(void *t, size_t off, const char *name, const char *argv0)
 {
 	struct unit_test_to_case_utf8 *test = (struct unit_test_to_case_utf8 *)t + off;
@@ -279,13 +521,13 @@ unit_test_callback_to_case_utf8(void *t, size_t off, const char *name, const cha
 	/* fill the array with canary values */
 	memset(buf, 0x7f, LEN(buf));
 
-	if (t == lowercase_utf8) {
+	if (t == to_lowercase_utf8) {
 		ret = grapheme_to_lowercase_utf8(test->input.src, test->input.srclen,
 		                                 buf, test->input.destlen);
-	} else if (t == uppercase_utf8) {
+	} else if (t == to_uppercase_utf8) {
 		ret = grapheme_to_uppercase_utf8(test->input.src, test->input.srclen,
 		                                 buf, test->input.destlen);
-	} else if (t == titlecase_utf8) {
+	} else if (t == to_titlecase_utf8) {
 		ret = grapheme_to_titlecase_utf8(test->input.src, test->input.srclen,
 		                                 buf, test->input.destlen);
 	} else {
@@ -319,10 +561,16 @@ main(int argc, char *argv[])
 {
 	(void)argc;
 
-	return run_unit_tests(unit_test_callback_to_case_utf8, lowercase_utf8,
-	                      LEN(lowercase_utf8), "grapheme_to_lowercase_utf8", argv[0]) +
-	       run_unit_tests(unit_test_callback_to_case_utf8, uppercase_utf8,
-	                      LEN(uppercase_utf8), "grapheme_to_uppercase_utf8", argv[0]) +
-	       run_unit_tests(unit_test_callback_to_case_utf8, titlecase_utf8,
-	                      LEN(titlecase_utf8), "grapheme_to_titlecase_utf8", argv[0]);
+	return run_unit_tests(unit_test_callback_is_case_utf8, is_lowercase_utf8,
+	                      LEN(is_lowercase_utf8), "grapheme_is_lowercase_utf8", argv[0]) +
+	       run_unit_tests(unit_test_callback_is_case_utf8, is_uppercase_utf8,
+	                      LEN(is_uppercase_utf8), "grapheme_is_uppercase_utf8", argv[0]) +
+	       run_unit_tests(unit_test_callback_is_case_utf8, is_titlecase_utf8,
+	                      LEN(is_titlecase_utf8), "grapheme_is_titlecase_utf8", argv[0]) +
+	       run_unit_tests(unit_test_callback_to_case_utf8, to_lowercase_utf8,
+	                      LEN(to_lowercase_utf8), "grapheme_to_lowercase_utf8", argv[0]) +
+	       run_unit_tests(unit_test_callback_to_case_utf8, to_uppercase_utf8,
+	                      LEN(to_uppercase_utf8), "grapheme_to_uppercase_utf8", argv[0]) +
+	       run_unit_tests(unit_test_callback_to_case_utf8, to_titlecase_utf8,
+	                      LEN(to_titlecase_utf8), "grapheme_to_titlecase_utf8", argv[0]);
 }

	libgrapheme unicode string library
	git clone git://git.suckless.org/libgrapheme
	Log \| Files \| Refs \| README \| LICENSE

M	src/case.c	\|	213	+++++++++++++++++++++++++++++++++++++------------------------------------------
M	src/util.h	\|	3	++-
M	test/case.c	\|	312	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------