Add case-conversion-unit-tests - libgrapheme

commit e63bcc42010176b300feea6a7412f814a6cc4191
parent 5332f7ee034081618617c2b0785733ccc9ec8753
Author: Laslo Hunhold <dev@frign.de>
Date:   Wed, 21 Sep 2022 20:18:12 +0200

Add case-conversion-unit-tests

To give even more assurance and catch any possible future regressions,
exhaustive unit tests are added for the case-conversion functions.

Signed-off-by: Laslo Hunhold <dev@frign.de>

Diffstat:
M Makefile  | 3 +++
A test/case.c  | 329 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M test/util.c  | 21 +++++++++++++++++++--
M test/util.h  | 5 +++++

4 files changed, 356 insertions(+), 2 deletions(-)
diff --git a/Makefile b/Makefile
@@ -53,6 +53,7 @@ SRC =\
 	src/word\
 
 TEST =\
+	test/case\
 	test/character\
 	test/line\
 	test/sentence\
@@ -160,6 +161,7 @@ src/sentence.o: src/sentence.c config.mk gen/sentence.h grapheme.h src/util.h
 src/utf8.o: src/utf8.c config.mk grapheme.h
 src/util.o: src/util.c config.mk gen/types.h grapheme.h src/util.h
 src/word.o: src/word.c config.mk gen/word.h grapheme.h src/util.h
+test/case.o: test/case.c config.mk grapheme.h test/util.h
 test/character.o: test/character.c config.mk gen/character-test.h grapheme.h test/util.h
 test/line.o: test/line.c config.mk gen/line-test.h grapheme.h test/util.h
 test/sentence.o: test/sentence.c config.mk gen/sentence-test.h grapheme.h test/util.h
@@ -183,6 +185,7 @@ gen/sentence: gen/sentence.o gen/util.o
 gen/sentence-test: gen/sentence-test.o gen/util.o
 gen/word: gen/word.o gen/util.o
 gen/word-test: gen/word-test.o gen/util.o
+test/case: test/case.o test/util.o libgrapheme.a
 test/character: test/character.o test/util.o libgrapheme.a
 test/line: test/line.o test/util.o libgrapheme.a
 test/sentence: test/sentence.o test/util.o libgrapheme.a
diff --git a/test/case.c b/test/case.c
@@ -0,0 +1,329 @@
+/* See LICENSE file for copyright and license details. */
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "../grapheme.h"
+#include "util.h"
+
+struct unit_test_to_case_utf8 {
+	const char *description;
+	struct {
+		const char *src;
+		size_t srclen;
+		size_t destlen;
+	} input;
+	struct {
+		const char *dest;
+		size_t ret;
+	} output;
+};
+
+struct unit_test_to_case_utf8 lowercase_utf8[] = {
+	{
+		.description = "empty input",
+		.input =  { "", 0, 10 },
+		.output = { "", 0 },
+	},
+	{
+		.description = "empty output",
+		.input =  { "hello", 5, 0 },
+		.output = { "", 5 },
+	},
+	{
+		.description = "one character, conversion",
+		.input =  { "A", 1, 10 },
+		.output = { "a", 1 },
+	},
+	{
+		.description = "one character, no conversion",
+		.input =  { "a", 1, 10 },
+		.output = { "a", 1 },
+	},
+	{
+		.description = "one character, conversion, truncation",
+		.input =  { "A", 1, 0 },
+		.output = { "", 1 },
+	},
+	{
+		.description = "one character, conversion, NUL-terminated",
+		.input =  { "A", SIZE_MAX, 10 },
+		.output = { "a", 1 },
+	},
+	{
+		.description = "one character, no conversion, NUL-terminated",
+		.input =  { "a", SIZE_MAX, 10 },
+		.output = { "a", 1 },
+	},
+	{
+		.description = "one character, conversion, NUL-terminated, truncation",
+		.input =  { "A", SIZE_MAX, 0 },
+		.output = { "", 1 },
+	},
+	{
+		.description = "one word, conversion",
+		.input =  { "wOrD", 4, 10 },
+		.output = { "word", 4 },
+	},
+	{
+		.description = "one word, no conversion",
+		.input =  { "word", 4, 10 },
+		.output = { "word", 4 },
+	},
+	{
+		.description = "one word, conversion, truncation",
+		.input =  { "wOrD", 4, 3 },
+		.output = { "wo", 4 },
+	},
+	{
+		.description = "one word, conversion, NUL-terminated",
+		.input =  { "wOrD", SIZE_MAX, 10 },
+		.output = { "word", 4 },
+	},
+	{
+		.description = "one word, no conversion, NUL-terminated",
+		.input =  { "word", SIZE_MAX, 10 },
+		.output = { "word", 4 },
+	},
+	{
+		.description = "one word, conversion, NUL-terminated, truncation",
+		.input =  { "wOrD", SIZE_MAX, 3 },
+		.output = { "wo", 4 },
+	},
+};
+
+struct unit_test_to_case_utf8 uppercase_utf8[] = {
+	{
+		.description = "empty input",
+		.input =  { "", 0, 10 },
+		.output = { "", 0 },
+	},
+	{
+		.description = "empty output",
+		.input =  { "hello", 5, 0 },
+		.output = { "", 5 },
+	},
+	{
+		.description = "one character, conversion",
+		.input =  { "a", 1, 10 },
+		.output = { "A", 1 },
+	},
+	{
+		.description = "one character, no conversion",
+		.input =  { "A", 1, 10 },
+		.output = { "A", 1 },
+	},
+	{
+		.description = "one character, conversion, truncation",
+		.input =  { "a", 1, 0 },
+		.output = { "", 1 },
+	},
+	{
+		.description = "one character, conversion, NUL-terminated",
+		.input =  { "a", SIZE_MAX, 10 },
+		.output = { "A", 1 },
+	},
+	{
+		.description = "one character, no conversion, NUL-terminated",
+		.input =  { "A", SIZE_MAX, 10 },
+		.output = { "A", 1 },
+	},
+	{
+		.description = "one character, conversion, NUL-terminated, truncation",
+		.input =  { "a", SIZE_MAX, 0 },
+		.output = { "", 1 },
+	},
+	{
+		.description = "one word, conversion",
+		.input =  { "wOrD", 4, 10 },
+		.output = { "WORD", 4 },
+	},
+	{
+		.description = "one word, no conversion",
+		.input =  { "WORD", 4, 10 },
+		.output = { "WORD", 4 },
+	},
+	{
+		.description = "one word, conversion, truncation",
+		.input =  { "wOrD", 4, 3 },
+		.output = { "WO", 4 },
+	},
+	{
+		.description = "one word, conversion, NUL-terminated",
+		.input =  { "wOrD", SIZE_MAX, 10 },
+		.output = { "WORD", 4 },
+	},
+	{
+		.description = "one word, no conversion, NUL-terminated",
+		.input =  { "WORD", SIZE_MAX, 10 },
+		.output = { "WORD", 4 },
+	},
+	{
+		.description = "one word, conversion, NUL-terminated, truncation",
+		.input =  { "wOrD", SIZE_MAX, 3 },
+		.output = { "WO", 4 },
+	},
+};
+
+struct unit_test_to_case_utf8 titlecase_utf8[] = {
+	{
+		.description = "empty input",
+		.input =  { "", 0, 10 },
+		.output = { "", 0 },
+	},
+	{
+		.description = "empty output",
+		.input =  { "hello", 5, 0 },
+		.output = { "", 5 },
+	},
+	{
+		.description = "one character, conversion",
+		.input =  { "a", 1, 10 },
+		.output = { "A", 1 },
+	},
+	{
+		.description = "one character, no conversion",
+		.input =  { "A", 1, 10 },
+		.output = { "A", 1 },
+	},
+	{
+		.description = "one character, conversion, truncation",
+		.input =  { "a", 1, 0 },
+		.output = { "", 1 },
+	},
+	{
+		.description = "one character, conversion, NUL-terminated",
+		.input =  { "a", SIZE_MAX, 10 },
+		.output = { "A", 1 },
+	},
+	{
+		.description = "one character, no conversion, NUL-terminated",
+		.input =  { "A", SIZE_MAX, 10 },
+		.output = { "A", 1 },
+	},
+	{
+		.description = "one character, conversion, NUL-terminated, truncation",
+		.input =  { "a", SIZE_MAX, 0 },
+		.output = { "", 1 },
+	},
+	{
+		.description = "one word, conversion",
+		.input =  { "heLlo", 5, 10 },
+		.output = { "Hello", 5 },
+	},
+	{
+		.description = "one word, no conversion",
+		.input =  { "Hello", 5, 10 },
+		.output = { "Hello", 5 },
+	},
+	{
+		.description = "one word, conversion, truncation",
+		.input =  { "heLlo", 5, 2 },
+		.output = { "H", 5 },
+	},
+	{
+		.description = "one word, conversion, NUL-terminated",
+		.input =  { "heLlo", SIZE_MAX, 10 },
+		.output = { "Hello", 5 },
+	},
+	{
+		.description = "one word, no conversion, NUL-terminated",
+		.input =  { "Hello", SIZE_MAX, 10 },
+		.output = { "Hello", 5 },
+	},
+	{
+		.description = "one word, conversion, NUL-terminated, truncation",
+		.input =  { "heLlo", SIZE_MAX, 3 },
+		.output = { "He", 5 },
+	},
+	{
+		.description = "two words, conversion",
+		.input =  { "heLlo wORLd!", 12, 20 },
+		.output = { "Hello World!", 12 },
+	},
+	{
+		.description = "two words, no conversion",
+		.input =  { "Hello World!", 12, 20 },
+		.output = { "Hello World!", 12 },
+	},
+	{
+		.description = "two words, conversion, truncation",
+		.input =  { "heLlo wORLd!", 12, 8 },
+		.output = { "Hello W", 12 },
+	},
+	{
+		.description = "two words, conversion, NUL-terminated",
+		.input =  { "heLlo wORLd!", SIZE_MAX, 20 },
+		.output = { "Hello World!", 12 },
+	},
+	{
+		.description = "two words, no conversion, NUL-terminated",
+		.input =  { "Hello World!", SIZE_MAX, 20 },
+		.output = { "Hello World!", 12 },
+	},
+	{
+		.description = "two words, conversion, NUL-terminated, truncation",
+		.input =  { "heLlo wORLd!", SIZE_MAX, 4 },
+		.output = { "Hel", 12 },
+	},
+};
+
+static int
+unit_test_callback_to_case_utf8(void *t, size_t off, const char *name, const char *argv0)
+{
+	struct unit_test_to_case_utf8 *test = (struct unit_test_to_case_utf8 *)t + off;
+	size_t ret = 0, i;
+	char buf[512];
+
+	/* fill the array with canary values */
+	memset(buf, 0x7f, LEN(buf));
+
+	if (t == lowercase_utf8) {
+		ret = grapheme_to_lowercase_utf8(test->input.src, test->input.srclen,
+		                                 buf, test->input.destlen);
+	} else if (t == uppercase_utf8) {
+		ret = grapheme_to_uppercase_utf8(test->input.src, test->input.srclen,
+		                                 buf, test->input.destlen);
+	} else if (t == titlecase_utf8) {
+		ret = grapheme_to_titlecase_utf8(test->input.src, test->input.srclen,
+		                                 buf, test->input.destlen);
+	} else {
+		goto err;
+	}
+
+	/* check results */
+	if (ret != test->output.ret ||
+	    memcmp(buf, test->output.dest, MIN(test->input.destlen, test->output.ret))) {
+		goto err;
+	}
+
+	/* check that none of the canary values have been overwritten */
+	for (i = test->input.destlen; i < LEN(buf); i++) {
+		if (buf[i] != 0x7f) {
+fprintf(stderr, "REEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEE\n");
+			goto err;
+		}
+	}
+
+	return 0;
+err:
+	fprintf(stderr, "%s: %s: Failed unit test %zu \"%s\" "
+	        "(returned (\"%.*s\", %zu) instead of (\"%.*s\", %zu)).\n", argv0,
+	        name, off, test->description, (int)ret, buf, ret,
+	        (int)test->output.ret, test->output.dest, test->output.ret);
+	return 1;
+}
+
+int
+main(int argc, char *argv[])
+{
+	(void)argc;
+
+	return run_unit_tests(unit_test_callback_to_case_utf8, lowercase_utf8,
+	                      LEN(lowercase_utf8), "grapheme_to_lowercase_utf8", argv[0]) +
+	       run_unit_tests(unit_test_callback_to_case_utf8, uppercase_utf8,
+	                      LEN(uppercase_utf8), "grapheme_to_uppercase_utf8", argv[0]) +
+	       run_unit_tests(unit_test_callback_to_case_utf8, titlecase_utf8,
+	                      LEN(titlecase_utf8), "grapheme_to_titlecase_utf8", argv[0]);
+}
diff --git a/test/util.c b/test/util.c
@@ -23,7 +23,7 @@ run_break_tests(size_t (*next_break)(const uint_least32_t *, size_t),
 			/* check if our resulting offset matches */
 			if (j == test[i].lenlen ||
 			    res != test[i].len[j++]) {
-				fprintf(stderr, "%s: Failed test %zu \"%s\".\n",
+				fprintf(stderr, "%s: Failed conformance test %zu \"%s\".\n",
 				        argv0, i, test[i].descr);
 				fprintf(stderr, "J=%zu: EXPECTED len %zu, got %zu\n", j-1, test[i].len[j-1], res);
 				failed++;
@@ -31,7 +31,24 @@ run_break_tests(size_t (*next_break)(const uint_least32_t *, size_t),
 			}
 		}
 	}
-	printf("%s: %zu/%zu tests passed.\n", argv0,
+	printf("%s: %zu/%zu conformance tests passed.\n", argv0,
+	       testlen - failed, testlen);
+
+	return (failed > 0) ? 1 : 0;
+}
+
+int
+run_unit_tests(int (*unit_test_callback)(void *, size_t, const char *,
+               const char *), void *test, size_t testlen, const char *name,
+               const char *argv0)
+{
+	size_t i, failed;
+
+	for (i = 0, failed = 0; i < testlen; i++) {
+		failed += (unit_test_callback(test, i, name, argv0) == 0) ? 0 : 1;
+	}
+
+	printf("%s: %s: %zu/%zu unit tests passed.\n", argv0, name,
 	       testlen - failed, testlen);
 
 	return (failed > 0) ? 1 : 0;
diff --git a/test/util.h b/test/util.h
@@ -5,10 +5,15 @@
 #include "../gen/types.h"
 #include "../grapheme.h"
 
+#undef MIN
+#define MIN(x,y)  ((x) < (y) ? (x) : (y))
+#undef LEN
 #define LEN(x) (sizeof(x) / sizeof(*(x)))
 
 int run_break_tests(size_t (*next_break)(const uint_least32_t *, size_t),
                     const struct break_test *test, size_t testlen,
                     const char *);
+int run_unit_tests(int (*unit_test_callback)(void *, size_t, const char *,
+                   const char *), void *, size_t, const char *, const char *);
 
 #endif /* UTIL_H */

	libgrapheme unicode string library
	git clone git://git.suckless.org/libgrapheme
	Log \| Files \| Refs \| README \| LICENSE

M	Makefile	\|	3	+++
A	test/case.c	\|	329	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M	test/util.c	\|	21	+++++++++++++++++++--
M	test/util.h	\|	5	+++++