commit 8a7e2ee85f0a2824e48e85e57534c5b18113cf07
parent 9f15d7eb0c9cf216f069d6972c58520013b80acb
Author: Laslo Hunhold <dev@frign.de>
Date: Sat, 24 Sep 2022 01:54:52 +0200
Compile the library in freestanding mode
Looking closely, we never explicitly depend on the standard library
within the actual library code. This can be explicitly expressed by
setting -ffreestanding during object-compilation and -nostdlib during
linking. The result is a clean library with zero libc-symbols, allowing
it to be used even without an operating system (kernel code, ELF,
etc.), by making use of the freestanding implementation form defined
in the standard[0].
To be freestanding, the code may only include <float.h>, <iso646.h>,
<limits.h>, <stdalign.h>, <stdarg.h>, <stdbool.h>, <stddef.h>,
<stdint.h> and <stdnoreturn.h>. We satisfy this condition implictly,
but there are some erroneous supplementary includes that are removed
in this commit. Additionally, the strict compiler-implementation simply
adds the U-prefix to the argument of UINT16_C (et. al.), which is why
calls to it have to be changed to really include only constants.
[0]:https://www.iso-9899.info/n1570.html#4.p6
Signed-off-by: Laslo Hunhold <dev@frign.de>
Diffstat:
7 files changed, 82 insertions(+), 81 deletions(-)
diff --git a/config.mk b/config.mk
@@ -15,8 +15,8 @@ BUILD_CPPFLAGS = $(CPPFLAGS)
BUILD_CFLAGS = $(CFLAGS)
BUILD_LDFLAGS = $(LDFLAGS)
-SHFLAGS = -fPIC
-SOFLAGS = -shared -Wl,--soname=libgrapheme.so
+SHFLAGS = -fPIC -ffreestanding
+SOFLAGS = -shared -nostdlib -Wl,--soname=libgrapheme.so
# tools
CC = cc
diff --git a/src/character.c b/src/character.c
@@ -1,8 +1,7 @@
/* See LICENSE file for copyright and license details. */
+#include <limits.h>
#include <stdbool.h>
#include <stddef.h>
-#include <stdlib.h>
-#include <string.h>
#include "../gen/character.h"
#include "../grapheme.h"
@@ -10,96 +9,96 @@
static const uint_least16_t dont_break[NUM_CHAR_BREAK_PROPS] = {
[CHAR_BREAK_PROP_OTHER] =
- UINT16_C(1 << CHAR_BREAK_PROP_EXTEND) | /* GB9 */
- UINT16_C(1 << CHAR_BREAK_PROP_ZWJ) | /* GB9 */
- UINT16_C(1 << CHAR_BREAK_PROP_SPACINGMARK), /* GB9a */
+ UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */
[CHAR_BREAK_PROP_CR] =
- UINT16_C(1 << CHAR_BREAK_PROP_LF), /* GB3 */
+ UINT16_C(1) << CHAR_BREAK_PROP_LF, /* GB3 */
[CHAR_BREAK_PROP_EXTEND] =
- UINT16_C(1 << CHAR_BREAK_PROP_EXTEND) | /* GB9 */
- UINT16_C(1 << CHAR_BREAK_PROP_ZWJ) | /* GB9 */
- UINT16_C(1 << CHAR_BREAK_PROP_SPACINGMARK), /* GB9a */
+ UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */
[CHAR_BREAK_PROP_EXTENDED_PICTOGRAPHIC] =
- UINT16_C(1 << CHAR_BREAK_PROP_EXTEND) | /* GB9 */
- UINT16_C(1 << CHAR_BREAK_PROP_ZWJ) | /* GB9 */
- UINT16_C(1 << CHAR_BREAK_PROP_SPACINGMARK), /* GB9a */
+ UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */
[CHAR_BREAK_PROP_HANGUL_L] =
- UINT16_C(1 << CHAR_BREAK_PROP_HANGUL_L) | /* GB6 */
- UINT16_C(1 << CHAR_BREAK_PROP_HANGUL_V) | /* GB6 */
- UINT16_C(1 << CHAR_BREAK_PROP_HANGUL_LV) | /* GB6 */
- UINT16_C(1 << CHAR_BREAK_PROP_HANGUL_LVT) | /* GB6 */
- UINT16_C(1 << CHAR_BREAK_PROP_EXTEND) | /* GB9 */
- UINT16_C(1 << CHAR_BREAK_PROP_ZWJ) | /* GB9 */
- UINT16_C(1 << CHAR_BREAK_PROP_SPACINGMARK), /* GB9a */
+ UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_L | /* GB6 */
+ UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_V | /* GB6 */
+ UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_LV | /* GB6 */
+ UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_LVT | /* GB6 */
+ UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */
[CHAR_BREAK_PROP_HANGUL_V] =
- UINT16_C(1 << CHAR_BREAK_PROP_HANGUL_V) | /* GB7 */
- UINT16_C(1 << CHAR_BREAK_PROP_HANGUL_T) | /* GB7 */
- UINT16_C(1 << CHAR_BREAK_PROP_EXTEND) | /* GB9 */
- UINT16_C(1 << CHAR_BREAK_PROP_ZWJ) | /* GB9 */
- UINT16_C(1 << CHAR_BREAK_PROP_SPACINGMARK), /* GB9a */
+ UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_V | /* GB7 */
+ UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_T | /* GB7 */
+ UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */
[CHAR_BREAK_PROP_HANGUL_T] =
- UINT16_C(1 << CHAR_BREAK_PROP_HANGUL_T) | /* GB8 */
- UINT16_C(1 << CHAR_BREAK_PROP_EXTEND) | /* GB9 */
- UINT16_C(1 << CHAR_BREAK_PROP_ZWJ) | /* GB9 */
- UINT16_C(1 << CHAR_BREAK_PROP_SPACINGMARK), /* GB9a */
+ UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_T | /* GB8 */
+ UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */
[CHAR_BREAK_PROP_HANGUL_LV] =
- UINT16_C(1 << CHAR_BREAK_PROP_HANGUL_V) | /* GB7 */
- UINT16_C(1 << CHAR_BREAK_PROP_HANGUL_T) | /* GB7 */
- UINT16_C(1 << CHAR_BREAK_PROP_EXTEND) | /* GB9 */
- UINT16_C(1 << CHAR_BREAK_PROP_ZWJ) | /* GB9 */
- UINT16_C(1 << CHAR_BREAK_PROP_SPACINGMARK), /* GB9a */
+ UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_V | /* GB7 */
+ UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_T | /* GB7 */
+ UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */
[CHAR_BREAK_PROP_HANGUL_LVT] =
- UINT16_C(1 << CHAR_BREAK_PROP_HANGUL_T) | /* GB8 */
- UINT16_C(1 << CHAR_BREAK_PROP_EXTEND) | /* GB9 */
- UINT16_C(1 << CHAR_BREAK_PROP_ZWJ) | /* GB9 */
- UINT16_C(1 << CHAR_BREAK_PROP_SPACINGMARK), /* GB9a */
+ UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_T | /* GB8 */
+ UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */
[CHAR_BREAK_PROP_PREPEND] =
- UINT16_C(1 << CHAR_BREAK_PROP_EXTEND) | /* GB9 */
- UINT16_C(1 << CHAR_BREAK_PROP_ZWJ) | /* GB9 */
- UINT16_C(1 << CHAR_BREAK_PROP_SPACINGMARK) | /* GB9a */
+ UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK | /* GB9a */
(UINT16_C(0xFFFF) &
- ~(UINT16_C(1 << CHAR_BREAK_PROP_CR) |
- UINT16_C(1 << CHAR_BREAK_PROP_LF) |
- UINT16_C(1 << CHAR_BREAK_PROP_CONTROL)
+ ~(UINT16_C(1) << CHAR_BREAK_PROP_CR |
+ UINT16_C(1) << CHAR_BREAK_PROP_LF |
+ UINT16_C(1) << CHAR_BREAK_PROP_CONTROL
)
), /* GB9b */
[CHAR_BREAK_PROP_REGIONAL_INDICATOR] =
- UINT16_C(1 << CHAR_BREAK_PROP_EXTEND) | /* GB9 */
- UINT16_C(1 << CHAR_BREAK_PROP_ZWJ) | /* GB9 */
- UINT16_C(1 << CHAR_BREAK_PROP_SPACINGMARK), /* GB9a */
+ UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */
[CHAR_BREAK_PROP_SPACINGMARK] =
- UINT16_C(1 << CHAR_BREAK_PROP_EXTEND) | /* GB9 */
- UINT16_C(1 << CHAR_BREAK_PROP_ZWJ) | /* GB9 */
- UINT16_C(1 << CHAR_BREAK_PROP_SPACINGMARK), /* GB9a */
+ UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */
[CHAR_BREAK_PROP_ZWJ] =
- UINT16_C(1 << CHAR_BREAK_PROP_EXTEND) | /* GB9 */
- UINT16_C(1 << CHAR_BREAK_PROP_ZWJ) | /* GB9 */
- UINT16_C(1 << CHAR_BREAK_PROP_SPACINGMARK), /* GB9a */
+ UINT16_C(1) << CHAR_BREAK_PROP_EXTEND | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_ZWJ | /* GB9 */
+ UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK, /* GB9a */
};
static const uint_least16_t flag_update_gb11[2 * NUM_CHAR_BREAK_PROPS] = {
[CHAR_BREAK_PROP_EXTENDED_PICTOGRAPHIC] =
- UINT16_C(1 << CHAR_BREAK_PROP_ZWJ) |
- UINT16_C(1 << CHAR_BREAK_PROP_EXTEND),
+ UINT16_C(1) << CHAR_BREAK_PROP_ZWJ |
+ UINT16_C(1) << CHAR_BREAK_PROP_EXTEND,
[CHAR_BREAK_PROP_ZWJ + NUM_CHAR_BREAK_PROPS] =
- UINT16_C(1 << CHAR_BREAK_PROP_EXTENDED_PICTOGRAPHIC),
+ UINT16_C(1) << CHAR_BREAK_PROP_EXTENDED_PICTOGRAPHIC,
[CHAR_BREAK_PROP_EXTEND + NUM_CHAR_BREAK_PROPS] =
- UINT16_C(1 << CHAR_BREAK_PROP_EXTEND) |
- UINT16_C(1 << CHAR_BREAK_PROP_ZWJ),
+ UINT16_C(1) << CHAR_BREAK_PROP_EXTEND |
+ UINT16_C(1) << CHAR_BREAK_PROP_ZWJ,
[CHAR_BREAK_PROP_EXTENDED_PICTOGRAPHIC + NUM_CHAR_BREAK_PROPS] =
- UINT16_C(1 << CHAR_BREAK_PROP_ZWJ) |
- UINT16_C(1 << CHAR_BREAK_PROP_EXTEND),
+ UINT16_C(1) << CHAR_BREAK_PROP_ZWJ |
+ UINT16_C(1) << CHAR_BREAK_PROP_EXTEND,
};
static const uint_least16_t dont_break_gb11[2 * NUM_CHAR_BREAK_PROPS] = {
[CHAR_BREAK_PROP_ZWJ + NUM_CHAR_BREAK_PROPS] =
- UINT16_C(1 << CHAR_BREAK_PROP_EXTENDED_PICTOGRAPHIC),
+ UINT16_C(1) << CHAR_BREAK_PROP_EXTENDED_PICTOGRAPHIC,
};
static const uint_least16_t flag_update_gb12_13[2 * NUM_CHAR_BREAK_PROPS] = {
[CHAR_BREAK_PROP_REGIONAL_INDICATOR] =
- UINT16_C(1 << CHAR_BREAK_PROP_REGIONAL_INDICATOR),
+ UINT16_C(1) << CHAR_BREAK_PROP_REGIONAL_INDICATOR,
};
static const uint_least16_t dont_break_gb12_13[2 * NUM_CHAR_BREAK_PROPS] = {
[CHAR_BREAK_PROP_REGIONAL_INDICATOR + NUM_CHAR_BREAK_PROPS] =
- UINT16_C(1 << CHAR_BREAK_PROP_REGIONAL_INDICATOR),
+ UINT16_C(1) << CHAR_BREAK_PROP_REGIONAL_INDICATOR,
};
static inline enum char_break_property
@@ -135,23 +134,23 @@ grapheme_is_character_break(uint_least32_t cp0, uint_least32_t cp1, GRAPHEME_STA
state->gb11_flag =
flag_update_gb11[cp0_prop + NUM_CHAR_BREAK_PROPS *
state->gb11_flag] &
- UINT16_C(1 << cp1_prop);
+ UINT16_C(1) << cp1_prop;
state->gb12_13_flag =
flag_update_gb12_13[cp0_prop + NUM_CHAR_BREAK_PROPS *
state->gb12_13_flag] &
- UINT16_C(1 << cp1_prop);
+ UINT16_C(1) << cp1_prop;
/*
* Apply grapheme cluster breaking algorithm (UAX #29), see
* http://unicode.org/reports/tr29/#Grapheme_Cluster_Boundary_Rules
*/
- notbreak = (dont_break[cp0_prop] & UINT16_C(1 << cp1_prop)) ||
+ notbreak = (dont_break[cp0_prop] & (UINT16_C(1) << cp1_prop)) ||
(dont_break_gb11[cp0_prop + state->gb11_flag *
NUM_CHAR_BREAK_PROPS] &
- UINT16_C(1 << cp1_prop)) ||
+ (UINT16_C(1) << cp1_prop)) ||
(dont_break_gb12_13[cp0_prop + state->gb12_13_flag *
NUM_CHAR_BREAK_PROPS] &
- UINT16_C(1 << cp1_prop));
+ (UINT16_C(1) << cp1_prop));
/* update or reset flags (when we have a break) */
if (likely(!notbreak)) {
@@ -168,9 +167,9 @@ grapheme_is_character_break(uint_least32_t cp0, uint_least32_t cp1, GRAPHEME_STA
* Given we have no state, this behaves as if the state-booleans
* were all set to false
*/
- notbreak = (dont_break[cp0_prop] & UINT16_C(1 << cp1_prop)) ||
- (dont_break_gb11[cp0_prop] & UINT16_C(1 << cp1_prop)) ||
- (dont_break_gb12_13[cp0_prop] & UINT16_C(1 << cp1_prop));
+ notbreak = (dont_break[cp0_prop] & (UINT16_C(1) << cp1_prop)) ||
+ (dont_break_gb11[cp0_prop] & (UINT16_C(1) << cp1_prop)) ||
+ (dont_break_gb12_13[cp0_prop] & (UINT16_C(1) << cp1_prop));
}
return !notbreak;
diff --git a/src/line.c b/src/line.c
@@ -1,8 +1,6 @@
/* See LICENSE file for copyright and license details. */
#include <stdbool.h>
#include <stddef.h>
-#include <stdlib.h>
-#include <string.h>
#include "../gen/line.h"
#include "../grapheme.h"
diff --git a/src/sentence.c b/src/sentence.c
@@ -1,8 +1,6 @@
/* See LICENSE file for copyright and license details. */
#include <stdbool.h>
#include <stddef.h>
-#include <stdlib.h>
-#include <string.h>
#include "../gen/sentence.h"
#include "../grapheme.h"
diff --git a/src/utf8.c b/src/utf8.c
@@ -1,5 +1,6 @@
/* See LICENSE file for copyright and license details. */
-#include <stdio.h>
+#include <stddef.h>
+#include <stdint.h>
#include "../grapheme.h"
#include "util.h"
diff --git a/src/util.c b/src/util.c
@@ -1,7 +1,8 @@
/* See LICENSE file for copyright and license details. */
+#include <limits.h>
#include <stdbool.h>
+#include <stddef.h>
#include <stdint.h>
-#include <stdlib.h>
#include "../gen/types.h"
#include "../grapheme.h"
@@ -88,6 +89,12 @@ herodotus_reader_next_codepoint_break(const HERODOTUS_READER *r)
}
}
+size_t
+herodotus_reader_number_read(const HERODOTUS_READER *r)
+{
+ return r->off;
+}
+
enum herodotus_status
herodotus_read_codepoint(HERODOTUS_READER *r, bool advance, uint_least32_t *cp)
{
@@ -202,7 +209,7 @@ herodotus_writer_nul_terminate(HERODOTUS_WRITER *w)
}
size_t
-herodotus_writer_number_written(HERODOTUS_WRITER *w)
+herodotus_writer_number_written(const HERODOTUS_WRITER *w)
{
return w->off;
}
diff --git a/src/word.c b/src/word.c
@@ -1,8 +1,6 @@
/* See LICENSE file for copyright and license details. */
#include <stdbool.h>
#include <stddef.h>
-#include <stdlib.h>
-#include <string.h>
#include "../gen/word.h"
#include "../grapheme.h"