-
Notifications
You must be signed in to change notification settings - Fork 22
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
dashodanger
committed
Jun 6, 2024
1 parent
35248eb
commit f1bcd7b
Showing
41 changed files
with
37,324 additions
and
413 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
########################################## | ||
# libgrapheme | ||
########################################## | ||
|
||
project( | ||
libgrapheme | ||
LANGUAGES C | ||
VERSION 0.1.0 | ||
) | ||
|
||
add_library( | ||
libgrapheme | ||
src/bidirectional.c | ||
src/case.c | ||
src/character.c | ||
src/line.c | ||
src/sentence.c | ||
src/utf8.c | ||
src/util.c | ||
src/word.c | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
ISC-License | ||
|
||
Copyright 2019-2022 Laslo Hunhold <[email protected]> | ||
|
||
Permission to use, copy, modify, and/or distribute this software for any | ||
purpose with or without fee is hereby granted, provided that the above | ||
copyright notice and this permission notice appear in all copies. | ||
|
||
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | ||
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | ||
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | ||
ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | ||
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | ||
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | ||
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
libgrapheme | ||
=========== | ||
|
||
libgrapheme is an extremely simple freestanding C99 library providing | ||
utilities for properly handling strings according to the latest Unicode | ||
standard 15.0.0. It offers fully Unicode compliant | ||
|
||
- grapheme cluster (i.e. user-perceived character) segmentation | ||
- word segmentation | ||
- sentence segmentation | ||
- detection of permissible line break opportunities | ||
- case detection (lower-, upper- and title-case) | ||
- case conversion (to lower-, upper- and title-case) | ||
|
||
on UTF-8 strings and codepoint arrays, which both can also be | ||
null-terminated. | ||
|
||
The necessary lookup-tables are automatically generated from the Unicode | ||
standard data (contained in the tarball) and heavily compressed. Over | ||
10,000 automatically generated conformance tests and over 150 unit tests | ||
ensure conformance and correctness. | ||
|
||
There is no complicated build-system involved and it's all done using one | ||
POSIX-compliant Makefile. All you need is a C99 compiler, given the | ||
lookup-table-generators and compressors are also written in C99. The | ||
resulting library is freestanding and thus not even dependent on a | ||
standard library to be present at runtime, making it a suitable choice | ||
for bare metal applications. | ||
|
||
It is also way smaller and much faster than the other established | ||
Unicode string libraries (ICU, GNU's libunistring, libutf8proc). | ||
|
||
Requirements | ||
------------ | ||
A C99-compiler and POSIX make. | ||
|
||
Installation | ||
------------ | ||
Run ./configure, which automatically edits config.mk to match your local | ||
setup. Edit config.mk by hand if necessary or desired for further | ||
customization. | ||
|
||
Afterwards enter the following command to build and install libgrapheme | ||
(if necessary as root): | ||
|
||
make install | ||
|
||
Conformance | ||
----------- | ||
The libgrapheme library is compliant with the Unicode 15.0.0 | ||
specification (September 2022). The tests can be run with | ||
|
||
make test | ||
|
||
to check standard conformance and correctness. | ||
|
||
Usage | ||
----- | ||
Include the header grapheme.h in your code and link against libgrapheme | ||
with "-lgrapheme" either statically ("-static") or dynamically. | ||
|
||
Author | ||
------ | ||
Laslo Hunhold <[email protected]> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
/* See LICENSE file for copyright and license details. */ | ||
#ifndef GRAPHEME_H | ||
#define GRAPHEME_H | ||
|
||
#ifdef __cplusplus | ||
extern "C" { | ||
#endif | ||
|
||
#include <stdbool.h> | ||
#include <stddef.h> | ||
#include <stdint.h> | ||
|
||
#define GRAPHEME_INVALID_CODEPOINT UINT32_C(0xFFFD) | ||
|
||
enum grapheme_bidirectional_direction { | ||
GRAPHEME_BIDIRECTIONAL_DIRECTION_NEUTRAL, | ||
GRAPHEME_BIDIRECTIONAL_DIRECTION_LTR, | ||
GRAPHEME_BIDIRECTIONAL_DIRECTION_RTL, | ||
}; | ||
|
||
size_t grapheme_bidirectional_get_line_embedding_levels(const uint_least32_t *, | ||
size_t, int_least8_t *, | ||
size_t); | ||
|
||
size_t grapheme_bidirectional_preprocess_paragraph( | ||
const uint_least32_t *, size_t, enum grapheme_bidirectional_direction, | ||
uint_least32_t *, size_t, enum grapheme_bidirectional_direction *); | ||
|
||
size_t grapheme_bidirectional_reorder_line(const uint_least32_t *, | ||
const uint_least32_t *, size_t, | ||
uint_least32_t *, size_t); | ||
|
||
size_t grapheme_decode_utf8(const char *, size_t, uint_least32_t *); | ||
size_t grapheme_encode_utf8(uint_least32_t, char *, size_t); | ||
|
||
bool grapheme_is_character_break(uint_least32_t, uint_least32_t, | ||
uint_least16_t *); | ||
|
||
bool grapheme_is_lowercase(const uint_least32_t *, size_t, size_t *); | ||
bool grapheme_is_titlecase(const uint_least32_t *, size_t, size_t *); | ||
bool grapheme_is_uppercase(const uint_least32_t *, size_t, size_t *); | ||
|
||
bool grapheme_is_lowercase_utf8(const char *, size_t, size_t *); | ||
bool grapheme_is_titlecase_utf8(const char *, size_t, size_t *); | ||
bool grapheme_is_uppercase_utf8(const char *, size_t, size_t *); | ||
|
||
size_t grapheme_next_character_break(const uint_least32_t *, size_t); | ||
size_t grapheme_next_line_break(const uint_least32_t *, size_t); | ||
size_t grapheme_next_sentence_break(const uint_least32_t *, size_t); | ||
size_t grapheme_next_word_break(const uint_least32_t *, size_t); | ||
|
||
size_t grapheme_next_character_break_utf8(const char *, size_t); | ||
size_t grapheme_next_line_break_utf8(const char *, size_t); | ||
size_t grapheme_next_sentence_break_utf8(const char *, size_t); | ||
size_t grapheme_next_word_break_utf8(const char *, size_t); | ||
|
||
size_t grapheme_to_lowercase(const uint_least32_t *, size_t, uint_least32_t *, | ||
size_t); | ||
size_t grapheme_to_titlecase(const uint_least32_t *, size_t, uint_least32_t *, | ||
size_t); | ||
size_t grapheme_to_uppercase(const uint_least32_t *, size_t, uint_least32_t *, | ||
size_t); | ||
|
||
size_t grapheme_to_lowercase_utf8(const char *, size_t, char *, size_t); | ||
size_t grapheme_to_titlecase_utf8(const char *, size_t, char *, size_t); | ||
size_t grapheme_to_uppercase_utf8(const char *, size_t, char *, size_t); | ||
|
||
#ifdef __cplusplus | ||
} | ||
#endif | ||
|
||
#endif /* GRAPHEME_H */ |
Oops, something went wrong.