From ab8e05b898eead75d20bd117b7835cef59976323 Mon Sep 17 00:00:00 2001 From: Riey Date: Tue, 30 Jul 2024 20:23:24 +0900 Subject: [PATCH] Only add emojis to dict --- Cargo.lock | 314 ++++++++++++++++++++++++++++++++- src/engine/core/tests/emoji.rs | 2 +- src/engine/dict/Cargo.toml | 3 +- src/engine/dict/build.rs | 5 + 4 files changed, 321 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b3a13594..b9f8b2c7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1537,6 +1537,15 @@ dependencies = [ "either", ] +[[package]] +name = "itertools" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" +dependencies = [ + "either", +] + [[package]] name = "itoa" version = "1.0.11" @@ -1747,10 +1756,11 @@ dependencies = [ name = "kime-engine-dict" version = "0.1.0" dependencies = [ - "itertools 0.10.5", + "itertools 0.13.0", "quick-xml", "serde", "serde_json", + "unic", ] [[package]] @@ -1932,6 +1942,12 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d" +[[package]] +name = "matches" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2532096657941c2fea9c289d370a250971c689d4f143798ff67113ec042024a5" + [[package]] name = "matrixmultiply" version = "0.3.9" @@ -3337,6 +3353,302 @@ version = "1.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" +[[package]] +name = "unic" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e31748f3e294dc6a9243a44686e8155a162af9a11cd56e07c0ebbc530b2a8a87" +dependencies = [ + "unic-bidi", + "unic-char", + "unic-common", + "unic-emoji", + "unic-idna", + "unic-normal", + "unic-segment", + "unic-ucd", +] + +[[package]] +name = "unic-bidi" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1356b759fb6a82050666f11dce4b6fe3571781f1449f3ef78074e408d468ec09" +dependencies = [ + "matches", + "unic-ucd-bidi", +] + +[[package]] +name = "unic-char" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af25df79bd134107f088ba725d9c470600f16263205d0be36c75e75b020bac0a" +dependencies = [ + "unic-char-basics", + "unic-char-property", + "unic-char-range", +] + +[[package]] +name = "unic-char-basics" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "20e5d239bc6394309225a0c1b13e1d059565ff2cfef1a437aff4a5871fa06c4b" + +[[package]] +name = "unic-char-property" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8c57a407d9b6fa02b4795eb81c5b6652060a15a7903ea981f3d723e6c0be221" +dependencies = [ + "unic-char-range", +] + +[[package]] +name = "unic-char-range" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0398022d5f700414f6b899e10b8348231abf9173fa93144cbc1a43b9793c1fbc" + +[[package]] +name = "unic-common" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "80d7ff825a6a654ee85a63e80f92f054f904f21e7d12da4e22f9834a4aaa35bc" + +[[package]] +name = "unic-emoji" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74193f32f7966ad20b819e70e29c6f1ac8c386692a9d5e90078eef80ea008bfb" +dependencies = [ + "unic-emoji-char", +] + +[[package]] +name = "unic-emoji-char" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b07221e68897210270a38bde4babb655869637af0f69407f96053a34f76494d" +dependencies = [ + "unic-char-property", + "unic-char-range", + "unic-ucd-version", +] + +[[package]] +name = "unic-idna" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "621e9cf526f2094d2c2ced579766458a92f8f422d6bb934c503ba1a95823a62d" +dependencies = [ + "matches", + "unic-idna-mapping", + "unic-idna-punycode", + "unic-normal", + "unic-ucd-bidi", + "unic-ucd-normal", + "unic-ucd-version", +] + +[[package]] +name = "unic-idna-mapping" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4de70fd4e5331537347a50a0dbc938efb1f127c9f6e5efec980fc90585aa1343" +dependencies = [ + "unic-char-property", + "unic-char-range", + "unic-ucd-version", +] + +[[package]] +name = "unic-idna-punycode" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06feaedcbf9f1fc259144d833c0d630b8b15207b0486ab817d29258bc89f2f8a" + +[[package]] +name = "unic-normal" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f09d64d33589a94628bc2aeb037f35c2e25f3f049c7348b5aa5580b48e6bba62" +dependencies = [ + "unic-ucd-normal", +] + +[[package]] +name = "unic-segment" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e4ed5d26be57f84f176157270c112ef57b86debac9cd21daaabbe56db0f88f23" +dependencies = [ + "unic-ucd-segment", +] + +[[package]] +name = "unic-ucd" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "625b18f7601e1127504a20ae731dc3c7826d0e86d5f7fe3434f8137669240efd" +dependencies = [ + "unic-ucd-age", + "unic-ucd-bidi", + "unic-ucd-block", + "unic-ucd-case", + "unic-ucd-category", + "unic-ucd-common", + "unic-ucd-hangul", + "unic-ucd-ident", + "unic-ucd-name", + "unic-ucd-name_aliases", + "unic-ucd-normal", + "unic-ucd-segment", + "unic-ucd-version", +] + +[[package]] +name = "unic-ucd-age" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c8cfdfe71af46b871dc6af2c24fcd360e2f3392ee4c5111877f2947f311671c" +dependencies = [ + "unic-char-property", + "unic-char-range", + "unic-ucd-version", +] + +[[package]] +name = "unic-ucd-bidi" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1d568b51222484e1f8209ce48caa6b430bf352962b877d592c29ab31fb53d8c" +dependencies = [ + "unic-char-property", + "unic-char-range", + "unic-ucd-version", +] + +[[package]] +name = "unic-ucd-block" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b2a16f2d7ecd25325a1053ca5a66e7fa1b68911a65c5e97f8d2e1b236b6f1d7" +dependencies = [ + "unic-char-property", + "unic-char-range", + "unic-ucd-version", +] + +[[package]] +name = "unic-ucd-case" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d98d6246a79bac6cf66beee01422bda7c882e11d837fa4969bfaaba5fdea6d3" +dependencies = [ + "unic-char-property", + "unic-char-range", + "unic-ucd-version", +] + +[[package]] +name = "unic-ucd-category" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b8d4591f5fcfe1bd4453baaf803c40e1b1e69ff8455c47620440b46efef91c0" +dependencies = [ + "matches", + "unic-char-property", + "unic-char-range", + "unic-ucd-version", +] + +[[package]] +name = "unic-ucd-common" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e9b78b910beafa1aae5c59bf00877c6cece1c5db28a1241ad801e86cecdff4ad" +dependencies = [ + "unic-char-property", + "unic-char-range", + "unic-ucd-version", +] + +[[package]] +name = "unic-ucd-hangul" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb1dc690e19010e1523edb9713224cba5ef55b54894fe33424439ec9a40c0054" +dependencies = [ + "unic-ucd-version", +] + +[[package]] +name = "unic-ucd-ident" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e230a37c0381caa9219d67cf063aa3a375ffed5bf541a452db16e744bdab6987" +dependencies = [ + "unic-char-property", + "unic-char-range", + "unic-ucd-version", +] + +[[package]] +name = "unic-ucd-name" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8fc55a45b2531089dc1773bf60c1f104b38e434b774ffc37b9c29a9b0f492e" +dependencies = [ + "unic-char-property", + "unic-ucd-hangul", + "unic-ucd-version", +] + +[[package]] +name = "unic-ucd-name_aliases" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b7674212643087699ba247a63dd05f1204c7e4880ec9342e545a7cffcc6a46f" +dependencies = [ + "unic-char-property", + "unic-ucd-version", +] + +[[package]] +name = "unic-ucd-normal" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86aed873b8202d22b13859dda5fe7c001d271412c31d411fd9b827e030569410" +dependencies = [ + "unic-char-property", + "unic-char-range", + "unic-ucd-category", + "unic-ucd-hangul", + "unic-ucd-version", +] + +[[package]] +name = "unic-ucd-segment" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2079c122a62205b421f499da10f3ee0f7697f012f55b675e002483c73ea34700" +dependencies = [ + "unic-char-property", + "unic-char-range", + "unic-ucd-version", +] + +[[package]] +name = "unic-ucd-version" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96bd2f2237fe450fcd0a1d2f5f4e91711124f7857ba2e964247776ebeeb7b0c4" +dependencies = [ + "unic-common", +] + [[package]] name = "unicode-bidi" version = "0.3.15" diff --git a/src/engine/core/tests/emoji.rs b/src/engine/core/tests/emoji.rs index e01ac2d2..9d22f2ce 100644 --- a/src/engine/core/tests/emoji.rs +++ b/src/engine/core/tests/emoji.rs @@ -12,7 +12,7 @@ fn thinking() { test_input(&[ (EMOJI, "🏻(light skin tone)🏼(medium-light skin tone)🏽(medium skin tone)🏾(medium-dark skin tone)🏿(dark skin tone)", ""), (Key::normal(T), "t🏻(light skin tone)🏼(medium-light skin tone)🏽(medium skin tone)🏾(medium-dark skin tone)🏿(dark skin tone)", ""), - (Key::normal(H), "th😁(beaming face with smiling eyes)πŸ˜‚(face with tears of joy)🀣(rolling on the floor laughing)πŸ˜ƒ(grinning face with big eyes)πŸ˜„(grinning face with smiling eyes)", ""), + (Key::normal(H), "thπŸ˜ƒ(grinning face with big eyes)πŸ˜„(grinning face with smiling eyes)😁(beaming face with smiling eyes)πŸ˜…(grinning face with sweat)🀣(rolling on the floor laughing)", ""), (Key::normal(I), "thiπŸ€”(thinking face)πŸ•§(twelve-thirty)πŸ•œ(one-thirty)πŸ•(two-thirty)πŸ•ž(three-thirty)", ""), (Key::normal(N), "thinπŸ€”(thinking face)", ""), (Key::normal(K), "thinkπŸ€”(thinking face)", ""), diff --git a/src/engine/dict/Cargo.toml b/src/engine/dict/Cargo.toml index 3b8c9e8b..2cf93fff 100644 --- a/src/engine/dict/Cargo.toml +++ b/src/engine/dict/Cargo.toml @@ -8,5 +8,6 @@ license = "GPL-3.0-or-later" [build-dependencies] serde = {version = "1.0.118", features = ["derive"]} serde_json = "1.0" -itertools = "0.10.0" +itertools = "0.13.0" quick-xml = { version = "0.27.1", features = ["encoding"] } +unic = "0.9.0" diff --git a/src/engine/dict/build.rs b/src/engine/dict/build.rs index 11dd2db8..13c86cc6 100644 --- a/src/engine/dict/build.rs +++ b/src/engine/dict/build.rs @@ -11,6 +11,7 @@ use std::{ mem, path::PathBuf, }; +use unic::emoji::char::is_emoji; #[derive(Default, Debug, Clone, Copy)] struct HanjaEntry { @@ -203,6 +204,10 @@ fn main() { ) .unwrap(); for entry in load_unicode_annotations().unwrap() { + if !entry.cp.chars().any(|c| is_emoji(c)) { + continue; + } + writeln!( out, "UnicodeAnnotation {{ codepoint: \"{}\", tts: \"{}\" }},",