Skip to content

Commit eabbe88

Browse files
committed
[char_property] Implement *CharProperty traits
Character Properties are of different kinds and shapes, and as UNIC components grow, we need a better way to be able to categorize them by their shape, and a way to make sure we have consistent, noncolliding API for them. This is the first step into building a CharProperty taxonomy, with as little as possibly needed to provide the assurances desired. We hope that the implementation can be improved over time with new features added to the language. There's already some proposals in this front. See these discussions for more details: * [Traits as contract, without changes to call-sites](https://users.rust-lang.org/t/traits-as-contract-without-changes-to-call-sites/11938/11>) * [RFC: delegation of implementation](rust-lang/rfcs#1406)
1 parent a1c80d0 commit eabbe88

File tree

15 files changed

+266
-15
lines changed

15 files changed

+266
-15
lines changed

unic/ucd/age/src/age.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ use std::fmt;
1414
use unic_utils::CharDataTable;
1515

1616
pub use unic_ucd_core::UnicodeVersion;
17+
use unic_utils::CharProperty;
1718

1819

1920
/// Represents values of the Unicode character property
@@ -41,6 +42,12 @@ pub enum Age {
4142
Unassigned, // Unassigned is older (larger) than any age
4243
}
4344

45+
impl CharProperty for Age {
46+
fn of(ch: char) -> Self {
47+
Self::of(ch)
48+
}
49+
}
50+
4451
use Age::{Assigned, Unassigned};
4552

4653

unic/ucd/age/src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
// except according to those terms.
1111

1212

13-
#![forbid(unsafe_code)]
13+
#![forbid(unsafe_code, unconditional_recursion)]
1414
#![deny(missing_docs)]
1515

1616
//! # UNIC — UCD — Character Age

unic/ucd/bidi/src/bidi_class.rs

Lines changed: 54 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,11 @@
99
// option. This file may not be copied, modified, or distributed
1010
// except according to those terms.
1111

12+
1213
use std::fmt;
1314

14-
use unic_utils::CharDataTable;
15+
use unic_utils::{CharDataTable, CharProperty, EnumeratedCharProperty};
16+
1517

1618
/// Represents the Unicode character
1719
/// [*Bidi_Class*](http://www.unicode.org/reports/tr44/#Bidi_Class) property, also known as the
@@ -48,6 +50,18 @@ pub enum BidiClass {
4850
// [UNIC_UPDATE_ON_UNICODE_UPDATE] Source: `tables/bidi_class_type.rsv`
4951
}
5052

53+
impl CharProperty for BidiClass {
54+
fn of(ch: char) -> Self {
55+
Self::of(ch)
56+
}
57+
}
58+
59+
impl EnumeratedCharProperty for BidiClass {
60+
fn all_values() -> &'static [Self] {
61+
Self::all_values()
62+
}
63+
}
64+
5165

5266
/// Abbreviated name aliases for
5367
/// [*Bidi_Class*](http://www.unicode.org/reports/tr44/#Bidi_Class) property.
@@ -111,6 +125,37 @@ impl BidiClass {
111125
*TABLE.find_or(ch, &L)
112126
}
113127

128+
/// Exhaustive list of all `BidiClass` property values.
129+
pub fn all_values() -> &'static [BidiClass] {
130+
use BidiClass::*;
131+
const ALL_VALUES: &[BidiClass] = &[
132+
ArabicLetter,
133+
ArabicNumber,
134+
ParagraphSeparator,
135+
BoundaryNeutral,
136+
CommonSeparator,
137+
EuropeanNumber,
138+
EuropeanSeparator,
139+
EuropeanTerminator,
140+
FirstStrongIsolate,
141+
LeftToRight,
142+
LeftToRightEmbedding,
143+
LeftToRightIsolate,
144+
LeftToRightOverride,
145+
NonspacingMark,
146+
OtherNeutral,
147+
PopDirectionalFormat,
148+
PopDirectionalIsolate,
149+
RightToLeft,
150+
RightToLeftEmbedding,
151+
RightToLeftIsolate,
152+
RightToLeftOverride,
153+
SegmentSeparator,
154+
WhiteSpace,
155+
];
156+
ALL_VALUES
157+
}
158+
114159
/// Abbreviated name of the *Bidi_Class* property value.
115160
///
116161
/// <http://www.unicode.org/Public/UCD/latest/ucd/PropertyValueAliases.txt#Bidi_Class>
@@ -213,6 +258,14 @@ impl BidiClass {
213258
}
214259
}
215260

261+
262+
impl Default for BidiClass {
263+
fn default() -> Self {
264+
BidiClass::LeftToRight
265+
}
266+
}
267+
268+
216269
impl fmt::Display for BidiClass {
217270
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
218271
write!(f, "{}", self.display())

unic/ucd/bidi/src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
// except according to those terms.
1111

1212

13-
#![forbid(unsafe_code)]
13+
#![forbid(unsafe_code, unconditional_recursion)]
1414
#![deny(missing_docs)]
1515

1616
//! # UNIC — UCD — Bidi

unic/ucd/category/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,6 @@ exclude = []
1515
travis-ci = { repository = "behnam/rust-unic", branch = "master" }
1616

1717
[dependencies]
18+
matches = "0.1"
1819
unic-ucd-core = { path = "../core/", version = "0.5.0" }
1920
unic-utils = { path = "../../utils/", version = "0.5.0" }
20-
matches = "0.1"

unic/ucd/category/src/category.rs

Lines changed: 51 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,15 +8,19 @@
88
// option. This file may not be copied, modified, or distributed
99
// except according to those terms.
1010

11-
use unic_utils::CharDataTable;
11+
12+
use std::fmt;
13+
14+
use unic_utils::{CharDataTable, CharProperty, EnumeratedCharProperty};
15+
1216

1317
/// Represents the Unicode Character
1418
/// [*General_Category*](http://unicode.org/reports/tr44/#General_Category) property.
1519
///
1620
/// This is a useful breakdown into various character types which can be used as a default
1721
/// categorization in implementations. For the property values, see
1822
/// [*General_Category Values*](http://unicode.org/reports/tr44/#General_Category_Values).
19-
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
23+
#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
2024
pub enum GeneralCategory {
2125
/// An uppercase letter (Short form: `Lu`)
2226
UppercaseLetter,
@@ -80,6 +84,21 @@ pub enum GeneralCategory {
8084
Unassigned,
8185
}
8286

87+
88+
impl CharProperty for GeneralCategory {
89+
fn of(ch: char) -> Self {
90+
Self::of(ch)
91+
}
92+
}
93+
94+
95+
impl EnumeratedCharProperty for GeneralCategory {
96+
fn all_values() -> &'static [Self] {
97+
Self::all_values()
98+
}
99+
}
100+
101+
83102
pub mod abbr_names {
84103
pub use super::GeneralCategory::UppercaseLetter as Lu;
85104
pub use super::GeneralCategory::LowercaseLetter as Ll;
@@ -125,8 +144,6 @@ impl GeneralCategory {
125144
}
126145

127146
/// Exhaustive list of all `GeneralCategory` property values.
128-
///
129-
/// Reference: <http://unicode.org/reports/tr44/#General_Category_Values>
130147
pub fn all_values() -> &'static [GeneralCategory] {
131148
use GeneralCategory::*;
132149
const ALL_VALUES: &[GeneralCategory] = &[
@@ -163,8 +180,16 @@ impl GeneralCategory {
163180
];
164181
ALL_VALUES
165182
}
183+
184+
/// Human-readable description of the property value.
185+
// TODO: Needs to be improved by returning long-name with underscores replaced by space.
186+
#[inline]
187+
pub fn display(&self) -> String {
188+
format!("{:?}", self).to_owned()
189+
}
166190
}
167191

192+
168193
impl GeneralCategory {
169194
/// `Lu` | `Ll` | `Lt` (Short form: `LC`)
170195
pub fn is_cased_letter(&self) -> bool {
@@ -207,6 +232,21 @@ impl GeneralCategory {
207232
}
208233
}
209234

235+
236+
impl Default for GeneralCategory {
237+
fn default() -> Self {
238+
GeneralCategory::Unassigned
239+
}
240+
}
241+
242+
243+
impl fmt::Display for GeneralCategory {
244+
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
245+
write!(f, "{}", self.display())
246+
}
247+
}
248+
249+
210250
#[cfg(test)]
211251
mod tests {
212252
use super::GeneralCategory as GC;
@@ -305,4 +345,11 @@ mod tests {
305345
assert_eq!(GC::of(c), GC::Unassigned);
306346
}
307347
}
348+
349+
#[test]
350+
fn test_display() {
351+
//assert_eq!(format!("{}", GC::UppercaseLetter), "Uppercase Letter");
352+
assert_eq!(format!("{}", GC::UppercaseLetter), "UppercaseLetter");
353+
assert_eq!(format!("{}", GC::Unassigned), "Unassigned");
354+
}
308355
}

unic/ucd/category/src/lib.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
// option. This file may not be copied, modified, or distributed
99
// except according to those terms.
1010

11-
#![deny(unsafe_code, missing_docs)]
11+
#![deny(unsafe_code, missing_docs, unconditional_recursion)]
1212

1313
//! # UNIC — UCD — Category
1414
//!
@@ -38,6 +38,7 @@
3838
3939
#[macro_use]
4040
extern crate matches;
41+
4142
extern crate unic_ucd_core;
4243
extern crate unic_utils;
4344

unic/ucd/core/src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
// except according to those terms.
1010

1111

12-
#![forbid(unsafe_code, missing_docs)]
12+
#![forbid(unsafe_code, missing_docs, unconditional_recursion)]
1313

1414
//! # UNIC — UCD — Core
1515
//!

unic/ucd/normal/src/canonical_combining_class.rs

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,9 @@
1515
//! Reference: <http://unicode.org/reports/tr44/#Canonical_Combining_Class_Values>
1616
1717

18-
use unic_utils::CharDataTable;
18+
use std::fmt;
19+
20+
use unic_utils::{CharDataTable, CharProperty};
1921

2022

2123
/// Represents *Canonical_Combining_Class* property of a Unicode character.
@@ -82,13 +84,33 @@ pub mod values {
8284
}
8385

8486

87+
impl CharProperty for CanonicalCombiningClass {
88+
fn of(ch: char) -> Self {
89+
Self::of(ch)
90+
}
91+
}
92+
93+
8594
impl CanonicalCombiningClass {
8695
/// Find the character *Canonical_Combining_Class* property value.
8796
pub fn of(ch: char) -> CanonicalCombiningClass {
8897
const TABLE: &'static [(char, char, CanonicalCombiningClass)] =
8998
include!("tables/canonical_combining_class_values.rsv");
9099
*TABLE.find_or(ch, &CanonicalCombiningClass(0))
91100
}
101+
102+
/// Human-readable description of the property value.
103+
// TODO: Needs to be improved by returning long-name with underscores replaced by space.
104+
#[inline]
105+
pub fn display(&self) -> String {
106+
format!("{}", self.number())
107+
}
108+
}
109+
110+
impl fmt::Display for CanonicalCombiningClass {
111+
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
112+
write!(f, "{}", self.display())
113+
}
92114
}
93115

94116

@@ -224,4 +246,10 @@ mod tests {
224246
assert_eq!(CCC::of('\u{0315}').number(), 232);
225247
assert_eq!(CCC::of('\u{1e94a}').number(), 7);
226248
}
249+
250+
#[test]
251+
fn test_display() {
252+
assert_eq!(format!("{}", CCC::of('\u{0000}')), "0");
253+
assert_eq!(format!("{}", CCC::of('\u{0300}')), "230");
254+
}
227255
}

0 commit comments

Comments
 (0)