Skip to content

Commit

Permalink
Merge pull request #20 from dtolnay/ucdparse
Browse files Browse the repository at this point in the history
Replace ucd-generate dependency with ucd-parse
  • Loading branch information
dtolnay authored Nov 4, 2022
2 parents c384237 + 834e16a commit 5d1a139
Show file tree
Hide file tree
Showing 9 changed files with 100 additions and 48 deletions.
2 changes: 1 addition & 1 deletion .gitattributes
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
generate/src/ucd.rs linguist-generated
src/tables.rs linguist-generated
tests/fst/xid_continue.fst linguist-generated
tests/fst/xid_start.fst linguist-generated
tests/tables/tables.rs linguist-generated
tests/trie/trie.rs linguist-generated
6 changes: 3 additions & 3 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,15 @@ jobs:
- uses: actions/checkout@v3
- uses: dtolnay/rust-toolchain@stable
- id: ucd-generate
run: echo "version=$(grep 'ucd-generate [0-9]\+\.[0-9]\+\.[0-9]\+' generate/src/ucd.rs --only-matching)" >> $GITHUB_OUTPUT
run: echo "version=$(grep 'ucd-generate [0-9]\+\.[0-9]\+\.[0-9]\+' tests/tables/tables.rs --only-matching)" >> $GITHUB_OUTPUT
- run: cargo install ucd-generate
- run: curl -LO https://www.unicode.org/Public/zipped/latest/UCD.zip
- run: unzip UCD.zip -d UCD
- run: ucd-generate property-bool UCD --include XID_Start,XID_Continue > generate/src/ucd.rs
- run: ucd-generate property-bool UCD --include XID_Start,XID_Continue > tests/tables/tables.rs
- run: ucd-generate property-bool UCD --include XID_Start,XID_Continue --fst-dir tests/fst
- run: ucd-generate property-bool UCD --include XID_Start,XID_Continue --trie-set > tests/trie/trie.rs
- run: cargo run --manifest-path generate/Cargo.toml
- run: sed --in-place 's/ucd-generate [0-9]\+\.[0-9]\+\.[0-9]\+/${{steps.ucd-generate.outputs.version}}/' generate/src/ucd.rs tests/trie/trie.rs
- run: sed --in-place 's/ucd-generate [0-9]\+\.[0-9]\+\.[0-9]\+/${{steps.ucd-generate.outputs.version}}/' tests/tables/tables.rs tests/trie/trie.rs
- run: git diff --exit-code

test:
Expand Down
4 changes: 4 additions & 0 deletions generate/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,7 @@ version = "0.0.0"
authors = ["David Tolnay <[email protected]>"]
edition = "2018"
publish = false

[dependencies]
anyhow = "1"
ucd-parse = "0.1.10"
64 changes: 29 additions & 35 deletions generate/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
// $ cargo install ucd-generate
// $ curl -LO https://www.unicode.org/Public/zipped/15.0.0/UCD.zip
// $ unzip UCD.zip -d UCD
// $ ucd-generate property-bool UCD --include XID_Start,XID_Continue > generate/src/ucd.rs
// $ ucd-generate property-bool UCD --include XID_Start,XID_Continue > tests/table/tables.rs
// $ ucd-generate property-bool UCD --include XID_Start,XID_Continue --fst-dir tests/fst
// $ ucd-generate property-bool UCD --include XID_Start,XID_Continue --trie-set > tests/trie/trie.rs
// $ cargo run --manifest-path generate/Cargo.toml
Expand All @@ -12,50 +12,43 @@
clippy::cast_lossless,
clippy::cast_possible_truncation, // https://github.com/rust-lang/rust-clippy/issues/9613
clippy::match_wild_err_arm,
clippy::module_name_repetitions,
clippy::too_many_lines,
clippy::uninlined_format_args
)]

#[rustfmt::skip]
#[allow(dead_code, clippy::all, clippy::pedantic)]
mod ucd;

mod output;
mod parse;
mod write;

use std::cmp::Ordering;
use crate::parse::parse_xid_properties;
use anyhow::Result;
use std::collections::{BTreeMap as Map, VecDeque};
use std::convert::TryFrom;
use std::fs;
use std::io;
use std::io::{self, Write};
use std::path::Path;
use std::process;

const CHUNK: usize = 64;
const PATH: &str = "../src/tables.rs";

fn is_xid_start(ch: char) -> bool {
search(ch, ucd::XID_START)
}

fn is_xid_continue(ch: char) -> bool {
search(ch, ucd::XID_CONTINUE)
}
const UCD: &str = "UCD";
const TABLES: &str = "src/tables.rs";

fn main() -> Result<()> {
let manifest_dir = Path::new(env!("CARGO_MANIFEST_DIR"));
let unicode_ident_dir = manifest_dir.parent().unwrap();
let ucd_dir = unicode_ident_dir.join(UCD);
if !ucd_dir.exists() {
writeln!(
io::stderr(),
"Not found: {}\nDownload from https://www.unicode.org/Public/zipped/l5.0.0/UCD.zip and unzip.",
ucd_dir.display(),
)?;
process::exit(1);
}

fn search(ch: char, table: &[(u32, u32)]) -> bool {
table
.binary_search_by(|&(lo, hi)| {
if lo > ch as u32 {
Ordering::Greater
} else if hi < ch as u32 {
Ordering::Less
} else {
Ordering::Equal
}
})
.is_ok()
}
let properties = parse_xid_properties(&ucd_dir)?;

fn main() -> io::Result<()> {
let mut chunkmap = Map::<[u8; CHUNK], u8>::new();
let mut dense = Vec::<[u8; CHUNK]>::new();
let mut new_chunk = |chunk| {
Expand Down Expand Up @@ -87,8 +80,8 @@ fn main() -> io::Result<()> {
let code = (i * CHUNK as u32 + j) * 8 + k;
if code >= 0x80 {
if let Some(ch) = char::from_u32(code) {
*this_start |= (is_xid_start(ch) as u8) << k;
*this_continue |= (is_xid_continue(ch) as u8) << k;
*this_start |= (properties.is_xid_start(ch) as u8) << k;
*this_continue |= (properties.is_xid_continue(ch) as u8) << k;
}
}
}
Expand Down Expand Up @@ -163,7 +156,8 @@ fn main() -> io::Result<()> {
*index = dense_to_halfdense[index];
}

let out = write::output(&index_start, &index_continue, &halfdense);
let path = Path::new(env!("CARGO_MANIFEST_DIR")).join(PATH);
fs::write(path, out)
let out = write::output(&properties, &index_start, &index_continue, &halfdense);
let path = unicode_ident_dir.join(TABLES);
fs::write(path, out)?;
Ok(())
}
40 changes: 40 additions & 0 deletions generate/src/parse.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
use anyhow::Result;
use std::collections::BTreeSet as Set;
use std::path::Path;
use ucd_parse::CoreProperty;

pub struct Properties {
xid_start: Set<u32>,
xid_continue: Set<u32>,
}

impl Properties {
pub fn is_xid_start(&self, ch: char) -> bool {
self.xid_start.contains(&(ch as u32))
}

pub fn is_xid_continue(&self, ch: char) -> bool {
self.xid_continue.contains(&(ch as u32))
}
}

pub fn parse_xid_properties(ucd_dir: &Path) -> Result<Properties> {
let mut properties = Properties {
xid_start: Set::new(),
xid_continue: Set::new(),
};

let prop_list: Vec<CoreProperty> = ucd_parse::parse(ucd_dir)?;
for core in prop_list {
let set = match core.property.as_str() {
"XID_Start" => &mut properties.xid_start,
"XID_Continue" => &mut properties.xid_continue,
_ => continue,
};
for codepoint in core.codepoints {
set.insert(codepoint.value());
}
}

Ok(properties)
}
16 changes: 12 additions & 4 deletions generate/src/write.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use crate::output::Output;
use crate::{is_xid_continue, is_xid_start, CHUNK};
use crate::parse::Properties;
use crate::CHUNK;

const HEAD: &str = "\
// \x40generated by ../generate. To regenerate, run the following in the repo root:
Expand All @@ -17,7 +18,12 @@ pub(crate) struct Align8<T>(pub(crate) T);
pub(crate) struct Align64<T>(pub(crate) T);
";

pub fn output(index_start: &[u8], index_continue: &[u8], halfdense: &[u8]) -> Output {
pub fn output(
properties: &Properties,
index_start: &[u8],
index_continue: &[u8],
halfdense: &[u8],
) -> Output {
let mut out = Output::new();
writeln!(out, "{}", HEAD);

Expand All @@ -29,7 +35,8 @@ pub fn output(index_start: &[u8], index_continue: &[u8], halfdense: &[u8]) -> Ou
write!(out, " ");
for j in 0..32 {
let ch = (i * 32 + j) as char;
write!(out, " {},", if is_xid_start(ch) { 'T' } else { 'F' });
let is_xid_start = properties.is_xid_start(ch);
write!(out, " {},", if is_xid_start { 'T' } else { 'F' });
}
writeln!(out);
}
Expand All @@ -44,7 +51,8 @@ pub fn output(index_start: &[u8], index_continue: &[u8], halfdense: &[u8]) -> Ou
write!(out, " ");
for j in 0..32 {
let ch = (i * 32 + j) as char;
write!(out, " {},", if is_xid_continue(ch) { 'T' } else { 'F' });
let is_xid_continue = properties.is_xid_continue(ch);
write!(out, " {},", if is_xid_continue { 'T' } else { 'F' });
}
writeln!(out);
}
Expand Down
9 changes: 4 additions & 5 deletions tests/static_size.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,13 @@ fn test_size() {
#[test]
fn test_xid_size() {
#[deny(dead_code)]
#[allow(clippy::redundant_static_lifetimes)]
#[path = "../generate/src/ucd.rs"]
mod ucd;
#[path = "tables/mod.rs"]
mod tables;

let size = size_of_val(ucd::XID_START) + size_of_val(ucd::XID_CONTINUE);
let size = size_of_val(tables::XID_START) + size_of_val(tables::XID_CONTINUE);
assert_eq!(11528, size);

let _ = ucd::BY_NAME;
let _ = tables::BY_NAME;
}

#[cfg(target_pointer_width = "64")]
Expand Down
7 changes: 7 additions & 0 deletions tests/tables/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#![allow(clippy::module_inception)]

#[allow(clippy::redundant_static_lifetimes)]
#[rustfmt::skip]
mod tables;

pub(crate) use self::tables::*;
File renamed without changes.

0 comments on commit 5d1a139

Please sign in to comment.