From 57642532162258ddb523caae361573e1eebf16a7 Mon Sep 17 00:00:00 2001 From: Evgeniy Reizner Date: Thu, 16 Jul 2020 14:51:01 +0300 Subject: [PATCH] (CFF) Added seac support. --- CHANGELOG.md | 1 + examples/font2svg.rs | 2 - src/parser.rs | 7 ++ src/tables/cff.rs | 250 +++++++++++++++++++++++++++++++++++++++++-- 4 files changed, 250 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e2d31db4..bdb9dac2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/). ## [Unreleased] ### Added - (CFF) CID fonts support. +- (CFF) `seac` support. - `Font::global_bounding_box` ## [0.6.1] - 2020-07-02 diff --git a/examples/font2svg.rs b/examples/font2svg.rs index 8d3ff553..3cf757df 100644 --- a/examples/font2svg.rs +++ b/examples/font2svg.rs @@ -223,8 +223,6 @@ fn glyph_to_path( svg: &mut xmlwriter::XmlWriter, path_buf: &mut svgtypes::Path, ) { - - path_buf.clear(); let mut builder = Builder(path_buf); let bbox = match font.outline_glyph(glyph_id, &mut builder) { diff --git a/src/parser.rs b/src/parser.rs index e1a67d47..76f81c01 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -142,6 +142,13 @@ pub trait TryNumFrom: Sized { fn try_num_from(_: T) -> Option; } +impl TryNumFrom for u8 { + #[inline] + fn try_num_from(v: f32) -> Option { + i32::try_num_from(v).and_then(|v| u8::try_from(v).ok()) + } +} + impl TryNumFrom for i16 { #[inline] fn try_num_from(v: f32) -> Option { diff --git a/src/tables/cff.rs b/src/tables/cff.rs index 8698cf1e..4da0ce90 100644 --- a/src/tables/cff.rs +++ b/src/tables/cff.rs @@ -72,7 +72,6 @@ mod private_dict_operator { /// Enumerates Charset IDs defined in the Adobe Technical Note #5176, Table 22 mod charset_id { - #![allow(dead_code)] pub const ISO_ADOBE: usize = 0; pub const EXPERT: usize = 1; pub const EXPERT_SUBSET: usize = 2; @@ -98,6 +97,21 @@ pub enum CFFError { InvalidNumberOfBlendOperands, BlendRegionsLimitReached, NoLocalSubroutines, + InvalidSeacCode, +} + + +/// A type-safe wrapper for string ID. +#[derive(Clone, Copy, PartialEq, PartialOrd, Debug)] +struct StringId(u16); + +impl FromData for StringId { + const SIZE: usize = 2; + + #[inline] + fn parse(data: &[u8]) -> Option { + u16::parse(data).map(StringId) + } } @@ -108,6 +122,7 @@ pub struct Metadata<'a> { table_data: &'a [u8], global_subrs: DataIndex<'a>, + charset: Charset<'a>, char_strings: DataIndex<'a>, kind: FontKind<'a>, } @@ -172,9 +187,18 @@ pub(crate) fn parse_metadata(data: &[u8]) -> Option { return None; } + // 'The number of glyphs is the value of the count field in the CharStrings INDEX.' + let number_of_glyphs = u16::try_from(char_strings.len()).ok()?; + + let charset = match top_dict.charset_offset { + Some(charset_id::ISO_ADOBE) => Charset::ISOAdobe, + Some(charset_id::EXPERT) => Charset::Expert, + Some(charset_id::EXPERT_SUBSET) => Charset::ExpertSubset, + Some(offset) => parse_charset(number_of_glyphs, &mut Stream::new_at(data, offset)?)?, + None => Charset::ISOAdobe, // default + }; + let kind = if top_dict.has_ros { - // 'The number of glyphs is the value of the count field in the CharStrings INDEX.' - let number_of_glyphs = u16::try_from(char_strings.len()).ok()?; parse_cid_metadata(data, top_dict, number_of_glyphs)? } else { parse_sid_metadata(data, top_dict)? @@ -183,6 +207,7 @@ pub(crate) fn parse_metadata(data: &[u8]) -> Option { Some(Metadata { table_data: data, global_subrs, + charset, char_strings, kind, }) @@ -351,6 +376,7 @@ struct CharStringParserContext<'a> { width_parsed: bool, stems_len: u32, has_endchar: bool, + has_seac: bool, glyph_id: GlyphId, // Required to parse local subroutine in CID fonts. local_subrs: Option>, } @@ -373,6 +399,7 @@ fn parse_char_string( width_parsed: false, stems_len: 0, has_endchar: false, + has_seac: false, glyph_id, local_subrs, }; @@ -635,7 +662,7 @@ fn _parse_char_string( return Err(CFFError::NoLocalSubroutines); } - if ctx.has_endchar { + if ctx.has_endchar && !ctx.has_seac { if !s.at_end() { return Err(CFFError::DataAfterEndChar); } @@ -771,8 +798,33 @@ fn _parse_char_string( } } operator::ENDCHAR => { - if !stack.is_empty() && !ctx.width_parsed { - stack.clear(); + if stack.len() == 4 || (!ctx.width_parsed && stack.len() == 5) { + // Process 'seac'. + let accent_char = seac_code_to_glyph_id(&ctx.metadata.charset, stack.pop()) + .ok_or(CFFError::InvalidSeacCode)?; + let base_char = seac_code_to_glyph_id(&ctx.metadata.charset, stack.pop()) + .ok_or(CFFError::InvalidSeacCode)?; + let dy = stack.pop(); + let dx = stack.pop(); + + if !ctx.width_parsed { + stack.pop(); + ctx.width_parsed = true; + } + + ctx.has_seac = true; + + let base_char_string = ctx.metadata.char_strings.get(u32::from(base_char.0)) + .ok_or(CFFError::InvalidSeacCode)?; + _parse_char_string(ctx, base_char_string, x, y, stack, depth + 1, builder)?; + x = dx; + y = dy; + + let accent_char_string = ctx.metadata.char_strings.get(u32::from(accent_char.0)) + .ok_or(CFFError::InvalidSeacCode)?; + _parse_char_string(ctx, accent_char_string, x, y, stack, depth + 1, builder)?; + } else if stack.len() == 1 && !ctx.width_parsed { + stack.pop(); ctx.width_parsed = true; } @@ -1009,7 +1061,7 @@ fn _parse_char_string( x = pos.0; y = pos.1; - if ctx.has_endchar { + if ctx.has_endchar && !ctx.has_seac { if !s.at_end() { return Err(CFFError::DataAfterEndChar); } @@ -1159,6 +1211,22 @@ pub fn calc_subroutine_bias(len: u32) -> u16 { } } +fn seac_code_to_glyph_id(charset: &Charset, n: f32) -> Option { + let code = u8::try_num_from(n)?; + + let sid = STANDARD_ENCODING[code as usize]; + let sid = StringId(u16::from(sid)); + + match charset { + Charset::ISOAdobe => { + // Not sure why code should be less than 228/zcaron, but this is what harfbuzz does. + if code < 228 { Some(GlyphId(sid.0)) } else { None } + } + Charset::Expert | Charset::ExpertSubset => None, + _ => charset.sid_to_gid(sid), + } +} + fn parse_index<'a>(s: &mut Stream<'a>) -> Option> { let count: u16 = s.read()?; if count != 0 { @@ -1565,10 +1633,153 @@ pub fn skip_number(b0: u8, s: &mut Stream) -> Option<()> { } +#[derive(Clone, Copy, Debug)] +struct CharsetFormat1Range { + first: StringId, + left: u8, +} + +impl FromData for CharsetFormat1Range { + const SIZE: usize = 3; + + #[inline] + fn parse(data: &[u8]) -> Option { + let mut s = Stream::new(data); + Some(CharsetFormat1Range { + first: s.read()?, + left: s.read()?, + }) + } +} + + +#[derive(Clone, Copy, Debug)] +struct CharsetFormat2Range { + first: StringId, + left: u16, +} + +impl FromData for CharsetFormat2Range { + const SIZE: usize = 4; + + #[inline] + fn parse(data: &[u8]) -> Option { + let mut s = Stream::new(data); + Some(CharsetFormat2Range { + first: s.read()?, + left: s.read()?, + }) + } +} + + +#[derive(Clone, Copy, Debug)] +enum Charset<'a> { + ISOAdobe, + Expert, + ExpertSubset, + Format0(LazyArray16<'a, StringId>), + Format1(LazyArray16<'a, CharsetFormat1Range>), + Format2(LazyArray16<'a, CharsetFormat2Range>), +} + +impl Charset<'_> { + fn sid_to_gid(&self, sid: StringId) -> Option { + if sid.0 == 0 { + return Some(GlyphId(0)); + } + + match self { + Charset::ISOAdobe | Charset::Expert | Charset::ExpertSubset => None, + Charset::Format0(ref array) => { + // First glyph is omitted, so we have to add 1. + array.into_iter().position(|n| n == sid).map(|n| GlyphId(n as u16 + 1)) + } + Charset::Format1(array) => { + let mut glyph_id = GlyphId(1); + for range in *array { + let last = u32::from(range.first.0) + u32::from(range.left); + if range.first <= sid && u32::from(sid.0) <= last { + glyph_id.0 += sid.0 - range.first.0; + return Some(glyph_id) + } + + glyph_id.0 += u16::from(range.left) + 1; + } + + None + } + Charset::Format2(array) => { + // The same as format 1, but Range::left is u16. + let mut glyph_id = GlyphId(1); + for range in *array { + let last = u32::from(range.first.0) + u32::from(range.left); + if sid >= range.first && u32::from(sid.0) <= last { + glyph_id.0 += sid.0 - range.first.0; + return Some(glyph_id) + } + + glyph_id.0 += range.left + 1; + } + + None + } + } + } +} + +fn parse_charset<'a>(number_of_glyphs: u16, s: &mut Stream<'a>) -> Option> { + if number_of_glyphs < 2 { + return None; + } + + // -1 everywhere, since `.notdef` is omitted. + let format: u8 = s.read()?; + match format { + 0 => Some(Charset::Format0(s.read_array16(number_of_glyphs - 1)?)), + 1 => { + // The number of ranges is not defined, so we have to + // read until no glyphs are left. + let mut count = 0; + { + let mut s = s.clone(); + let mut total_left = number_of_glyphs - 1; + while total_left > 0 { + s.skip::(); // first + let left: u8 = s.read()?; + total_left = total_left.checked_sub(u16::from(left) + 1)?; + count += 1; + } + } + + s.read_array16(count).map(Charset::Format1) + } + 2 => { + // The same as format 1, but Range::left is u16. + let mut count = 0; + { + let mut s = s.clone(); + let mut total_left = number_of_glyphs - 1; + while total_left > 0 { + s.skip::(); // first + let left: u16 = s.read()?; + let left = left.checked_add(1)?; + total_left = total_left.checked_sub(left)?; + count += 1; + } + } + + s.read_array16(count).map(Charset::Format2) + } + _ => None, + } +} + + #[derive(Clone, Copy, Debug)] enum FDSelect<'a> { Format0(LazyArray16<'a, u8>), - Format3(&'a [u8]), // It's easier to parse it inplace. + Format3(&'a [u8]), // It's easier to parse it in-place. } impl Default for FDSelect<'_> { @@ -1712,6 +1923,26 @@ pub fn f32_abs(n: f32) -> f32 { if n.is_sign_negative() { -n } else { n } } +/// The Standard Encoding as defined in the Adobe Technical Note #5176 Appendix B. +const STANDARD_ENCODING: [u8;256] = [ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, + 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, + 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, + 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, + 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, + 0, 111, 112, 113, 114, 0, 115, 116, 117, 118, 119, 120, 121, 122, 0, 123, + 0, 124, 125, 126, 127, 128, 129, 130, 131, 0, 132, 133, 0, 134, 135, 136, + 137, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 138, 0, 139, 0, 0, 0, 0, 140, 141, 142, 143, 0, 0, 0, 0, + 0, 144, 0, 0, 0, 145, 0, 0, 146, 147, 148, 149, 0, 0, 0, 0, +]; + #[cfg(test)] mod tests { @@ -1796,6 +2027,9 @@ mod tests { CFFError::NoLocalSubroutines => { write!(f, "no local subroutines") } + CFFError::InvalidSeacCode => { + write!(f, "invalid seac code") + } } } }