Skip to content

Commit

Permalink
(cmap) Add cmap::Subtable::codepoints method.
Browse files Browse the repository at this point in the history
Closes #20
  • Loading branch information
laurmaedje authored Jul 31, 2020
1 parent f6bb9bf commit a0ec717
Show file tree
Hide file tree
Showing 9 changed files with 254 additions and 7 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/)
and this project adheres to [Semantic Versioning](http://semver.org/).

## [Unreleased]
### Added
- `cmap::Subtable::codepoints`

### Fixed
- (cmap) Incorrectly returning glyph ID `0` instead of `None` for format 0
- (cmap) Possible invalid glyph mapping for format 2
Expand Down
29 changes: 27 additions & 2 deletions src/tables/cmap/format0.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,30 @@ pub fn parse(data: &[u8], code_point: u32) -> Option<u16> {
}
}

pub fn codepoints(data: &[u8], mut f: impl FnMut(u32)) -> Option<()> {
let mut s = Stream::new(data);
s.skip::<u16>(); // format
s.skip::<u16>(); // length
s.skip::<u16>(); // language

for code_point in 0..256 {
// In contrast to every other format, here we take a look at the glyph
// id and check whether it is zero because otherwise this method would
// always simply call `f` for `0..256` which would be kind of pointless
// (this array always has length 256 even when the face has only fewer
// glyphs).
let glyph_id: u8 = s.read()?;
if glyph_id != 0 {
f(code_point);
}
}

Some(())
}

#[cfg(test)]
mod format0_tests {
use super::parse;
mod tests {
use super::{parse, codepoints};

#[test]
fn maps_not_all_256_codepoints() {
Expand All @@ -39,5 +60,9 @@ mod format0_tests {
assert_eq!(parse(&data, 0), None);
assert_eq!(parse(&data, 0x40), Some(100));
assert_eq!(parse(&data, 100), None);

let mut vec = vec![];
codepoints(&data, |c| vec.push(c));
assert_eq!(vec, [0x40]);
}
}
17 changes: 17 additions & 0 deletions src/tables/cmap/format10.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,20 @@ pub fn parse(data: &[u8], code_point: u32) -> Option<u16> {
let idx = code_point.checked_sub(first_code_point)?;
glyphs.get(idx)
}

pub fn codepoints(data: &[u8], mut f: impl FnMut(u32)) -> Option<()> {
let mut s = Stream::new(data);
s.skip::<u16>(); // format
s.skip::<u16>(); // reserved
s.skip::<u32>(); // length
s.skip::<u32>(); // language
let first_code_point: u32 = s.read()?;
let count: u32 = s.read()?;

for i in 0..count {
let code_point = first_code_point.checked_add(i)?;
f(code_point);
}

Some(())
}
17 changes: 17 additions & 0 deletions src/tables/cmap/format12.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,20 @@ pub fn parse(data: &[u8], code_point: u32) -> Option<u16> {

None
}

pub fn codepoints(data: &[u8], mut f: impl FnMut(u32)) -> Option<()> {
let mut s = Stream::new(data);
s.skip::<u16>(); // format
s.skip::<u16>(); // reserved
s.skip::<u32>(); // length
s.skip::<u32>(); // language
let count: u32 = s.read()?;
let groups = s.read_array32::<SequentialMapGroup>(count)?;
for group in groups {
for code_point in group.start_char_code..=group.end_char_code {
f(code_point);
}
}

Some(())
}
6 changes: 6 additions & 0 deletions src/tables/cmap/format13.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,9 @@ pub fn parse(data: &[u8], code_point: u32) -> Option<u16> {

None
}

pub fn codepoints(data: &[u8], f: impl FnMut(u32)) -> Option<()> {
// Only the glyph id mapping differs for this table. The code points are the
// same as for format 12.
super::format12::codepoints(data, f)
}
71 changes: 69 additions & 2 deletions src/tables/cmap/format2.rs
Original file line number Diff line number Diff line change
Expand Up @@ -92,10 +92,77 @@ pub fn parse(data: &[u8], code_point: u32) -> Option<u16> {
u16::try_from((i32::from(glyph) + i32::from(sub_header.id_delta)) % 65536).ok()
}

pub fn codepoints(data: &[u8], mut f: impl FnMut(u32)) -> Option<()> {
let mut s = Stream::new(data);
s.skip::<u16>(); // format
s.skip::<u16>(); // length
s.skip::<u16>(); // language
let sub_header_keys = s.read_array16::<u16>(256)?;

// The maximum index in a sub_header_keys is a sub_headers count.
let sub_headers_count = sub_header_keys.into_iter().map(|n| n / 8).max()? + 1;
let sub_headers = s.read_array16::<SubHeaderRecord>(sub_headers_count)?;

for first_byte in 0u16..256 {
let i = sub_header_keys.get(first_byte)? / 8;
let sub_header = sub_headers.get(i)?;
let first_code = sub_header.first_code;

if i == 0 {
// This is a single byte code.
let range_end = first_code.checked_add(sub_header.entry_count)?;
if first_byte >= first_code && first_byte < range_end {
f(u32::from(first_byte));
}
} else {
// This is a two byte code.
let base = first_code.checked_add(first_byte << 8)?;
for k in 0..sub_header.entry_count {
let code_point = base.checked_add(k)?;
f(u32::from(code_point));
}
}
}

Some(())
}

#[cfg(test)]
mod format2_tests {
mod tests {
use crate::parser::FromData;
use super::parse;
use super::{parse, codepoints};

#[test]
fn collect_codepoints() {
let mut data = vec![
0x00, 0x02, // format: 2
0x02, 0x16, // subtable size: 534
0x00, 0x00, // language ID: 0
];

// Make only high byte 0x28 multi-byte.
data.extend(std::iter::repeat(0x00).take(256 * u16::SIZE));
data[6 + 0x28 * u16::SIZE + 1] = 0x08;

data.extend(&[
// First sub header (for single byte mapping)
0x00, 0xFE, // first code: 254
0x00, 0x02, // entry count: 2
0x00, 0x00, // id delta: uninteresting
0x00, 0x00, // id range offset: uninteresting
// Second sub header (for high byte 0x28)
0x00, 0x10, // first code: (0x28 << 8) + 0x10 = 10256,
0x00, 0x03, // entry count: 3
0x00, 0x00, // id delta: uninteresting
0x00, 0x00, // id range offset: uninteresting
]);

// Now only glyph ID's would follow. Not interesting for codepoints.

let mut vec = vec![];
codepoints(&data, |c| vec.push(c));
assert_eq!(vec, [10256, 10257, 10258, 254, 255]);
}

#[test]
fn codepoint_at_range_end() {
Expand Down
53 changes: 51 additions & 2 deletions src/tables/cmap/format4.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,33 @@ pub fn parse(data: &[u8], code_point: u32) -> Option<u16> {
None
}

pub fn codepoints(data: &[u8], mut f: impl FnMut(u32)) -> Option<()> {
let mut s = Stream::new(data);
s.advance(6); // format + length + language
let seg_count_x2: u16 = s.read()?;
if seg_count_x2 < 2 {
return None;
}

let seg_count = seg_count_x2 / 2;
s.advance(6); // searchRange + entrySelector + rangeShift

let end_codes = s.read_array16::<u16>(seg_count)?;
s.skip::<u16>(); // reservedPad
let start_codes = s.read_array16::<u16>(seg_count)?;

for (start, end) in start_codes.into_iter().zip(end_codes) {
for code_point in start..=end {
f(u32::from(code_point));
}
}

Some(())
}

#[cfg(test)]
mod format4_tests {
use super::parse;
mod tests {
use super::{parse, codepoints};

#[test]
fn single_glyph() {
Expand Down Expand Up @@ -457,4 +481,29 @@ mod format4_tests {

assert_eq!(parse(data, 0x41), None);
}

#[test]
fn collect_codepoints() {
let data = &[
0x00, 0x04, // format: 4
0x00, 0x18, // subtable size: 24
0x00, 0x00, // language ID: 0
0x00, 0x04, // 2 x segCount: 4
0x00, 0x02, // search range: 2
0x00, 0x00, // entry selector: 0
0x00, 0x02, // range shift: 2
// End character codes
0x00, 0x22, // char code [0]: 34
0xFF, 0xFF, // char code [1]: 65535
0x00, 0x00, // reserved: 0
// Start character codes
0x00, 0x1B, // char code [0]: 27
0xFF, 0xFD, // char code [1]: 65533
// codepoints does not care about glyph ids
];

let mut vec = vec![];
codepoints(data, |c| vec.push(c));
assert_eq!(vec, [27, 28, 29, 30, 31, 32, 33, 34, 65533, 65534, 65535]);
}
}
16 changes: 16 additions & 0 deletions src/tables/cmap/format6.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,19 @@ pub fn parse(data: &[u8], code_point: u32) -> Option<u16> {
let idx = code_point.checked_sub(first_code_point)?;
glyphs.get(idx)
}

pub fn codepoints(data: &[u8], mut f: impl FnMut(u32)) -> Option<()> {
let mut s = Stream::new(data);
s.skip::<u16>(); // format
s.skip::<u16>(); // length
s.skip::<u16>(); // language
let first_code_point: u16 = s.read()?;
let count: u16 = s.read()?;

for i in 0..count {
let code_point = first_code_point.checked_add(i)?;
f(u32::from(code_point));
}

Some(())
}
49 changes: 48 additions & 1 deletion src/tables/cmap/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ impl<'a> Subtable<'a> {

/// Maps a character to a glyph ID.
///
/// This is a low-level method and unlike `Face::glyph_index` is doesn't
/// This is a low-level method and unlike `Face::glyph_index` it doesn't
/// check that the current encoding is Unicode.
/// It simply maps a `u32` codepoint number to a glyph ID.
///
Expand Down Expand Up @@ -180,6 +180,53 @@ impl<'a> Subtable<'a> {
None
}
}

/// Calls `f` for all codepoints contained in this subtable.
///
/// This is a low-level method and it doesn't check that the current
/// encoding is Unicode. It simply calls the function `f` for all `u32`
/// codepoints that are present in this subtable.
///
/// Note that this may list codepoints for which `glyph_index` still returns
/// `None` because this method finds all codepoints which were _defined_ in
/// this subtable. The subtable may still map them to glyph ID `0`.
///
/// Returns without doing anything:
/// - when format is `MixedCoverage`, since it's not supported.
/// - when format is `UnicodeVariationSequences`, since it's not supported.
pub fn codepoints<F: FnMut(u32)>(&self, f: F) {
let _ = match self.format {
Format::ByteEncodingTable => {
format0::codepoints(self.subtable_data, f)
}
Format::HighByteMappingThroughTable => {
format2::codepoints(self.subtable_data, f)
},
Format::SegmentMappingToDeltaValues => {
format4::codepoints(self.subtable_data, f)
},
Format::TrimmedTableMapping => {
format6::codepoints(self.subtable_data, f)
},
Format::MixedCoverage => {
// Unsupported
None
},
Format::TrimmedArray => {
format10::codepoints(self.subtable_data, f)
},
Format::SegmentedCoverage => {
format12::codepoints(self.subtable_data, f)
}
Format::ManyToOneRangeMappings => {
format13::codepoints(self.subtable_data, f)
},
Format::UnicodeVariationSequences => {
// Unsupported
None
},
};
}
}

impl<'a> core::fmt::Debug for Subtable<'a> {
Expand Down

0 comments on commit a0ec717

Please sign in to comment.