Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/files/lambda.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,5 +17,5 @@ Resources:
- Ref: MartinLayer
CodeUri: lambda-function/
Handler: config.yaml
MemorySize: 350
Timeout: 10
MemorySize: 128
Timeout: 3
115 changes: 46 additions & 69 deletions martin-core/src/resources/fonts/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ use std::collections::HashMap;
use std::ffi::OsStr;
use std::fmt::Debug;
use std::path::PathBuf;
use std::sync::LazyLock;
use std::sync::{Arc, LazyLock};

use bit_set::BitSet;
use dashmap::{DashMap, Entry};
Expand All @@ -37,7 +37,7 @@ use tracing::{debug, info, warn};
/// Although U+FFFF covers the Basic Multilingual Plane, the Unicode standard
/// allows to use up to U+10FFFF, including for private use.
/// (cf. <https://en.wikipedia.org/wiki/Unicode_block>)
const MAX_UNICODE_CP: usize = 0x0010_FFFF;
const MAX_UNICODE_CP: u32 = 0x0010_FFFF;
/// Size of each Unicode codepoint range (256 characters).
const CP_RANGE_SIZE: usize = 256;
/// Font size in pixels for SDF glyph rendering.
Expand All @@ -51,10 +51,6 @@ const BUFFER_SIZE: usize = 3;
const RADIUS: usize = 8;
/// Cutoff threshold for SDF generation (0.0 to 1.0).
const CUTOFF: f64 = 0.25_f64;
/// Maximum Unicode codepoint range ID.
///
/// Each range is 256 codepoints long, so the highest range ID is 0xFFFF / 256 = 255.
const MAX_UNICODE_CP_RANGE_ID: usize = MAX_UNICODE_CP / CP_RANGE_SIZE;

mod error;
pub use error::FontError;
Expand All @@ -63,36 +59,37 @@ mod cache;
pub use cache::{FontCache, NO_FONT_CACHE, OptFontCache};

/// Glyph information: (codepoints, count, ranges, first, last).
type GetGlyphInfo = (BitSet, usize, Vec<(usize, usize)>, usize, usize);
type GetGlyphInfo = (BitSet, u32, Vec<(usize, usize)>, usize, usize);

/// Extracts available codepoints from a font face.
///
/// Returns `None` if the font contains no usable glyphs.
fn get_available_codepoints(face: &mut Face) -> Option<GetGlyphInfo> {
let mut codepoints = BitSet::with_capacity(MAX_UNICODE_CP);
let mut codepoints = BitSet::new();
Copy link
Copy Markdown
Contributor Author

@Auspicus Auspicus Jan 23, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Without explicit capacity there will be some required resizing but it allows for this BitSet to be as small as 8 bytes (256 bits) when the font only provides codepoints within the range 0-255. If the font provides even a single codepoint at the end of the range this will need to be resized to 139KB. So we're trading off a small amount of start up time here for a lower memory footprint per font.

let mut spans = Vec::new();
let mut first: Option<usize> = None;
let mut count = 0;
let mut last = 0;

for cp in 0..=MAX_UNICODE_CP {
if face.get_char_index(cp).is_ok() {
codepoints.insert(cp);
count += 1;
if first.is_none() {
for (cp, _) in face.chars() {
codepoints.insert(cp);
if let Some(start) = first {
if cp != last + 1 {
spans.push((start, last));
first = Some(cp);
}
} else if let Some(start) = first {
spans.push((start, cp - 1));
first = None;
} else {
first = Some(cp);
}
last = cp;
}

if count == 0 {
None
} else {
if let Some(first) = first {
spans.push((first, last));
let count = u32::try_from(face.num_glyphs()).unwrap_or(0);
let start = spans[0].0;
let end = spans[spans.len() - 1].1;
Some((codepoints, count, spans, start, end))
Some((codepoints, count, spans, start, last))
} else {
None
}
}

Expand All @@ -110,40 +107,18 @@ pub struct CatalogFontEntry {
/// None for regular style.
pub style: Option<String>,
/// Total number of glyphs in this font.
pub glyphs: usize,
pub glyphs: u32,
/// First Unicode codepoint available.
pub start: usize,
/// Last Unicode codepoint available.
pub end: usize,
}

/// Thread-safe font manager for discovery, cataloging, and serving fonts as Protocol Buffers.
#[derive(Debug, Clone)]
#[derive(Debug, Clone, Default)]
pub struct FontSources {
/// Map of font name to font source data.
fonts: DashMap<String, FontSource>,
/// Pre-computed bitmasks for each 256-character Unicode range.
masks: Vec<BitSet>,
}

impl Default for FontSources {
fn default() -> Self {
let mut masks = Vec::with_capacity(MAX_UNICODE_CP_RANGE_ID + 1);

let mut bs = BitSet::with_capacity(CP_RANGE_SIZE);
for v in 0..=MAX_UNICODE_CP {
bs.insert(v);
Copy link
Copy Markdown
Contributor Author

@Auspicus Auspicus Jan 22, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the core issue is that although we allocate a BitSet::with_capacity(256) the value for v here might be 0x10FFFF causing a MASSIVE reallocation to fit values 0-0x10FFFF in the BitSet. From what I understand, the values must be contiguous without holes so each BitSet could be as large as (0x10FFFF / 32) (bits per u32) * 4 (bytes per u32) and then each of these is inserted for each range up to 4352.

Copy link
Copy Markdown
Member

@nyurik nyurik Jan 22, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

my understanding is that each file would use a 136KB of RAM (136*1024*8 = 1_114_112 = 0x11_0000 codepoint presence bits) - not that much memory. If we follow the optimization I proposed below, most fonts won't even get that high, but it will keep the complexity to the minimal.

Copy link
Copy Markdown
Contributor Author

@Auspicus Auspicus Jan 22, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The issue is less with each font source since it's only a single BitSet of 34816 u32 (139KB). The issue is with the pre-computed masks. Each range (of which there are 4352) carries a masking BitSet of up to 0x11_0000 (34816 u32 [4 bytes] in the backing BitVec). Assuming every range is this size (inaccurately) we get 600MB (34816*4*4352 bytes). More accurately we get masking BitSets with sizes growing from 8-34816 u32 and more likely around 300MB total. Prior to adjusting the max codepoint size we had smaller BitSets (8-2048 u32) and less of them 256 (0xFFFF/256). Resulting in at most ~2MB (2048*4*256 bytes) for masks but likely less due to not all BitSets being 2048 size.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See images attached with debugger running on main branch in the default for FontSources:

Screenshot_20260123_083203 Screenshot_20260123_083257

if v % CP_RANGE_SIZE == (CP_RANGE_SIZE - 1) {
masks.push(bs);
bs = BitSet::with_capacity(CP_RANGE_SIZE);
}
}

Self {
fonts: DashMap::new(),
masks,
}
}
}

impl FontSources {
Expand All @@ -168,6 +143,9 @@ impl FontSources {
/// Range must be exactly 256 characters (e.g., 0-255, 256-511).
#[expect(clippy::cast_possible_truncation)]
pub fn get_font_range(&self, ids: &str, start: u32, end: u32) -> Result<Vec<u8>, FontError> {
if start > MAX_UNICODE_CP || end > MAX_UNICODE_CP {
return Err(FontError::InvalidFontRangeStartEnd(start, end));
}
if start > end {
return Err(FontError::InvalidFontRangeStartEnd(start, end));
}
Expand All @@ -181,23 +159,16 @@ impl FontSources {
return Err(FontError::InvalidFontRange(start, end));
}

let mut needed = self.masks[(start as usize) / CP_RANGE_SIZE].clone();
let fonts = ids
.split(',')
.filter_map(|id| match self.fonts.get(id) {
None => Some(Err(FontError::FontNotFound(id.to_string()))),
Some(v) => {
let mut ds = needed.clone();
ds.intersect_with(&v.codepoints);
if ds.is_empty() {
None
} else {
needed.difference_with(&v.codepoints);
Some(Ok((id, v, ds)))
}
.map(|id| {
if self.fonts.get(id).is_none() {
return Err(FontError::FontNotFound(id.to_string()));
}

Ok(id)
})
.collect::<Result<Vec<_>, FontError>>()?;
.collect::<Result<Vec<&str>, FontError>>()?;

if fonts.is_empty() {
return Ok(Vec::new());
Expand All @@ -206,7 +177,11 @@ impl FontSources {
let lib = Library::init()?;
let mut stack = Fontstack::default();

for (id, font, ds) in fonts {
for id in fonts {
let Some(font) = self.fonts.get(id) else {
continue;
};

if stack.name.is_empty() {
stack.name = id.to_string();
} else {
Expand All @@ -225,9 +200,12 @@ impl FontSources {
// and https://www.freetype.org/freetype2/docs/tutorial/step1.html for details.
face.set_char_size(0, CHAR_HEIGHT, 0, 0)?;

for cp in &ds {
let glyph = render_sdf_glyph(&face, cp as u32, BUFFER_SIZE, RADIUS, CUTOFF)?;
stack.glyphs.push(glyph);
for codepoint in start..=end {
if !font.codepoints.contains(codepoint as usize) {
continue;
}
let g = render_sdf_glyph(&face, codepoint, BUFFER_SIZE, RADIUS, CUTOFF)?;
stack.glyphs.push(g);
}
}

Expand All @@ -247,7 +225,7 @@ pub struct FontSource {
/// Face index within the font file (for .ttc collections).
face_index: isize,
/// Unicode codepoints this font supports.
codepoints: BitSet,
codepoints: Arc<BitSet>,
/// Font metadata for the catalog.
catalog_entry: CatalogFontEntry,
}
Expand Down Expand Up @@ -356,7 +334,7 @@ fn parse_font(
v.insert(FontSource {
path: path.clone(),
face_index,
codepoints,
codepoints: Arc::new(codepoints),
catalog_entry: CatalogFontEntry {
family,
style,
Expand All @@ -383,14 +361,13 @@ mod tests {
// U+3320: SQUARE SANTIIMU, U+1F60A: SMILING FACE WITH SMILING EYES
for codepoint in [0x3320, 0x1f60a] {
let font_path = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.join(format!("src/resources/fonts/tests/u+{codepoint:x}.ttf"));
assert!(font_path.is_file()); // make sure the file path is correct

.join(format!("../tests/fixtures/fonts2/u+{codepoint:x}.ttf"));
assert!(font_path.is_file(), "{}", font_path.display());
let mut face = lib.new_face(&font_path, 0).unwrap();

let (_codepoints, count, _ranges, first, last) =
get_available_codepoints(&mut face).unwrap();
assert_eq!(count, 1);
assert_eq!(count, 2);
assert_eq!(format!("U+{first:X}"), format!("U+{codepoint:X}"));
assert_eq!(format!("U+{last:X}"), format!("U+{codepoint:X}"));
}
Expand Down
4 changes: 2 additions & 2 deletions tests/expected/auto/catalog_auto.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,14 @@
"Overpass Mono Light": {
"end": 128276,
"family": "Overpass Mono",
"glyphs": 934,
"glyphs": 988,
"start": 0,
"style": "Light"
},
"Overpass Mono Regular": {
"end": 128276,
"family": "Overpass Mono",
"glyphs": 934,
"glyphs": 988,
"start": 0,
"style": "Regular"
}
Expand Down
4 changes: 2 additions & 2 deletions tests/expected/configured/catalog_cfg.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,14 @@
"Overpass Mono Light": {
"end": 128276,
"family": "Overpass Mono",
"glyphs": 934,
"glyphs": 988,
"start": 0,
"style": "Light"
},
"Overpass Mono Regular": {
"end": 128276,
"family": "Overpass Mono",
"glyphs": 934,
"glyphs": 988,
"start": 0,
"style": "Regular"
}
Expand Down
Binary file modified tests/expected/configured/font_3.pbf
Binary file not shown.
2 changes: 1 addition & 1 deletion tests/expected/configured/font_3.pbf.headers
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
content-encoding: br
content-type: application/x-protobuf
etag: W/"13762-12Po9LfxnAVc5cwSQZI0Bg=="
etag: W/"2680d-HiEdEbbGH_qT-oPkzbO5zg=="
transfer-encoding: chunked
vary: accept-encoding, Origin, Access-Control-Request-Method, Access-Control-Request-Headers
Loading