diff --git a/.yapfignore b/.yapfignore new file mode 100644 index 0000000..15971ac --- /dev/null +++ b/.yapfignore @@ -0,0 +1,4 @@ +.eggs +.tox +.venv +.vscode diff --git a/README.md b/README.md index 43b14e9..3ad77ac 100644 --- a/README.md +++ b/README.md @@ -55,7 +55,8 @@ and the context in the document, such as only at the beginning of lines. Please see the OpenType spec for more details. -This package can add these features to any OpenType fonts +This package adds these features to any OpenType fonts +if any of these features are missing, by computing the feature tables from the data such as Unicode code points and glyph outlines diff --git a/east_asian_spacing/builder.py b/east_asian_spacing/builder.py index f3e7292..3a14d32 100755 --- a/east_asian_spacing/builder.py +++ b/east_asian_spacing/builder.py @@ -216,10 +216,7 @@ async def main(): "--index", help="font index, or a list of font indices" " for a font collection (TTC)") - parser.add_argument("-g", - "--glyph-out", - type=pathlib.Path, - help="output glyph list") + parser.add_argument("-g", "--glyph-out", help="output glyph list") parser.add_argument("-l", "--language", help="language if the font is language-specific," @@ -252,7 +249,11 @@ async def main(): args = parser.parse_args() init_logging(args.verbose, main=logger) if args.glyph_out: - args.glyph_out.mkdir(exist_ok=True, parents=True) + if args.glyph_out == '-': + args.glyph_out = sys.stdout + else: + args.glyph_out = pathlib.Path(args.glyph_out) + args.glyph_out.mkdir(exist_ok=True, parents=True) if args.output: args.output.mkdir(exist_ok=True, parents=True) for input in Builder.expand_paths(args.inputs): diff --git a/east_asian_spacing/shaper.py b/east_asian_spacing/shaper.py index 3fb69ea..4e3cccd 100755 --- a/east_asian_spacing/shaper.py +++ b/east_asian_spacing/shaper.py @@ -1,4 +1,5 @@ #!/usr/bin/env python3 +from abc import abstractmethod import argparse import asyncio import enum @@ -175,12 +176,18 @@ def __str__(self): class ShaperBase(object): - def __init__(self, font, language=None, script=None, features=None): + def __init__(self, + font, + language=None, + script=None, + features=None, + log_name=None): assert isinstance(font.path, pathlib.Path) self.font = font self.language = language self.script = script self.features = features + self.log_name = log_name @property def features_dict(self): @@ -220,10 +227,11 @@ async def ensure_fullwidth_advance(font): logger.info('No fullwidth advance for "%s"', font) return False - def log_result(self, result, text): + def _log_result(self, result, text) -> None: if logger.getEffectiveLevel() <= logging.DEBUG: result.set_text(text) - logger.debug('ShapeResult=%s', result) + result.compute_ink_parts(self.font) + logger.debug('%s=%s', self.log_name or 'ShapeResult', result) _dump_images = False _shapers = None @@ -272,7 +280,7 @@ async def shape(self, text): pos.x_offset) for info, pos in zip(infos, positions)) result = ShapeResult(glyphs) - self.log_result(result, text) + self._log_result(result, text) return result @@ -310,7 +318,7 @@ async def shape(self, text): glyphs = (GlyphData(g["g"], g["cl"], g["ax"], g["dx"]) for g in glyphs) result = ShapeResult(glyphs) - self.log_result(result, text) + self._log_result(result, text) return result async def dump(self, text): diff --git a/east_asian_spacing/spacing.py b/east_asian_spacing/spacing.py index 07b144a..c5c1139 100755 --- a/east_asian_spacing/spacing.py +++ b/east_asian_spacing/spacing.py @@ -5,7 +5,6 @@ import logging import math import sys -from typing import Iterator from typing import List from typing import Tuple @@ -20,10 +19,15 @@ from east_asian_spacing.config import Config from east_asian_spacing.font import Font import east_asian_spacing.log_utils as log_utils -from east_asian_spacing.shaper import GlyphData, InkPart, Shaper -from east_asian_spacing.shaper import show_dump_images +from east_asian_spacing.shaper import InkPart +from east_asian_spacing.shaper import Shaper logger = logging.getLogger('spacing') +_log_shaper = logging.getLogger('shaper') + + +def _is_shaper_log_enabled(): + return _log_shaper.getEffectiveLevel() <= logging.DEBUG class GlyphSets(object): @@ -39,7 +43,7 @@ def __init__(self, left=None, right=None, middle=None, space=None): # except they share the same glyph set. self._root_font = None # For debug/font analysis purpose, keeps all `GlyphData`. - self._glyph_data_list = None + self._glyph_data_list = [] if _is_shaper_log_enabled() else None def assert_font(self, font): if self._root_font: @@ -79,13 +83,48 @@ def _to_str(self, glyph_ids=False): def __str__(self): return self._to_str() + @property + def _glyph_data_by_glyph_id(self): + if not self._glyph_data_list: + return None + result = dict() + for glyph_data in self._glyph_data_list: + glyph_data.cluster_index = 0 + glyph_data_list = result.get(glyph_data.glyph_id) + if glyph_data_list: + if glyph_data not in glyph_data_list: + glyph_data_list.append(glyph_data) + else: + result[glyph_data.glyph_id] = [glyph_data] + return result + def save_glyphs(self, output, prefix='', separator='\n'): + glyph_data_by_glyph_id = self._glyph_data_by_glyph_id + + def str_from_glyph_id(glyph_id): + if glyph_data_by_glyph_id: + glyph_data_list = glyph_data_by_glyph_id.get(glyph_id) + if glyph_data_list: + glyph_data_list = ', '.join( + str(glyph_data) for glyph_data in glyph_data_list) + return f'{glyph_id} # {glyph_data_list}' + return str(glyph_id) + for name, glyphs in self._name_and_glyphs: output.write(f'# {prefix}{name}\n') - glyphs = (str(glyph_id) for glyph_id in sorted(glyphs)) + glyphs = sorted(glyphs) + glyphs = (str_from_glyph_id(glyph_id) for glyph_id in glyphs) output.write(separator.join(glyphs)) output.write('\n') + if glyph_data_by_glyph_id: + output.write(f'# {prefix}filtered\n') + glyph_ids = self.glyph_ids + for glyph_id, glyph_data_list in glyph_data_by_glyph_id.items(): + if glyph_id not in glyph_ids: + for glyph_data in glyph_data_list: + output.write(f'# {glyph_data}\n') + def unite(self, other): if not other: return @@ -93,19 +132,9 @@ def unite(self, other): self.middle |= other.middle self.right |= other.right self.space |= other.space - if self._glyph_data_list and other._glyph_data_list: + if self._glyph_data_list is not None and other._glyph_data_list: self._glyph_data_list.extend(other._glyph_data_list) - def _keep_glyph_data(self) -> None: - if self._glyph_data_list is None: - self._glyph_data_list = [] - - def _removed_glyph_data(self) -> Iterator[GlyphData]: - assert self._glyph_data_list is not None - glyph_ids = self.glyph_ids - return (glyph for glyph in self._glyph_data_list - if glyph.glyph_id not in glyph_ids) - def add_by_ink_part(self, glyphs, font): for glyph in glyphs: ink_pos = glyph.get_ink_part(font) @@ -114,7 +143,7 @@ def add_by_ink_part(self, glyphs, font): elif ink_pos == InkPart.MIDDLE: self.middle.add(glyph.glyph_id) else: - logger.debug('add_by_ink_part: ignored %s', glyph) + _log_shaper.debug('ink_part: ignored %s', glyph) self.assert_glyphs_are_disjoint() async def add_glyphs(self, font, config): @@ -136,47 +165,51 @@ async def add_glyphs(self, font, config): self.assert_glyphs_are_disjoint() self._add_glyphs_count += 1 - async def _shape(self, font, unicodes, language=None, temporary=False): - text = ''.join(chr(c) for c in unicodes) - # Unified code points (e.g., U+2018-201D) in most fonts are Latin glyphs. - # Enable "fwid" feature to get fullwidth glyphs. - features = ['fwid', 'vert'] if font.is_vertical else ['fwid'] - shaper = Shaper(font, - language=language, - script='hani', - features=features) - result = await shaper.shape(text) - result.filter_missing_glyphs() - - if not temporary and self._glyph_data_list is not None: - result.ensure_multi_iterations() - self._glyph_data_list.extend(result) - - # East Asian spacing applies only to fullwidth glyphs. - em = font.fullwidth_advance - result.filter(lambda g: g.advance == em) - - if logger.getEffectiveLevel() <= logging.DEBUG: - result.ensure_multi_iterations() - if len(result): - result.compute_ink_parts(font) - logger.debug('ShapeResult=%s', result) - return result + class _ShapeHelper(object): + def __init__(self, glyph_sets, font, log_name=None): + self._font = font + self._glyph_data_list = glyph_sets._glyph_data_list + self._log_name = log_name + + async def shape(self, unicodes, language=None, temporary=False): + font = self._font + text = ''.join(chr(c) for c in unicodes) + # Unified code points (e.g., U+2018-201D) in most fonts are Latin glyphs. + # Enable "fwid" feature to get fullwidth glyphs. + features = ['fwid', 'vert'] if font.is_vertical else ['fwid'] + shaper = Shaper(font, + language=language, + script='hani', + features=features, + log_name=self._log_name) + result = await shaper.shape(text) + result.filter_missing_glyphs() + + if not temporary and self._glyph_data_list is not None: + result.ensure_multi_iterations() + self._glyph_data_list.extend(result) + + # East Asian spacing applies only to fullwidth glyphs. + em = font.fullwidth_advance + result.filter(lambda g: g.advance == em) + + return result async def _glyph_id_set(self, font, unicodes, language=None): - result = await self._shape(font, - unicodes, - language=language, - temporary=True) + shaper = GlyphSets._ShapeHelper(self, font) + result = await shaper.shape(unicodes, + language=language, + temporary=True) return set(result.glyph_ids) async def get_opening_closing(self, font, config): opening = config.cjk_opening | config.quotes_opening closing = config.cjk_closing | config.quotes_closing + shaper = GlyphSets._ShapeHelper(self, font, log_name='opening_closing') left, right, middle, space = await asyncio.gather( - self._shape(font, closing), self._shape(font, opening), - self._shape(font, config.cjk_middle), - self._shape(font, config.fullwidth_space)) + shaper.shape(closing), shaper.shape(opening), + shaper.shape(config.cjk_middle), + shaper.shape(config.fullwidth_space)) if config.use_ink_bounds: left.filter_ink_part(font, InkPart.LEFT) right.filter_ink_part(font, InkPart.RIGHT) @@ -200,8 +233,9 @@ async def get_period_comma(self, font, config): text = config.cjk_period_comma if not text: return None - ja, zht = await asyncio.gather(self._shape(font, text, language="JAN"), - self._shape(font, text, language="ZHT")) + shaper = GlyphSets._ShapeHelper(self, font, log_name='period_comma') + ja, zht = await asyncio.gather(shaper.shape(text, language="JAN"), + shaper.shape(text, language="ZHT")) if config.use_ink_bounds: ja.filter_ink_part(font, InkPart.LEFT) zht.filter_ink_part(font, InkPart.MIDDLE) @@ -222,8 +256,9 @@ async def get_colon_semicolon(self, font, config): # Colon/semicolon are at middle for Japanese, left in ZHS. text = config.cjk_colon_semicolon trio = GlyphSets() - ja, zhs = await asyncio.gather(self._shape(font, text, language="JAN"), - self._shape(font, text, language="ZHS")) + shaper = GlyphSets._ShapeHelper(self, font, log_name='colon_semicolon') + ja, zhs = await asyncio.gather(shaper.shape(text, language="JAN"), + shaper.shape(text, language="ZHS")) if config.use_ink_bounds: trio.add_by_ink_part(itertools.chain(ja, zhs), font) else: @@ -262,8 +297,9 @@ async def get_exclam_question(self, font, config): return None # Fullwidth exclamation mark and question mark are on left only in ZHS. text = config.cjk_exclam_question - ja, zhs = await asyncio.gather(self._shape(font, text, language="JAN"), - self._shape(font, text, language="ZHS")) + shaper = GlyphSets._ShapeHelper(self, font, log_name='exclam_question') + ja, zhs = await asyncio.gather(shaper.shape(text, language="JAN"), + shaper.shape(text, language="ZHS")) if config.use_ink_bounds: ja = set() zhs.filter_ink_part(font, InkPart.LEFT) @@ -569,26 +605,15 @@ async def main(): default=0) args = parser.parse_args() log_utils.init_logging(args.verbose) + font = Font.load(args.path) if font.is_collection: font = font.fonts_in_collection[args.index] spacing = EastAsianSpacing() - spacing.horizontal._keep_glyph_data() - spacing.vertical._keep_glyph_data() config = Config.default await spacing.add_glyphs(font, config) - print('horizontal:', spacing.horizontal._to_str(True)) - print('vertical:', spacing.vertical._to_str(True)) - spacing.save_glyphs(sys.stdout, separator=', ') - - print('Removed GlyphData:') - for name, glyph_sets in (('horizontal', spacing.horizontal), - ('vertical', spacing.vertical)): - print(name) - print('\n'.join( - str(glyph_data) - for glyph_data in glyph_sets._removed_glyph_data())) + spacing.save_glyphs(sys.stdout) if __name__ == '__main__': diff --git a/tests/shaper_test.py b/tests/shaper_test.py index 0d0199b..c7d1cef 100644 --- a/tests/shaper_test.py +++ b/tests/shaper_test.py @@ -14,13 +14,17 @@ def test_glyph_data_eq(): assert glyph1 == glyph2 glyph3 = GlyphData(2, 1, 1000, 0) + assert glyph1 != glyph3 + assert glyph2 != glyph3 glyph4 = GlyphData(2, 1, 1000, 0) + assert glyph3 == glyph4 + result1 = ShapeResult((glyph1, glyph3)) result2 = ShapeResult((glyph2, glyph4)) assert result1 == result2 glyph3.advance = 500 - assert glyph1 != glyph3 + assert glyph3 != glyph4 assert result1 != result2