Skip to content

Commit

Permalink
Add GlyphData to glyph files when debugging
Browse files Browse the repository at this point in the history
When debug-log is enabled, GlyphData is saved to the glyph
files to help debugging.

Also changed `-g` option to accept `-` to write to stdout.
  • Loading branch information
kojiishi committed Jul 31, 2021
1 parent 9a6037a commit 67dfa91
Show file tree
Hide file tree
Showing 6 changed files with 124 additions and 81 deletions.
4 changes: 4 additions & 0 deletions .yapfignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
.eggs
.tox
.venv
.vscode
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,8 @@ and the context in the document,
such as only at the beginning of lines.
Please see the OpenType spec for more details.

This package can add these features to any OpenType fonts
This package adds these features to any OpenType fonts
if any of these features are missing,
by computing the feature tables from the data
such as Unicode code points and glyph outlines

Expand Down
11 changes: 6 additions & 5 deletions east_asian_spacing/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,10 +216,7 @@ async def main():
"--index",
help="font index, or a list of font indices"
" for a font collection (TTC)")
parser.add_argument("-g",
"--glyph-out",
type=pathlib.Path,
help="output glyph list")
parser.add_argument("-g", "--glyph-out", help="output glyph list")
parser.add_argument("-l",
"--language",
help="language if the font is language-specific,"
Expand Down Expand Up @@ -252,7 +249,11 @@ async def main():
args = parser.parse_args()
init_logging(args.verbose, main=logger)
if args.glyph_out:
args.glyph_out.mkdir(exist_ok=True, parents=True)
if args.glyph_out == '-':
args.glyph_out = sys.stdout
else:
args.glyph_out = pathlib.Path(args.glyph_out)
args.glyph_out.mkdir(exist_ok=True, parents=True)
if args.output:
args.output.mkdir(exist_ok=True, parents=True)
for input in Builder.expand_paths(args.inputs):
Expand Down
18 changes: 13 additions & 5 deletions east_asian_spacing/shaper.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#!/usr/bin/env python3
from abc import abstractmethod
import argparse
import asyncio
import enum
Expand Down Expand Up @@ -175,12 +176,18 @@ def __str__(self):


class ShaperBase(object):
def __init__(self, font, language=None, script=None, features=None):
def __init__(self,
font,
language=None,
script=None,
features=None,
log_name=None):
assert isinstance(font.path, pathlib.Path)
self.font = font
self.language = language
self.script = script
self.features = features
self.log_name = log_name

@property
def features_dict(self):
Expand Down Expand Up @@ -220,10 +227,11 @@ async def ensure_fullwidth_advance(font):
logger.info('No fullwidth advance for "%s"', font)
return False

def log_result(self, result, text):
def _log_result(self, result, text) -> None:
if logger.getEffectiveLevel() <= logging.DEBUG:
result.set_text(text)
logger.debug('ShapeResult=%s', result)
result.compute_ink_parts(self.font)
logger.debug('%s=%s', self.log_name or 'ShapeResult', result)

_dump_images = False
_shapers = None
Expand Down Expand Up @@ -272,7 +280,7 @@ async def shape(self, text):
pos.x_offset)
for info, pos in zip(infos, positions))
result = ShapeResult(glyphs)
self.log_result(result, text)
self._log_result(result, text)
return result


Expand Down Expand Up @@ -310,7 +318,7 @@ async def shape(self, text):
glyphs = (GlyphData(g["g"], g["cl"], g["ax"], g["dx"])
for g in glyphs)
result = ShapeResult(glyphs)
self.log_result(result, text)
self._log_result(result, text)
return result

async def dump(self, text):
Expand Down
163 changes: 94 additions & 69 deletions east_asian_spacing/spacing.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import logging
import math
import sys
from typing import Iterator
from typing import List
from typing import Tuple

Expand All @@ -20,10 +19,15 @@
from east_asian_spacing.config import Config
from east_asian_spacing.font import Font
import east_asian_spacing.log_utils as log_utils
from east_asian_spacing.shaper import GlyphData, InkPart, Shaper
from east_asian_spacing.shaper import show_dump_images
from east_asian_spacing.shaper import InkPart
from east_asian_spacing.shaper import Shaper

logger = logging.getLogger('spacing')
_log_shaper = logging.getLogger('shaper')


def _is_shaper_log_enabled():
return _log_shaper.getEffectiveLevel() <= logging.DEBUG


class GlyphSets(object):
Expand All @@ -39,7 +43,7 @@ def __init__(self, left=None, right=None, middle=None, space=None):
# except they share the same glyph set.
self._root_font = None
# For debug/font analysis purpose, keeps all `GlyphData`.
self._glyph_data_list = None
self._glyph_data_list = [] if _is_shaper_log_enabled() else None

def assert_font(self, font):
if self._root_font:
Expand Down Expand Up @@ -79,33 +83,58 @@ def _to_str(self, glyph_ids=False):
def __str__(self):
return self._to_str()

@property
def _glyph_data_by_glyph_id(self):
if not self._glyph_data_list:
return None
result = dict()
for glyph_data in self._glyph_data_list:
glyph_data.cluster_index = 0
glyph_data_list = result.get(glyph_data.glyph_id)
if glyph_data_list:
if glyph_data not in glyph_data_list:
glyph_data_list.append(glyph_data)
else:
result[glyph_data.glyph_id] = [glyph_data]
return result

def save_glyphs(self, output, prefix='', separator='\n'):
glyph_data_by_glyph_id = self._glyph_data_by_glyph_id

def str_from_glyph_id(glyph_id):
if glyph_data_by_glyph_id:
glyph_data_list = glyph_data_by_glyph_id.get(glyph_id)
if glyph_data_list:
glyph_data_list = ', '.join(
str(glyph_data) for glyph_data in glyph_data_list)
return f'{glyph_id} # {glyph_data_list}'
return str(glyph_id)

for name, glyphs in self._name_and_glyphs:
output.write(f'# {prefix}{name}\n')
glyphs = (str(glyph_id) for glyph_id in sorted(glyphs))
glyphs = sorted(glyphs)
glyphs = (str_from_glyph_id(glyph_id) for glyph_id in glyphs)
output.write(separator.join(glyphs))
output.write('\n')

if glyph_data_by_glyph_id:
output.write(f'# {prefix}filtered\n')
glyph_ids = self.glyph_ids
for glyph_id, glyph_data_list in glyph_data_by_glyph_id.items():
if glyph_id not in glyph_ids:
for glyph_data in glyph_data_list:
output.write(f'# {glyph_data}\n')

def unite(self, other):
if not other:
return
self.left |= other.left
self.middle |= other.middle
self.right |= other.right
self.space |= other.space
if self._glyph_data_list and other._glyph_data_list:
if self._glyph_data_list is not None and other._glyph_data_list:
self._glyph_data_list.extend(other._glyph_data_list)

def _keep_glyph_data(self) -> None:
if self._glyph_data_list is None:
self._glyph_data_list = []

def _removed_glyph_data(self) -> Iterator[GlyphData]:
assert self._glyph_data_list is not None
glyph_ids = self.glyph_ids
return (glyph for glyph in self._glyph_data_list
if glyph.glyph_id not in glyph_ids)

def add_by_ink_part(self, glyphs, font):
for glyph in glyphs:
ink_pos = glyph.get_ink_part(font)
Expand All @@ -114,7 +143,7 @@ def add_by_ink_part(self, glyphs, font):
elif ink_pos == InkPart.MIDDLE:
self.middle.add(glyph.glyph_id)
else:
logger.debug('add_by_ink_part: ignored %s', glyph)
_log_shaper.debug('ink_part: ignored %s', glyph)
self.assert_glyphs_are_disjoint()

async def add_glyphs(self, font, config):
Expand All @@ -136,47 +165,51 @@ async def add_glyphs(self, font, config):
self.assert_glyphs_are_disjoint()
self._add_glyphs_count += 1

async def _shape(self, font, unicodes, language=None, temporary=False):
text = ''.join(chr(c) for c in unicodes)
# Unified code points (e.g., U+2018-201D) in most fonts are Latin glyphs.
# Enable "fwid" feature to get fullwidth glyphs.
features = ['fwid', 'vert'] if font.is_vertical else ['fwid']
shaper = Shaper(font,
language=language,
script='hani',
features=features)
result = await shaper.shape(text)
result.filter_missing_glyphs()

if not temporary and self._glyph_data_list is not None:
result.ensure_multi_iterations()
self._glyph_data_list.extend(result)

# East Asian spacing applies only to fullwidth glyphs.
em = font.fullwidth_advance
result.filter(lambda g: g.advance == em)

if logger.getEffectiveLevel() <= logging.DEBUG:
result.ensure_multi_iterations()
if len(result):
result.compute_ink_parts(font)
logger.debug('ShapeResult=%s', result)
return result
class _ShapeHelper(object):
def __init__(self, glyph_sets, font, log_name=None):
self._font = font
self._glyph_data_list = glyph_sets._glyph_data_list
self._log_name = log_name

async def shape(self, unicodes, language=None, temporary=False):
font = self._font
text = ''.join(chr(c) for c in unicodes)
# Unified code points (e.g., U+2018-201D) in most fonts are Latin glyphs.
# Enable "fwid" feature to get fullwidth glyphs.
features = ['fwid', 'vert'] if font.is_vertical else ['fwid']
shaper = Shaper(font,
language=language,
script='hani',
features=features,
log_name=self._log_name)
result = await shaper.shape(text)
result.filter_missing_glyphs()

if not temporary and self._glyph_data_list is not None:
result.ensure_multi_iterations()
self._glyph_data_list.extend(result)

# East Asian spacing applies only to fullwidth glyphs.
em = font.fullwidth_advance
result.filter(lambda g: g.advance == em)

return result

async def _glyph_id_set(self, font, unicodes, language=None):
result = await self._shape(font,
unicodes,
language=language,
temporary=True)
shaper = GlyphSets._ShapeHelper(self, font)
result = await shaper.shape(unicodes,
language=language,
temporary=True)
return set(result.glyph_ids)

async def get_opening_closing(self, font, config):
opening = config.cjk_opening | config.quotes_opening
closing = config.cjk_closing | config.quotes_closing
shaper = GlyphSets._ShapeHelper(self, font, log_name='opening_closing')
left, right, middle, space = await asyncio.gather(
self._shape(font, closing), self._shape(font, opening),
self._shape(font, config.cjk_middle),
self._shape(font, config.fullwidth_space))
shaper.shape(closing), shaper.shape(opening),
shaper.shape(config.cjk_middle),
shaper.shape(config.fullwidth_space))
if config.use_ink_bounds:
left.filter_ink_part(font, InkPart.LEFT)
right.filter_ink_part(font, InkPart.RIGHT)
Expand All @@ -200,8 +233,9 @@ async def get_period_comma(self, font, config):
text = config.cjk_period_comma
if not text:
return None
ja, zht = await asyncio.gather(self._shape(font, text, language="JAN"),
self._shape(font, text, language="ZHT"))
shaper = GlyphSets._ShapeHelper(self, font, log_name='period_comma')
ja, zht = await asyncio.gather(shaper.shape(text, language="JAN"),
shaper.shape(text, language="ZHT"))
if config.use_ink_bounds:
ja.filter_ink_part(font, InkPart.LEFT)
zht.filter_ink_part(font, InkPart.MIDDLE)
Expand All @@ -222,8 +256,9 @@ async def get_colon_semicolon(self, font, config):
# Colon/semicolon are at middle for Japanese, left in ZHS.
text = config.cjk_colon_semicolon
trio = GlyphSets()
ja, zhs = await asyncio.gather(self._shape(font, text, language="JAN"),
self._shape(font, text, language="ZHS"))
shaper = GlyphSets._ShapeHelper(self, font, log_name='colon_semicolon')
ja, zhs = await asyncio.gather(shaper.shape(text, language="JAN"),
shaper.shape(text, language="ZHS"))
if config.use_ink_bounds:
trio.add_by_ink_part(itertools.chain(ja, zhs), font)
else:
Expand Down Expand Up @@ -262,8 +297,9 @@ async def get_exclam_question(self, font, config):
return None
# Fullwidth exclamation mark and question mark are on left only in ZHS.
text = config.cjk_exclam_question
ja, zhs = await asyncio.gather(self._shape(font, text, language="JAN"),
self._shape(font, text, language="ZHS"))
shaper = GlyphSets._ShapeHelper(self, font, log_name='exclam_question')
ja, zhs = await asyncio.gather(shaper.shape(text, language="JAN"),
shaper.shape(text, language="ZHS"))
if config.use_ink_bounds:
ja = set()
zhs.filter_ink_part(font, InkPart.LEFT)
Expand Down Expand Up @@ -569,26 +605,15 @@ async def main():
default=0)
args = parser.parse_args()
log_utils.init_logging(args.verbose)

font = Font.load(args.path)
if font.is_collection:
font = font.fonts_in_collection[args.index]
spacing = EastAsianSpacing()
spacing.horizontal._keep_glyph_data()
spacing.vertical._keep_glyph_data()
config = Config.default
await spacing.add_glyphs(font, config)

print('horizontal:', spacing.horizontal._to_str(True))
print('vertical:', spacing.vertical._to_str(True))
spacing.save_glyphs(sys.stdout, separator=', ')

print('Removed GlyphData:')
for name, glyph_sets in (('horizontal', spacing.horizontal),
('vertical', spacing.vertical)):
print(name)
print('\n'.join(
str(glyph_data)
for glyph_data in glyph_sets._removed_glyph_data()))
spacing.save_glyphs(sys.stdout)


if __name__ == '__main__':
Expand Down
6 changes: 5 additions & 1 deletion tests/shaper_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,17 @@ def test_glyph_data_eq():
assert glyph1 == glyph2

glyph3 = GlyphData(2, 1, 1000, 0)
assert glyph1 != glyph3
assert glyph2 != glyph3
glyph4 = GlyphData(2, 1, 1000, 0)
assert glyph3 == glyph4

result1 = ShapeResult((glyph1, glyph3))
result2 = ShapeResult((glyph2, glyph4))
assert result1 == result2

glyph3.advance = 500
assert glyph1 != glyph3
assert glyph3 != glyph4
assert result1 != result2


Expand Down

0 comments on commit 67dfa91

Please sign in to comment.