Skip to content

Commit

Permalink
Merge pull request #2 from 8uos/develop
Browse files Browse the repository at this point in the history
Add get_chars_from_ttf.py
  • Loading branch information
8uos authored Apr 6, 2021
2 parents eafc156 + e3aba4c commit 096c262
Show file tree
Hide file tree
Showing 10 changed files with 100 additions and 18 deletions.
1 change: 1 addition & 0 deletions data/ttfs/train/MaShanZheng-Regular.txt

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions data/ttfs/train/ZCOOLKuaiLe-Regular.txt

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions data/ttfs/val/MaShanZheng-Regular.txt

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions data/ttfs/val/ZCOOLKuaiLe-Regular.txt

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

from .imagefolder_dataset import ImageTestDataset
from .ttf_dataset import TTFTrainDataset, TTFValDataset
from .ttf_utils import get_available_chars, read_font, render
from .ttf_utils import get_filtered_chars, read_font, render
from torch.utils.data import DataLoader


Expand Down Expand Up @@ -51,4 +51,4 @@ def get_test_loader(cfg, transform, **kwargs):
return dset, loader


__all__ = ["get_trn_loader", "get_val_loader", "get_test_loader", "get_available_chars", "read_font", "render"]
__all__ = ["get_trn_loader", "get_val_loader", "get_test_loader", "get_filtered_chars", "read_font", "render"]
6 changes: 3 additions & 3 deletions datasets/imagefolder_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,18 +11,18 @@
import torch
from torch.utils.data import Dataset

from .ttf_utils import get_available_chars, read_font, render
from .ttf_utils import get_filtered_chars, read_font, render


class ImageTestDataset(Dataset):
def __init__(self, data_dir, source_font, gen_chars_file=None, transform=None, extension="png"):

self.data_dir = Path(data_dir)
self.source_font = read_font(source_font)
self.gen_chars = get_available_chars(source_font)
self.gen_chars = get_filtered_chars(source_font)
if gen_chars_file is not None:
gen_chars = json.load(open(gen_chars_file))
self.gen_chars = [x for x in self.gen_chars if x in gen_chars]
self.gen_chars = list(set(self.gen_chars).intersection(set(gen_chars)))

self.font_ref_chars = self.load_data_list(self.data_dir, extension)

Expand Down
29 changes: 22 additions & 7 deletions datasets/ttf_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import torch
from torch.utils.data import Dataset

from .ttf_utils import get_available_chars, read_font, render
from .ttf_utils import read_font, render


class TTFTrainDataset(Dataset):
Expand Down Expand Up @@ -53,7 +53,7 @@ def filter_chars(self):
filtered_chars = list(char_key_dict)
key_char_dict = {}
for key, chars in self.key_char_dict.items():
key_char_dict[key] = [c for c in chars if c in filtered_chars]
key_char_dict[key] = list(set(chars).intersection(filtered_chars))

return key_char_dict, char_key_dict

Expand Down Expand Up @@ -122,11 +122,18 @@ class TTFValDataset(Dataset):
def __init__(self, data_dir, source_font, char_filter, n_ref=4, n_gen=20, transform=None):

self.data_dir = data_dir
self.source_font = read_font(source_font)
self.source_font = read_font(source_font) if source_font is not None else None
self.n_ref = n_ref
self.n_gen = n_gen

self.key_font_dict, self.key_char_dict = load_data_list(data_dir, char_filter=char_filter)
if self.source_font is None:
self.char_key_dict = {}
for key, charlist in self.key_char_dict.items():
for char in charlist:
self.char_key_dict.setdefault(char, []).append(key)

self.key_char_dict, self.char_key_dict = self.filter_chars()
self.ref_chars, self.gen_chars = self.sample_ref_gen_chars(self.key_char_dict)

self.gen_char_dict = {k: self.gen_chars for k in self.key_font_dict}
Expand All @@ -148,7 +155,13 @@ def __getitem__(self, index):
ref_imgs = torch.stack([self.transform(render(font, c))
for c in self.ref_chars])

source_img = self.transform(render(self.source_font, char))
if self.source_font is not None:
source_font = self.source_font
else:
source_key = random.choice(self.char_key_dict[char])
source_font = self.key_font_dict[source_key]

source_img = self.transform(render(source_font, char))
trg_img = self.transform(render(font, char))

ret = {
Expand Down Expand Up @@ -202,9 +215,11 @@ def load_data_list(data_dir, char_filter=None):
font = read_font(font_path)
key_font_dict[font_path.stem] = font

chars = get_available_chars(font_path)
with open(str(font_path).replace(".ttf", ".txt")) as f:
chars = f.read()

if char_filter is not None:
chars = [x for x in chars if x in char_filter]
key_char_dict[font_path.stem] = chars
chars = set(chars).intersection(char_filter)
key_char_dict[font_path.stem] = list(chars)

return key_font_dict, key_char_dict
21 changes: 18 additions & 3 deletions datasets/ttf_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,31 @@
"""

from fontTools.ttLib import TTFont
from itertools import chain
from PIL import Image, ImageFont, ImageDraw
import numpy as np


def get_available_chars(fontfile):
def get_defined_chars(fontfile):
ttf = TTFont(fontfile)
chars = sorted(set(chain.from_iterable([chr(y) for y in x.cmap.keys()] for x in ttf["cmap"].tables)))
chars = [chr(y) for y in ttf["cmap"].tables[0].cmap.keys()]
return chars


def get_filtered_chars(fontpath):
ttf = read_font(fontpath)
defined_chars = get_defined_chars(fontpath)
avail_chars = []

for char in defined_chars:
img = np.array(render(ttf, char))
if img.mean() == 255.:
pass
else:
avail_chars.append(char.encode('utf-16', 'surrogatepass').decode('utf-16'))

return avail_chars


def read_font(fontfile, size=150):
font = ImageFont.truetype(str(fontfile), size=size)
return font
Expand Down
31 changes: 31 additions & 0 deletions get_chars_from_ttf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
"""
MX-Font
Copyright (c) 2021-present NAVER Corp.
MIT license
"""

import argparse
from tqdm import tqdm
from pathlib import Path

from datasets import get_filtered_chars


def main():
parser = argparse.ArgumentParser()
parser.add_argument("--root_dir")
args = parser.parse_args()

print(args.root_dir)
ttffiles = list(Path(args.root_dir).rglob("*.ttf"))

for ttffile in tqdm(ttffiles):
filename = ttffile.stem
dirname = ttffile.parent
avail_chars = get_filtered_chars(ttffile)
with open((dirname / (filename+".txt")), "w") as f:
f.write("".join(avail_chars))


if __name__ == "__main__":
main()
23 changes: 20 additions & 3 deletions readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,14 +34,29 @@ conda install numpy scipy scikit-image tqdm jsonlib-python3 fonttools


# Usage
The examples of datasets are in *(./data)*

Note that, we only provide the example font files; not the font files used for the training the provided weight *(generator.pth)*.
The example font files are downloaded from [here](https://www.freechinesefont.com/tag/commercial-use-ok/).

## Training
### Preparing font files (.ttf)
## Preparing Data
* The examples of datasets are in *(./data)*

### Font files (.ttf)
* Prepare the TrueType font files(.ttf) to use for the training and the validation.
* Put the training font files and validation font files into separate directories.

### The text files containing the available characters of .ttf files (.txt)
* If you have the available character list of a .ttf file, save its available characters list to a text file (.txt) with the same name in the same directory with the ttf file.
* (example) **TTF file**: data/ttfs/train/MaShanZheng-Regular.ttf, **its available characters**: data/ttfs/train/MaShanZheng-Regular.txt
* You can also generate the available characters files automatically using the `get_chars_from_ttf.py`
```
# Generating the available characters file
python get_chars_from_ttf.py --root_dir path/to/ttf/dir
```
* --root_dir: The root directory to find the .ttf files. All the .ttf files under this directory and its subdirectories will be processed.

### The json files with decomposition information (.json)
* The files for the decomposition information are needed.
* The files for the Chinese characters are provided. (data/chn_decomposition.json, data/primals.json)
* If you want to train the model with a language other than Chinese, the files for the decomposition rule (see below) are also needed.
Expand All @@ -55,6 +70,8 @@ The example font files are downloaded from [here](https://www.freechinesefont.co
* example: ['亠', '厶', '川', '囗', '口']


## Training

### Modify the configuration file (cfgs/train.yaml)

```
Expand Down

0 comments on commit 096c262

Please sign in to comment.