Skip to content

Commit

Permalink
[postprocessor:metadata] speed up JSON encoding
Browse files Browse the repository at this point in the history
  • Loading branch information
mikf committed Feb 6, 2023
1 parent 762a689 commit 3436c6b
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 14 deletions.
17 changes: 12 additions & 5 deletions gallery_dl/postprocessor/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

from .common import PostProcessor
from .. import util, formatter
import json
import sys
import os

Expand Down Expand Up @@ -46,14 +47,20 @@ def __init__(self, job, options):
ext = "txt"
elif mode == "jsonl":
self.write = self._write_json
self.indent = None
self.ascii = options.get("ascii", False)
self._json_encode = json.JSONEncoder(
ensure_ascii=options.get("ascii", False),
sort_keys=True, indent=None, default=str,
).encode
omode = "a"
filename = "data.jsonl"
else:
self.write = self._write_json
self.indent = options.get("indent", 4)
self.ascii = options.get("ascii", False)
self._json_encode = json.JSONEncoder(
ensure_ascii=options.get("ascii", False),
indent=options.get("indent", 4),
sort_keys=True,
default=str,
).encode
ext = "json"

directory = options.get("directory")
Expand Down Expand Up @@ -191,7 +198,7 @@ def _write_tags(self, fp, kwdict):
def _write_json(self, fp, kwdict):
if not self.private:
kwdict = util.filter_dict(kwdict)
util.dump_json(kwdict, fp, self.ascii, self.indent)
fp.write(self._json_encode(kwdict) + "\n")


__postprocessor__ = MetadataPP
47 changes: 38 additions & 9 deletions test/test_postprocessor.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

# Copyright 2019-2022 Mike Fährmann
# Copyright 2019-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
Expand Down Expand Up @@ -171,9 +171,8 @@ def test_metadata_default(self):

# default arguments
self.assertEqual(pp.write , pp._write_json)
self.assertEqual(pp.ascii , False)
self.assertEqual(pp.indent , 4)
self.assertEqual(pp.extension, "json")
self.assertTrue(callable(pp._json_encode))

def test_metadata_json(self):
pp = self._create({
Expand All @@ -182,26 +181,56 @@ def test_metadata_json(self):
"indent" : 2,
"extension": "JSON",
}, {
"public" : "hello",
"_private" : "world",
"public" : "hello ワールド",
"_private" : "foo バール",
})

self.assertEqual(pp.write , pp._write_json)
self.assertEqual(pp.ascii , True)
self.assertEqual(pp.indent , 2)
self.assertEqual(pp.extension, "JSON")
self.assertTrue(callable(pp._json_encode))

with patch("builtins.open", mock_open()) as m:
self._trigger()

path = self.pathfmt.realpath + ".JSON"
m.assert_called_once_with(path, "w", encoding="utf-8")
self.assertEqual(self._output(m), """{
self.assertEqual(self._output(m), r"""{
"category": "test",
"extension": "ext",
"filename": "file",
"public": "hello"
"public": "hello \u30ef\u30fc\u30eb\u30c9"
}
""")

def test_metadata_json_options(self):
pp = self._create({
"mode" : "json",
"ascii" : False,
"private" : True,
"indent" : None,
"open" : "a",
"encoding" : "UTF-8",
"extension": "JSON",
}, {
"public" : "hello ワールド",
"_private" : "foo バール",
})

self.assertEqual(pp.write , pp._write_json)
self.assertEqual(pp.extension, "JSON")
self.assertTrue(callable(pp._json_encode))

with patch("builtins.open", mock_open()) as m:
self._trigger()

path = self.pathfmt.realpath + ".JSON"
m.assert_called_once_with(path, "a", encoding="UTF-8")
self.assertEqual(self._output(m), """{\
"_private": "foo バール", \
"category": "test", \
"extension": "ext", \
"filename": "file", \
"public": "hello ワールド"}
""")

def test_metadata_tags(self):
Expand Down

0 comments on commit 3436c6b

Please sign in to comment.