From 3436c6b1176d520c66d949537ef44703d0c4c544 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Mon, 6 Feb 2023 12:35:28 +0100 Subject: [PATCH] [postprocessor:metadata] speed up JSON encoding --- gallery_dl/postprocessor/metadata.py | 17 +++++++--- test/test_postprocessor.py | 47 ++++++++++++++++++++++------ 2 files changed, 50 insertions(+), 14 deletions(-) diff --git a/gallery_dl/postprocessor/metadata.py b/gallery_dl/postprocessor/metadata.py index 09f54fbe07..cac78b7900 100644 --- a/gallery_dl/postprocessor/metadata.py +++ b/gallery_dl/postprocessor/metadata.py @@ -10,6 +10,7 @@ from .common import PostProcessor from .. import util, formatter +import json import sys import os @@ -46,14 +47,20 @@ def __init__(self, job, options): ext = "txt" elif mode == "jsonl": self.write = self._write_json - self.indent = None - self.ascii = options.get("ascii", False) + self._json_encode = json.JSONEncoder( + ensure_ascii=options.get("ascii", False), + sort_keys=True, indent=None, default=str, + ).encode omode = "a" filename = "data.jsonl" else: self.write = self._write_json - self.indent = options.get("indent", 4) - self.ascii = options.get("ascii", False) + self._json_encode = json.JSONEncoder( + ensure_ascii=options.get("ascii", False), + indent=options.get("indent", 4), + sort_keys=True, + default=str, + ).encode ext = "json" directory = options.get("directory") @@ -191,7 +198,7 @@ def _write_tags(self, fp, kwdict): def _write_json(self, fp, kwdict): if not self.private: kwdict = util.filter_dict(kwdict) - util.dump_json(kwdict, fp, self.ascii, self.indent) + fp.write(self._json_encode(kwdict) + "\n") __postprocessor__ = MetadataPP diff --git a/test/test_postprocessor.py b/test/test_postprocessor.py index 7da2089de7..63dcfc57c4 100644 --- a/test/test_postprocessor.py +++ b/test/test_postprocessor.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- -# Copyright 2019-2022 Mike Fährmann +# Copyright 2019-2023 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -171,9 +171,8 @@ def test_metadata_default(self): # default arguments self.assertEqual(pp.write , pp._write_json) - self.assertEqual(pp.ascii , False) - self.assertEqual(pp.indent , 4) self.assertEqual(pp.extension, "json") + self.assertTrue(callable(pp._json_encode)) def test_metadata_json(self): pp = self._create({ @@ -182,26 +181,56 @@ def test_metadata_json(self): "indent" : 2, "extension": "JSON", }, { - "public" : "hello", - "_private" : "world", + "public" : "hello ワールド", + "_private" : "foo バール", }) self.assertEqual(pp.write , pp._write_json) - self.assertEqual(pp.ascii , True) - self.assertEqual(pp.indent , 2) self.assertEqual(pp.extension, "JSON") + self.assertTrue(callable(pp._json_encode)) with patch("builtins.open", mock_open()) as m: self._trigger() path = self.pathfmt.realpath + ".JSON" m.assert_called_once_with(path, "w", encoding="utf-8") - self.assertEqual(self._output(m), """{ + self.assertEqual(self._output(m), r"""{ "category": "test", "extension": "ext", "filename": "file", - "public": "hello" + "public": "hello \u30ef\u30fc\u30eb\u30c9" } +""") + + def test_metadata_json_options(self): + pp = self._create({ + "mode" : "json", + "ascii" : False, + "private" : True, + "indent" : None, + "open" : "a", + "encoding" : "UTF-8", + "extension": "JSON", + }, { + "public" : "hello ワールド", + "_private" : "foo バール", + }) + + self.assertEqual(pp.write , pp._write_json) + self.assertEqual(pp.extension, "JSON") + self.assertTrue(callable(pp._json_encode)) + + with patch("builtins.open", mock_open()) as m: + self._trigger() + + path = self.pathfmt.realpath + ".JSON" + m.assert_called_once_with(path, "a", encoding="UTF-8") + self.assertEqual(self._output(m), """{\ +"_private": "foo バール", \ +"category": "test", \ +"extension": "ext", \ +"filename": "file", \ +"public": "hello ワールド"} """) def test_metadata_tags(self):