From 17c9c47ca051f0929f7405ee41c18a54445988c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Thu, 13 Jan 2022 16:45:46 +0100 Subject: [PATCH] [hitomi] fix 'tag' extraction (fixes #2189) --- gallery_dl/extractor/hitomi.py | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/gallery_dl/extractor/hitomi.py b/gallery_dl/extractor/hitomi.py index ce6c7ce8a3..e132bf96cb 100644 --- a/gallery_dl/extractor/hitomi.py +++ b/gallery_dl/extractor/hitomi.py @@ -159,6 +159,7 @@ class HitomiTagExtractor(Extractor): """Extractor for galleries from tag searches on hitomi.la""" category = "hitomi" subcategory = "tag" + root = "https://hitomi.la" pattern = (r"(?:https?://)?hitomi\.la/" r"(tag|artist|group|series|type|character)/" r"([^/?#]+)\.html") @@ -183,12 +184,29 @@ def __init__(self, match): self.tag = tag def items(self): - url = "https://ltn.hitomi.la/{}/{}.nozomi".format(self.type, self.tag) data = {"_extractor": HitomiGalleryExtractor} + nozomi_url = "https://ltn.hitomi.la/{}/{}.nozomi".format( + self.type, self.tag) + headers = { + "Origin": self.root, + "Cache-Control": "max-age=0", + } - for gallery_id in decode_nozomi(self.request(url).content): - url = "https://hitomi.la/galleries/{}.html".format(gallery_id) - yield Message.Queue, url, data + offset = 0 + while True: + headers["Referer"] = "{}/{}/{}.html?page={}".format( + self.root, self.type, self.tag, offset // 100 + 1) + headers["Range"] = "bytes={}-{}".format(offset, offset+99) + nozomi = self.request(nozomi_url, headers=headers).content + + for gallery_id in decode_nozomi(nozomi): + gallery_url = "{}/galleries/{}.html".format( + self.root, gallery_id) + yield Message.Queue, gallery_url, data + + if len(nozomi) < 100: + return + offset += 100 @memcache()