diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 3a704cf454..53c8833509 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -13,6 +13,12 @@ Consider all listed sites to potentially be NSFW. + + 2ch + https://2ch.hk/ + Boards, Threads + + 2chen https://sturdychan.help/ diff --git a/gallery_dl/extractor/2ch.py b/gallery_dl/extractor/2ch.py new file mode 100644 index 0000000000..dbbf21b635 --- /dev/null +++ b/gallery_dl/extractor/2ch.py @@ -0,0 +1,91 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://2ch.hk/""" + +from .common import Extractor, Message +from .. import text, util + + +class _2chThreadExtractor(Extractor): + """Extractor for 2ch threads""" + category = "2ch" + subcategory = "thread" + root = "https://2ch.hk" + directory_fmt = ("{category}", "{board}", "{thread} {title}") + filename_fmt = "{tim}{filename:? //}.{extension}" + archive_fmt = "{board}_{thread}_{tim}" + pattern = r"(?:https?://)?2ch\.hk/([^/?#]+)/res/(\d+)" + example = "https://2ch.hk/a/res/12345.html" + + def __init__(self, match): + Extractor.__init__(self, match) + self.board, self.thread = match.groups() + + def items(self): + url = "{}/{}/res/{}.json".format(self.root, self.board, self.thread) + posts = self.request(url).json()["threads"][0]["posts"] + + op = posts[0] + title = op.get("subject") or text.remove_html(op["comment"]) + + thread = { + "board" : self.board, + "thread": self.thread, + "title" : text.unescape(title)[:50], + } + + yield Message.Directory, thread + for post in posts: + files = post.get("files") + if files: + post["post_name"] = post["name"] + post["date"] = text.parse_timestamp(post["timestamp"]) + del post["files"] + del post["name"] + + for file in files: + file.update(thread) + file.update(post) + + file["filename"] = file["fullname"].rpartition(".")[0] + file["tim"], _, file["extension"] = \ + file["name"].rpartition(".") + + yield Message.Url, self.root + file["path"], file + + +class _2chBoardExtractor(Extractor): + """Extractor for 2ch boards""" + category = "2ch" + subcategory = "board" + root = "https://2ch.hk" + pattern = r"(?:https?://)?2ch\.hk/([^/?#]+)/?$" + example = "https://2ch.hk/a/" + + def __init__(self, match): + Extractor.__init__(self, match) + self.board = match.group(1) + + def items(self): + # index page + url = "{}/{}/index.json".format(self.root, self.board) + index = self.request(url).json() + index["_extractor"] = _2chThreadExtractor + for thread in index["threads"]: + url = "{}/{}/res/{}.html".format( + self.root, self.board, thread["thread_num"]) + yield Message.Queue, url, index + + # pages 1..n + for n in util.advance(index["pages"], 1): + url = "{}/{}/{}.json".format(self.root, self.board, n) + page = self.request(url).json() + page["_extractor"] = _2chThreadExtractor + for thread in page["threads"]: + url = "{}/{}/res/{}.html".format( + self.root, self.board, thread["thread_num"]) + yield Message.Queue, url, page diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 13d7b38b65..8e7129618a 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -10,6 +10,7 @@ import re modules = [ + "2ch", "2chan", "2chen", "35photo", diff --git a/test/results/2ch.py b/test/results/2ch.py new file mode 100644 index 0000000000..5400292cf4 --- /dev/null +++ b/test/results/2ch.py @@ -0,0 +1,64 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +gallery_dl = __import__("gallery_dl.extractor.2ch") +_2ch = getattr(gallery_dl.extractor, "2ch") + + +__tests__ = ( +{ + "#url" : "https://2ch.hk/a/res/6202876.html", + "#category": ("", "2ch", "thread"), + "#class" : _2ch._2chThreadExtractor, + "#pattern" : r"https://2ch\.hk/a/src/6202876/\d+\.\w+", + "#count" : range(450, 1000), + + "banned" : 0, + "board" : "a", + "closed" : 0, + "comment" : str, + "date" : "type:datetime", + "displayname": str, + "email" : "", + "endless" : 1, + "extension": str, + "filename" : str, + "fullname" : str, + "height" : int, + "lasthit" : 1705273977, + "md5" : r"re:[0-9a-f]{32}", + "name" : r"re:\d+\.\w+", + "num" : int, + "number" : range(1, 1000), + "op" : 0, + "parent" : int, + "path" : r"re:/a/src/6202876/\d+\.\w+", + "post_name": "Аноним", + "size" : int, + "sticky" : 0, + "subject" : str, + "thread" : "6202876", + "thumbnail": str, + "tim" : r"re:\d+", + "timestamp": int, + "title" : "MP4/WEBM", + "tn_height": int, + "tn_width" : int, + "trip" : "", + "type" : int, + "views" : int, + "width" : int, +}, + +{ + "#url" : "https://2ch.hk/a/", + "#category": ("", "2ch", "board"), + "#class" : _2ch._2chBoardExtractor, + "#pattern" : _2ch._2chThreadExtractor.pattern, + "#count" : range(200, 300), +}, + +) diff --git a/test/results/coomerparty.py b/test/results/coomerparty.py index dfc4a188bc..87c932e83d 100644 --- a/test/results/coomerparty.py +++ b/test/results/coomerparty.py @@ -8,12 +8,19 @@ __tests__ = ( +{ + "#url" : "https://coomer.su/onlyfans/user/alinity/post/125962203", + "#comment" : "coomer (#2100)", + "#category": ("", "coomerparty", "onlyfans"), + "#class" : kemonoparty.KemonopartyPostExtractor, + "#urls" : "https://coomer.su/data/7d/3f/7d3fd9804583dc224968c0591163ec91794552b04f00a6c2f42a15b68231d5a8.jpg", +}, + { "#url" : "https://coomer.party/onlyfans/user/alinity/post/125962203", - "#comment" : "coomer.party (#2100)", "#category": ("", "coomerparty", "onlyfans"), "#class" : kemonoparty.KemonopartyPostExtractor, - "#pattern" : r"https://coomer\.party/data/7d/3f/7d3fd9804583dc224968c0591163ec91794552b04f00a6c2f42a15b68231d5a8\.jpg", + "#urls" : "https://coomer.party/data/7d/3f/7d3fd9804583dc224968c0591163ec91794552b04f00a6c2f42a15b68231d5a8.jpg", }, ) diff --git a/test/results/kemonoparty.py b/test/results/kemonoparty.py index ad94a4960a..5bd541a3ae 100644 --- a/test/results/kemonoparty.py +++ b/test/results/kemonoparty.py @@ -297,6 +297,7 @@ "#category": ("", "kemonoparty", "favorite"), "#class" : kemonoparty.KemonopartyFavoriteExtractor, "#pattern" : kemonoparty.KemonopartyUserExtractor.pattern, + "#auth" : True, "#count" : 3, "#sha1_url": "f4b5b796979bcba824af84206578c79101c7f0e1", }, @@ -306,6 +307,7 @@ "#category": ("", "kemonoparty", "favorite"), "#class" : kemonoparty.KemonopartyFavoriteExtractor, "#pattern" : kemonoparty.KemonopartyPostExtractor.pattern, + "#auth" : True, "#count" : 3, "#sha1_url": "ecfccf5f0d50b8d14caa7bbdcf071de5c1e5b90f", }, @@ -315,6 +317,7 @@ "#category": ("", "kemonoparty", "favorite"), "#class" : kemonoparty.KemonopartyFavoriteExtractor, "#pattern" : kemonoparty.KemonopartyPostExtractor.pattern, + "#auth" : True, "#count" : 3, "#sha1_url": "4be8e84cb384a907a8e7997baaf6287b451783b5", }, diff --git a/test/test_results.py b/test/test_results.py index 575fc0f3eb..12fe59d5dc 100644 --- a/test/test_results.py +++ b/test/test_results.py @@ -38,6 +38,15 @@ }, } +AUTH = { + "pixiv", + "nijie", + "horne", + "seiga", + "instagram", + "twitter", +} + class TestExtractorResults(unittest.TestCase): @@ -76,6 +85,18 @@ def _run_test(self, result): for key, value in result["#options"].items(): key = key.split(".") config.set(key[:-1], key[-1], value) + + requires_auth = result.get("#auth") + if requires_auth is None: + requires_auth = (result["#category"][1] in AUTH) + if requires_auth: + extr = result["#class"].from_url(result["#url"]) + if not any(extr.config(key) for key in ( + "username", "cookies", "api-key", "client-id")): + msg = "no auth" + self._skipped.append((result["#url"], msg)) + self.skipTest(msg) + if "#range" in result: config.set((), "image-range" , result["#range"]) config.set((), "chapter-range", result["#range"]) @@ -371,7 +392,7 @@ def load_test_config(): except FileNotFoundError: pass except Exception as exc: - print("Error when loading {}: {}: {}".format( + sys.exit("Error when loading {}: {}: {}".format( path, exc.__class__.__name__, exc)) @@ -422,7 +443,7 @@ def test(self): setattr(TestExtractorResults, method.__name__, method) -load_test_config() generate_tests() if __name__ == "__main__": + load_test_config() unittest.main(warnings="ignore")