diff --git a/docs/configuration.rst b/docs/configuration.rst index edae17111b..3a049acbde 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -385,6 +385,7 @@ Type Default * ``"0.5-1.5"`` ``[Danbooru]``, ``[E621]``, ``[foolfuuka]:search``, ``itaku``, + ``koharu``, ``newgrounds``, ``[philomena]``, ``pixiv:novel``, ``plurk``, ``poipiku`` , ``pornpics``, ``soundgasm``, ``urlgalleries``, ``vk``, ``zerochan`` @@ -438,6 +439,7 @@ Description * ``imgbb`` * ``inkbunny`` * ``kemonoparty`` + * ``koharu`` * ``mangadex`` * ``mangoxo`` * ``pillowfort`` diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 1058011192..d78dccd6a2 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -478,7 +478,7 @@ Consider all listed sites to potentially be NSFW. Koharu https://koharu.to/ - Galleries, Search Results + Favorites, Galleries, Search Results diff --git a/gallery_dl/extractor/koharu.py b/gallery_dl/extractor/koharu.py index 675f8e70ef..ac238a2e68 100644 --- a/gallery_dl/extractor/koharu.py +++ b/gallery_dl/extractor/koharu.py @@ -10,15 +10,53 @@ from .common import GalleryExtractor, Extractor, Message from .. import text, exception +from ..cache import cache BASE_PATTERN = r"(?i)(?:https?://)?(?:koharu|anchira)\.to" -class KoharuGalleryExtractor(GalleryExtractor): - """Extractor for koharu galleries""" +class KoharuExtractor(Extractor): + """Base class for koharu extractors""" category = "koharu" root = "https://koharu.to" root_api = "https://api.koharu.to" + request_interval = (0.5, 1.5) + + def _init(self): + self.headers = { + "Accept" : "*/*", + "Referer": self.root + "/", + "Origin" : self.root, + } + + def _pagination(self, endpoint, params): + url_api = self.root_api + endpoint + + while True: + data = self.request( + url_api, params=params, headers=self.headers).json() + + try: + entries = data["entries"] + except KeyError: + return + + for entry in entries: + url = "{}/g/{}/{}".format( + self.root, entry["id"], entry["public_key"]) + entry["_extractor"] = KoharuGalleryExtractor + yield Message.Queue, url, entry + + try: + if data["limit"] * data["page"] >= data["total"]: + return + except Exception: + pass + params["page"] += 1 + + +class KoharuGalleryExtractor(KoharuExtractor, GalleryExtractor): + """Extractor for koharu galleries""" filename_fmt = "{num:>03}.{extension}" directory_fmt = ("{category}", "{id} {title}") archive_fmt = "{id}_{num}" @@ -130,46 +168,47 @@ def _select_format(self, formats): return fmt -class KoharuSearchExtractor(Extractor): +class KoharuSearchExtractor(KoharuExtractor): """Extractor for koharu search results""" - category = "koharu" subcategory = "search" - root = "https://koharu.to" - root_api = "https://api.koharu.to" - request_interval = (1.0, 2.0) pattern = BASE_PATTERN + r"/\?([^#]*)" example = "https://koharu.to/?s=QUERY" - def _init(self): - self.headers = { - "Accept" : "*/*", - "Referer": self.root + "/", - "Origin" : self.root, - } + def items(self): + params = text.parse_query(self.groups[0]) + params["page"] = text.parse_int(params.get("page"), 1) + return self._pagination("/books", params) + + +class KoharuFavoriteExtractor(KoharuExtractor): + """Extractor for koharu favorites""" + subcategory = "favorite" + pattern = BASE_PATTERN + r"/favorites(?:\?([^#]*))?" + example = "https://koharu.to/favorites" def items(self): - url_api = self.root_api + "/books" + self.login() + params = text.parse_query(self.groups[0]) params["page"] = text.parse_int(params.get("page"), 1) + return self._pagination("/favorites", params) - while True: - data = self.request( - url_api, params=params, headers=self.headers).json() + def login(self): + username, password = self._get_auth_info() + if username: + self.headers["Authorization"] = \ + "Bearer " + self._login_impl(username, password) + return - try: - entries = data["entries"] - except KeyError: - return + raise exception.AuthenticationError("Username and password required") - for entry in entries: - url = "{}/g/{}/{}/".format( - self.root, entry["id"], entry["public_key"]) - entry["_extractor"] = KoharuGalleryExtractor - yield Message.Queue, url, entry + @cache(maxage=28*86400, keyarg=1) + def _login_impl(self, username, password): + self.log.info("Logging in as %s", username) - try: - if data["limit"] * data["page"] >= data["total"]: - return - except Exception: - pass - params["page"] += 1 + url = "https://auth.koharu.to/login" + data = {"uname": username, "passwd": password} + response = self.request( + url, method="POST", headers=self.headers, data=data) + + return response.json()["session"] diff --git a/test/results/koharu.py b/test/results/koharu.py index e972b2a8b9..91688ebfbe 100644 --- a/test/results/koharu.py +++ b/test/results/koharu.py @@ -85,4 +85,26 @@ "#count" : ">= 50", }, +{ + "#url" : "https://koharu.to/favorites", + "#category": ("", "koharu", "favorite"), + "#class" : koharu.KoharuFavoriteExtractor, + "#pattern" : koharu.KoharuGalleryExtractor.pattern, + "#auth" : True, + "#urls" : [ + "https://koharu.to/g/14216/6c67076fdd45", + ], +}, + +{ + "#url" : "https://koharu.to/favorites?cat=6&sort=4", + "#category": ("", "koharu", "favorite"), + "#class" : koharu.KoharuFavoriteExtractor, + "#pattern" : koharu.KoharuGalleryExtractor.pattern, + "#auth" : True, + "#urls" : [ + "https://koharu.to/g/14216/6c67076fdd45", + ], +}, + )