diff --git a/docs/configuration.rst b/docs/configuration.rst
index edae17111b..3a049acbde 100644
--- a/docs/configuration.rst
+++ b/docs/configuration.rst
@@ -385,6 +385,7 @@ Type
Default
* ``"0.5-1.5"``
``[Danbooru]``, ``[E621]``, ``[foolfuuka]:search``, ``itaku``,
+ ``koharu``,
``newgrounds``, ``[philomena]``, ``pixiv:novel``, ``plurk``,
``poipiku`` , ``pornpics``, ``soundgasm``, ``urlgalleries``,
``vk``, ``zerochan``
@@ -438,6 +439,7 @@ Description
* ``imgbb``
* ``inkbunny``
* ``kemonoparty``
+ * ``koharu``
* ``mangadex``
* ``mangoxo``
* ``pillowfort``
diff --git a/docs/supportedsites.md b/docs/supportedsites.md
index 1058011192..d78dccd6a2 100644
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -478,7 +478,7 @@ Consider all listed sites to potentially be NSFW.
Koharu |
https://koharu.to/ |
- Galleries, Search Results |
+ Favorites, Galleries, Search Results |
|
diff --git a/gallery_dl/extractor/koharu.py b/gallery_dl/extractor/koharu.py
index 675f8e70ef..ac238a2e68 100644
--- a/gallery_dl/extractor/koharu.py
+++ b/gallery_dl/extractor/koharu.py
@@ -10,15 +10,53 @@
from .common import GalleryExtractor, Extractor, Message
from .. import text, exception
+from ..cache import cache
BASE_PATTERN = r"(?i)(?:https?://)?(?:koharu|anchira)\.to"
-class KoharuGalleryExtractor(GalleryExtractor):
- """Extractor for koharu galleries"""
+class KoharuExtractor(Extractor):
+ """Base class for koharu extractors"""
category = "koharu"
root = "https://koharu.to"
root_api = "https://api.koharu.to"
+ request_interval = (0.5, 1.5)
+
+ def _init(self):
+ self.headers = {
+ "Accept" : "*/*",
+ "Referer": self.root + "/",
+ "Origin" : self.root,
+ }
+
+ def _pagination(self, endpoint, params):
+ url_api = self.root_api + endpoint
+
+ while True:
+ data = self.request(
+ url_api, params=params, headers=self.headers).json()
+
+ try:
+ entries = data["entries"]
+ except KeyError:
+ return
+
+ for entry in entries:
+ url = "{}/g/{}/{}".format(
+ self.root, entry["id"], entry["public_key"])
+ entry["_extractor"] = KoharuGalleryExtractor
+ yield Message.Queue, url, entry
+
+ try:
+ if data["limit"] * data["page"] >= data["total"]:
+ return
+ except Exception:
+ pass
+ params["page"] += 1
+
+
+class KoharuGalleryExtractor(KoharuExtractor, GalleryExtractor):
+ """Extractor for koharu galleries"""
filename_fmt = "{num:>03}.{extension}"
directory_fmt = ("{category}", "{id} {title}")
archive_fmt = "{id}_{num}"
@@ -130,46 +168,47 @@ def _select_format(self, formats):
return fmt
-class KoharuSearchExtractor(Extractor):
+class KoharuSearchExtractor(KoharuExtractor):
"""Extractor for koharu search results"""
- category = "koharu"
subcategory = "search"
- root = "https://koharu.to"
- root_api = "https://api.koharu.to"
- request_interval = (1.0, 2.0)
pattern = BASE_PATTERN + r"/\?([^#]*)"
example = "https://koharu.to/?s=QUERY"
- def _init(self):
- self.headers = {
- "Accept" : "*/*",
- "Referer": self.root + "/",
- "Origin" : self.root,
- }
+ def items(self):
+ params = text.parse_query(self.groups[0])
+ params["page"] = text.parse_int(params.get("page"), 1)
+ return self._pagination("/books", params)
+
+
+class KoharuFavoriteExtractor(KoharuExtractor):
+ """Extractor for koharu favorites"""
+ subcategory = "favorite"
+ pattern = BASE_PATTERN + r"/favorites(?:\?([^#]*))?"
+ example = "https://koharu.to/favorites"
def items(self):
- url_api = self.root_api + "/books"
+ self.login()
+
params = text.parse_query(self.groups[0])
params["page"] = text.parse_int(params.get("page"), 1)
+ return self._pagination("/favorites", params)
- while True:
- data = self.request(
- url_api, params=params, headers=self.headers).json()
+ def login(self):
+ username, password = self._get_auth_info()
+ if username:
+ self.headers["Authorization"] = \
+ "Bearer " + self._login_impl(username, password)
+ return
- try:
- entries = data["entries"]
- except KeyError:
- return
+ raise exception.AuthenticationError("Username and password required")
- for entry in entries:
- url = "{}/g/{}/{}/".format(
- self.root, entry["id"], entry["public_key"])
- entry["_extractor"] = KoharuGalleryExtractor
- yield Message.Queue, url, entry
+ @cache(maxage=28*86400, keyarg=1)
+ def _login_impl(self, username, password):
+ self.log.info("Logging in as %s", username)
- try:
- if data["limit"] * data["page"] >= data["total"]:
- return
- except Exception:
- pass
- params["page"] += 1
+ url = "https://auth.koharu.to/login"
+ data = {"uname": username, "passwd": password}
+ response = self.request(
+ url, method="POST", headers=self.headers, data=data)
+
+ return response.json()["session"]
diff --git a/test/results/koharu.py b/test/results/koharu.py
index e972b2a8b9..91688ebfbe 100644
--- a/test/results/koharu.py
+++ b/test/results/koharu.py
@@ -85,4 +85,26 @@
"#count" : ">= 50",
},
+{
+ "#url" : "https://koharu.to/favorites",
+ "#category": ("", "koharu", "favorite"),
+ "#class" : koharu.KoharuFavoriteExtractor,
+ "#pattern" : koharu.KoharuGalleryExtractor.pattern,
+ "#auth" : True,
+ "#urls" : [
+ "https://koharu.to/g/14216/6c67076fdd45",
+ ],
+},
+
+{
+ "#url" : "https://koharu.to/favorites?cat=6&sort=4",
+ "#category": ("", "koharu", "favorite"),
+ "#class" : koharu.KoharuFavoriteExtractor,
+ "#pattern" : koharu.KoharuGalleryExtractor.pattern,
+ "#auth" : True,
+ "#urls" : [
+ "https://koharu.to/g/14216/6c67076fdd45",
+ ],
+},
+
)