[bunkr] fix extraction (#2732)

move bunkr.is code to its own module
mikf · Jul 15, 2022 · 46f11a3 · 46f11a3
1 parent baf3815
commit 46f11a3
Show file tree

Hide file tree

Showing 4 changed files with 97 additions and 38 deletions.
diff --git a/docs/supportedsites.md b/docs/supportedsites.md
@@ -103,6 +103,12 @@ Consider all sites to be NSFW unless otherwise known.
     <td>Blogs, Posts, Search Results</td>
     <td></td>
 </tr>
+<tr>
+    <td>Bunkr</td>
+    <td>https://bunkr.is/</td>
+    <td>Albums</td>
+    <td></td>
+</tr>
 <tr>
     <td>Comic Vine</td>
     <td>https://comicvine.gamespot.com/</td>
@@ -1261,12 +1267,6 @@ Consider all sites to be NSFW unless otherwise known.
 <tr>
     <td colspan="4"><strong>lolisafe and chibisafe</strong></td>
 </tr>
-<tr>
-    <td>Bunkr</td>
-    <td>https://app.bunkr.is/</td>
-    <td>Albums</td>
-    <td></td>
-</tr>
 <tr>
     <td>ZzZz</td>
     <td>https://zz.ht/</td>

diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py
@@ -25,6 +25,7 @@
     "bcy",
     "behance",
     "blogger",
+    "bunkr",
     "comicvine",
     "cyberdrop",
     "danbooru",

diff --git a/gallery_dl/extractor/bunkr.py b/gallery_dl/extractor/bunkr.py
@@ -0,0 +1,89 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2022 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://bunkr.is/"""
+
+from .lolisafe import LolisafeAlbumExtractor
+from .. import text
+import json
+
+
+class BunkrAlbumExtractor(LolisafeAlbumExtractor):
+    """Extractor for bunkr.is albums"""
+    category = "bunkr"
+    root = "https://app.bunkr.is"
+    pattern = r"(?:https?://)?(?:app\.)?bunkr\.(?:is|to)/a/([^/?#]+)"
+    test = (
+        ("https://app.bunkr.is/a/Lktg9Keq", {
+            "pattern": r"https://cdn\.bunkr\.is/test-テスト-\"&>-QjgneIQv\.png",
+            "content": "0c8768055e4e20e7c7259608b67799171b691140",
+            "keyword": {
+                "album_id": "Lktg9Keq",
+                "album_name": 'test テスト "&>',
+                "count": 1,
+                "filename": 'test-テスト-"&>-QjgneIQv',
+                "id": "QjgneIQv",
+                "name": 'test-テスト-"&>',
+                "num": int,
+            },
+        }),
+        # mp4 (#2239)
+        ("https://bunkr.is/a/ptRHaCn2", {
+            "pattern": r"https://media-files\.bunkr\.is/_-RnHoW69L\.mp4",
+            "content": "80e61d1dbc5896ae7ef9a28734c747b28b320471",
+        }),
+        ("https://bunkr.to/a/Lktg9Keq"),
+    )
+
+    def fetch_album(self, album_id):
+        if "//app." in self.root:
+            return self._fetch_album_api(album_id)
+        else:
+            return self._fetch_album_site(album_id)
+
+    def _fetch_album_api(self, album_id):
+        files, data = LolisafeAlbumExtractor.fetch_album(self, album_id)
+
+        for file in files:
+            url = file["file"]
+            if url.endswith(".mp4"):
+                file["file"] = url.replace(
+                    "//cdn.bunkr.is/", "//media-files.bunkr.is/", 1)
+            else:
+                file["_fallback"] = (url.replace("//cdn.", "//cdn3.", 1),)
+
+        return files, data
+
+    def _fetch_album_site(self, album_id):
+        url = self.root + "/a/" + self.album_id
+
+        try:
+            data = json.loads(text.extract(
+                self.request(url).text,
+                'id="__NEXT_DATA__" type="application/json">', '<')[0])
+            props = data["props"]["pageProps"]
+            album = props["album"]
+            files = props["files"]
+        except Exception as exc:
+            self.log.debug(exc)
+            self.root = self.root.replace("bunkr", "app.bunkr", 1)
+            return self._fetch_album_api(album_id)
+
+        for file in files:
+            name = file["name"]
+            if name.endswith(".mp4"):
+                file["file"] = "https://media-files.bunkr.is/" + name
+            else:
+                file["file"] = file["cdn"] + "/" + name
+
+        return files, {
+            "album_id"   : self.album_id,
+            "album_name" : text.unescape(album["name"]),
+            "description": text.unescape(album["description"]),
+            "count"      : len(files),
+        }
diff --git a/gallery_dl/extractor/lolisafe.py b/gallery_dl/extractor/lolisafe.py
@@ -20,10 +20,6 @@ class LolisafeExtractor(BaseExtractor):
 
 
 BASE_PATTERN = LolisafeExtractor.update({
-    "bunkr": {
-        "root": "https://app.bunkr.is",
-        "pattern": r"(?:app\.)?bunkr\.(?:is|to)",
-    },
     "zzzz" : {
         "root": "https://zz.ht",
         "pattern": r"zz\.(?:ht|fo)",
@@ -35,25 +31,6 @@ class LolisafeAlbumExtractor(LolisafeExtractor):
     subcategory = "album"
     pattern = BASE_PATTERN + "/a/([^/?#]+)"
     test = (
-        ("https://app.bunkr.is/a/Lktg9Keq", {
-            "pattern": r"https://cdn\.bunkr\.is/test-テスト-\"&>-QjgneIQv\.png",
-            "content": "0c8768055e4e20e7c7259608b67799171b691140",
-            "keyword": {
-                "album_id": "Lktg9Keq",
-                "album_name": 'test テスト "&>',
-                "count": 1,
-                "filename": 'test-テスト-"&>-QjgneIQv',
-                "id": "QjgneIQv",
-                "name": 'test-テスト-"&>',
-                "num": int,
-            },
-        }),
-        # mp4 (#2239)
-        ("https://bunkr.is/a/ptRHaCn2", {
-            "pattern": r"https://media-files\.bunkr\.is/_-RnHoW69L\.mp4",
-            "content": "80e61d1dbc5896ae7ef9a28734c747b28b320471",
-        }),
-        ("https://bunkr.to/a/Lktg9Keq"),
         ("https://zz.ht/a/lop7W6EZ", {
             "pattern": r"https://z\.zz\.fo/(4anuY|ih560)\.png",
             "count": 2,
@@ -71,11 +48,7 @@ def __init__(self, match):
 
         domain = self.config("domain")
         if domain is None or domain == "auto":
-            if self.category == "bunkr":
-                self.root = "https://app.bunkr.is"
-            else:
-                self.root = text.root_from_url(match.group(0))
-
+            self.root = text.root_from_url(match.group(0))
         else:
             self.root = text.ensure_http_scheme(domain)
 
@@ -89,10 +62,6 @@ def items(self):
                 data["_fallback"] = file["_fallback"]
             text.nameext_from_url(url, data)
             data["name"], sep, data["id"] = data["filename"].rpartition("-")
-
-            if data["extension"] == "mp4":
-                url = url.replace(
-                    "//cdn.bunkr.is/", "//media-files.bunkr.is/", 1)
             yield Message.Url, url, data
 
     def fetch_album(self, album_id):