Skip to content

Commit

Permalink
[bunkr] fix extraction (#2732)
Browse files Browse the repository at this point in the history
move bunkr.is code to its own module
  • Loading branch information
mikf committed Jul 15, 2022
1 parent baf3815 commit 46f11a3
Show file tree
Hide file tree
Showing 4 changed files with 97 additions and 38 deletions.
12 changes: 6 additions & 6 deletions docs/supportedsites.md
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,12 @@ Consider all sites to be NSFW unless otherwise known.
<td>Blogs, Posts, Search Results</td>
<td></td>
</tr>
<tr>
<td>Bunkr</td>
<td>https://bunkr.is/</td>
<td>Albums</td>
<td></td>
</tr>
<tr>
<td>Comic Vine</td>
<td>https://comicvine.gamespot.com/</td>
Expand Down Expand Up @@ -1261,12 +1267,6 @@ Consider all sites to be NSFW unless otherwise known.
<tr>
<td colspan="4"><strong>lolisafe and chibisafe</strong></td>
</tr>
<tr>
<td>Bunkr</td>
<td>https://app.bunkr.is/</td>
<td>Albums</td>
<td></td>
</tr>
<tr>
<td>ZzZz</td>
<td>https://zz.ht/</td>
Expand Down
1 change: 1 addition & 0 deletions gallery_dl/extractor/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
"bcy",
"behance",
"blogger",
"bunkr",
"comicvine",
"cyberdrop",
"danbooru",
Expand Down
89 changes: 89 additions & 0 deletions gallery_dl/extractor/bunkr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
# -*- coding: utf-8 -*-

# Copyright 2022 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.

"""Extractors for https://bunkr.is/"""

from .lolisafe import LolisafeAlbumExtractor
from .. import text
import json


class BunkrAlbumExtractor(LolisafeAlbumExtractor):
"""Extractor for bunkr.is albums"""
category = "bunkr"
root = "https://app.bunkr.is"
pattern = r"(?:https?://)?(?:app\.)?bunkr\.(?:is|to)/a/([^/?#]+)"
test = (
("https://app.bunkr.is/a/Lktg9Keq", {
"pattern": r"https://cdn\.bunkr\.is/test-テスト-\"&>-QjgneIQv\.png",
"content": "0c8768055e4e20e7c7259608b67799171b691140",
"keyword": {
"album_id": "Lktg9Keq",
"album_name": 'test テスト "&>',
"count": 1,
"filename": 'test-テスト-"&>-QjgneIQv',
"id": "QjgneIQv",
"name": 'test-テスト-"&>',
"num": int,
},
}),
# mp4 (#2239)
("https://bunkr.is/a/ptRHaCn2", {
"pattern": r"https://media-files\.bunkr\.is/_-RnHoW69L\.mp4",
"content": "80e61d1dbc5896ae7ef9a28734c747b28b320471",
}),
("https://bunkr.to/a/Lktg9Keq"),
)

def fetch_album(self, album_id):
if "//app." in self.root:
return self._fetch_album_api(album_id)
else:
return self._fetch_album_site(album_id)

def _fetch_album_api(self, album_id):
files, data = LolisafeAlbumExtractor.fetch_album(self, album_id)

for file in files:
url = file["file"]
if url.endswith(".mp4"):
file["file"] = url.replace(
"//cdn.bunkr.is/", "//media-files.bunkr.is/", 1)
else:
file["_fallback"] = (url.replace("//cdn.", "//cdn3.", 1),)

return files, data

def _fetch_album_site(self, album_id):
url = self.root + "/a/" + self.album_id

try:
data = json.loads(text.extract(
self.request(url).text,
'id="__NEXT_DATA__" type="application/json">', '<')[0])
props = data["props"]["pageProps"]
album = props["album"]
files = props["files"]
except Exception as exc:
self.log.debug(exc)
self.root = self.root.replace("bunkr", "app.bunkr", 1)
return self._fetch_album_api(album_id)

for file in files:
name = file["name"]
if name.endswith(".mp4"):
file["file"] = "https://media-files.bunkr.is/" + name
else:
file["file"] = file["cdn"] + "/" + name

return files, {
"album_id" : self.album_id,
"album_name" : text.unescape(album["name"]),
"description": text.unescape(album["description"]),
"count" : len(files),
}
33 changes: 1 addition & 32 deletions gallery_dl/extractor/lolisafe.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,6 @@ class LolisafeExtractor(BaseExtractor):


BASE_PATTERN = LolisafeExtractor.update({
"bunkr": {
"root": "https://app.bunkr.is",
"pattern": r"(?:app\.)?bunkr\.(?:is|to)",
},
"zzzz" : {
"root": "https://zz.ht",
"pattern": r"zz\.(?:ht|fo)",
Expand All @@ -35,25 +31,6 @@ class LolisafeAlbumExtractor(LolisafeExtractor):
subcategory = "album"
pattern = BASE_PATTERN + "/a/([^/?#]+)"
test = (
("https://app.bunkr.is/a/Lktg9Keq", {
"pattern": r"https://cdn\.bunkr\.is/test-テスト-\"&>-QjgneIQv\.png",
"content": "0c8768055e4e20e7c7259608b67799171b691140",
"keyword": {
"album_id": "Lktg9Keq",
"album_name": 'test テスト "&>',
"count": 1,
"filename": 'test-テスト-"&>-QjgneIQv',
"id": "QjgneIQv",
"name": 'test-テスト-"&>',
"num": int,
},
}),
# mp4 (#2239)
("https://bunkr.is/a/ptRHaCn2", {
"pattern": r"https://media-files\.bunkr\.is/_-RnHoW69L\.mp4",
"content": "80e61d1dbc5896ae7ef9a28734c747b28b320471",
}),
("https://bunkr.to/a/Lktg9Keq"),
("https://zz.ht/a/lop7W6EZ", {
"pattern": r"https://z\.zz\.fo/(4anuY|ih560)\.png",
"count": 2,
Expand All @@ -71,11 +48,7 @@ def __init__(self, match):

domain = self.config("domain")
if domain is None or domain == "auto":
if self.category == "bunkr":
self.root = "https://app.bunkr.is"
else:
self.root = text.root_from_url(match.group(0))

self.root = text.root_from_url(match.group(0))
else:
self.root = text.ensure_http_scheme(domain)

Expand All @@ -89,10 +62,6 @@ def items(self):
data["_fallback"] = file["_fallback"]
text.nameext_from_url(url, data)
data["name"], sep, data["id"] = data["filename"].rpartition("-")

if data["extension"] == "mp4":
url = url.replace(
"//cdn.bunkr.is/", "//media-files.bunkr.is/", 1)
yield Message.Url, url, data

def fetch_album(self, album_id):
Expand Down

0 comments on commit 46f11a3

Please sign in to comment.