Skip to content

Commit

Permalink
[pixiv] implement 'include' option
Browse files Browse the repository at this point in the history
- split 'user' extractor and its 'avatar' and 'background' options into
  separate extractors ('artworks', 'avatar', 'background')
- avatars can now be downloaded with
  https://www.pixiv.net/en/users/ID/avatar
  as URL and will use a proper archive key; similar for backgrounds
- options for the 'user' subcategory must be moved to 'artworks' to have
  the same effect as before
  • Loading branch information
mikf committed May 2, 2022
1 parent d11e219 commit 8475698
Show file tree
Hide file tree
Showing 4 changed files with 137 additions and 92 deletions.
30 changes: 15 additions & 15 deletions docs/configuration.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1776,28 +1776,28 @@ Description
Download from video pins.


extractor.pixiv.user.avatar
---------------------------
extractor.pixiv.include
-----------------------
Type
``bool``
* ``string``
* ``list`` of ``strings``
Default
``false``
``"artworks"``
Example
* ``"avatar,background,artworks"``
* ``["avatar", "background", "artworks"]``
Description
Download user avatars.
A (comma-separated) list of subcategories to include
when processing a user profile.

Possible values are
``"artworks"``, ``"avatar"``, ``"background"``, ``"favorite"``.

extractor.pixiv.user.background
-------------------------------
Type
``bool``
Default
``false``
Description
Download user background banners.
It is possible to use ``"all"`` instead of listing all values separately.


extractor.pixiv.user.metadata
-----------------------------
extractor.pixiv.artworks.metadata
---------------------------------
Type
``bool``
Default
Expand Down
2 changes: 1 addition & 1 deletion docs/gallery-dl.conf
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@
"pixiv":
{
"refresh-token": null,
"avatar": false,
"include": "artworks",
"tags": "japanese",
"ugoira": true
},
Expand Down
2 changes: 1 addition & 1 deletion docs/supportedsites.md
Original file line number Diff line number Diff line change
Expand Up @@ -604,7 +604,7 @@ Consider all sites to be NSFW unless otherwise known.
<tr>
<td>Pixiv</td>
<td>https://www.pixiv.net/</td>
<td>Favorites, Follows, pixiv.me Links, pixivision, Rankings, Search Results, Sketch, User Profiles, individual Images</td>
<td>Artworks, Avatars, Backgrounds, Favorites, Follows, pixiv.me Links, pixivision, Rankings, Search Results, Sketch, User Profiles, individual Images</td>
<td><a href="https://github.com/mikf/gallery-dl#oauth">OAuth</a></td>
</tr>
<tr>
Expand Down
195 changes: 120 additions & 75 deletions gallery_dl/extractor/pixiv.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
class PixivExtractor(Extractor):
"""Base class for pixiv extractors"""
category = "pixiv"
root = "https://www.pixiv.net"
directory_fmt = ("{category}", "{user[id]} {user[account]}")
filename_fmt = "{id}_p{num}.{extension}"
archive_fmt = "{id}{suffix}.{extension}"
Expand Down Expand Up @@ -90,22 +91,79 @@ def transform_tags(work):
work["suffix"] = "_p{:02}".format(work["num"])
yield Message.Url, url, text.nameext_from_url(url, work)

@staticmethod
def _make_work(kind, url, user):
return {
"create_date" : None,
"height" : 0,
"id" : kind,
"image_urls" : None,
"meta_pages" : (),
"meta_single_page": {"original_image_url": url},
"page_count" : 1,
"sanity_level" : 0,
"tags" : (),
"title" : kind,
"type" : kind,
"user" : user,
"width" : 0,
"x_restrict" : 0,
}

def works(self):
"""Return an iterable containing all relevant 'work'-objects"""
"""Return an iterable containing all relevant 'work' objects"""

def metadata(self):
"""Collect metadata for extractor-job"""
"""Collect metadata for extractor job"""
return {}


class PixivUserExtractor(PixivExtractor):
"""Extractor for works of a pixiv user"""
"""Extractor for a pixiv user profile"""
subcategory = "user"
pattern = (r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net/(?:"
r"(?:en/)?users/(\d+)(?:/(?:artworks|illustrations|manga)"
r"(?:/([^/?#]+))?)?/?(?:$|[?#])"
r"|member(?:_illust)?\.php\?id=(\d+)(?:&([^#]+))?"
r"|(?:u(?:ser)?/|(?:mypage\.php)?#id=)(\d+))")
r"(?:en/)?u(?:sers)?/|member\.php\?id=|(?:mypage\.php)?#id="
r")(\d+)(?:$|[?#])")
test = (
("https://www.pixiv.net/en/users/173530"),
("https://www.pixiv.net/u/173530"),
("https://www.pixiv.net/member.php?id=173530"),
("https://www.pixiv.net/mypage.php#id=173530"),
("https://www.pixiv.net/#id=173530"),
)

def __init__(self, match):
PixivExtractor.__init__(self, match)
self.user_id = match.group(1)

def items(self):
default = []
if self.config("avatar"):
self.log.warning("'avatar' is deprecated, "
"use \"include\": \"…,avatar\" instead")
default.append("avatar")
if self.config("background"):
self.log.warning("'background' is deprecated, "
"use \"include\": \"…,background\" instead")
default.append("background")
default.append("artworks")

base = "{}/users/{}/".format(self.root, self.user_id)
return self._dispatch_extractors((
(PixivAvatarExtractor , base + "avatar"),
(PixivBackgroundExtractor, base + "background"),
(PixivArtworksExtractor , base + "artworks"),
(PixivFavoriteExtractor , base + "bookmarks/artworks"),
), default)


class PixivArtworksExtractor(PixivExtractor):
"""Extractor for artworks of a pixiv user"""
subcategory = "artworks"
pattern = (r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net/(?:"
r"(?:en/)?users/(\d+)/(?:artworks|illustrations|manga)"
r"(?:/([^/?#]+))?/?(?:$|[?#])"
r"|member_illust\.php\?id=(\d+)(?:&([^#]+))?)")
test = (
("https://www.pixiv.net/en/users/173530/artworks", {
"url": "852c31ad83b6840bacbce824d85f2a997889efb7",
Expand All @@ -119,42 +177,25 @@ class PixivUserExtractor(PixivExtractor):
"&tag=%E6%89%8B%E3%81%B6%E3%82%8D"), {
"url": "25b1cd81153a8ff82eec440dd9f20a4a22079658",
}),
# avatar (#595, #623, #1124)
("https://www.pixiv.net/en/users/173530", {
"options": (("avatar", True),),
"content": "4e57544480cc2036ea9608103e8f024fa737fe66",
"range": "1",
}),
# background (#623, #1124, #2495)
("https://www.pixiv.net/en/users/194921", {
"options": (("background", True),),
"content": "aeda3536003ea3002f70657cb93c5053f26f5843",
"range": "1",
}),
# deleted account
("http://www.pixiv.net/member_illust.php?id=173531", {
"options": (("metadata", True),),
"exception": exception.NotFoundError,
}),
("https://www.pixiv.net/en/users/173530"),
("https://www.pixiv.net/en/users/173530/manga"),
("https://www.pixiv.net/en/users/173530/illustrations"),
("https://www.pixiv.net/member_illust.php?id=173530"),
("https://www.pixiv.net/u/173530"),
("https://www.pixiv.net/user/173530"),
("https://www.pixiv.net/mypage.php#id=173530"),
("https://www.pixiv.net/#id=173530"),
("https://touch.pixiv.net/member_illust.php?id=173530"),
)

def __init__(self, match):
PixivExtractor.__init__(self, match)
u1, t1, u2, t2, u3 = match.groups()
u1, t1, u2, t2 = match.groups()
if t1:
t1 = text.unquote(t1)
elif t2:
t2 = text.parse_query(t2).get("tag")
self.user_id = u1 or u2 or u3
self.user_id = u1 or u2
self.tag = t1 or t2

def metadata(self):
Expand All @@ -172,54 +213,58 @@ def works(self):
if tag in [t["name"].lower() for t in work["tags"]]
)

avatar = self.config("avatar")
background = self.config("background")
if avatar or background:
work_list = []
detail = self.api.user_detail(self.user_id)
user = detail["user"]

if avatar:
url = user["profile_image_urls"]["medium"]
work_list.append((self._make_work(
"avatar", url.replace("_170.", "."), user),))

if background:
url = detail["profile"]["background_image_url"]
if url:
if "/c/" in url:
parts = url.split("/")
del parts[3:5]
url = "/".join(parts)
url = url.replace("_master1200.", ".")
work = self._make_work("background", url, user)
if url.endswith(".jpg"):
work["_fallback"] = (url[:-4] + ".png",)
work_list.append((work,))

work_list.append(works)
works = itertools.chain.from_iterable(work_list)

return works

@staticmethod
def _make_work(kind, url, user):
return {
"create_date" : None,
"height" : 0,
"id" : kind,
"image_urls" : None,
"meta_pages" : (),
"meta_single_page": {"original_image_url": url},
"page_count" : 1,
"sanity_level" : 0,
"tags" : (),
"title" : kind,
"type" : kind,
"user" : user,
"width" : 0,
"x_restrict" : 0,
}

class PixivAvatarExtractor(PixivExtractor):
"""Extractor for pixiv avatars"""
subcategory = "avatar"
archive_fmt = "avatar_{user[id]}"
pattern = (r"(?:https?://)?(?:www\.)?pixiv\.net"
r"/(?:en/)?users/(\d+)/avatar")
test = ("https://www.pixiv.net/en/users/173530/avatar", {
"content": "4e57544480cc2036ea9608103e8f024fa737fe66",
})

def __init__(self, match):
PixivExtractor.__init__(self, match)
self.user_id = match.group(1)

def works(self):
user = self.api.user_detail(self.user_id)["user"]
url = user["profile_image_urls"]["medium"].replace("_170.", ".")
return (self._make_work("avatar", url, user),)


class PixivBackgroundExtractor(PixivExtractor):
"""Extractor for pixiv background banners"""
subcategory = "background"
archive_fmt = "background_{user[id]}"
pattern = (r"(?:https?://)?(?:www\.)?pixiv\.net"
r"/(?:en/)?users/(\d+)/background")
test = ("https://www.pixiv.net/en/users/194921/background", {
"pattern": r"https://i\.pximg\.net/background/img/2021/01/30/16/12/02"
r"/194921_af1f71e557a42f499213d4b9eaccc0f8\.jpg",
})

def __init__(self, match):
PixivExtractor.__init__(self, match)
self.user_id = match.group(1)

def works(self):
detail = self.api.user_detail(self.user_id)
url = detail["profile"]["background_image_url"]
if not url:
return ()
if "/c/" in url:
parts = url.split("/")
del parts[3:5]
url = "/".join(parts)
url = url.replace("_master1200.", ".")
work = self._make_work("background", url, detail["user"])
if url.endswith(".jpg"):
work["_fallback"] = (url[:-4] + ".png",)
return (work,)


class PixivMeExtractor(PixivExtractor):
Expand Down Expand Up @@ -311,10 +356,10 @@ class PixivFavoriteExtractor(PixivExtractor):
r"|bookmark\.php)(?:\?([^#]*))?")
test = (
("https://www.pixiv.net/en/users/173530/bookmarks/artworks", {
"url": "e717eb511500f2fa3497aaee796a468ecf685cc4",
"url": "85a3104eaaaf003c7b3947117ca2f1f0b1cfc949",
}),
("https://www.pixiv.net/bookmark.php?id=173530", {
"url": "e717eb511500f2fa3497aaee796a468ecf685cc4",
"url": "85a3104eaaaf003c7b3947117ca2f1f0b1cfc949",
}),
# bookmarks with specific tag
(("https://www.pixiv.net/en/users/3137110"
Expand Down Expand Up @@ -759,7 +804,7 @@ def user_bookmarks_illust(self, user_id, tag=None, restrict="public"):
params = {"user_id": user_id, "tag": tag, "restrict": restrict}
return self._pagination("/v1/user/bookmarks/illust", params)

@memcache()
@memcache(keyarg=1)
def user_detail(self, user_id):
params = {"user_id": user_id}
return self._call("/v1/user/detail", params)
Expand Down

0 comments on commit 8475698

Please sign in to comment.