[pixiv] implement 'include' option

- split 'user' extractor and its 'avatar' and 'background' options into separate extractors ('artworks', 'avatar', 'background') - avatars can now be downloaded with https://www.pixiv.net/en/users/ID/avatar as URL and will use a proper archive key; similar for backgrounds - options for the 'user' subcategory must be moved to 'artworks' to have the same effect as before
mikf · May 2, 2022 · 8475698 · 8475698
1 parent d11e219
commit 8475698
Show file tree

Hide file tree

Showing 4 changed files with 137 additions and 92 deletions.
diff --git a/docs/configuration.rst b/docs/configuration.rst
@@ -1776,28 +1776,28 @@ Description
     Download from video pins.
 
 
-extractor.pixiv.user.avatar
----------------------------
+extractor.pixiv.include
+-----------------------
 Type
-    ``bool``
+    * ``string``
+    * ``list`` of ``strings``
 Default
-    ``false``
+    ``"artworks"``
+Example
+    * ``"avatar,background,artworks"``
+    * ``["avatar", "background", "artworks"]``
 Description
-    Download user avatars.
+    A (comma-separated) list of subcategories to include
+    when processing a user profile.
 
+    Possible values are
+    ``"artworks"``, ``"avatar"``, ``"background"``, ``"favorite"``.
 
-extractor.pixiv.user.background
--------------------------------
-Type
-    ``bool``
-Default
-    ``false``
-Description
-    Download user background banners.
+    It is possible to use ``"all"`` instead of listing all values separately.
 
 
-extractor.pixiv.user.metadata
------------------------------
+extractor.pixiv.artworks.metadata
+---------------------------------
 Type
     ``bool``
 Default

diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf
@@ -201,7 +201,7 @@
         "pixiv":
         {
             "refresh-token": null,
-            "avatar": false,
+            "include": "artworks",
             "tags": "japanese",
             "ugoira": true
         },

diff --git a/docs/supportedsites.md b/docs/supportedsites.md
@@ -604,7 +604,7 @@ Consider all sites to be NSFW unless otherwise known.
 <tr>
     <td>Pixiv</td>
     <td>https://www.pixiv.net/</td>
-    <td>Favorites, Follows, pixiv.me Links, pixivision, Rankings, Search Results, Sketch, User Profiles, individual Images</td>
+    <td>Artworks, Avatars, Backgrounds, Favorites, Follows, pixiv.me Links, pixivision, Rankings, Search Results, Sketch, User Profiles, individual Images</td>
     <td><a href="https://github.com/mikf/gallery-dl#oauth">OAuth</a></td>
 </tr>
 <tr>

diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py
@@ -19,6 +19,7 @@
 class PixivExtractor(Extractor):
     """Base class for pixiv extractors"""
     category = "pixiv"
+    root = "https://www.pixiv.net"
     directory_fmt = ("{category}", "{user[id]} {user[account]}")
     filename_fmt = "{id}_p{num}.{extension}"
     archive_fmt = "{id}{suffix}.{extension}"
@@ -90,22 +91,79 @@ def transform_tags(work):
                     work["suffix"] = "_p{:02}".format(work["num"])
                     yield Message.Url, url, text.nameext_from_url(url, work)
 
+    @staticmethod
+    def _make_work(kind, url, user):
+        return {
+            "create_date"     : None,
+            "height"          : 0,
+            "id"              : kind,
+            "image_urls"      : None,
+            "meta_pages"      : (),
+            "meta_single_page": {"original_image_url": url},
+            "page_count"      : 1,
+            "sanity_level"    : 0,
+            "tags"            : (),
+            "title"           : kind,
+            "type"            : kind,
+            "user"            : user,
+            "width"           : 0,
+            "x_restrict"      : 0,
+        }
+
     def works(self):
-        """Return an iterable containing all relevant 'work'-objects"""
+        """Return an iterable containing all relevant 'work' objects"""
 
     def metadata(self):
-        """Collect metadata for extractor-job"""
+        """Collect metadata for extractor job"""
         return {}
 
 
 class PixivUserExtractor(PixivExtractor):
-    """Extractor for works of a pixiv user"""
+    """Extractor for a pixiv user profile"""
     subcategory = "user"
     pattern = (r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net/(?:"
-               r"(?:en/)?users/(\d+)(?:/(?:artworks|illustrations|manga)"
-               r"(?:/([^/?#]+))?)?/?(?:$|[?#])"
-               r"|member(?:_illust)?\.php\?id=(\d+)(?:&([^#]+))?"
-               r"|(?:u(?:ser)?/|(?:mypage\.php)?#id=)(\d+))")
+               r"(?:en/)?u(?:sers)?/|member\.php\?id=|(?:mypage\.php)?#id="
+               r")(\d+)(?:$|[?#])")
+    test = (
+        ("https://www.pixiv.net/en/users/173530"),
+        ("https://www.pixiv.net/u/173530"),
+        ("https://www.pixiv.net/member.php?id=173530"),
+        ("https://www.pixiv.net/mypage.php#id=173530"),
+        ("https://www.pixiv.net/#id=173530"),
+    )
+
+    def __init__(self, match):
+        PixivExtractor.__init__(self, match)
+        self.user_id = match.group(1)
+
+    def items(self):
+        default = []
+        if self.config("avatar"):
+            self.log.warning("'avatar' is deprecated, "
+                             "use \"include\": \"…,avatar\" instead")
+            default.append("avatar")
+        if self.config("background"):
+            self.log.warning("'background' is deprecated, "
+                             "use \"include\": \"…,background\" instead")
+            default.append("background")
+        default.append("artworks")
+
+        base = "{}/users/{}/".format(self.root, self.user_id)
+        return self._dispatch_extractors((
+            (PixivAvatarExtractor    , base + "avatar"),
+            (PixivBackgroundExtractor, base + "background"),
+            (PixivArtworksExtractor  , base + "artworks"),
+            (PixivFavoriteExtractor  , base + "bookmarks/artworks"),
+        ), default)
+
+
+class PixivArtworksExtractor(PixivExtractor):
+    """Extractor for artworks of a pixiv user"""
+    subcategory = "artworks"
+    pattern = (r"(?:https?://)?(?:www\.|touch\.)?pixiv\.net/(?:"
+               r"(?:en/)?users/(\d+)/(?:artworks|illustrations|manga)"
+               r"(?:/([^/?#]+))?/?(?:$|[?#])"
+               r"|member_illust\.php\?id=(\d+)(?:&([^#]+))?)")
     test = (
         ("https://www.pixiv.net/en/users/173530/artworks", {
             "url": "852c31ad83b6840bacbce824d85f2a997889efb7",
@@ -119,42 +177,25 @@ class PixivUserExtractor(PixivExtractor):
           "&tag=%E6%89%8B%E3%81%B6%E3%82%8D"), {
             "url": "25b1cd81153a8ff82eec440dd9f20a4a22079658",
         }),
-        # avatar (#595, #623, #1124)
-        ("https://www.pixiv.net/en/users/173530", {
-            "options": (("avatar", True),),
-            "content": "4e57544480cc2036ea9608103e8f024fa737fe66",
-            "range": "1",
-        }),
-        # background (#623, #1124, #2495)
-        ("https://www.pixiv.net/en/users/194921", {
-            "options": (("background", True),),
-            "content": "aeda3536003ea3002f70657cb93c5053f26f5843",
-            "range": "1",
-        }),
         # deleted account
         ("http://www.pixiv.net/member_illust.php?id=173531", {
             "options": (("metadata", True),),
             "exception": exception.NotFoundError,
         }),
-        ("https://www.pixiv.net/en/users/173530"),
         ("https://www.pixiv.net/en/users/173530/manga"),
         ("https://www.pixiv.net/en/users/173530/illustrations"),
         ("https://www.pixiv.net/member_illust.php?id=173530"),
-        ("https://www.pixiv.net/u/173530"),
-        ("https://www.pixiv.net/user/173530"),
-        ("https://www.pixiv.net/mypage.php#id=173530"),
-        ("https://www.pixiv.net/#id=173530"),
         ("https://touch.pixiv.net/member_illust.php?id=173530"),
     )
 
     def __init__(self, match):
         PixivExtractor.__init__(self, match)
-        u1, t1, u2, t2, u3 = match.groups()
+        u1, t1, u2, t2 = match.groups()
         if t1:
             t1 = text.unquote(t1)
         elif t2:
             t2 = text.parse_query(t2).get("tag")
-        self.user_id = u1 or u2 or u3
+        self.user_id = u1 or u2
         self.tag = t1 or t2
 
     def metadata(self):
@@ -172,54 +213,58 @@ def works(self):
                 if tag in [t["name"].lower() for t in work["tags"]]
             )
 
-        avatar = self.config("avatar")
-        background = self.config("background")
-        if avatar or background:
-            work_list = []
-            detail = self.api.user_detail(self.user_id)
-            user = detail["user"]
-
-            if avatar:
-                url = user["profile_image_urls"]["medium"]
-                work_list.append((self._make_work(
-                    "avatar", url.replace("_170.", "."), user),))
-
-            if background:
-                url = detail["profile"]["background_image_url"]
-                if url:
-                    if "/c/" in url:
-                        parts = url.split("/")
-                        del parts[3:5]
-                        url = "/".join(parts)
-                    url = url.replace("_master1200.", ".")
-                    work = self._make_work("background", url, user)
-                    if url.endswith(".jpg"):
-                        work["_fallback"] = (url[:-4] + ".png",)
-                    work_list.append((work,))
-
-            work_list.append(works)
-            works = itertools.chain.from_iterable(work_list)
-
         return works
 
-    @staticmethod
-    def _make_work(kind, url, user):
-        return {
-            "create_date"     : None,
-            "height"          : 0,
-            "id"              : kind,
-            "image_urls"      : None,
-            "meta_pages"      : (),
-            "meta_single_page": {"original_image_url": url},
-            "page_count"      : 1,
-            "sanity_level"    : 0,
-            "tags"            : (),
-            "title"           : kind,
-            "type"            : kind,
-            "user"            : user,
-            "width"           : 0,
-            "x_restrict"      : 0,
-        }
+
+class PixivAvatarExtractor(PixivExtractor):
+    """Extractor for pixiv avatars"""
+    subcategory = "avatar"
+    archive_fmt = "avatar_{user[id]}"
+    pattern = (r"(?:https?://)?(?:www\.)?pixiv\.net"
+               r"/(?:en/)?users/(\d+)/avatar")
+    test = ("https://www.pixiv.net/en/users/173530/avatar", {
+        "content": "4e57544480cc2036ea9608103e8f024fa737fe66",
+    })
+
+    def __init__(self, match):
+        PixivExtractor.__init__(self, match)
+        self.user_id = match.group(1)
+
+    def works(self):
+        user = self.api.user_detail(self.user_id)["user"]
+        url = user["profile_image_urls"]["medium"].replace("_170.", ".")
+        return (self._make_work("avatar", url, user),)
+
+
+class PixivBackgroundExtractor(PixivExtractor):
+    """Extractor for pixiv background banners"""
+    subcategory = "background"
+    archive_fmt = "background_{user[id]}"
+    pattern = (r"(?:https?://)?(?:www\.)?pixiv\.net"
+               r"/(?:en/)?users/(\d+)/background")
+    test = ("https://www.pixiv.net/en/users/194921/background", {
+        "pattern": r"https://i\.pximg\.net/background/img/2021/01/30/16/12/02"
+                   r"/194921_af1f71e557a42f499213d4b9eaccc0f8\.jpg",
+    })
+
+    def __init__(self, match):
+        PixivExtractor.__init__(self, match)
+        self.user_id = match.group(1)
+
+    def works(self):
+        detail = self.api.user_detail(self.user_id)
+        url = detail["profile"]["background_image_url"]
+        if not url:
+            return ()
+        if "/c/" in url:
+            parts = url.split("/")
+            del parts[3:5]
+            url = "/".join(parts)
+        url = url.replace("_master1200.", ".")
+        work = self._make_work("background", url, detail["user"])
+        if url.endswith(".jpg"):
+            work["_fallback"] = (url[:-4] + ".png",)
+        return (work,)
 
 
 class PixivMeExtractor(PixivExtractor):
@@ -311,10 +356,10 @@ class PixivFavoriteExtractor(PixivExtractor):
                r"|bookmark\.php)(?:\?([^#]*))?")
     test = (
         ("https://www.pixiv.net/en/users/173530/bookmarks/artworks", {
-            "url": "e717eb511500f2fa3497aaee796a468ecf685cc4",
+            "url": "85a3104eaaaf003c7b3947117ca2f1f0b1cfc949",
         }),
         ("https://www.pixiv.net/bookmark.php?id=173530", {
-            "url": "e717eb511500f2fa3497aaee796a468ecf685cc4",
+            "url": "85a3104eaaaf003c7b3947117ca2f1f0b1cfc949",
         }),
         # bookmarks with specific tag
         (("https://www.pixiv.net/en/users/3137110"
@@ -759,7 +804,7 @@ def user_bookmarks_illust(self, user_id, tag=None, restrict="public"):
         params = {"user_id": user_id, "tag": tag, "restrict": restrict}
         return self._pagination("/v1/user/bookmarks/illust", params)
 
-    @memcache()
+    @memcache(keyarg=1)
     def user_detail(self, user_id):
         params = {"user_id": user_id}
         return self._call("/v1/user/detail", params)