From b6a40217d6ffc1f6188410428f12d953c43eaaf2 Mon Sep 17 00:00:00 2001 From: mix5003 Date: Sat, 10 Aug 2024 11:04:36 +0700 Subject: [PATCH 1/2] [pixiv] use api for illust series --- gallery_dl/extractor/pixiv.py | 61 ++++++++++++++++++++--------------- 1 file changed, 35 insertions(+), 26 deletions(-) diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py index d732894a9c..f6fe21c708 100644 --- a/gallery_dl/extractor/pixiv.py +++ b/gallery_dl/extractor/pixiv.py @@ -562,34 +562,39 @@ def __init__(self, match): self.user_id, self.series_id = match.groups() def works(self): - url = self.root + "/ajax/series/" + self.series_id - params = {"p": 1} - headers = { - "Accept": "application/json", - "Referer": "{}/user/{}/series/{}".format( - self.root, self.user_id, self.series_id), - "Alt-Used": "www.pixiv.net", - } + offset = 0 + series = None + works = [] while True: - data = self.request(url, params=params, headers=headers).json() - body = data["body"] - page = body["page"] - - series = body["extraData"]["meta"] - series["id"] = self.series_id - series["total"] = page["total"] - series["title"] = text.extr(series["title"], '"', '"') - - for info in page["series"]: - work = self.api.illust_detail(info["workId"]) - work["num_series"] = info["order"] - work["series"] = series - yield work - - if len(page["series"]) < 10: - return - params["p"] += 1 + data = (self.api.illust_series(self.series_id, offset)) + + if series is None: + detail = data['illust_series_detail'] + series = { + 'id': self.series_id, + 'total': detail['series_work_count'], + 'title': detail["title"], + 'description': detail['caption'], + } + + works = works + data['illusts'] + + if data['next_url'] is None: + break + + offset = len(works) + + works.reverse() + + chapterNo = 0 + for work in works: + chapterNo += 1 + + work["num_series"] = chapterNo + work["series"] = series + + yield work class PixivNovelExtractor(PixivExtractor): @@ -916,6 +921,10 @@ def illust_related(self, illust_id): params = {"illust_id": illust_id} return self._pagination("/v2/illust/related", params) + def illust_series(self, series_id, offset=0): + params = {"illust_series_id": series_id, "offset": offset} + return self._call("/v1/illust/series", params) + def novel_bookmark_detail(self, novel_id): params = {"novel_id": novel_id} return self._call( From b33a53968d1011bd5ec3e7369c486f6cf198a531 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Mon, 12 Aug 2024 11:50:41 +0200 Subject: [PATCH 2/2] [pixiv] incorporate into '_pagination' --- gallery_dl/extractor/pixiv.py | 48 +++++++++++------------------------ test/results/pixiv.py | 14 +++++----- 2 files changed, 23 insertions(+), 39 deletions(-) diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py index f6fe21c708..e7947f0b68 100644 --- a/gallery_dl/extractor/pixiv.py +++ b/gallery_dl/extractor/pixiv.py @@ -551,9 +551,6 @@ class PixivSeriesExtractor(PixivExtractor): directory_fmt = ("{category}", "{user[id]} {user[account]}", "{series[id]} {series[title]}") filename_fmt = "{num_series:>03}_{id}_p{num}.{extension}" - cookies_domain = ".pixiv.net" - browser = "firefox" - tls12 = False pattern = BASE_PATTERN + r"/user/(\d+)/series/(\d+)" example = "https://www.pixiv.net/user/12345/series/12345" @@ -562,38 +559,17 @@ def __init__(self, match): self.user_id, self.series_id = match.groups() def works(self): - offset = 0 - series = None - works = [] - while True: - data = (self.api.illust_series(self.series_id, offset)) + for work in self.api.illust_series(self.series_id): if series is None: - detail = data['illust_series_detail'] - series = { - 'id': self.series_id, - 'total': detail['series_work_count'], - 'title': detail["title"], - 'description': detail['caption'], - } - - works = works + data['illusts'] - - if data['next_url'] is None: - break - - offset = len(works) - - works.reverse() - - chapterNo = 0 - for work in works: - chapterNo += 1 + series = self.api.data + series["total"] = num_series = series.pop("series_work_count") + else: + num_series -= 1 - work["num_series"] = chapterNo + work["num_series"] = num_series work["series"] = series - yield work @@ -923,7 +899,8 @@ def illust_related(self, illust_id): def illust_series(self, series_id, offset=0): params = {"illust_series_id": series_id, "offset": offset} - return self._call("/v1/illust/series", params) + return self._pagination("/v1/illust/series", params, + key_data="illust_series_detail") def novel_bookmark_detail(self, novel_id): params = {"novel_id": novel_id} @@ -1022,10 +999,15 @@ def _call(self, endpoint, params=None, parse=None): raise exception.StopExtraction("API request failed: %s", error) - def _pagination(self, endpoint, params, key="illusts"): + def _pagination(self, endpoint, params, + key_items="illusts", key_data=None): while True: data = self._call(endpoint, params) - yield from data[key] + + if key_data: + self.data = data.get(key_data) + key_data = None + yield from data[key_items] if not data["next_url"]: return diff --git a/test/results/pixiv.py b/test/results/pixiv.py index 6711cebc16..8d0394a7b4 100644 --- a/test/results/pixiv.py +++ b/test/results/pixiv.py @@ -405,12 +405,14 @@ "num_series": int, "series" : { - "canonical" : "https://www.pixiv.net/user/10509347/series/21859", - "description": str, - "ogp" : dict, - "title" : "先輩がうざい後輩の話", - "total" : int, - "twitter" : dict, + "create_date": "2017-10-22T14:07:42+09:00", + "width" : 4250, + "height": 3009, + "id" : 21859, + "title" : "先輩がうざい後輩の話", + "total" : range(100, 500), + "user" : dict, + "watchlist_added": False, }, },