From 24873c27240a53859224b6ad0a56e3438c47aaf7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Wed, 6 Mar 2024 01:27:45 +0100 Subject: [PATCH] [warosu] fix crash for threads with deleted posts (#5289) --- gallery_dl/extractor/warosu.py | 14 +++++++++----- test/results/warosu.py | 12 ++++++++++++ 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/gallery_dl/extractor/warosu.py b/gallery_dl/extractor/warosu.py index 3bb635d657..11f0c18f38 100644 --- a/gallery_dl/extractor/warosu.py +++ b/gallery_dl/extractor/warosu.py @@ -64,8 +64,7 @@ def posts(self, page): def parse(self, post): """Build post object by extracting data from an HTML post""" data = self._extract_post(post) - if " File:" in post: - self._extract_image(post, data) + if " File:" in post and self._extract_image(post, data): part = data["image"].rpartition("/")[2] data["tim"], _, data["extension"] = part.partition(".") data["ext"] = "." + data["extension"] @@ -91,6 +90,11 @@ def _extract_image(self, post, data): "", "<").rstrip().rpartition(".")[0]) extr("
", "") - data["image"] = url = extr("") - if url[0] == "/": - data["image"] = self.root + url + url = extr("") + if url: + if url[0] == "/": + data["image"] = self.root + url + else: + data["image"] = url + return True + return False diff --git a/test/results/warosu.py b/test/results/warosu.py index efc7f83249..fd095183d4 100644 --- a/test/results/warosu.py +++ b/test/results/warosu.py @@ -54,6 +54,18 @@ "w" : 450, }, +{ + "#url" : "https://warosu.org/jp/thread/45886210", + "#comment" : "deleted post (#5289)", + "#category": ("", "warosu", "thread"), + "#class" : warosu.WarosuThreadExtractor, + "#count" : "> 150", + + "board" : "jp", + "board_name": "Otaku Culture", + "title" : "/07/th Expansion Thread", +}, + { "#url" : "https://warosu.org/ic/thread/4604652", "#category": ("", "warosu", "thread"),