Skip to content

Commit

Permalink
[paheal] restore 'extension' metadata (#4976)
Browse files Browse the repository at this point in the history
  • Loading branch information
mikf committed Dec 26, 2023
1 parent a50c147 commit f954419
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 9 deletions.
13 changes: 9 additions & 4 deletions gallery_dl/extractor/paheal.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def items(self):
post["tags"] = text.unquote(post["tags"])
post.update(data)
yield Message.Directory, post
yield Message.Url, url, text.nameext_from_url(url, post)
yield Message.Url, url, post

def get_metadata(self):
"""Return general metadata"""
Expand All @@ -59,11 +59,13 @@ def _extract_post(self, post_id):
extr(">Source&nbsp;Link<", "</td>"), "href='", "'")),
}

dimensions, size, ext = extr("Info</th><td>", ">").split(" // ")
post["width"], _, height = dimensions.partition("x")
dimensions, size, ext = extr("Info</th><td>", "<").split(" // ")
post["size"] = text.parse_bytes(size[:-1])
post["width"], _, height = dimensions.partition("x")
post["height"], _, duration = height.partition(", ")
post["duration"] = text.parse_float(duration[:-1])
post["filename"] = "{} - {}".format(post_id, post["tags"])
post["extension"] = ext

return post

Expand Down Expand Up @@ -112,16 +114,19 @@ def _extract_data(post):

tags, data, date = data.split("\n")
dimensions, size, ext = data.split(" // ")
tags = text.unescape(tags)
width, _, height = dimensions.partition("x")
height, _, duration = height.partition(", ")

return {
"id": pid, "md5": md5, "file_url": url,
"width": width, "height": height,
"duration": text.parse_float(duration[:-1]),
"tags": text.unescape(tags),
"tags": tags,
"size": text.parse_bytes(size[:-1]),
"date": text.parse_datetime(date, "%B %d, %Y; %H:%M"),
"filename" : "{} - {}".format(pid, tags),
"extension": ext,
}

def _extract_data_ex(self, post):
Expand Down
23 changes: 18 additions & 5 deletions test/results/paheal.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,21 @@
"#url" : "https://rule34.paheal.net/post/list/Ayane_Suzuki/1",
"#category": ("shimmie2", "paheal", "tag"),
"#class" : paheal.PahealTagExtractor,
"#pattern" : r"https://[^.]+\.paheal\.net/_images/\w+/\d+%20-%20",
"#count" : ">= 15",
"#pattern" : "https://[^.]+\.paheal\.net/_images/\w+/\d+%20-%20|https://r34i\.paheal-cdn\.net/[0-9a-f]{2}/[0-9a-f]{2}/[0-9a-f]{32}$",
"#count" : range(70, 200),

"date" : "type:datetime",
"extension": r"re:jpg|png",
"filename" : r"re:\d+ - \w+",
"duration" : float,
"height" : int,
"id" : int,
"md5" : r"re:[0-9a-f]{32}",
"search_tags": "Ayane_Suzuki",
"size" : int,
"tags" : str,
"width" : int,

},

{
Expand Down Expand Up @@ -42,12 +55,12 @@
"#url" : "https://rule34.paheal.net/post/view/481609",
"#category": ("shimmie2", "paheal", "post"),
"#class" : paheal.PahealPostExtractor,
"#pattern" : r"https://tulip\.paheal\.net/_images/bbdc1c33410c2cdce7556c7990be26b7/481609%20-.+\.jpg",
"#urls" : "https://r34i.paheal-cdn.net/bb/dc/bbdc1c33410c2cdce7556c7990be26b7",
"#sha1_content": "7b924bcf150b352ac75c9d281d061e174c851a11",

"date" : "dt:2010-06-17 15:40:23",
"extension": "jpg",
"file_url" : r"re:https://tulip.paheal.net/_images/bbdc1c33410c",
"file_url" : "https://r34i.paheal-cdn.net/bb/dc/bbdc1c33410c2cdce7556c7990be26b7",
"filename" : "481609 - Ayumu_Kasuga Azumanga_Daioh inanimate Vuvuzela",
"height" : 660,
"id" : 481609,
Expand Down Expand Up @@ -79,7 +92,7 @@
"#comment" : "video",
"#category": ("shimmie2", "paheal", "post"),
"#class" : paheal.PahealPostExtractor,
"#pattern" : r"https://[\w]+\.paheal\.net/_images/7629fc0ff77e32637dde5bf4f992b2cb/3864982%20-%20animated%20Metal_Gear%20Metal_Gear_Solid_V%20Quiet%20Vg_erotica%20webm\.webm",
"#urls" : "https://r34i.paheal-cdn.net/76/29/7629fc0ff77e32637dde5bf4f992b2cb",

"date" : "dt:2020-09-06 01:59:03",
"duration" : 30.0,
Expand Down

0 comments on commit f954419

Please sign in to comment.