Skip to content

Commit

Permalink
[cohost] add 'tag' extractor (#4483)
Browse files Browse the repository at this point in the history
  • Loading branch information
mikf committed Sep 13, 2024
1 parent 0d67d54 commit 7abf6e4
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 1 deletion.
2 changes: 1 addition & 1 deletion docs/supportedsites.md
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ Consider all listed sites to potentially be NSFW.
<tr>
<td>cohost!</td>
<td>https://cohost.org/</td>
<td>Posts, User Profiles</td>
<td>Posts, Tag Searches, User Profiles</td>
<td></td>
</tr>
<tr>
Expand Down
33 changes: 33 additions & 0 deletions gallery_dl/extractor/cohost.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,3 +158,36 @@ def posts(self):
post["comments"] = ()

return (post,)


class CohostTagExtractor(CohostExtractor):
"""Extractor for tagged posts"""
subcategory = "tag"
pattern = BASE_PATTERN + r"/([^/?#]+)/tagged/([^/?#]+)(?:\?([^#]+))?"
example = "https://cohost.org/USER/tagged/TAG"

def posts(self):
user, tag, query = self.groups
url = "{}/{}/tagged/{}".format(self.root, user, tag)
params = text.parse_query(query)
post_feed_key = ("tagged-post-feed" if user == "rc" else
"project-tagged-post-feed")

while True:
page = self.request(url, params=params).text
data = util.json_loads(text.extr(
page, 'id="__COHOST_LOADER_STATE__">', '</script>'))

try:
feed = data[post_feed_key]
except KeyError:
feed = data.popitem()[1]

yield from feed["posts"]

pagination = feed["paginationMode"]
if not pagination.get("morePagesForward"):
return
params["refTimestamp"] = pagination["refTimestamp"]
params["skipPosts"] = \
pagination["currentSkip"] + pagination["idealPageStride"]

0 comments on commit 7abf6e4

Please sign in to comment.