Skip to content

Commit 392f20b

Browse files
committed
fixing error with "?..." in url
1 parent b414d8a commit 392f20b

File tree

1 file changed

+5
-16
lines changed

1 file changed

+5
-16
lines changed

minet/reddit/scraper.py

+5-16
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
def add_slash(url: str):
1919
path = url.split("/")
20-
if path[-1] == "?limit=500":
20+
if path[-1][0] == "?":
2121
return url
2222
elif url[-1] != "/":
2323
return url + "/"
@@ -29,21 +29,11 @@ def resolve_relative_url(path):
2929

3030

3131
def get_old_url(url):
32-
domain = get_domain_name(url)
33-
path = urlpathsplit(url)
34-
old_url = f"https://old.{domain}"
35-
for ele in path:
36-
old_url = urljoin(old_url, f"{ele}/")
37-
return old_url
32+
return url.replace("www.reddit", "old.reddit")
3833

3934

4035
def get_new_url(url):
41-
domain = get_domain_name(url)
42-
path = urlpathsplit(url)
43-
new_url = f"https://www.{domain}"
44-
for ele in path:
45-
new_url = urljoin(new_url, f"{ele}/")
46-
return new_url
36+
return url.replace("old.reddit", "www.reddit")
4737

4838

4939
def get_url_from_subreddit(name: str):
@@ -133,14 +123,13 @@ def data_posts(
133123
link,
134124
error,
135125
):
136-
try_author = post.select_one("a[class*='author']")
137-
author = try_author.get_text() if try_author else "[Deleted]"
126+
author = post.scrape_one("a[class*='author']")
138127
if get_domain_name(link) == "reddit.com":
139128
link = ""
140129
data = RedditPost(
141130
title=title,
142131
url=get_new_url(url),
143-
author=author,
132+
author=author if author else "[Deleted]",
144133
author_text=author_text,
145134
points=points,
146135
scraped_number_comments=scraped_number_comments,

0 commit comments

Comments
 (0)