File tree 1 file changed +5
-16
lines changed
1 file changed +5
-16
lines changed Original file line number Diff line number Diff line change 17
17
18
18
def add_slash (url : str ):
19
19
path = url .split ("/" )
20
- if path [- 1 ] == "?limit=500 " :
20
+ if path [- 1 ][ 0 ] == "?" :
21
21
return url
22
22
elif url [- 1 ] != "/" :
23
23
return url + "/"
@@ -29,21 +29,11 @@ def resolve_relative_url(path):
29
29
30
30
31
31
def get_old_url (url ):
32
- domain = get_domain_name (url )
33
- path = urlpathsplit (url )
34
- old_url = f"https://old.{ domain } "
35
- for ele in path :
36
- old_url = urljoin (old_url , f"{ ele } /" )
37
- return old_url
32
+ return url .replace ("www.reddit" , "old.reddit" )
38
33
39
34
40
35
def get_new_url (url ):
41
- domain = get_domain_name (url )
42
- path = urlpathsplit (url )
43
- new_url = f"https://www.{ domain } "
44
- for ele in path :
45
- new_url = urljoin (new_url , f"{ ele } /" )
46
- return new_url
36
+ return url .replace ("old.reddit" , "www.reddit" )
47
37
48
38
49
39
def get_url_from_subreddit (name : str ):
@@ -133,14 +123,13 @@ def data_posts(
133
123
link ,
134
124
error ,
135
125
):
136
- try_author = post .select_one ("a[class*='author']" )
137
- author = try_author .get_text () if try_author else "[Deleted]"
126
+ author = post .scrape_one ("a[class*='author']" )
138
127
if get_domain_name (link ) == "reddit.com" :
139
128
link = ""
140
129
data = RedditPost (
141
130
title = title ,
142
131
url = get_new_url (url ),
143
- author = author ,
132
+ author = author if author else "[Deleted]" ,
144
133
author_text = author_text ,
145
134
points = points ,
146
135
scraped_number_comments = scraped_number_comments ,
You can’t perform that action at this time.
0 commit comments