1
1
import re
2
2
from time import sleep
3
- from ural import get_domain_name , is_url
3
+ from ural import is_url
4
4
from urllib .parse import urljoin
5
5
6
6
from minet .reddit .exceptions import RedditInvalidTargetError
@@ -156,7 +156,7 @@ def data_user_posts(
156
156
link ,
157
157
error ,
158
158
):
159
- sub = post .scrape_one ("a[class*=' subreddit'] " , "href" )
159
+ sub = post .scrape_one ("a. subreddit" , "href" )
160
160
data = RedditUserPost (
161
161
title = title ,
162
162
url = get_new_url (url ),
@@ -179,28 +179,28 @@ def __init__(self):
179
179
180
180
def get_childs_l500 (self , url , list_comments , parent_id ):
181
181
_ , soup , _ = reddit_request (url , self .pool_manager )
182
- comments = soup .select ("div.commentarea>div>div[class*=' comment'] " )
182
+ comments = soup .select ("div.commentarea>div>div. comment" )
183
183
if parent_id is None :
184
184
for com in comments :
185
185
list_comments .append ((None , com ))
186
- else :
187
- for com in comments :
188
- child = com .find ("div" , class_ = "child" )
189
- if child .text != "" :
190
- child = child .find ("div" )
191
- child_com = child .find_all (
192
- "div" ,
193
- class_ = lambda x : x
194
- and (
195
- "comment" in x
196
- or "deleted comment" in x
197
- or "morerecursion" in x
198
- or "morechildren" in x
199
- ),
200
- recursive = False ,
201
- )
202
- for ele in child_com :
203
- list_comments .append ((parent_id , ele ))
186
+ return list_comments
187
+ for com in comments :
188
+ child = com .find ("div" , class_ = "child" )
189
+ if child .text != "" :
190
+ child = child .find ("div" )
191
+ child_com = child .find_all (
192
+ "div" ,
193
+ class_ = lambda x : x
194
+ and (
195
+ "comment" in x
196
+ or "deleted comment" in x
197
+ or "morerecursion" in x
198
+ or "morechildren" in x
199
+ ),
200
+ recursive = False ,
201
+ )
202
+ for ele in child_com :
203
+ list_comments .append ((parent_id , ele ))
204
204
return list_comments
205
205
206
206
def get_comments (self , url : str , all ):
@@ -220,9 +220,9 @@ def get_comments(self, url: str, all):
220
220
error = error ,
221
221
)
222
222
else :
223
- first_comments = soup .select ("div.commentarea>div>div[class*=' comment'] " )
223
+ first_comments = soup .select ("div.commentarea>div>div. comment" )
224
224
if all :
225
- more = soup .select ("div.commentarea>div>div[class*=' morechildren'] " )
225
+ more = soup .select ("div.commentarea>div>div. morechildren" )
226
226
for ele in more :
227
227
a = ele .select_one ("a" )
228
228
onclick = a ["onclick" ]
@@ -241,8 +241,7 @@ def get_comments(self, url: str, all):
241
241
points = None
242
242
else :
243
243
comment_url = com .scrape_one ("a.bylink" , "href" )
244
- try_author = com .select_one ("div.entry.unvoted" )
245
- author = try_author .scrape_one ("a[class^='author']" )
244
+ author = com .scrape_one ("div.entry.unvoted a.author" )
246
245
if not author :
247
246
author = "[Deleted]"
248
247
points = get_points (com )
@@ -286,7 +285,7 @@ def get_comments(self, url: str, all):
286
285
m_comments .append ((current_id , ele ))
287
286
data = RedditComment (
288
287
comment_url = get_new_url (resolve_relative_url (comment_url )),
289
- author = author ,
288
+ author = author if author else "[Deleted]" ,
290
289
id = current_id ,
291
290
parent = parent ,
292
291
points = points ,
@@ -311,12 +310,10 @@ def get_general_post(self, url: str, type: str, add_text: bool, limit: int):
311
310
break
312
311
list_buttons = post .select_one ("ul.flat-list.buttons" )
313
312
if len (list_buttons .scrape ("span.promoted-span" )) == 0 :
314
- title = post .force_select_one ("a[class*='title']" ).get_text ()
315
- post_url = list_buttons .scrape_one (
316
- "a[class^='bylink comments']" , "href"
317
- )
313
+ title = post .force_select_one ("a.title" ).get_text ()
314
+ post_url = list_buttons .scrape_one ("a.bylink.comments" , "href" )
318
315
n_comments_scraped = list_buttons .select_one (
319
- "a[class^=' bylink comments'] "
316
+ "a. bylink. comments"
320
317
).get_text ()
321
318
match = re .match (r"(\d+)\s+comment(s)?" , n_comments_scraped )
322
319
if match :
@@ -325,9 +322,7 @@ def get_general_post(self, url: str, type: str, add_text: bool, limit: int):
325
322
n_comments = 0
326
323
upvote = get_points (post )
327
324
published_date , edited_date = get_dates (post )
328
- link = resolve_relative_url (
329
- post .scrape_one ("a[class*='title']" , "href" )
330
- )
325
+ link = resolve_relative_url (post .scrape_one ("a.title" , "href" ))
331
326
if link == post_url :
332
327
link = ""
333
328
if add_text :
@@ -363,9 +358,7 @@ def get_general_post(self, url: str, type: str, add_text: bool, limit: int):
363
358
link ,
364
359
text_error ,
365
360
)
366
- try_content = text_soup .select_one (
367
- "div#siteTable div[class^='usertext']"
368
- )
361
+ try_content = text_soup .select_one ("div#siteTable div.usertext" )
369
362
if try_content :
370
363
content = try_content .get_text ()
371
364
else :
@@ -431,8 +424,8 @@ def get_user_comments(self, url: str, limit: int):
431
424
break
432
425
post_title = comment .scrape_one ("a.title" )
433
426
post_url = comment .scrape_one ("a.bylink.may-blank" , "href" )
434
- post_author = comment .scrape_one ("p.parent>a[class^=' author'] " )
435
- post_subreddit = comment .scrape_one ("a[class^=' subreddit'] " , "href" )
427
+ post_author = comment .scrape_one ("p.parent>a. author" )
428
+ post_subreddit = comment .scrape_one ("a. subreddit" , "href" )
436
429
points = get_points (comment )
437
430
published_date , edited_date = get_dates (comment )
438
431
text = comment .scrape_one ("div.content div.md" )
0 commit comments