-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathposts.py
executable file
·96 lines (69 loc) · 2.66 KB
/
posts.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
#!/usr/bin/env python
from datetime import datetime
import html
import random
import sys
import time
import common
current_page = 0
exported_count = 0
def parse_data(data):
posts = []
for post in data['children']:
if post['kind'] != 't3':
continue
post = post['data']
if args.sub_filter is not None and post['subreddit'] not in args.sub_filter.split(','):
continue
# remove the content type
post_id = post['name'].split('_')[1]
date_created = datetime.fromtimestamp(post['created']).strftime('%Y-%m-%d %H:%M:%S')
body = html.unescape(post['selftext'].replace('\n', '\\n'))
posts.append([post_id, post['author'], post['title'], post['subreddit'], str(post['link_flair_text']), date_created, post['url'], body])
return posts
def run(username, page):
global current_page
global exported_count
response = common.get_data(common.URL_ALL, username, page)
# exit if no data in page
data = response['data'] if 'data' in response else None
if data is None:
return
posts = parse_data(data)
for post in posts:
if not args.dump:
exported_count += 1
file_output.write('{}\n'.format('~#~'.join(post)))
continue
print('~#~'.join(post))
current_page += 1
next_page = data['after']
# didn't reach the first post or the page limit (if set)
if next_page is not None and (args.page_limit is None or current_page < args.page_limit):
# wait for a random number of seconds to avoid getting blocked
time.sleep(random.randint(1, 5))
# recurse for the next page
run(username, next_page)
if __name__ == '__main__':
args = common.setup_arguments()
csv_columns = ['id', 'username', 'title', 'subreddit', 'flair', 'date_created', 'url', 'body']
# get a usernames array, even from the parameter or the file
usernames = common.get_usernames(args)
if not usernames:
print('Define one or more usernames using -u or -f.\nCheck the help dialog for more options.')
sys.exit(1)
if args.dump:
print('~#~'.join(csv_columns))
for username in usernames:
current_page = 0
exported_count = 0
if not args.dump:
filename = '{}_posts_{}.csv'.format(username, datetime.today().strftime('%Y%m%d_%H%M%S'))
file_output = open(filename, 'a')
file_output.write('~#~'.join(csv_columns) + '\n')
run(username, None)
file_output.close()
print('{}: {} posts exported.'.format(filename, exported_count))
else:
run(username, None)
sys.exit(0)