-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathrun.py
179 lines (149 loc) · 7.19 KB
/
run.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
"""
This is the main file to run to generate your output.
"""
import logging
import os
import random
from dotenv import load_dotenv
import modules.data_bin_convert
import modules.genre
import modules.html
import modules.justwatch
import modules.runtime
# Get user variables from .env
load_dotenv(dotenv_path='./.env')
when_to_start = int(os.environ.get('WHEN_TO_START'))
hours_to_print = int(os.environ.get('HOURS_TO_PRINT'))
OUTFILE = str(os.environ.get('OUTFILE'))
STYLESHEET_PATH = str(os.environ.get('STYLESHEET_PATH'))
use_keyword_lists = os.environ.get('USE_KEYWORD_LIST').lower() == 'true'
dev_mode = os.environ.get('DEV_MODE').lower() == 'true'
# Logging
if dev_mode:
logging.basicConfig(filename='./my_data/run.log', encoding='utf-8', level=logging.DEBUG,
filemode='w', format="%(asctime)s %(levelname)s %(message)s")
else:
logging.basicConfig(filename='./my_data/run.log', encoding='utf-8', level=logging.INFO,
filemode='w', format="%(asctime)s %(levelname)s %(message)s")
# Scrape your data from JustWatch and store in .bin files for later
logging.info('Scraping data from JustWatch')
# Manually remove shows
# modules.justwatch.remove_manually_by_url('https://www.justwatch.com/us/tv-show/great-news')
# modules.justwatch.remove_manually_by_percentage(1)
# Clean up shows already seen
modules.justwatch.remove_already_seen(
'https://www.justwatch.com/us/lists/my-lists?inner_tab=seenlist')
# TV in progress
modules.justwatch.scrape_justwatch(
'https://www.justwatch.com/us/lists/tv-show-tracking?inner_tab=continue_watching')
# TV not started
modules.justwatch.scrape_justwatch(
'https://www.justwatch.com/us/lists/tv-show-tracking?inner_tab=havent_started')
# Movies
modules.justwatch.scrape_justwatch(
'https://www.justwatch.com/us/lists/my-lists?content_type=movie&sort_by=popular_30_day')
# Read all genres from scraped data and store in .bin file for later
logging.info('Reading genres from scraped data')
modules.genre.get_genres_from_scraped_lists()
# Restore scraped data from stored .bin files and combine into a full list of all shows
# data_list_movies = modules.data_bin_convert.bin_to_data('./my_data/saved_data_movies.bin')
# data_list_tv = modules.data_bin_convert.bin_to_data('./my_data/saved_data_tv.bin')
# balance_factor = 4 # Issue #116
# logging.info('Combining movie and tv data into balanced list')
# logging.info(f'{balance_factor=}')
# data_list_everything = modules.justwatch.balance_movie_and_tv_lists(data_list_movies,
# data_list_tv, balance_factor)
data_list_everything = modules.data_bin_convert.bin_to_data('./my_data/saved_data.bin')
# Restore genres from stored .bin file
all_genres = modules.data_bin_convert.bin_to_data('./my_data/saved_data_genres.bin')
# Randomize the list
logging.info('Shuffling data')
random.shuffle(data_list_everything)
# Sort by genre frequency so you don't just end up with super-popular Action/Adventure movies
all_genres = sorted(all_genres, key = all_genres.count, reverse = False)
# Look through data for keyword matches
# This can be used for special lists (e.g. movies you only watch during the holidays),
# or for things you might want to filter out (e.g. trigger warnings)
logging.info('Splitting by keyword')
if use_keyword_lists:
logging.debug('triggers')
genre_triggers, remainder = modules.genre.split_by_keyword(data_list_everything,
modules.genre.trigger_keywords())
logging.debug('Christmas')
genre_christmas, remainder = modules.genre.split_by_keyword(remainder,
modules.genre.christmas_keywords())
else:
remainder = data_list_everything
# Loop through all_genres to separate data by genre
# Starts with data left over after pulling out keywords (if this is used), and splits into
# genre groupings. remainder gets smaller each pass until it eventually empties out.
logging.info('Splitting by standard genre')
i = 0
genre_lists = []
while len(remainder) > 0:
list_with_genre, remainder = modules.genre.split_by_genre(remainder, all_genres[i])
genre_lists.append(list_with_genre)
i += 1
# Begin writing HTML output
logging.info('Writing HTML output')
logging.info('-------------------')
html_handle = open(OUTFILE, '+w', encoding="utf-8")
html_handle.write(modules.html.generate_html_start(STYLESHEET_PATH))
# Begin writing the main table for your personal TV guide
logging.info('Writing main table start')
html_handle.write(modules.html.generate_table_start())
html_handle.write(modules.html.generate_table_header_row(when_to_start, hours_to_print))
# Write table rows for keyword lists (if using)
logging.info('Writing keyword rows')
if use_keyword_lists:
html_handle.write(modules.html.generate_table_genre_row(genre_triggers, 'Trigger Warning',
hours_to_print))
html_handle.write(modules.html.generate_table_genre_row(genre_christmas, 'Christmas',
hours_to_print))
# Write table rows for previously-separated genre lists, sorted alphabetically
logging.info('Generating table HTML for standard genre rows')
genre_lists_str = []
i = 0
for i, genre_list in enumerate(genre_lists):
logging.debug(all_genres[i])
if genre_list:
genre_lists_str.append(
str(modules.html.generate_table_genre_row(genre_list, all_genres[i],
hours_to_print)))
logging.info('Sorting rows')
genre_lists_str_sorted = sorted(genre_lists_str)
logging.info('Writing standard genre rows')
i = 0
for i, list_str in enumerate(genre_lists_str_sorted):
html_handle.write(list_str)
html_handle.write(modules.html.generate_table_end())
# End of writing the main table for your personal TV guide
# Featured Film
logging.info('Writing Featured Film table')
html_handle.write(modules.html.generate_featured_film_table(
modules.justwatch.get_random_show(data_list_everything)))
# Write table for time left in TV series
logging.info('Writing time left in TV series table')
time_info_list = modules.runtime.time_left_in_tv_series_report(data_list_everything)
html_handle.write(
'<p>\n<table>\n<th>Title</th><th>Minutes Left</th>' +
'<th>Minutes Per Episode</th><th>Next Episode</th>\n')
for i, show_with_time_info in enumerate(time_info_list):
time_info_title = show_with_time_info[0]
time_info_min_left = str(show_with_time_info[1])
time_info_ep_runtime = str(show_with_time_info[4])
time_info_next_ep = show_with_time_info[5]
if time_info_next_ep != 'S1 E1':
time_info_next_ep += ' ▶️'
html_handle.write('<tr><td>' + time_info_title + '</td><td>' +
time_info_min_left + '</td><td>' +
time_info_ep_runtime + '</td><td>' +
time_info_next_ep + '</td></tr>') # minutes left
html_handle.write('</table>\n</p>\n')
# Movies sorted by runtime
logging.info('Writing Movies sorted by runtime table')
html_handle.write(modules.justwatch.generate_movies_by_runtime_table(data_list_everything))
# Finish writing HTML output
html_handle.write(modules.html.generate_html_end())
html_handle.close()
logging.info('-------------------')