-
Notifications
You must be signed in to change notification settings - Fork 0
/
rerun_predictions.py
executable file
·44 lines (36 loc) · 1.38 KB
/
rerun_predictions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
#!/usr/bin/env python
"""Re-run all predictions."""
import pandas as pd
import predict
def main():
"""Main."""
hackernews_df, stocks_df = predict.load_data()
stocks_df = stocks_df['regular_market_change_percent']
today_str = pd.Timestamp.now().strftime('%Y-%m-%d')
prediction_dfs = []
for day in sorted(hackernews_df.index.unique()):
day_str = day.strftime('%Y-%m-%d')
next_day = day + pd.Timedelta(1, unit='D')
if day_str == today_str:
break
try:
multi_output_clf = predict.make_model(up_to=day_str)
except ValueError as exc:
print(f'Value Error: {exc}')
continue
prediction = multi_output_clf.predict(
predict.vectorize(hackernews_df.loc[day_str:day_str]))
print(f'Prediction: {prediction}')
prediction_dfs.append(pd.DataFrame(
columns=stocks_df.columns, data=prediction, index=[next_day]))
prediction_df = pd.concat(prediction_dfs)
prediction_df = pd.concat([
prediction_df.loc[prediction_df.index.isin(stocks_df.index)],
prediction_df.iloc[-1:]]).sort_index()
# Remove any duplicate dates, keeping last.
prediction_df = prediction_df[~prediction_df.index.duplicated(
keep='last')]
print(prediction_df[::-1])
prediction_df.to_pickle(predict.PREDICTIONS)
if __name__ == '__main__':
main()