This repository has been archived by the owner on Oct 7, 2021. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpredict_parser.py
69 lines (62 loc) · 2.51 KB
/
predict_parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import pandas as pd
import numpy
import copy
def processData(raw_data, candidates):
raw_data = pd.read_csv(raw_data)
parsed_data = raw_data.drop(columns = ["OpenSharePrice","HighSharePrice","LowSharePrice"])
parsed_data = removeIrrelevants(parsed_data, candidates)
parsed_data = convertDates(parsed_data)
restructured_table = restructure(parsed_data, candidates)
final_table = createDifferential(restructured_table, candidates)
return final_table
def removeIrrelevants(data, candidates):
for index, row in data.iterrows():
if (row['ContractName'] not in candidates):
data.drop(index,inplace = True)
return data
def convertDates(data):
numbering = -1
previous_date = ""
for index, row in data.iterrows():
if (row['Date'] == previous_date):
data.at[index,'Date'] = numbering
else:
previous_date = row['Date']
numbering = numbering+1
data.at[index,'Date'] = numbering
return data
def restructure(data, candidates):
column_names = copy.deepcopy(candidates)
column_names.insert(0,'Date')
column_names.append('Price')
column_names.append('TradeVolume')
zeros = [0] * len(column_names)
empty_data = dict(zip(column_names,zeros))
single_row = pd.DataFrame([empty_data])
full_table = pd.DataFrame(columns=empty_data)
for index, row in data.iterrows():
date = row['Date']
name = row['ContractName']
price = row['CloseSharePrice']
volume = row['TradeVolume']
single_row['Date'] = date
single_row[name] = 1
single_row['Price'] = price
single_row['TradeVolume'] = volume
full_table = full_table.append(single_row)
single_row = pd.DataFrame([empty_data])
return full_table
def createDifferential(data, candidates):
push_length = len(candidates)*5
row_count = len(data.index)
initialized_rows = [0] * row_count
data.insert(len(data.columns), "Predictions Diff", initialized_rows, True)
new_data = data.shift(periods=-push_length)
current_prices = data.loc[:,['Price']]
late_prices = new_data.loc[:,['Price']]
current_prices['Price'] = current_prices['Price'].str.replace('$', '')
late_prices['Price'] = late_prices['Price'].str.replace('$', '')
diff = (current_prices.astype(float) - late_prices.astype(float))/current_prices.astype(float)
data["Predictions Diff"] = diff
data["Price"] = data["Price"].str.replace('$', '')
return data