-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathspecial_data_reader.py
57 lines (40 loc) · 1.36 KB
/
special_data_reader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
""" Data Reading utilities """
import os
import pandas as pd
def read_data(file_path):
"""
Read data into pandas dataframe
:param file_path: str
string filename
:return: pd.DataFrame
"""
basename = os.path.basename(file_path)
if basename == 'wishes.csv':
return _read_wishes_data(file_path)
return pd.read_csv(file_path)
def _read_wishes_data(file_path):
with open(file_path, 'r') as f:
data = f.readlines()
res = []
in_list = False
current_list = []
columns = data[0].replace('"', '').replace('\n', '').split(',')
for line in data[1:]:
parsed_line = []
for elt in line.replace('\n', '').split(','):
if elt[0] != '{' and not in_list:
parsed_line.append(elt)
elif in_list:
current_list.append(elt.split('}')[0])
else:
in_list = True
current_list.append(elt.split('{')[1])
if in_list and elt.endswith('}'):
in_list = False
parsed_line.append(current_list)
current_list = []
# assert len(parsed_line) == len(columns), (len(parsed_line), len(columns))
if not len(parsed_line) == len(columns):
import pdb; pdb.set_trace()
res.append(parsed_line)
return pd.DataFrame(res, columns=columns)