-
Notifications
You must be signed in to change notification settings - Fork 0
/
convert_metasys_data.py
108 lines (81 loc) · 3.96 KB
/
convert_metasys_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import sqlite3
import pandas as pd
import numpy as np
# metasys data format:
# each row is a timestamp and each column is a sensor (each sensor either measures temperature or co2 for one room)
# goal data format:
# each row is a timestamp x room number, columns are temp and co2 values for that timestamp and room
# 1. parse room numbers from column headers & maybe match room numbers to each other --> it's always RM something except if it's the caf, so we'd remove the "RM"
# 2. we should be able to tell whether the column in question is temperature or co2 -- looks like it's marked with a -T or a CO2 (no dash)
# 3. move the room number factor from a row to a column/reorganize the data into the goal format
# basically, reading column headers to group data
import sqlalchemy
def read_room(x):
if "RM" in x:
rm = x.split(' ')[0]
return rm[2:]
if x == "Outside Air CO2" or x == "Outside Air RTU1":
return "Outside Air"
else:
return x
def is_co2_sensor(x):
# if "CO2" not in x:
# print("hello")
# print(x)
return "CO2" in x or "-Q" in x # not sure this works 100% of the time
df = pd.read_csv("2021-Q1-IAQ-AHS-Temp-CO2.csv", error_bad_lines=False, low_memory=False) # low_memory=False added b/c of potential data type issues
df = df.drop(df.tail(2).index) # removes informational lines at the bottom of the file
df.to_csv("tester.csv")
rooms = pd.Series(df.T.index)[1:].reset_index(drop=True)
room_nums = rooms.apply(read_room) # for some reason this adds an extra row at the start so I'm just getting rid of it
# print(room_nums[100:110])
# print("Temp Sensors: ")
is_co2 = rooms.apply(is_co2_sensor)
# this goes into the multiindex now
rooms_plus_sensors = pd.concat([room_nums, is_co2], axis=1)
print("rooms plus sensors")
print(rooms_plus_sensors)
#rooms_plus_sensors.to_csv("tester.csv")
# save a transposed copy of df so that we can index by rooms
# print("End of temp sensors")
# print(df.columns)
transposed = df.set_index("Unnamed: 0").T
transposed = transposed.reset_index()
transposed.insert(1, "Room Number", room_nums, True)
transposed.insert(2, "CO2 Sensor?", is_co2, True)
# transposed.to_csv("new_tester.csv")
# print("CO2" in "Cafe UV01 ZN08 Q CO2")
# my_fake_df = pd.DataFrame()
# my_fake_df.insert(0, "Room Number", room_nums, True)
# my_fake_df.to_csv("new_tester.csv")
transposed = transposed.sort_values("CO2 Sensor?")
transposed = transposed.sort_values("Room Number")
transposed = transposed.reset_index().drop("index", axis=1).drop("level_0", axis=1)
# Final stage of modifying data
pivot = transposed.melt(id_vars=["Room Number", "CO2 Sensor?"], var_name="Timestamp", value_name="Value")
pivot = pivot.set_index(["Room Number", "Timestamp"])
pivot = pd.pivot_table(pivot, index=["Room Number", "Timestamp"], values="Value", columns=["CO2 Sensor?"], aggfunc='first')
pivot.columns = ["Temperature", "CO2"]
temp_units = ["deg F"]*len(pivot.axes[0])
co2_units = ["ppm"]*len(pivot.axes[0])
pivot["Temp Units"] = temp_units
pivot["CO2 Units"] = co2_units
pivot = pivot.reset_index()
pivot = pivot.rename(columns={"Room Number": "Room #", "Temp Units": "Temp. Units"})
pivot = pivot.set_index("Room #")
def custom_conv(x):
if type(x) != float or not np.isnan(x):
return int(round(float(x)))
return x
pivot["Temperature"] = pivot["Temperature"].apply(custom_conv)
pivot["CO2"] = pivot["CO2"].apply(custom_conv) # FOUND IT !!
SERVER_PATH = '' # '/media/ea/Data/Students/jade/buildingEnergyApi/'
PATH = 'my_file'
engine = sqlalchemy.create_engine('sqlite:///' + SERVER_PATH + PATH)
conn = sqlite3.connect(SERVER_PATH + PATH)
# new_df = pd.read_sql("MetasysLog", engine)
pivot.to_csv(SERVER_PATH + "tester.csv")
pivot.to_sql("MetasysLog", conn, if_exists='append') # actual permanent database
pivot.to_sql("TempAndCO2LogDaily", conn, if_exists='append') # copy used for tasks 3 and 4 in this branch, must be cleared out every week
test2 = pd.read_sql("TempAndCO2Log", engine)
test2.to_csv(SERVER_PATH + "tester.csv")