diff --git a/.gitignore b/.gitignore index ba74660..8f67b15 100644 --- a/.gitignore +++ b/.gitignore @@ -55,3 +55,6 @@ docs/_build/ # PyBuilder target/ + +# IPython +.ipynb_checkpoints diff --git a/README.md b/README.md index 3c82e1c..cb2e2ad 100644 --- a/README.md +++ b/README.md @@ -18,6 +18,7 @@ $ pip install git+https://github.com/platypusllc/analytics.git * **API documentation** can be found on [ReadTheDocs](http://platypus-analytics.readthedocs.org/en/latest/). * **Usage examples** of this library can be found in the [examples](examples) directory. +* **IPython/Jupyter notebooks** using this library can be found in the [notebooks](notebooks) directory. [1]: http://docs.python-guide.org/en/latest/dev/virtualenvs/ [2]: https://www.continuum.io/documentation diff --git a/examples/EC_zero_trim.py b/examples/EC_zero_trim.py new file mode 100644 index 0000000..2cda1a8 --- /dev/null +++ b/examples/EC_zero_trim.py @@ -0,0 +1,197 @@ +import collections +import platypus.io.logs +import platypus.util.conversions +import numpy as np +import datetime +import json +import six +import re +import pandas +import glob + + +# FILE TO TEST JAR DATA EXTRACTION +PATH = "/home/jason/Documents/INTCATCH/phone logs/Gardaland outlet/2017-10-3/" +FILE = "platypus_20171003_050016" +EXT = ".txt" + +# FILES TO TEST MERGING +PATH2 = "/home/jason/Documents/INTCATCH/phone logs/Gardaland outlet/2017-10-4/" +FILE1 = "platypus_20171004_040203" +FILE2 = "platypus_20171004_054619" + +""" +def trim_EC(): + global PATH, FILE + print "\nLoading all the data in " + PATH + FILE + "\n" + data = platypus.io.logs.load(PATH + FILE) + if "ES2" in data: + print "ES2 sensor is present. Trimming all data within EC < 100 time windows\n" + # find all time windows where EC is exactly 0 + ES2_data = data["ES2"] + values = ES2_data["ec"].values + ec_eq_zero_indices = np.where(values < 100)[0] + windows = list() + windows.append([ec_eq_zero_indices[0]]) + left = ec_eq_zero_indices[0] + for ii in range(1, ec_eq_zero_indices.shape[0]): + i = ec_eq_zero_indices[ii] + if i - left > 5: + # there has been a jump in index, a new time window has started + windows[-1].append(left) + windows.append([i]) + left = i + windows[-1].append(ec_eq_zero_indices[-1]) + # print ec_eq_zero_indices + # print windows + for window in windows: + time_window = [ES2_data["ec"].index.values[window[0]], ES2_data["ec"].index.values[window[1]]] + for k in data.keys(): + data[k] = data[k].loc[np.logical_or(data[k].index < time_window[0], data[k].index > time_window[1])] + + else: + print "No ES2 sensor present. No trimming will be performed." + + # do stuff with data +""" + + +def merge_files(filename_list): + """ + + :param: filename_list: list of full path filename strings + :return: One result will all the dataframes merged + :rtype: {str: pandas.DataFrame} + """ + logfile_result_list = [platypus.io.logs.load(filename) for filename in filename_list] + if len(logfile_result_list) == 1: + return logfile_result_list[0] + all_data_types = set() + for i in range(1, len(logfile_result_list)): + all_data_types = all_data_types.union(set(logfile_result_list[i].keys())) + print all_data_types + + # merged_dataframe = pandas.DataFrame.merge(merged_dataframe[data_type], dataframe_list[i][data_type], how='outer') + merged_dataframe_dict = dict() + + for data_type in all_data_types: + for i in range(len(logfile_result_list)): + if data_type in logfile_result_list[i]: + first_log_index = i + break + merged_dataframe_dict[data_type] = logfile_result_list[first_log_index][data_type] + for i in range(first_log_index + 1, len(logfile_result_list)): + if data_type in logfile_result_list[i]: + merged_dataframe_dict[data_type] = merged_dataframe_dict[data_type].combine_first(logfile_result_list[i][data_type]).dropna(how='any') + return merged_dataframe_dict + + +def trim_using_EC(dataframe, threshold=100): + """ + Trims any data when EC < 100 + :return: trimmed dataframe + """ + if "ES2" in dataframe: + print "ES2 sensor is present. Trimming all data within EC < {:.0f} time windows\n".format(threshold) + # find all time windows where EC is exactly 0 + ES2_data = dataframe["ES2"] + values = ES2_data["ec"].values + ec_eq_zero_indices = np.where(values < threshold)[0] + windows = list() + windows.append([ec_eq_zero_indices[0]]) + left = ec_eq_zero_indices[0] + for ii in range(1, ec_eq_zero_indices.shape[0]): + i = ec_eq_zero_indices[ii] + if i - left > 5: + # there has been a jump in index, a new time window has started + windows[-1].append(left) + windows.append([i]) + left = i + windows[-1].append(ec_eq_zero_indices[-1]) + # print ec_eq_zero_indices + # print windows + for window in windows: + time_window = [ES2_data["ec"].index.values[window[0]], ES2_data["ec"].index.values[window[1]]] + for k in dataframe: + dataframe[k] = dataframe[k].loc[np.logical_or(dataframe[k].index < time_window[0], dataframe[k].index > time_window[1])] + else: + print "No ES2 sensor present. No trimming will be performed." + return dataframe + + +def data_with_sampler(filename): + data = platypus.io.logs.load(filename) + is_EC_gt_100 = False + + jar_start_timestamps = dict() + with open(filename, 'r') as logfile: + raw_data = collections.defaultdict(list) + start_time = datetime.datetime.utcfromtimestamp(0) + + for line in logfile: + # Extract each line fron the logfile and convert the timestamp. + time_offset_ms, level, message = line.split('\t', 2) + + # Compute the timestamp for each log entry. + time_offset = datetime.timedelta(milliseconds=int(time_offset_ms)) + timestamp = start_time + time_offset + + # Try to parse the log as a JSON object. + try: + entry = json.loads(message) + except ValueError as e: + raise ValueError( + "Aborted after invalid JSON log message '{:s}': {:s}".format(message, e)) + + # If the line is a datetime, compute subsequent timestamps from this. + # We assume that "date" and "time" are always together in the entry. + if 'date' in entry: + timestamp = datetime.datetime.utcfromtimestamp(entry['time'] / 1000.) + start_time = timestamp - time_offset + + # Extract appropriate data from each entry. + for k, v in six.viewitems(entry): + if k == 'sensor': + if v['type'] == "ES2": + ec = v['data'][0] + if not is_EC_gt_100 and ec > 100: + is_EC_gt_100 = True + if is_EC_gt_100 and ec < 100: + is_EC_gt_100 = False + if k == 'sampler' and is_EC_gt_100: + if "start" in v: + # the in-water sampler start messages + m = re.search('[0-9]+', v) + jar_id = m.group(0) + jar_start_timestamps[jar_id] = timestamp + + # TODO: MUST MERGE IN THE LATITUDE AND LONGITUDE!!! + + return data, jar_start_timestamps + + +def extract_sampler_data_by_jar(): + global PATH, FILE, EXT + filename = PATH + FILE + EXT + data, jar_start_timestamps = data_with_sampler(filename) + trimmed_data = trim_using_EC(data) + for k in jar_start_timestamps: + start_time = jar_start_timestamps[k] + end_time = start_time + datetime.timedelta(minutes=3.75) + print "Jar {} lasts from {} to {}".format(k, start_time, end_time) + for sensor in data.keys(): + print sensor + if sensor not in ["ES2", "ATLAS_DO", "ATLAS_PH"]: + continue + dataframe = trimmed_data[sensor] + relevantframe = dataframe.between_time(start_time.time(), end_time.time()) + output_filename = PATH + FILE + "__JAR_{}".format(k) + "__{}".format(sensor) + ".csv" + relevantframe.to_csv(output_filename) + + +if __name__ == "__main__": + merged_data = merge_files(glob.glob("/home/shawn/day2/*.txt")) + + + + diff --git a/examples/histogram-insitu.py b/examples/histogram-insitu.py new file mode 100644 index 0000000..fd90158 --- /dev/null +++ b/examples/histogram-insitu.py @@ -0,0 +1,162 @@ +#!/usr/bin/env python +# coding: utf-8 + +""" +Example of loading Platypus vehicle logs from file. + +Data is loaded as time series into Pandas[1] DataFrames, from which they can +be interpolated and filtered for other purposes, or used as input into Numpy[2] +or Scipy[3] functions. + +[1]: http://pandas.pydata.org/ +[2]: http://www.numpy.org/ +[3]: http://www.scipy.org/ +""" +import matplotlib.pyplot as plt +import platypus.io.insitu_logs +import platypus.util.conversions +import glob +import os +import numpy as np +import math + +# Read the data log from file. +# Note: for log versions <5.0, this filename must be 'airboat_[timestamp].txt]. + +def trim_using_EC(dataframe, threshold=100): + """ + Trims any data when EC < 100 + :return: trimmed dataframe + """ + if "ES2" in dataframe: + print("ES2 sensor is present. Trimming all data within EC < {:.0f} time windows\n".format(threshold)) + # find all time windows where EC is exactly 0 + ES2_data = dataframe["ES2"] + values = ES2_data["ec"].values + ec_eq_zero_indices = np.where(values < threshold)[0] + windows = list() + windows.append([ec_eq_zero_indices[0]]) + left = ec_eq_zero_indices[0] + for ii in range(1, ec_eq_zero_indices.shape[0]): + i = ec_eq_zero_indices[ii] + if i - left > 5: + # there has been a jump in index, a new time window has started + windows[-1].append(left) + windows.append([i]) + left = i + windows[-1].append(ec_eq_zero_indices[-1]) + # print(ec_eq_zero_indices) + # print(windows) + for window in windows: + time_window = [ES2_data["ec"].index.values[window[0]], ES2_data["ec"].index.values[window[1]]] + for k in dataframe: + dataframe[k] = dataframe[k].loc[np.logical_or(dataframe[k].index < time_window[0], dataframe[k].index > time_window[1])] + else: + print("No ES2 sensor present. No trimming will be performed.") + return dataframe + + +def load_data(folders = [], files = [], ec_trim_value = 50): + if (len(folders) == 0): + folders = [sys.path.cwd] + + # print("folders", str(folders)) + + for folder in folders: + files.extend(glob.glob(folder+'/*fixed*.csv')) + + # print("log files: " + str(files)) + # todo: remove duplicates? + + data = platypus.io.insitu_logs.merge_files(files) + data = trim_using_EC(data, ec_trim_value) + return data + +def plot_hist_sensor(data, sensor = 'ES2', num_bins = 10, hide_top_n_percent = 0, hide_bot_n_percent = 0, save_dir = "~/save"): + num_readings = len(data[sensor]) + # Get the std of the data + sensor_stddev = data[sensor].std() + # Get the mean of the data + sensor_mean = data[sensor].mean() + # Get the min of the data + sensor_min = data[sensor].min() + # Get the max of the data + sensor_max = data[sensor].max() + + print(sensor+" number of readings", num_readings) + print(sensor+" std", sensor_stddev) + print(sensor+" mean", sensor_mean) + print(sensor+" min", sensor_min) + print(sensor+" max", sensor_max) + + hist_max = math.ceil(sensor_max - hide_top_n_percent * 0.01 * sensor_max) + hist_min = math.floor(sensor_min + hide_bot_n_percent * 0.01 * sensor_min) + bin_size = (hist_max - hist_min)/float(num_bins) + + bins = np.arange(hist_min, hist_max, bin_size) + # print(hist_max, hist_min, bin_size, bins) + + # n, bins, patches = plt.hist(data[sensor], bins=xrange(200,1600,100)) + weights = np.ones_like(data[sensor])/float(num_readings) * 100 + if (num_bins <= 0): + n, bins, patches = plt.hist(data[sensor], weights=weights) + else: + n, bins, patches = plt.hist(data[sensor], weights=weights, bins=bins) + + # print(n, bins, patches) + + plt.xlabel(sensor) + plt.ylabel('Percentage of values in the given range') + plt.ylim(0,50) + plt.title('Histogram of ' + sensor + ' ' + save_dir.split('/')[-1]) + plt.savefig(save_dir + "/"+'Histogram of ' + sensor + ' ' + save_dir.split('/')[-1]+'.png') + # plt.text(0, .25, "Standard Dev: " + str(es2_stddev)) + plt.figtext(.16, .75, "Mean: " + str(sensor_mean)) + plt.figtext(.16, .7, "std: " + str(sensor_stddev)) + plt.grid(True) + plt.show() + +def get_folders(): + # folders = ['/home/shawn/NL2/grokeneveldse_polder/grokeneveldse_polder_feb_2018/'] + folders = ['/home/shawn/data/june 18 2018 - NL delfgauw/day_1', '/home/shawn/data/june 18 2018 - NL delfgauw/day_2'] + return folders + +def main(): + print("enter EC trim value: ") + new_ec_trim = int(raw_input()) + + folders = get_folders() + + data = load_data(folders=folders, ec_trim_value = new_ec_trim) + print(data) + print("data columns: ", data.keys) + + num_bins = 10 + hide_bot_n_percent = 10 + hide_top_n_percent = 10 + while ( True ): + print("what would you like to do?") + print("0: "+"change number of bins (currently: " +str(num_bins)+")") + print("1: "+"change percentage of bottom to hide (currently: " +str(hide_bot_n_percent)+")") + print("2: "+"change percentage of top to hide (currently: " +str(hide_top_n_percent)+")") + for i,x in enumerate(data.keys()): + print(i+3, ": plot " + x) + + command = raw_input() + + if (command == '0'): + break + elif (command == '1'): + num_bins = int(command) + elif (command == '2'): + hide_bot_n_percent = int(command) + elif (command == '3'): + hide_top_n_percent = int(command) + elif(int(command) < len(data.keys()) ): + plot_hist_sensor(data, data.keys()[int(command)], num_bins = num_bins, hide_top_n_percent = hide_top_n_percent, hide_bot_n_percent=hide_bot_n_percent, save_dir = folders[0]) + else: + print(command +" is not valid") + + +if __name__ == '__main__': + main() diff --git a/examples/histogram.py b/examples/histogram.py new file mode 100644 index 0000000..2991e49 --- /dev/null +++ b/examples/histogram.py @@ -0,0 +1,148 @@ +#!/usr/bin/env python +# coding: utf-8 + +""" +Example of loading Platypus vehicle logs from file. + +Data is loaded as time series into Pandas[1] DataFrames, from which they can +be interpolated and filtered for other purposes, or used as input into Numpy[2] +or Scipy[3] functions. + +[1]: http://pandas.pydata.org/ +[2]: http://www.numpy.org/ +[3]: http://www.scipy.org/ +""" +import matplotlib.pyplot as plt +import platypus.io.logs +import platypus.util.conversions +import glob +import os +import numpy as np +import math + +# Read the data log from file. +# Note: for log versions <5.0, this filename must be 'airboat_[timestamp].txt]. + +def trim_using_EC(dataframe, threshold=100): + """ + Trims any data when EC < 100 + :return: trimmed dataframe + """ + if "ES2" in dataframe: + print "ES2 sensor is present. Trimming all data within EC < {:.0f} time windows\n".format(threshold) + # find all time windows where EC is exactly 0 + ES2_data = dataframe["ES2"] + values = ES2_data["ec"].values + ec_eq_zero_indices = np.where(values < threshold)[0] + windows = list() + windows.append([ec_eq_zero_indices[0]]) + left = ec_eq_zero_indices[0] + for ii in range(1, ec_eq_zero_indices.shape[0]): + i = ec_eq_zero_indices[ii] + if i - left > 5: + # there has been a jump in index, a new time window has started + windows[-1].append(left) + windows.append([i]) + left = i + windows[-1].append(ec_eq_zero_indices[-1]) + # print ec_eq_zero_indices + # print windows + for window in windows: + time_window = [ES2_data["ec"].index.values[window[0]], ES2_data["ec"].index.values[window[1]]] + for k in dataframe: + dataframe[k] = dataframe[k].loc[np.logical_or(dataframe[k].index < time_window[0], dataframe[k].index > time_window[1])] + else: + print "No ES2 sensor present. No trimming will be performed." + return dataframe + + +def load_data(folders = [], files = [], ec_trim_value = 50): + if (len(folders) == 0): + folders = [sys.path.cwd] + + for folder in folders: + files.extend(glob.glob(folder+'/*.txt')) + + print files + # todo: remove duplicates? + + data = platypus.io.logs.merge_files(files) + data = trim_using_EC(data, ec_trim_value) + return data + +def plot_hist_sensor(data, sensor = 'ES2', channel='ec', num_bins = 10, hide_top_n_percent = 0, hide_bot_n_percent = 0, save_dir = "~/save"): + num_readings = len(data[sensor][channel]) + # Get the std of the data + sensor_stddev = data[sensor][channel].std() + # Get the mean of the data + sensor_mean = data[sensor][channel].mean() + # Get the min of the data + sensor_min = data[sensor][channel].min() + # Get the max of the data + sensor_max = data[sensor][channel].max() + + print channel+" number of readings", num_readings + print channel+" std", sensor_stddev + print channel+" mean", sensor_mean + print channel+" min", sensor_min + print channel+" max", sensor_max + + hist_max = math.ceil(sensor_max - hide_top_n_percent * 0.01 * sensor_max) + hist_min = math.floor(sensor_min + hide_bot_n_percent * 0.01 * sensor_min) + bin_size = (hist_max - hist_min)/float(num_bins) + + bins = np.arange(hist_min, hist_max, bin_size) + # print hist_max, hist_min, bin_size, bins + + # n, bins, patches = plt.hist(data[sensor][channel], bins=xrange(200,1600,100)) + weights = np.ones_like(data[sensor][channel])/float(num_readings) * 100 + if (num_bins <= 0): + n, bins, patches = plt.hist(data[sensor][channel], weights=weights) + else: + n, bins, patches = plt.hist(data[sensor][channel], weights=weights, bins=bins) + + # print n, bins, patches + + plt.xlabel(channel) + plt.ylabel('Percentage of values in the given range') + plt.ylim(0,50) + plt.title('Histogram of ' + channel + ' ' + save_dir.split('/')[-1]) + plt.savefig(save_dir + "/"+'Histogram of ' + channel + ' ' + save_dir.split('/')[-1]+'.png') + # plt.text(0, .25, "Standard Dev: " + str(es2_stddev)) + plt.figtext(.16, .75, "Mean: " + str(sensor_mean)) + plt.figtext(.16, .7, "std: " + str(sensor_stddev)) + plt.grid(True) + plt.show() + +def get_folders(): + # folders = ['/home/shawn/NL2/grokeneveldse_polder/grokeneveldse_polder_feb_2018/'] + folders = ['/home/shawn/NL1/all_nov_2017'] + return folders + +def main(): + print "enter EC trim value: " + new_ec_trim = int(raw_input()) + + folders = get_folders() + + data = load_data(folders=folders, ec_trim_value = new_ec_trim) + while ( True ): + print "what would you like to do?\n0: quit\n1: plot EC\n2: plot pH\n3: plot DO\n4: plot temp\n5: change EC trim value" + command = raw_input() + + if (command == '0'): + break + elif (command == '1'): + plot_hist_sensor(data, 'ES2', 'ec', num_bins = 10, hide_top_n_percent = 0, save_dir = folders[0]) + elif (command == '2'): + plot_hist_sensor(data, 'ATLAS_PH', 'ph', num_bins = 10, hide_bot_n_percent = 0, save_dir = folders[0]) + elif (command == '3'): + plot_hist_sensor(data, 'ATLAS_DO', 'do', num_bins = 10, save_dir = folders[0]) + elif (command == '4'): + plot_hist_sensor(data, 'ES2', 'temperature', num_bins = 10, save_dir = folders[0]) + else: + print command +" is not valid" + + +if __name__ == '__main__': + main() diff --git a/examples/histogram_gui.py b/examples/histogram_gui.py new file mode 100644 index 0000000..c835115 --- /dev/null +++ b/examples/histogram_gui.py @@ -0,0 +1,26 @@ +from Tkinter import Tk, BOTH + +class Example(Frame): + + def __init__(self): + super().__init__() + + self.initUI() + + + def initUI(self): + + self.master.title("Simple") + self.pack(fill=BOTH, expand=1) + + +def main(): + + root = Tk() + root.geometry("250x150+300+300") + app = Example() + root.mainloop() + + +if __name__ == '__main__': + main() diff --git a/examples/loading_logs.py b/examples/loading_logs.py index 1feb320..64d861b 100644 --- a/examples/loading_logs.py +++ b/examples/loading_logs.py @@ -15,26 +15,88 @@ import matplotlib.pyplot as plt import platypus.io.logs import platypus.util.conversions +import glob +import os +import numpy as np # Read the data log from file. # Note: for log versions <5.0, this filename must be 'airboat_[timestamp].txt]. -data = platypus.io.logs.load('./airboat_20130807_063622.txt') +def trim_using_EC(dataframe, threshold=100): + """ + Trims any data when EC < 100 + :return: trimmed dataframe + """ + if "ES2" in dataframe: + print "ES2 sensor is present. Trimming all data within EC < {:.0f} time windows\n".format(threshold) + # find all time windows where EC is exactly 0 + ES2_data = dataframe["ES2"] + values = ES2_data["ec"].values + ec_eq_zero_indices = np.where(values < threshold)[0] + windows = list() + windows.append([ec_eq_zero_indices[0]]) + left = ec_eq_zero_indices[0] + for ii in range(1, ec_eq_zero_indices.shape[0]): + i = ec_eq_zero_indices[ii] + if i - left > 5: + # there has been a jump in index, a new time window has started + windows[-1].append(left) + windows.append([i]) + left = i + windows[-1].append(ec_eq_zero_indices[-1]) + # print ec_eq_zero_indices + # print windows + for window in windows: + time_window = [ES2_data["ec"].index.values[window[0]], ES2_data["ec"].index.values[window[1]]] + for k in dataframe: + dataframe[k] = dataframe[k].loc[np.logical_or(dataframe[k].index < time_window[0], dataframe[k].index > time_window[1])] + else: + print "No ES2 sensor present. No trimming will be performed." + return dataframe -# Access the first 100 GPS locations for the vehicle. -poses = data['pose'][100:] -# Plot the first 100 GPS locations as UTM coordinates using matplotlib. -plt.plot(poses['easting'], poses['northing']) -plt.show() +sensor = "ES2" +channel = 'ec' -# Retrieve temperature data from the ES2 sensor. -temp = data['es2']['temperature'] +for folder in glob.glob('/home/shawn/NL2/nordpolder_van_delfgauw/'): + print "processing folder: ", folder + for file in glob.glob(folder+'platypus_20180215_104248.txt'): + if (os.path.exists(file)): + print file + out_name = os.path.basename(os.path.splitext(file)[0]) + print out_name + data = platypus.io.logs.merge_files(glob.glob(folder+"/*.txt")) + data = trim_using_EC(data, 100) -# Plot ES2 electrical conductivity data using matplotlib. -plt.plot(data['es2'].index, data['es2']['ec']) -plt.show() + # Access the first 100 GPS locations for the vehicle. + poses = data['pose'] -# Get the standard deviation of the ES2 data. -es2_stddev = data['es2'].std() + output_base = folder+out_name+"_"+channel + + # plt.title("Path of vehicle: " + out_name) + # # Plot the first 100 GPS locations as UTM coordinates using matplotlib. + # plt.plot(poses['easting'], poses['northing']) + # plt.savefig(output_base + "_path.png") + # plt.show() + # # plt.clear() + + # Retrieve temperature data from the ES2 sensor. + # temp = data[sensor]['temp'] + + + # Plot ES2 electrical conductivity data using matplotlib. + + # print data[sensor][channel] + + plt.title("Graph of "+channel+" data: " + out_name) + plt.plot(data[sensor].index, data[sensor][channel]) + # plt.savefig(folder+"_"+channel+"_graph.png") + plt.savefig(output_base + "_graph.png") + + plt.show() + # plt.clear() + + # Get the standard deviation of the ES2 data. + es2_stddev = data[sensor].std() + print "deviation", es2_stddev diff --git a/examples/pose_fixer.py b/examples/pose_fixer.py new file mode 100644 index 0000000..3172c36 --- /dev/null +++ b/examples/pose_fixer.py @@ -0,0 +1,78 @@ +import sys +import csv +from csv import DictWriter, DictReader +from dateutil import parser +import datetime +import os +import glob +import append_csvs + +def get_poses(filename_poses, pose_offset): + poses = [] + with open(filename_poses) as csvfile: + reader = DictReader(csvfile) + print "pose offset: " + str(pose_offset) + last_row = {"time": 0, "latitude": 0, "longitude": 0} + for row in reader: + dt = parser.parse(row["time"]) + datetime.timedelta(hours = pose_offset) + if (last_row["latitude"] == row["latitude"] and last_row["longitude"] == row["longitude"] and abs(last_row["time"] - dt) < datetime.timedelta(seconds = 1)): + continue + row["time"] = dt + poses.append(row) + last_row = row + + print "first point: " + str(poses[0]) +"\nLast point: "+ str(poses[-1])+"\nnum points: "+str(len(poses))+"\n\n" + return poses + +def fix_insitu_csv(filename_insitu, poses, output_filename): + with open(filename_insitu) as csvfile: + reader = DictReader(csvfile) + + with open(output_filename, 'w') as csvoutfile: + fieldnames = reader.fieldnames + writer = csv.DictWriter(csvoutfile, fieldnames=fieldnames) + writer.writeheader() + print "insitu data: " + for row in reader: + # print(row) + insitu_date = parser.parse(row['Date Time']) + latest_time_diff = 0 + min_val = min(poses, key=lambda x:abs(x["time"]-insitu_date)) + # for x in poses: + # pose_date = x["time"] + # if (pose_date > insitu_date): + # print "found a pose after " + str(pose_date) + " for insitu data point: " + str(insitu_date) + " - diff = " + str(abs(insitu_date -pose_date)) + diff_time = abs(insitu_date - min_val["time"]) + if (diff_time > datetime.timedelta(seconds = 1)): + print "found the closest pose (" + str(min_val) + ") for insitu data point: " + str(insitu_date) + " - diff = " + str(diff_time) + + row['Date Time'] = min_val["time"] + row['Latitude'] = min_val["latitude"] + row['Longitude'] = min_val["longitude"] + writer.writerow(row) + +if __name__ == '__main__': + filename_poses = sys.argv[1] + pose_offset = int(sys.argv[2]) + filename_insitu = sys.argv[3] + if (os.path.exists(filename_insitu) == False): + print("file doesn't exist: "+filename_insitu) + + "filename of poses: ", filename_poses + dict_poses = get_poses(filename_poses, pose_offset) + + if (os.path.isdir(filename_insitu)): + files_in_folder = glob.glob(filename_insitu+'/VuSitu_*.csv') + else: + files_in_folder = [filename_insitu] + + + files_output = [] + + for x in files_in_folder: + print "filename of insitu data: ", x + fix_insitu_csv(x, dict_poses, x+".fixed") + files_output.append(x + ".fixed") + + if os.path.isdir(filename_insitu): + append_csvs(files = files_output, output_filename = filename_insitu + "/combined.csv") \ No newline at end of file diff --git a/image_server/.gitignore b/image_server/.gitignore new file mode 100644 index 0000000..3da24a9 --- /dev/null +++ b/image_server/.gitignore @@ -0,0 +1,3 @@ +overlay/* +stats/* + diff --git a/image_server/css/css.css b/image_server/css/css.css new file mode 100644 index 0000000..49ce431 --- /dev/null +++ b/image_server/css/css.css @@ -0,0 +1 @@ +#mapid { height: 250px; width: 250px;} \ No newline at end of file diff --git a/image_server/csv/.gitignore b/image_server/csv/.gitignore new file mode 100644 index 0000000..5e7d273 --- /dev/null +++ b/image_server/csv/.gitignore @@ -0,0 +1,4 @@ +# Ignore everything in this directory +* +# Except this file +!.gitignore diff --git a/image_server/data_processor.py b/image_server/data_processor.py new file mode 100644 index 0000000..a5aa59d --- /dev/null +++ b/image_server/data_processor.py @@ -0,0 +1,539 @@ +from sklearn.neighbors import RadiusNeighborsRegressor +import matplotlib +matplotlib.use('Agg') + +import matplotlib.pyplot as plt +import math +import matplotlib.cm +from matplotlib import pyplot +import numpy as np +import numpy.lib.recfunctions +import scipy +import scipy.interpolate +import pandas +import platypus.io.logs +import os +import glob +import sys +import json + +log_path = '/home/ubuntu/process_ERM/' + +def sensor_id_to_name(id): + if (id == 0): + # Select the sensor and the name of the channel for that sensor. + sensor_name = 'PH_ATLAS' + sensor_channel = 'ph' + sensor_units = "pH" + elif (id == 1): + sensor_name = 'EC_DECAGON' + sensor_channel = 'ec' + sensor_units = 'Electrical Conductivity (uS/cm)' + elif (id == 2): + sensor_name = 'T_DECAGON' + sensor_channel = 'temperature' + sensor_units = 'Temperature (C)' + elif (id == 3): + sensor_name = 'DO_ATLAS' + sensor_channel = 'do' + sensor_units = 'Turbidty (NTU)' + + return (sensor_name, sensor_channel, sensor_units) + +def generate_overlay(log_path, log_file, sensor_id, ec_bounds, ph_bounds, turbidity_bounds): + print "processing: " + log_file + min_ec = ec_bounds[0] + max_ec = ec_bounds[1] + min_ph = ph_bounds[0] + max_ph = ph_bounds[1] + min_turbidity = turbidity_bounds[0] + max_turbidity = turbidity_bounds[1] + data_padding = [0., 0.] # degrees lat/lon + data_resolution = [0.00001, 0.00001] # degrees lat/lon + data_interpolation_radius = 0.00001 # degrees lat/lon + + data_boundaries = [[37.756664, -122.381500], [37.760387, -122.377216]] +# data_boundaries = [] + + # read the old generation stats file + stats_in = {} + old_stats = {"settings": {}} + + try: + with open("./stats/"+log_file+'.json', 'r') as infile: + print "reading previous stats from: " + "./stats/"+log_file+'.json' + stats_in = json.load(infile) + print stats_in + # old_stats = stats_in[str(sensor_id)] + except: + print "failed to load from input stats file" + + data_stats = {} + data_stats["settings"] = {} + data_stats["settings"]["log_path"] = log_path + data_stats["settings"]["log_file"] = log_file + data_stats["settings"]["sensor_id"] = sensor_id + data_stats["settings"]["ec_bounds"] = ec_bounds + data_stats["settings"]["ph_bounds"] = ph_bounds + data_stats["settings"]["turbidity_bounds"] = turbidity_bounds + + # Import the data from the specified logfile + + log_ext = ".txt" + + if (os.path.exists(log_path + log_file + log_ext) == False): + print "File doesn't exist: " + log_path + log_file + log_ext + log_ext = ".txt.incomplete" + + if (os.path.exists(log_path + log_file + log_ext) == False): + print "Error. log does not exist: " + log_path + log_file + log_ext + return False + + data_stats["settings"]["log_ext"] = log_ext + print(str(data_stats["settings"])) + + if (data_stats["settings"] == old_stats["settings"]): + print "old processing settings == new processing settings. don't re-run" + return + + data = platypus.io.logs.load(log_path+log_file+log_ext) + + (sensor_name, sensor_channel, sensor_units) = sensor_id_to_name(sensor_id) + + # Define useful access variables. + if (data_boundaries != []): + print "Trimming all data within long/lat = "+str(data_boundaries) + # find all time windows where EC is exactly 0 + ES2_data = data["pose"] + # print ES2_data["time"] + values_lat = ES2_data["latitude"].values + values_lon = ES2_data["longitude"].values + + # ec_eq_zero_indices = np.where(values == 0)[0] + lat_min = min(data_boundaries[0][0], data_boundaries[1][0]) + lat_max = max(data_boundaries[0][0], data_boundaries[1][0]) + lon_min = min(data_boundaries[0][1], data_boundaries[1][1]) + lon_max = max(data_boundaries[0][1], data_boundaries[1][1]) + print lat_min, lat_max, " | ", lon_min, lon_max + ec_eq_zero_indices = np.where( (values_lat < lat_min) | (values_lat > lat_max) | + (values_lon < lon_min) | (values_lon > lon_max) + )[0] + + if (len(ec_eq_zero_indices) == 0): + print "no poses outside the bounds" + else: + # print ec_eq_zero_indices + # ec_eq_zero_indices = np.where(values < 50)[0] + windows = list() + windows.append([ec_eq_zero_indices[0]]) + left = ec_eq_zero_indices[0] + for ii in range(1, ec_eq_zero_indices.shape[0]): + i = ec_eq_zero_indices[ii] + if i - left > 5: + # there has been a jump in index, a new time window has started + windows[-1].append(left) + windows.append([i]) + left = i + windows[-1].append(ec_eq_zero_indices[-1]) + # print ec_eq_zero_indices + # print windows + for window in windows: + time_window = [ES2_data.index.values[window[0]], ES2_data.index.values[window[1]]] + # print time_window + for k in data: + print "trimming: " + k +" for interval " + str(time_window) + data[k] = data[k].loc[np.logical_or(data[k].index < time_window[0], data[k].index > time_window[1])] + + + # if "T_DECAGON" in data: + # print "Temperature sensor is present. Trimming all data where temperature is changing a lot in a given time windows\n" + # # find all time windows where EC is exactly 0 + + # # print ES2_data + # T_data = data["T_DECAGON"] + # values = T_data["temperature"].values + + # dtemp_dt_limit = 0.5/60.0 + + # # pose_lat_vals = position["latitude"].values + # # pose_lon_vals = position["longitude"].values + # stddevs = [] + # zero_indices = [] + # for i, x in enumerate(values): + # zero_indices.append(0) + # stddevs.append(0) + # if (i + 60 < len(values)): + # vals = [] + # for x in xrange(i, i+60): + # vals.append(values[x]) + # vals = np.array(vals) + # stddev = vals.std() + # stddevs[i] = stddev + # # if (stddev > 0.1): + # # print "@ i = " + str(i)+", time = " + str(T_data["temperature"].index[i]) + " - std dev: " + str(stddev) + + # for i, x in enumerate(stddevs): + # # print i + # if (x > 0.1): + # # print x + # for y in xrange(i, i+60): + # zero_indices[y] = 1 + + # # # print zero_indices + + # # ec_eq_zero_indices = np.where(values == 0)[0] + # ec_eq_zero_indices = np.where( (np.array(zero_indices) == 1) )[0] # | out_of_bouds_lat | out_of_bouds_lon )[0] + # # print ec_eq_zero_indices + # # ec_eq_zero_indices = np.where(values < 50)[0] + # windows = list() + # windows.append([ec_eq_zero_indices[0]]) + # left = ec_eq_zero_indices[0] + # for ii in range(1, ec_eq_zero_indices.shape[0]): + # i = ec_eq_zero_indices[ii] + # if i - left > 5: + # # there has been a jump in index, a new time window has started + # windows[-1].append(left) + # windows.append([i]) + # left = i + # windows[-1].append(ec_eq_zero_indices[-1]) + # # print ec_eq_zero_indices + # # print windows + # for window in windows: + # print "window: " + str(window) + # time_window = [T_data["temperature"].index.values[window[0]], T_data["temperature"].index.values[window[1]]] + # for k in data: + # data[k] = data[k].loc[np.logical_or(data[k].index < time_window[0], data[k].index > time_window[1])] + # print T_data["temperature"].values[np.where( (np.array(zero_indices) ==1) )[0]] + # else: + # print "No ES2 sensor present. No trimming will be performed." + + if "EC_DECAGON" in data: + print "ES2 sensor is present. Trimming all data within EC = "+str(min_ec)+" time windows\n" + # find all time windows where EC is exactly 0 + ES2_data = data["EC_DECAGON"] + values = ES2_data["ec"].values + + # ec_eq_zero_indices = np.where(values == 0)[0] + ec_eq_zero_indices = np.where( (values < min_ec) | (values > max_ec) )[0] # | out_of_bouds_lat | out_of_bouds_lon )[0] + if (len(ec_eq_zero_indices) == 0): + print "no ec data to trim" + else: + windows = list() + windows.append([ec_eq_zero_indices[0]]) + left = ec_eq_zero_indices[0] + for ii in range(1, ec_eq_zero_indices.shape[0]): + i = ec_eq_zero_indices[ii] + if i - left > 5: + # there has been a jump in index, a new time window has started + windows[-1].append(left) + windows.append([i]) + left = i + windows[-1].append(ec_eq_zero_indices[-1]) + # print ec_eq_zero_indices + # print windows + for window in windows: + time_window = [ES2_data["ec"].index.values[window[0]], ES2_data["ec"].index.values[window[1]]] + for k in data: + data[k] = data[k].loc[np.logical_or(data[k].index < time_window[0], data[k].index > time_window[1])] + else: + print "No ES2 sensor present. No trimming will be performed." + + if "PH_ATLAS" in data: + print "pH sensor is present. Trimming all data within PH = ["+str(min_ph)+", "+str(max_ph)+ "] time windows\n" + PH_data = data["PH_ATLAS"] + values = PH_data["ph"].values + + ph_outofbounds_indices = np.where( (values < min_ph) | (values > max_ph) )[0] # | out_of_bouds_lat | out_of_bouds_lon )[0] + + if (len(ph_outofbounds_indices) == 0): + print("no sensor values to prune. all values between "+ str(min_ph) +" and " + str(max_ph)) + else: + windows = list() + windows.append([ph_outofbounds_indices[0]]) + left = ph_outofbounds_indices[0] + for ii in range(1, ph_outofbounds_indices.shape[0]): + i = ph_outofbounds_indices[ii] + if i - left > 5: + # there has been a jump in index, a new time window has started + windows[-1].append(left) + windows.append([i]) + left = i + windows[-1].append(ph_outofbounds_indices[-1]) + # print ph_outofbounds_indices + # print windows + for window in windows: + time_window = [PH_data["ph"].index.values[window[0]], PH_data["ph"].index.values[window[1]]] + for k in data: + data[k] = data[k].loc[np.logical_or(data[k].index < time_window[0], data[k].index > time_window[1])] + else: + print "No PH sensor present. No trimming will be performed." + + # Print the available sensors and channels for this logfile. + print "Available sensors/channels:" + for s in data.keys(): + if s == 'pose' or s == 'BATTERY': + continue + for c in data[s].dtypes.keys(): + print " {:s}, {:s}".format(s, str(c)) + + pose = data['pose'] + position = pose[['latitude', 'longitude']] + + out_prefix = log_file + "-"+sensor_name+'-' + + # Extract the pose timing and the sensor data of interest. + pose_times = pose.index.values.astype(np.float64) + + if sensor_name in data: + if (sensor_name == "DO_ATLAS"): + data[sensor_name]["do"] = data[sensor_name]["do"]/20.0 + sensor = data[sensor_name] + sensor_times = sensor.index.values.astype(np.float64) + + # Linearly interpolate the position of the sensor at every sample. + sensor_pose_interpolator = scipy.interpolate.interp1d(pose_times, position, + axis=0, bounds_error=False) + + # Add the position information back to the sensor data. + sensor = sensor.join(pandas.DataFrame(sensor_pose_interpolator(sensor_times), sensor.index, + columns=('latitude', 'longitude'))) + + csv_output_filename = './csv/'+out_prefix+'csv.csv' + data_stats["csv_output_filename"] = out_prefix+'csv.csv' + sensor.to_csv(csv_output_filename) + + # Remove columns that have NaN values (no pose information). + sensor_valid = np.all(np.isfinite(sensor), axis=1) + sensor = sensor[sensor_valid] + + ## Add a data overlay for the map + data_bounds = [(position.min() - data_padding).tolist(), + (position.max() + data_padding).tolist()] + + # print data + print position.min() + print position.max() + print data_bounds + print data_resolution + + # Create a rectangular grid of overlay points. + data_xv, data_yv = np.meshgrid( + np.arange(data_bounds[1][0], data_bounds[0][0], -data_resolution[0]), + np.arange(data_bounds[0][1], data_bounds[1][1], data_resolution[1]) + ) + data_shape = data_xv.shape + data_xy = np.vstack([data_xv.ravel(), data_yv.ravel()]).T + + print data_shape + + data_stats["number_of_points"] = data_shape[0] + data_stats["data_bounds"] = data_bounds + + print "starting major processing..." + if sensor_name in data: + # Create a radial-basis interpolator over the sensor dataset + # Then, query it at each point of the rectangular grid. + #from sklearn.neighbors import RadiusNeighborsClassifier + #data_estimator = RadiusNeighborsClassifier(radius=data_interpolation_radius, outlier_label=np.nan) + print "creating radius neighbors regressor" + data_estimator = RadiusNeighborsRegressor(radius=data_interpolation_radius) + + print "running neighbors fit" + data_estimator.fit(sensor[['latitude','longitude']], sensor[sensor_channel].astype(np.float)) + print "running predict" + data_zv = data_estimator.predict(data_xy) + print "running reshape" + data_zv = data_zv.reshape(data_shape).T + + print "normalizing data" + + # Normalize data from [0, 1) + data_max = data_zv[np.isfinite(data_zv)].max() + data_min = data_zv[np.isfinite(data_zv)].min() + print "Data min = {:f} Data max = {:f}".format(data_min, data_max) + # data_stats["data_min"] = data_min + if (sensor_id == 0): + # pH + color_data_min = 6.5 + color_data_max = 9.5 + num_bins = 20 + elif (sensor_id == 1): + # ec + num_bins = 20 + color_data_min = 30000 + color_data_max = 95000 + elif (sensor_id == 2): + # temp + num_bins = 20 + color_data_min = 5 + color_data_max = 30 + elif (sensor_id == 3): + # turbidity (DO_ATLAS) + num_bins = 40 + color_data_min = 0 + color_data_max = 50 + + data_stats["data_stddev"] = data[sensor_name][sensor_channel].std() + data_stats["data_min"] = data[sensor_name][sensor_channel].min() + data_stats["data_max"] = data[sensor_name][sensor_channel].max() + data_stats["data_mean"] = data[sensor_name][sensor_channel].mean() + histogram_filename = './histograms/'+out_prefix+'histogram.png' + data_stats["histogram_filename"] = out_prefix+'histogram.png' + plot_hist_sensor(data, sensor_name, sensor_channel, num_bins, color_data_min, color_data_max, histogram_filename) + + NORMALIZER = color_data_max # 800 + data_zv = (data_zv - color_data_min) / (NORMALIZER - color_data_min) + + + + + print "create color map" + # Update a color map only at the points that have valid values. + data_rgb = np.zeros((data_shape[0], data_shape[1], 4), dtype=np.uint8) + data_rgb = matplotlib.cm.jet(data_zv) * 255 + data_rgb[:,:,3] = 255 * np.isfinite(data_zv) + + # Remove any old image files w/ the same name + old_png_files = glob.glob('./[overlay,histograms,csv]/'+out_prefix+'-'+sensor_name+'-[overlay,histogram,csv].png') + for old_png_file in old_png_files: + print "removing file: " + old_png_file + os.remove(old_png_file) + + print "creating overlay files" + png_filename_rgb = './overlay/'+out_prefix+'overlay.png' + data_stats["overlay_filename"] = out_prefix+'overlay.png' + scipy.misc.imsave(png_filename_rgb, data_rgb) + + # Create image overlay that references generated image. + # makes an image w/ a slight opaque tint to it + # data_overlay_tint = np.ones((data_shape[0], data_shape[1], 4), dtype=np.uint8) * 50 + # data_overlay_tint_filename = './overlay/'+out_prefix+'tint.png' + # scipy.misc.imsave(data_overlay_tint_filename, data_overlay_tint) + + # data_bounds_tint = [(position.min() - [x * 500 for x in data_padding]).tolist(), + # (position.max() + [x * 500 for x in data_padding]).tolist()] + + # Make a figure and axes with dimensions as desired. + fig = pyplot.figure(figsize=(15, 3)) + ax1 = fig.add_axes([0.05, 0.80, 0.9, 0.15]) + # Set the colormap and norm to correspond to the data for which + # the colorbar will be used. + cmap = matplotlib.cm.jet + norm = matplotlib.colors.Normalize(vmin=color_data_min, vmax=NORMALIZER) + cb1 = matplotlib.colorbar.ColorbarBase(ax1, cmap=cmap, + norm=norm, + orientation='horizontal') + cb1.set_label(sensor_units) + + scale_bar_filename = './overlay/'+out_prefix+'bar.png' + data_stats["bar_filename"] = out_prefix+'bar.png' + pyplot.savefig(scale_bar_filename) + + pyplot.close('all') + + print data_stats + stats_out = stats_in + stats_out[str(sensor_id)] = data_stats + + with open("./stats/"+log_file+'.json', 'w') as outfile: + json.dump(stats_out, outfile, sort_keys=True, indent=4) + else: + print "sensor name: " + sensor_name +" is not in:\n", data + +def plot_hist_sensor(data, sensor, channel, num_bins, min_value, max_value, filename): + num_readings = len(data[sensor][channel]) + + hist_min = math.floor(min_value) + hist_max = math.ceil(max_value) + bin_size = (hist_max - hist_min)/float(num_bins) + + std_dev = data[sensor][channel].std() + mean = data[sensor][channel].mean() + + bins = np.arange(hist_min, hist_max, bin_size) + # print bins + # print hist_max, hist_min, bin_size, bins + + # n, bins, patches = plt.hist(data[sensor][channel], bins=xrange(200,1600,100)) + weights = np.ones_like(data[sensor][channel])/float(num_readings) * 100 + if (num_bins <= 0): + n, bins, patches = plt.hist(data[sensor][channel], weights=weights) + else: + n, bins, patches = plt.hist(data[sensor][channel], weights=weights, bins=bins) + + # print n, bins, patches + + plt.xlabel(channel) + plt.ylabel('Percentage of values in the given range') + plt.ylim(0,100) + plt.title('Histogram of ' + sensor + " $\mu$="+ "{:.2f}".format(std_dev) +" $\sigma$=" + "{:.2f}".format(mean)) + plt.savefig(filename) + # plt.text(0, .25, "Standard Dev: " + str(es2_stddev)) + plt.figtext(.16, .75, "Mean: " + str(mean)) + plt.figtext(.16, .7, "std: " + str(std_dev)) + plt.grid(True) + plt.show() + +if __name__ == '__main__': + if (len(sys.argv) < 3 or (["-h", "help", "h", "--help"] in sys.argv)): + print "args: python data_processor.py log_file_name (w/o .txt appended) sensor_id min_ec" + quit(1) + log_file = sys.argv[1] + sensor_id = int(sys.argv[2]) + min_ec = int(sys.argv[3]) + max_ec = int(sys.argv[4]) + min_ph = float(sys.argv[5]) + max_ph = float(sys.argv[6]) + min_turbidity = 0 + max_turbidity = 1000 + if (os.path.isfile(log_file)): + # log_path = sys.argv[1].split("/").join() + log_file = sys.argv[1].split("/")[-1] + log_file = log_file.split(".")[-2] + if ("platypus" not in log_file): + log_file = sys.argv[1].split(".")[-2] + if ("platypus" not in log_file): + print "invalid filename" + quit(2) + print log_file + if sensor_id == -1: + for x in range(0, 4): + # try: + generate_overlay(log_path, log_file, x, [min_ec, max_ec], [min_ph, max_ph], [min_turbidity, max_turbidity]) + # except: + # print "Failed to generate overlay: " + log_path + ", " + log_file + ", " + str(x) + print "\n\n\n\n\n\n\n\n\n" + else: + # try: + generate_overlay(log_path, log_file, sensor_id, [min_ec, max_ec], [min_ph, max_ph], [min_turbidity, max_turbidity]) + # except: + # print "Failed to generate overlay: " + log_path + ", " + log_file + ", " + str(sensor_id) + print "\n\n\n\n\n\n\n\n\n" + else: + log_folder = log_file + log_files = [] + for file in os.listdir(log_folder): + if (os.path.isfile(log_folder+"/"+file)): + print "adding file: " + file + log_files.append(os.path.splitext(file)[0]) + else: + print(file +" is not a file") + + print log_files + + for x in log_files: + if (sensor_id == -1): + for y in range(0, 4): + # try: + generate_overlay(log_path, x, y, [min_ec, max_ec], [min_ph, max_ph], [min_turbidity, max_turbidity]) + # except: + # print "Failed to generate overlay: " + log_path + ", " + log_file + ", " + str(y) + print "\n\n\n\n\n\n\n\n\n" + else: + # try: + generate_overlay(log_path, x, sensor_id, [min_ec, max_ec], [min_ph, max_ph], [min_turbidity, max_turbidity]) + # except: + # print "Failed to generate overlay: " + log_path + ", " + log_file + ", " + str(sensor_id) + print "\n\n\n\n\n\n\n\n\n" + + # generate_histogram() diff --git a/image_server/histograms/.gitignore b/image_server/histograms/.gitignore new file mode 100644 index 0000000..5e7d273 --- /dev/null +++ b/image_server/histograms/.gitignore @@ -0,0 +1,4 @@ +# Ignore everything in this directory +* +# Except this file +!.gitignore diff --git a/image_server/image_server.py b/image_server/image_server.py new file mode 100644 index 0000000..977dc90 --- /dev/null +++ b/image_server/image_server.py @@ -0,0 +1,141 @@ +import matplotlib +import matplotlib.cm +from matplotlib import pyplot +import numpy as np +import numpy.lib.recfunctions +import scipy +import scipy.interpolate +import pandas +import platypus.io.logs +import os +import uuid +import glob +import flask +from flask import send_from_directory, render_template +import json + +UPLOAD_FOLDER = '/home/shawn/data/ERM/log_files/' +ALLOWED_EXTENSIONS = set(['txt']) + +from flask import Flask +app = Flask(__name__, static_url_path='', ) + +app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER + +class PlatypusDataStore(object): + """docstring for PlatypusDataStore""" + def __init__(self, data): + super(PlatypusDataStore, self).__init__() + self.data = data + + +@app.route('/overlay/') +def serve_static(filename): + print "getting static file: ", filename, "from dir: ",os.path.join(".", 'overlay') + return send_from_directory(os.path.join(".", 'overlay'), filename) + + +@app.route('/csv/') +def serve_csv(filename): + print "getting static file: ", filename, "from dir: ",os.path.join(".", 'csv') + return send_from_directory(os.path.join(".", 'csv'), filename) + + +@app.route('/histograms/') +def serve_histograms(filename): + print "getting static file: ", filename, "from dir: ",os.path.join(".", 'histograms') + return send_from_directory(os.path.join(".", 'histograms'), filename) + +# @app.route('/overlay/') +# def send(path): +# path = "/overlay/"+path +# if (os.path.exists("./"+path)): +# print "sending file: " + path +# return send_from_directory("overlay", path) +# else: +# print path +" does not exist" + +def sensor_id_to_name(id): + if (id == 0): + # Select the sensor and the name of the channel for that sensor. + sensor_name = 'PH_ATLAS' + sensor_channel = 'ph' + sensor_units = "pH" + elif (id == 1): + sensor_name = 'EC_DECAGON' + sensor_channel = 'ec' + sensor_units = 'Electrical Conductivity (uS/cm)' + elif (id == 2): + sensor_name = 'T_DECAGON' + sensor_channel = 'temperature' + sensor_units = 'Temperature (C)' + elif (id == 3): + sensor_name = 'DO_ATLAS' + sensor_channel = 'do' + sensor_units = 'Turbidty (NTU)' + + return (sensor_name, sensor_channel, sensor_units) + +@app.route('/logs//') +def render_page(log_file, sensor_id): + print "rendering log file page from stats" + # open stats file for this log + with open("./stats/"+log_file+'.json', 'r') as readfile: + data = json.load(readfile) + print data + data = data[str(sensor_id)] + + print data + data["bar_filename"] = "/overlay/"+data["bar_filename"] + data["overlay_filename"] = "/overlay/"+data["overlay_filename"] + (sensor_name, sensor_channel, sensor_units) = sensor_id_to_name(sensor_id) + return render_template('render.html', log_file = log_file, sensor_name = sensor_name, sensor_id=sensor_id, data=data) + +@app.route('/reprocess//') +def reprocess(log_file, sensor_id): + with open("./stats/"+log_file+'.json', 'r') as readfile: + data = json.load(readfile) + data = data[str(sensor_id)] + print "stats data: ", data + import subprocess + + try: + base = data["settings"]["log_path"] + except: + log_path = "~/mount_nas/" + name = log_file + try: + ext = data["settings"]["log_ext"] + except: + ext = ".txt" + path = os.path.join(base, name) + subprocess.Popen(["python", "data_processor.py", str(path)+ext, str(sensor_id), "5000", "1000000", "5", "12"]) + + return "HOME
queued: " + str(["python", "data_processor.py", str(path), "sensor_id", "5000", "1000000", "5", "12"]) + +@app.route('/index') +@app.route('/') +def render_index(): + log_folder = "./stats/" + log_files = [] + for file in os.listdir(log_folder): + if (str(file) == ".gitignore"): + continue + if (os.path.isfile(log_folder+file)): + print "adding file: " + file + log_files.append(os.path.splitext(file)[0]) + else: + print(file +" is not a file") + + log_files.sort() + print log_files + + return render_template('index.html', log_folder = log_folder, log_files = log_files) + +def root_dir(): # pragma: no cover + return os.path.abspath(os.path.dirname(__file__)) + +if __name__ == '__main__': + app.debug = True + app.run(host='0.0.0.0') + #app.run() diff --git a/image_server/run_periodic.sh b/image_server/run_periodic.sh new file mode 100644 index 0000000..c2b59fa --- /dev/null +++ b/image_server/run_periodic.sh @@ -0,0 +1,27 @@ +#!/bin/bash +while [ 1 ] +do + cp -n -r ~/mount_nas/ERM/phone\ \#2/ ~/backup_ERM + cp -n -r ~/mount_nas/ERM/phone\ \#1/ ~/backup_ERM + cp -n ~/mount_nas/ERM/*txt* ~/backup_ERM + + rm -r ~/process_ERM/ + mkdir -p ~/process_ERM/ + cp -n ~/mount_nas/ERM/phone\ \#2/* ~/process_ERM/ + cp -n ~/mount_nas/ERM/phone\ \#1/* ~/process_ERM/ + cp -n ~/mount_nas/ERM/*txt* ~/process_ERM/ + + for filename in ~/process_ERM/*; do + if [[ $(find "$filename" -mtime +10 -print) ]]; then + echo "skipping old file: $filename" + # sleep 0.5 + # continue + fi + for ((i=0; i<=3; i++)); do + python data_processor.py "$filename" "$i" 5000 1000000 6 10.5 + done + + done + echo "Done for now" + sleep 3600 +done diff --git a/image_server/stats/.gitignore b/image_server/stats/.gitignore new file mode 100644 index 0000000..5e7d273 --- /dev/null +++ b/image_server/stats/.gitignore @@ -0,0 +1,4 @@ +# Ignore everything in this directory +* +# Except this file +!.gitignore diff --git a/image_server/templates/index.html b/image_server/templates/index.html new file mode 100644 index 0000000..2657842 --- /dev/null +++ b/image_server/templates/index.html @@ -0,0 +1,23 @@ + + + + + Platypus DataVis - index + + + + + + + + +

ERM Robotic Boat Sensor Data

+ + + + + diff --git a/image_server/templates/render.html b/image_server/templates/render.html new file mode 100644 index 0000000..a52d9ec --- /dev/null +++ b/image_server/templates/render.html @@ -0,0 +1,81 @@ + + + + + Quick Start - Leaflet + + + + + + + + + + + +

Data Home

+

{{ log_file }} - {{ sensor_name }}

+ +
+ This run's raw {{ sensor_name }} data +
+ + This run's pH data | This run's EC data | This run's Temperature data | This run's Turbidity data

+ +
+ + + + + +
+ REPROCESS DATA + + diff --git a/notebooks/Data_Exporter_Combined.ipynb b/notebooks/Data_Exporter_Combined.ipynb new file mode 100644 index 0000000..663b9d3 --- /dev/null +++ b/notebooks/Data_Exporter_Combined.ipynb @@ -0,0 +1,115 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Log Data Exporter - Combined Sensor CSV\n", + "This data exporter takes a log file and exports it to a single CSV files, with data values for each sensor.\n", + "\n", + "Since each sensor is exported together, these CSV files will have interpolated data values. This is because each row represents an update in _one_ sensor value, but there must be valid entries for the other sensors at the time. The values of all of the other sensors will be interpolated to the nearest valid reading (in time) for these events." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas\n", + "import platypus.io.logs\n", + "import os" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Import the data from the specified logfile\n", + "log_filename = '../logs/platypus_20161126_161922.txt'\n", + "data = platypus.io.logs.load(log_filename)\n", + "\n", + "# To selectively export certain sensors, change this value to an array of names.\n", + "# By default, all sensors are exported (meaning they are also interpolated).\n", + "sensor_names = None\n", + "# sensor_names = ['ATLAS_DO', 'ES2']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Extract position of boat and remove duplicate entries.\n", + "data[\"pose\"][\"time\"] = data[\"pose\"].index\n", + "position = data['pose'][['time', 'latitude', 'longitude']].drop_duplicates(subset='time', keep='first')\n", + "position = position[['latitude', 'longitude']] # remove unneeded time column after de-duplication\n", + "\n", + "# If no sensor is specified, try to export all detected sensors.\n", + "if not sensor_names:\n", + " sensor_names = [k for k in data.keys() if k not in ['pose', 'BATTERY']]\n", + "\n", + "# Put together all the sensor data we are looking for.\n", + "# This also combines the name of the sensor with the name of each channel. \n", + "sensor_frames = []\n", + "for sensor_name in sensor_names:\n", + " sensor_data = data[sensor_name].copy()\n", + " sensor_data.columns = [ sensor_name + '_' + str(k) for k in sensor_data.columns ]\n", + " sensor_frames.append(sensor_data)\n", + "sensor_data = pandas.concat(sensor_frames, axis=1)\n", + "\n", + "# Find the position for each sensor reading.\n", + "sensor_position = position.reindex(sensor_data.index, method='nearest')\n", + "output = pandas.concat((sensor_position, sensor_data), axis=1)\n", + "\n", + "# Fill in missing values with last known values.\n", + "output = output.apply(pandas.Series.interpolate, method='nearest')\n", + "output['epoch_time'] = output.index.astype(np.int64)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Output the file as a CSV in the same directory as the original logfile.\n", + "output_filename = os.path.splitext(log_filename)[0] + '.csv'\n", + "output.to_csv(output_filename, index=True)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 2", + "language": "python", + "name": "python2" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.9" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/notebooks/Data_Exporter_Independent.ipynb b/notebooks/Data_Exporter_Independent.ipynb new file mode 100644 index 0000000..ac18813 --- /dev/null +++ b/notebooks/Data_Exporter_Independent.ipynb @@ -0,0 +1,126 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Log Data Exporter - Independent Sensor CSVs\n", + "This data exporter takes a log file and exports it to a series of CSV files, one per sensor.\n", + "\n", + "Since each sensor is exported separately, these CSV files will not have interpolated data values, only the raw reported sensor values." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas\n", + "import platypus.io.logs\n", + "import os" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Import the data from the specified logfile\n", + "log_filename = '../logs/platypus_20161126_161922.txt'\n", + "data = platypus.io.logs.load(log_filename)\n", + "\n", + "# To selectively export certain sensors, change this value to an array of names.\n", + "# By default, all sensors are exported (meaning they are also interpolated).\n", + "sensor_names = None\n", + "# sensor_names = ['ATLAS_DO', 'ES2']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "def export_csv(data, sensor_name, output_filename):\n", + " \"\"\"\n", + " Export a CSV for a particular sensor in a logfile.\n", + "\n", + " :param data: a loaded Platypus data log\n", + " :param sensor_name: the name of a sensor in the data log\n", + " :param output_file: the filename to use for the output CSV\n", + " \"\"\"\n", + " # Extract position of boat and remove duplicate entries.\n", + " data[\"pose\"][\"time\"] = data[\"pose\"].index\n", + " position = data['pose'][['time', 'latitude', 'longitude']].drop_duplicates(subset='time', keep='first')\n", + " position = position[['latitude', 'longitude']] # remove unneeded time column after de-duplication\n", + "\n", + " # Check if the sensor name exists in the data log.\n", + " if sensor_name not in data.keys():\n", + " raise ValueError(\"'{:s}' sensor was not found in the log file.\"\n", + " .format(sensor_name))\n", + "\n", + " # Put together all the sensor data we are looking for.\n", + " sensor_data = data[sensor_name].copy()\n", + "\n", + " # Find the position for each sensor reading.\n", + " sensor_position = position.reindex(sensor_data.index, method='nearest')\n", + " output = pandas.concat((sensor_position, sensor_data), axis=1)\n", + "\n", + " # Fill in missing values with last known values.\n", + " output = output.apply(pandas.Series.interpolate, method='nearest')\n", + " output['epoch_time'] = output.index.astype(np.int64)\n", + " \n", + " # Output the file as a CSV in the same directory as the original logfile.\n", + " output.to_csv(output_filename, index=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# If no sensor is specified, try to export all detected sensors.\n", + "if not sensor_names:\n", + " sensor_names = [k for k in data.keys() if k not in ['pose', 'BATTERY']]\n", + "\n", + "# Iterate through and export each sensor as the original logfile name + sensor_name\n", + "for sensor_name in sensor_names:\n", + " output_filename = os.path.splitext(log_filename)[0] + '_' + str(sensor_name) + '.csv'\n", + " export_csv(data, sensor_name, output_filename)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 2", + "language": "python", + "name": "python2" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.9" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/notebooks/Data_Interpolation.ipynb b/notebooks/Data_Interpolation.ipynb new file mode 100644 index 0000000..cc23f2f --- /dev/null +++ b/notebooks/Data_Interpolation.ipynb @@ -0,0 +1,484 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "%matplotlib inline\n", + "\n", + "from ipyleaflet import Map, ImageOverlay, Polyline\n", + "import matplotlib\n", + "import matplotlib.cm\n", + "from matplotlib import pyplot\n", + "import numpy as np\n", + "import numpy.lib.recfunctions\n", + "import scipy\n", + "import scipy.interpolate\n", + "import pandas\n", + "import platypus.io.logs\n", + "import os\n", + "import uuid\n", + "import glob" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['/home/shawn/Downloads/Archive/platypus_20180221_172549.txt', '/home/shawn/Downloads/Archive/platypus_20180619_112857.txt', '/home/shawn/Downloads/Archive/platypus_20180618_130514.txt', '/home/shawn/Downloads/Archive/platypus_20180619_093422.txt', '/home/shawn/Downloads/Archive/platypus_20180617_214447.txt', '/home/shawn/Downloads/Archive/platypus_20180617_200121.txt']\n", + "set([u'DO_ATLAS', u'BATTERY', 'pose', u'EC_DECAGON', u'PH_ATLAS', u'T_DECAGON'])\n", + "ES2 sensor is present. Trimming all data within EC = 0 time windows\n", + "\n", + "Available sensors/channels:\n", + " DO_ATLAS, do\n", + " EC_DECAGON, ec\n", + " PH_ATLAS, ph\n", + " T_DECAGON, temperature\n" + ] + } + ], + "source": [ + "# Import the data from the specified logfile\n", + "\n", + "log_ext = \".txt\"\n", + "\n", + "log_path = \"/home/shawn/Downloads/Archive\"\n", + "log_filenames = [\n", + "# log_path + \"/\" + \"platypus_20180619_120536.txt\",\n", + "# log_path + \"/\" + \"platypus_20180619_120706.txt\",\n", + "# log_path + \"/\" + \"platypus_20180619_130313.txt\",\n", + "# log_path + \"/\" + \"platypus_20180619_131024.txt\",\n", + "# log_path + \"/\" + \"platypus_20180619_133227.txt\",\n", + "# log_path + \"/\" + \"platypus_20180619_133645.txt\",\n", + "# log_path + \"/\" + \"platypus_20180619_141055.txt\",\n", + "# log_path + \"/\" + \"platypus_20180619_141145.txt\",\n", + "# log_path + \"/\" + \"platypus_20180619_141345.txt\",\n", + "# log_path + \"/\" + \"platypus_20180619_141356.txt\",\n", + "# log_path + \"/\" + \"platypus_20180619_141442.txt\",\n", + "# log_path + \"/\" + \"platypus_20180619_161419.txt\",\n", + "# log_path + \"/\" + \"platypus_20180619_161511.txt\",\n", + "# log_path + \"/\" + \"platypus_20180619_165615.txt\",\n", + "# log_path + \"/\" + \"platypus_20180620_092526.txt\",\n", + "# log_path + \"/\" + \"platypus_20180620_120001.txt\",\n", + "# log_path + \"/\" + \"platypus_20180620_123406.txt\",\n", + "# log_path + \"/\" + \"platypus_20180620_134304.txt\",\n", + "# log_path + \"/\" + \"platypus_20180620_161350.txt\",\n", + "# log_path + \"/\" + \"platypus_20180620_164312.txt\",\n", + "# log_path + \"/\" + \"platypus_20180620_173053.txt\",\n", + "# log_path + \"/\" + \"platypus_20180620_173712.txt\",\n", + "# log_path + \"/\" + \"platypus_20180620_175217.txt\",\n", + "# log_path + \"/\" + \"platypus_20180620_181927.txt\",\n", + "# log_path + \"/\" + \"platypus_20180620_182042.txt\",\n", + "\n", + "]\n", + "\n", + "log_path = \"/home/shawn/Downloads/Archive/\"\n", + "log_filenames = glob.glob(log_path+'/'+\"*.txt\")\n", + "print log_filenames\n", + "\n", + "# log_path = \"/home/shawn/NL1/all_nov_2017/\"\n", + "# log_filenames = glob.glob(log_path+'/'+\"*.txt\")\n", + "\n", + "csv_output_filename = \"NL_Nootdorp_June_2018\"\n", + "\n", + "# data = platypus.io.logs.load(log_path + log_filename + log_ext)\n", + "data = platypus.io.logs.merge_files(log_filenames)\n", + "\n", + "for x in log_filenames:\n", + " if not os.path.exists:\n", + " print \"file \" + x + \" does not exist!!!\"\n", + "\n", + "\n", + "if \"EC_DECAGON\" in data:\n", + " print \"ES2 sensor is present. Trimming all data within EC = 0 time windows\\n\"\n", + " # find all time windows where EC is exactly 0\n", + " ES2_data = data[\"EC_DECAGON\"]\n", + " values = ES2_data[\"ec\"].values\n", + "# ec_eq_zero_indices = np.where(values == 0)[0]\n", + " ec_eq_zero_indices = np.where( (values < 20))[0]\n", + "# ec_eq_zero_indices = np.where(values < 50)[0]\n", + " windows = list()\n", + " windows.append([ec_eq_zero_indices[0]])\n", + " left = ec_eq_zero_indices[0]\n", + " for ii in range(1, ec_eq_zero_indices.shape[0]):\n", + " i = ec_eq_zero_indices[ii]\n", + " if i - left > 5:\n", + " # there has been a jump in index, a new time window has started\n", + " windows[-1].append(left)\n", + " windows.append([i])\n", + " left = i\n", + " windows[-1].append(ec_eq_zero_indices[-1])\n", + " # print ec_eq_zero_indices\n", + " # print windows\n", + " for window in windows:\n", + " time_window = [ES2_data[\"ec\"].index.values[window[0]], ES2_data[\"ec\"].index.values[window[1]]]\n", + " for k in data:\n", + " data[k] = data[k].loc[np.logical_or(data[k].index < time_window[0], data[k].index > time_window[1])]\n", + "else:\n", + " print \"No ES2 sensor present. No trimming will be performed.\"\n", + "\n", + "\n", + "\n", + "# if \"PH_ATLAS\" in data:\n", + "# print \"pH sensor is present. Trimming all data within pH < 6 time windows\\n\"\n", + "# # find all time windows where pH is less than 6\n", + "# pH_data = data[\"PH_ATLAS\"]\n", + "# values = pH_data[\"ph\"].values\n", + "# # pH_lt_6_indices = np.where( (values < 6) | (values > 8.5))[0]\n", + "# pH_lt_6_indices = np.where( (values < 6) )[0]\n", + "# windows = list()\n", + "# windows.append([pH_lt_6_indices[0]])\n", + "# left = pH_lt_6_indices[0]\n", + "# for ii in range(1, pH_lt_6_indices.shape[0]):\n", + "# i = pH_lt_6_indices[ii]\n", + "# if i - left > 5:\n", + "# windows[-1].append(left)\n", + "# windows.append([i])\n", + "# left = i\n", + "# windows[-1].append(pH_lt_6_indices[-1])\n", + "# for window in windows:\n", + "# time_window = [pH_data[\"ph\"].index.values[window[0]], pH_data[\"ph\"].index.values[window[1]]]\n", + "# for k in data:\n", + "# data[k] = data[k].loc[np.logical_or(data[k].index < time_window[0], data[k].index > time_window[1])]\n", + "\n", + "\n", + "# Define useful access variables.\n", + "pose = data['pose']\n", + "position = pose[['latitude', 'longitude']]\n", + "\n", + "# Print the available sensors and channels for this logfile.\n", + "print \"Available sensors/channels:\"\n", + "for s in data.keys():\n", + " if s == 'pose' or s == 'BATTERY':\n", + " continue\n", + " for c in data[s].dtypes.keys():\n", + " print \" {:s}, {:s}\".format(s, str(c))" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [], + "source": [ + "# Select the sensor and the name of the channel for that sensor.\n", + "sensor_name = 'PH_ATLAS'\n", + "sensor_channel = 'ph'\n", + "sensor_units = \"pH\"\n", + "\n", + "# sensor_name = 'EC_DECAGON'\n", + "# sensor_channel = 'ec'\n", + "# sensor_units = 'Electrical Conductivity (uS/cm)'\n", + "\n", + "# sensor_name = 'T_DECAGON'\n", + "# sensor_channel = 'temperature'\n", + "# sensor_units = 'Temperature (C)'\n", + "\n", + "# sensor_name = 'DO_ATLAS'\n", + "# sensor_channel = 'do'\n", + "# sensor_units = 'Dissolved Oxygen'\n", + "\n", + "\n", + "# print data[\"pose\"]\n", + "# data[\"pose\"].to_csv(log_path + \"/poses.csv\", columns=[\"latitude\", \"longitude\"])\n", + "data[\"PH_ATLAS\"][\"ph\"].to_csv(log_path + \"/ph.csv\")\n", + "data[\"DO_ATLAS\"][\"do\"].to_csv(log_path + \"/do.csv\")\n", + "data[\"EC_DECAGON\"][\"ec\"].to_csv(log_path + \"/ec.csv\")\n", + "data[\"T_DECAGON\"][\"temperature\"].to_csv(log_path + \"/temp.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "# Extract the pose timing and the sensor data of interest.\n", + "pose_times = pose.index.values.astype(np.float64)\n", + "\n", + "if sensor_name in data:\n", + " sensor = data[sensor_name]\n", + " sensor_times = sensor.index.values.astype(np.float64)\n", + "\n", + " # Linearly interpolate the position of the sensor at every sample.\n", + " sensor_pose_interpolator = scipy.interpolate.interp1d(pose_times, position,\n", + " axis=0, bounds_error=False)\n", + "\n", + " # Add the position information back to the sensor data.\n", + " sensor = sensor.join(pandas.DataFrame(sensor_pose_interpolator(sensor_times), sensor.index,\n", + " columns=('latitude', 'longitude')))\n", + " \n", + " # print sensor data to csv file\n", + " sensor.to_csv(log_path + csv_output_filename + \"__\" + sensor_name + \".csv\")\n", + "\n", + " # Remove columns that have NaN values (no pose information).\n", + " sensor_valid = np.all(np.isfinite(sensor), axis=1)\n", + " sensor = sensor[sensor_valid]" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/shawn/src/analytics/venv/lib/python2.7/site-packages/ipykernel_launcher.py:2: FutureWarning: Method .as_matrix will be removed in a future version. Use .values instead.\n", + " \n" + ] + } + ], + "source": [ + "# Create a trail of the vehicle's path on the map.\n", + "pl = Polyline(locations=position.as_matrix().tolist())\n", + "pl.fill_opacity = 0.0\n", + "pl.weight = 2" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "latitude 37.756384\n", + "longitude -122.382500\n", + "dtype: float64\n", + "latitude 37.757332\n", + "longitude -122.380777\n", + "dtype: float64\n", + "[1e-05, 1e-05]\n", + "Data min = 53.866667 Data max = 1126.130000\n" + ] + } + ], + "source": [ + "## Add a data overlay for the map\n", + "data_padding = [0.00001, 0.00001] # degrees lat/lon\n", + "data_resolution = [0.00001, 0.00001] # degrees lat/lon\n", + "data_interpolation_radius = 0.00001 # degrees lat/lon\n", + "data_bounds = [(position.min() - data_padding).tolist(),\n", + " (position.max() + data_padding).tolist()]\n", + "print position.min()\n", + "print position.max()\n", + "print data_resolution\n", + "\n", + "# Create a rectangular grid of overlay points.\n", + "data_xv, data_yv = np.meshgrid(\n", + " np.arange(data_bounds[1][0], data_bounds[0][0], -data_resolution[0]),\n", + " np.arange(data_bounds[0][1], data_bounds[1][1], data_resolution[1])\n", + ")\n", + "data_shape = data_xv.shape\n", + "data_xy = np.vstack([data_xv.ravel(), data_yv.ravel()]).T\n", + "\n", + "if sensor_name in data:\n", + " # Create a radial-basis interpolator over the sensor dataset\n", + " # Then, query it at each point of the rectangular grid.\n", + " #from sklearn.neighbors import RadiusNeighborsClassifier\n", + " #data_estimator = RadiusNeighborsClassifier(radius=data_interpolation_radius, outlier_label=np.nan)\n", + " from sklearn.neighbors import RadiusNeighborsRegressor\n", + " data_estimator = RadiusNeighborsRegressor(radius=data_interpolation_radius)\n", + "\n", + " data_estimator.fit(sensor[['latitude','longitude']], sensor[sensor_channel].astype(np.float))\n", + " data_zv = data_estimator.predict(data_xy)\n", + " data_zv = data_zv.reshape(data_shape).T\n", + "\n", + " # Normalize data from [0, 1)\n", + " data_max = data_zv[np.isfinite(data_zv)].max()\n", + " data_min = data_zv[np.isfinite(data_zv)].min()\n", + " print \"Data min = {:f} Data max = {:f}\".format(data_min, data_max)\n", + " NORMALIZER = data_max # 800\n", + " data_zv = (data_zv - data_min) / (NORMALIZER - data_min)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "removing file: ./platypus_data_4f9c525b-15cb-4c02-86b5-ff6068d9c0f5.png\n", + "removing file: ./platypus_data_4e7f86d5-1ae2-4a07-b950-ec10ec93a1b9.png\n", + "removing file: ./platypus_data_628d10c5-64a1-4ed1-9e9d-9ec81b3e3f87.png\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/shawn/Downloads/analytics-shawn_netherlands_save/analytics-shawn_netherlands_save/venv/lib/python2.7/site-packages/ipykernel_launcher.py:15: DeprecationWarning: `imsave` is deprecated!\n", + "`imsave` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.\n", + "Use ``imageio.imwrite`` instead.\n", + " from ipykernel import kernelapp as app\n", + "/home/shawn/Downloads/analytics-shawn_netherlands_save/analytics-shawn_netherlands_save/venv/lib/python2.7/site-packages/ipykernel_launcher.py:21: DeprecationWarning: `imsave` is deprecated!\n", + "`imsave` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.\n", + "Use ``imageio.imwrite`` instead.\n" + ] + } + ], + "source": [ + "\n", + "if sensor_name in data:\n", + " # Update a color map only at the points that have valid values.\n", + " data_rgb = np.zeros((data_shape[0], data_shape[1], 4), dtype=np.uint8)\n", + " data_rgb = matplotlib.cm.jet(data_zv) * 255\n", + " data_rgb[:,:,3] = 255 * np.isfinite(data_zv)\n", + "\n", + " # Remove any old image files.\n", + " old_png_files = glob.glob('./*.png')\n", + " for old_png_file in old_png_files:\n", + " print \"removing file: \" + old_png_file\n", + " os.remove(old_png_file)\n", + "\n", + " png_filename = './platypus_data_{:s}.png'.format(uuid.uuid4())\n", + " scipy.misc.imsave(png_filename, data_rgb)\n", + "\n", + " # Create image overlay that references generated image.\n", + " \n", + " data_rgb2 = np.ones((data_shape[0], data_shape[1], 4), dtype=np.uint8) * 50\n", + " png_filename2 = './platypus_data_{:s}.png'.format(uuid.uuid4())\n", + " scipy.misc.imsave(png_filename2, data_rgb2)\n", + " \n", + " data_bounds2 = [(position.min() - [x * 500 for x in data_padding]).tolist(),\n", + " (position.max() + [x * 500 for x in data_padding]).tolist()]\n", + " io_blank = ImageOverlay(url=png_filename2, bounds=data_bounds2)\n", + " io = ImageOverlay(url=png_filename, bounds=data_bounds)" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "b66f7a07105b446a88823d2713173e3f", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "TWFwKGJhc2VtYXA9eyd1cmwnOiAnaHR0cHM6Ly97c30udGlsZS5vcGVuc3RyZWV0bWFwLm9yZy97en0ve3h9L3t5fS5wbmcnLCAnbWF4X3pvb20nOiAxOSwgJ2F0dHJpYnV0aW9uJzogJ01hcCDigKY=\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAA+EAAABRCAYAAAC9m6cOAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAACylJREFUeJzt3X+MZWV9x/H3hx0QpY3sgrHC0kgD2kAT0dBCU2u0EtCCQhq1pral1saa1NQabUNbUzCpDU2arjZNbIkUrViK2Rqx1WA3QqP9o5uA2IIgQmyVRWD5JRJtkV2+/eM+I3fP3pk7l3vnzNw771ey2fP8vN9z95ln5zvnnDupKiRJkiRJ0vo7YqMDkCRJkiRpqzAJlyRJkiSpJybhkiRJkiT1xCRckiRJkqSemIRLkiRJktQTk3BJkiRJknpiEi5JkiRJUk9MwiVJkiRJ6olJuCRJkiRJPVmapHNySsH3l0vd1lXKk/Sd9VxdWy2uGZrl2zPN3PMy1zRzO9f6fsn0NleNGXtoe1bp321bre+gWCOPR+m2r9b/8Lbu2HH9n9nrTtp/mrk3SxyTzj3LubbC+zXZ3M98nXfbN/ScaiiO1b+UD2/vWvntOby82thx/cf1Xa+x3fZZnqPv12SvvZ5zTTJ20r4zHHtYc40+HlleJYxu+6SnNM0/23q+1esZ17jldB98vqpeM6LpEBMl4YME/LdXGHrkKlOv1jZp+7ixs5xro85xlnNNaPg7i20Tvmy3PDx+mrHd8dOM7fafZmy3PK/v13rOtVp5lu/9tHFN894fVh7akpcOdtoOLR+x7cChzUce2r5tqP+2pW5bZ2y3/YihsXTaDit35lql/zRj13euZz62O37ysbOca+XzmCaOyeeaZk1slvU17ZrYfOtr4jgOdsoHhuI4+FSnjVXL6Wxnh4RyYJU2gNXGdsuT9B33Wsa1cXFNM3YrxDXh2Cc77UNfyjx5cOW2kWM7L/XkCsfj+o5rn2Zst73bNsu5ZnmOAJfB8SOqD+Pt6JIkSZIk9cQkXJIkSZKknpiES5IkSZLUE5NwSZIkSZJ6YhIuSZIkSVJPTMIlSZIkSeqJSbgkSZIkST0xCZckSZIkqScm4ZIkSZIk9cQkXJIkSZKknpiES5IkSZLUE5NwSZIkSZJ6YhIuSZIkSVJPTMIlSZIkSeqJSbgkSZIkST0xCZckSZIkqScm4ZIkSZIk9cQkXJIkSZKknpiES5IkSZLUE5NwSZIkSZJ6YhIuSZIkSVJPTMIlSZIkSeqJSbgkSZIkST0xCZckSZIkqScm4ZIkSZIk9cQkXJIkSZKknpiES5IkSZLUE5NwSZIkSZJ6YhIuSZIkSVJPTMIlSZIkSeqJSbgkSZIkST1JVa29c3I9cPz6haM5czzw0EYHIc2Qa1qLyHWtReOa1qJxTS+Oh6rqNeM6TZSES8OS3FRVZ250HNKsuKa1iFzXWjSuaS0a1/TW4+3okiRJkiT1xCRckiRJkqSemIRrGldsdADSjLmmtYhc11o0rmktGtf0FuMz4ZIkSZIk9cQr4ZIkSZIk9cQkXCMlOSnJjUluT/LVJO9q9TuS7ElyV/t7e6tPkr9KcneS/0ryso09A2llSbYluSXJv7TyyUn2tvV7bZKjWv2zWvnu1v7CjYxbGiXJsUl2J/lakjuS/Kx7teZZkne37z1uS3JNkqPdpzVvkvxdkv1Jbhuqm3hvTnJx639Xkos34lw0eybhWskB4D1VdRpwNvA7SU4DLgG+UFWnAl9oZYDXAqe2P28HPtx/yNKavQu4Y6j858CuqjoFeBR4W6t/G/Boq9/V+kmbzYeA66vqJ4GXMFjb7tWaS0lOBH4XOLOqfgrYBrwZ92nNn48C3d8XPdHenGQHcClwFvAzwKXLibvmm0m4Rqqq+6rqy+34cQbf1J0IXAh8rHX7GHBRO74Q+Psa+A/g2CQv6DlsaawkO4HzgY+0coBfAHa3Lt11vbzedwOvbv2lTSHJc4FXAFcCVNUPquo7uFdrvi0Bz06yBDwHuA/3ac2Zqvoi8EinetK9+TxgT1U9UlWPAns4PLHXHDIJ11jt1q6XAnuB51fVfa3pfuD57fhE4J6hYftanbTZfBD4A+CpVj4O+E5VHWjl4bX7w3Xd2h9r/aXN4mTgQeCq9ojFR5Icg3u15lRV3Qv8BfAtBsn3Y8DNuE9rMUy6N7tnLyiTcK0qyY8A/wT8XlV9d7itBh+t78fra24kuQDYX1U3b3Qs0owsAS8DPlxVLwW+x9O3NwLu1Zov7VbbCxn8gOkE4Bi88qcF5N68tZmEa0VJjmSQgH+iqj7Vqh9YvnWx/b2/1d8LnDQ0fGerkzaTnwNen+R/gH9kcHvjhxjc9rXU+gyv3R+u69b+XODhPgOWxtgH7Kuqva28m0FS7l6teXUO8N9V9WBVPQl8isHe7T6tRTDp3uyevaBMwjVSe57qSuCOqvrLoabPAMufzHgxcN1Q/a+3T3c8G3hs6HYbaVOoqj+sqp1V9UIGH/RzQ1W9BbgReEPr1l3Xy+v9Da2/P7XWplFV9wP3JHlxq3o1cDvu1Zpf3wLOTvKc9r3I8pp2n9YimHRv/jxwbpLt7S6Rc1ud5lzcpzRKkpcDXwJu5elnZ/+IwXPhnwR+HPgm8KaqeqT9R/nXDG4Z+z7w1qq6qffApTVK8krgvVV1QZKfYHBlfAdwC/CrVfVEkqOBjzP4TIRHgDdX1Tc2KmZplCRnMPigwaOAbwBvZfBDdvdqzaUk7wd+mcFvarkF+C0Gz8G6T2tuJLkGeCVwPPAAg085/zQT7s1JfpPB9+AAH6iqq/o8D60Pk3BJkiRJknri7eiSJEmSJPXEJFySJEmSpJ6YhEuSJEmS1BOTcEmSJEmSemISLkmSJElST0zCJUlagyTHJflK+3N/knuHyketcY6rk1w0ov6sJLtWGLMvybFJdiR5xzOIe2eS69rxOUkqyWuH2q9P8vIkn2nncneSx4bO7azlGIbGnJPk0+34oiR/MmlckiRtVSbhkiStQVU9XFVnVNUZwN8Au5bLVfWDceOTLK0y996qeveYKXYAEyfhwHuAK4bK9wB/PCKG17dzewdw49C57R0z/3XAL7Xf1yxJksYwCZckaQpJTknylaHyJUne147/PcmuJDcB72xdzktyc5KvL1+R7lxZfl6SPUm+muRvgbRxlwMvblenL0/yD0kuGHrda5Oc34ktwEXAnqHqLwNPJHnVLM6/qgr4EvCLs5hPkqRFZxIuSdL62lZVZ1bVB1v5JOCngdcBVyR5Vqf/+xlciT4d+BxwQqu/BLizXZ2+BLgS+A2AJNvbnNd35joF2D/iSv0HgPdNfWZPuwn4+RnOJ0nSwjIJlyRpfV3bKX+yqp6qqjsZ3Bp+aqf9FcDVAFV1HfD4CvPeAJye5DjgLW3eg50+LwAe7A6sqhuAZyc5e43nUGPq9vP0DwskSdIqTMIlSZrOAQ79/7T7bPT3OuVuQjsqwR2r3QZ+NfArDK6IXzWi2/+OiGfZn7L2q+EPA9uHyjuAh4bKR7fXkiRJY5iES5I0nfuBE5Jsbx9Odv6Y/m/MwIsY3Jp+V6f9iwwSa5K8DvjRVv/40PGyq4DfB55oV9a77gROHhVEVX0O+DHg9DHxAvwb8GstpiUGV95vHGp/EXDbGuaRJGnLMwmXJGkKVfV/wJ8xeC76X4Hbxwy5t/X9Z+DtI57XvhQ4J8ltwAXAt9vrPADcnOTWJJe3um8DX2f0VXCq6rvAPUlGJuIt7p1j4gW4DDgtyX8y+GC3O4BrhtpfBXx2DfNIkrTlZXA3myRJmjdJjgFuBV5SVSOfHU/yRuD0qrpsnWI4AfhoVZ27HvNLkrRovBIuSdIcSnIegyvSu1ZKwJvdwL51DOUk4L3rOL8kSQvFK+GSJEmSJPXEK+GSJEmSJPXEJFySJEmSpJ6YhEuSJEmS1BOTcEmSJEmSemISLkmSJElST0zCJUmSJEnqyf8DVpg+08em6FkAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "\n", + "# Create a map centered on this data log.\n", + "center = [pose['latitude'].median(), pose['longitude'].median()]\n", + "# print center\n", + "zoom = 17\n", + "m = Map(center=center, zoom=zoom, height='1300px')\n", + "if sensor_name in data:\n", + " m += io_blank\n", + " m += io # Add image overlay\n", + "if sensor_name not in data:\n", + " m += pl # Add vehicle trail, but only if there isn't heatmap data to look at\n", + "\n", + "# Make a figure and axes with dimensions as desired.\n", + "fig = pyplot.figure(figsize=(15, 3))\n", + "ax1 = fig.add_axes([0.05, 0.80, 0.9, 0.15])\n", + "\n", + "if sensor_name in data:\n", + " # Set the colormap and norm to correspond to the data for which\n", + " # the colorbar will be used. \n", + " cmap = matplotlib.cm.jet\n", + " norm = matplotlib.colors.Normalize(vmin=data_min, vmax=NORMALIZER)\n", + " cb1 = matplotlib.colorbar.ColorbarBase(ax1, cmap=cmap,\n", + " norm=norm,\n", + " orientation='horizontal')\n", + " cb1.set_label(sensor_units)\n", + "\n", + "png_filename = './platypus_data_{:s}.png'.format(uuid.uuid4())\n", + "pyplot.savefig(png_filename)\n", + "# pyplot.show()\n", + "m" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 2", + "language": "python", + "name": "python2" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.12" + }, + "widgets": { + "state": { + "353ccbbc0ff94b5bbe0afcc012b3fd5c": { + "views": [ + { + "cell_index": 6 + } + ] + } + }, + "version": "1.2.0" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/notebooks/README.md b/notebooks/README.md new file mode 100644 index 0000000..c1785d6 --- /dev/null +++ b/notebooks/README.md @@ -0,0 +1,17 @@ +# Platypus Analytics Notebooks + +This directory contains example IPython/Jupyter notebooks that demonstrate various useful data analysis operations. + +# Usage + +If you do not already have it installed, install [Jupyter](https://jupyter.readthedocs.io/en/latest/install.html#alternative-for-experienced-python-users-installing-jupyter-with-pip): +``` +$ pip install jupyter +``` + +Then, simply browse to this directory and run: +``` +$ jupyter notebook +``` + +This should create a Jupyter instance showing the notebooks in this directory. diff --git a/setup.py b/setup.py index 8d8033a..e6409b1 100644 --- a/setup.py +++ b/setup.py @@ -35,11 +35,18 @@ ] }, install_requires=[ + 'pillow', 'pandas', 'pymongo', 'pyserial', 'six', - 'utm' + 'scipy', + 'utm', + 'matplotlib', + 'jupyter', + 'ipyleaflet', + 'sklearn', + 'flask' ], test_suite="tests", ) diff --git a/src/platypus/io/insitu_logs.py b/src/platypus/io/insitu_logs.py new file mode 100644 index 0000000..7d6459f --- /dev/null +++ b/src/platypus/io/insitu_logs.py @@ -0,0 +1,59 @@ +#!/usr/bin/env python +""" +Module for handling the import of various logfiles into numpy arrays. +Copyright 2015. Platypus LLC. All rights reserved. +""" +import collections +import datetime +import logging +import itertools +import pandas + +logger = logging.getLogger(__name__) + + +def merge_files(filename_list): + """ + + :param: filename_list: list of full path filename strings + :return: One result will all the dataframes merged + :rtype: {str: pandas.DataFrame} + """ + + logfile_result_list = [load(filename) for filename in filename_list] + if len(logfile_result_list) == 1: + return logfile_result_list[0] + #all_data_types = set() + #for i in range(1, len(logfile_result_list)): + # all_data_types = all_data_types.union(set(logfile_result_list[i].keys())) + all_data_types = {key for log_dict in logfile_result_list for key in log_dict.keys()} + print all_data_types + + merged_dataframe_dict = dict() + + for data_type in all_data_types: + for i in range(len(logfile_result_list)): + if data_type in logfile_result_list[i]: + first_log_index = i + break + merged_dataframe_dict[data_type] = logfile_result_list[first_log_index][data_type] + for i in range(first_log_index + 1, len(logfile_result_list)): + if data_type in logfile_result_list[i]: + merged_dataframe_dict[data_type] = merged_dataframe_dict[data_type].combine_first(logfile_result_list[i][data_type]).dropna(how='any') + return merged_dataframe_dict + + + + +def load(filename): + """ + Loads a log from an in-situ log file (htm or csv) + + Attempts to auto-detect format from the file. + + :param filename: path to a log file + :type filename: string + :returns: a dict containing the data from this logfile + :rtype: {str: numpy.recarray} + """ + return pandas.read_csv(filename) diff --git a/src/platypus/io/logs.py b/src/platypus/io/logs.py index 371ecc1..09dca13 100644 --- a/src/platypus/io/logs.py +++ b/src/platypus/io/logs.py @@ -57,10 +57,10 @@ """ _REGEX_ES2_V4_0_0 = re.compile( - r"^ES2: \[e, (?P[\d\.]+), (?P[\d\.]+)\]") + r"^ES2: \[e, (?P[\d\.]+), (?P[\d\.]+)\]") """ Defines a regular expression that represents a pose record of the form: -'ES2: [e, , ]' +'ES2: [e, , ]' This format is used in v4.0.0 vehicle log entries. """ @@ -76,7 +76,7 @@ _DATA_FIELDS_v4_1_0 = { 'BATTERY': ('voltage', 'm0_current', 'm1_current'), - 'ES2': ('ec', 'temp'), + 'ES2': ('ec', 'temperature'), 'ATLAS_DO': ('do',), 'ATLAS_PH': ('ph',), } @@ -86,7 +86,7 @@ _DATA_FIELDS_v4_2_0 = { 'BATTERY': ('voltage', 'm0_current', 'm1_current'), - 'ES2': ('ec', 'temp'), + 'ES2': ('ec', 'temperature'), 'ATLAS_DO': ('do',), 'ATLAS_PH': ('ph',), } @@ -94,8 +94,62 @@ Defines dataframe field names for known data types in v4.2.0 logfiles. """ +_DATA_FIELDS_v4_3_0 = { + 'BATTERY': ('voltage',), + 'EC_DECAGON': ('ec',), + 'T_DECAGON': ('temperature',), + 'DO_ATLAS': ('do',), + 'PH_ATLAS': ('ph',), +} +""" +Defines dataframe field names for known data types in v4.2.0 logfiles. +""" -def read_v4_2_0(logfile): + +def merge_files(filename_list): + """ + + :param: filename_list: list of full path filename strings + :return: One result will all the dataframes merged + :rtype: {str: pandas.DataFrame} + """ + logfile_result_list = [load(filename) for filename in filename_list] + if len(logfile_result_list) == 1: + return logfile_result_list[0] + #all_data_types = set() + #for i in range(1, len(logfile_result_list)): + # all_data_types = all_data_types.union(set(logfile_result_list[i].keys())) + all_data_types = {key for log_dict in logfile_result_list for key in log_dict.keys()} + print all_data_types + + merged_dataframe_dict = dict() + + for data_type in all_data_types: + for i in range(len(logfile_result_list)): + if data_type in logfile_result_list[i]: + first_log_index = i + break + merged_dataframe_dict[data_type] = logfile_result_list[first_log_index][data_type] + for i in range(first_log_index + 1, len(logfile_result_list)): + if data_type in logfile_result_list[i]: + merged_dataframe_dict[data_type] = merged_dataframe_dict[data_type].combine_first(logfile_result_list[i][data_type]).dropna(how='any') + return merged_dataframe_dict + + +def read_around_sampler(logfile, pump_duration_seconds=4*60): + """ + Reads text logs from a Platypus vehicle server logfile, particularly focusing on data following a sampler jar activation + + :param logfile: the logfile as an iterable + :param pump_duration_seconds: an integer, the number of seconds to extract data + :return: a dict containing the data from this logfile + :rtype: {int: pandas.DataFrame}, where int key is jar number (1-4) + """ + #TODO + return + + +def read_v4_3_0(logfile): """ Reads text logs from a Platypus vehicle server logfile. @@ -142,10 +196,100 @@ def read_v4_2_0(logfile): v['p'][2], zone, hemi ]) - elif k == 'sensor': - raw_data[v['type']].append( - [timestamp] + v['data'] + elif k == 'sensor': + try: + + + raw_data[v['type']].append([timestamp] + [v['data']]) + except: + # do nothing + None + else: + pass + + # Convert the list data to pandas DataFrames and return them. + # For known types, clean up and label the data. + data = {} + + for k, v in six.viewitems(raw_data): + if k == 'pose': + data['pose'] = add_ll_to_pose_dataframe( + remove_outliers_from_pose_dataframe( + pandas.DataFrame(v, columns=('time', + 'easting', 'northing', + 'altitude', 'zone', 'hemi')) + .set_index('time') ) + ) + elif k in _DATA_FIELDS_v4_3_0: + data[k] = (pandas.DataFrame( + v, columns=('time',) + _DATA_FIELDS_v4_3_0[k]) + .set_index('time')) + else: + print 'other' + # For sensor types that we don't know how to handle, + # provide an unlabeled data frame. + data[k] = (pandas.DataFrame(v) + .rename(columns={0: 'time'}, copy=False) + .set_index('time')) + return data + + +def read_v4_2_0(logfile): + """ + Reads text logs from a Platypus vehicle server logfile. + + :param logfile: the logfile as an iterable + :type logfile: python file-like + :returns: a dict containing the data from this logfile + :rtype: {str: pandas.DataFrame} + """ + raw_data = collections.defaultdict(list) + start_time = datetime.datetime.utcfromtimestamp(0) + + for line in logfile: + # Extract each line fron the logfile and convert the timestamp. + time_offset_ms, level, message = line.split('\t', 2) + + # Compute the timestamp for each log entry. + time_offset = datetime.timedelta(milliseconds=int(time_offset_ms)) + timestamp = start_time + time_offset + + # Try to parse the log as a JSON object. + try: + entry = json.loads(message) + except ValueError as e: + raise ValueError( + "Aborted after invalid JSON log message '{:s}': {:s}" + .format(message, e)) + + # If the line is a datetime, compute subsequent timestamps from this. + # We assume that "date" and "time" are always together in the entry. + if 'date' in entry: + timestamp = datetime.datetime.utcfromtimestamp( + entry['time'] / 1000.) + start_time = timestamp - time_offset + + # Extract appropriate data from each entry. + for k, v in six.viewitems(entry): + if k == 'pose': + zone = int(v['zone'][:-5]) + hemi = v['zone'].endswith('North') + raw_data[k].append([ + timestamp, + v['p'][0], + v['p'][1], + v['p'][2], + zone, hemi + ]) + elif k == 'sensor': + try: + + + raw_data[v['type']].append([timestamp] + v['data']) + except: + # do nothing + None else: pass @@ -160,7 +304,7 @@ def read_v4_2_0(logfile): pandas.DataFrame(v, columns=('time', 'easting', 'northing', 'altitude', 'zone', 'hemi')) - .set_index('time') + .set_index('time') ) ) elif k in _DATA_FIELDS_v4_2_0: @@ -173,7 +317,6 @@ def read_v4_2_0(logfile): data[k] = (pandas.DataFrame(v) .rename(columns={0: 'time'}, copy=False) .set_index('time')) - return data @@ -309,7 +452,7 @@ def read_v4_0_0(logfile, filename): data_sensors['es2'] = [] data_sensors['es2'].append([timestamp, float(m_es2.group('ec')), - float(m_es2.group('temp'))]) + float(m_es2.group('temperature'))]) continue m_sensor = _REGEX_SENSOR_V4_0_0.match(message) @@ -362,6 +505,18 @@ def read_v4_0_0(logfile, filename): return data +def load_v4_3_0(filename, *args, **kwargs): + """ + Loads a log from a v4.2.0 server from a filename. + + :param filename: path to a log file + :type filename: string + :returns: a dict containing the data from this logfile + :rtype: {str: numpy.recarray} + """ + with open(filename, 'r') as logfile: + return read_v4_3_0(logfile) + def load_v4_2_0(filename, *args, **kwargs): """ Loads a log from a v4.2.0 server from a filename. @@ -422,7 +577,7 @@ def read(logfile, filename=None): # Depending on the format of the first line, pick an appropriate loader. if len(components[1]) == 1: # Version 4.2.0 files have a single-character log-level. - return read_v4_2_0(logfile) + return read_v4_3_0(logfile) else: try: # Version 4.1.0 logs have JSON messages. diff --git a/src/platypus/util/conversions.py b/src/platypus/util/conversions.py index 8a61cbd..762c996 100644 --- a/src/platypus/util/conversions.py +++ b/src/platypus/util/conversions.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python """ Module containing utility conversion functions. Copyright 2015. Platypus LLC. All rights reserved.