This repository has been archived by the owner on Aug 22, 2019. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 4
/
publish.py
executable file
·122 lines (90 loc) · 4 KB
/
publish.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import print_function
from __future__ import unicode_literals
import os
import shutil
import sqlite3
import tempfile
import argparse
import datetime
import subprocess
import arrow
import numpy as np
import pandas as pd
import utils
POSITION_DTYPES = {
'vehicle_id': np.str,
'timestamp': np.str,
'speed': np.float,
'route_id': np.str,
'trip_id': np.str,
'latitude': np.float,
'longitude': np.float,
}
SHAPE_DIST_CACHE = {}
OUTPUT_PATH = './data/vehicle_positions/'
class Trip:
HEADSIGN_SQL = 'SELECT trips.trip_headsign FROM trips WHERE trips.trip_id = ?'
def __init__(self, trip_id, conn):
self.trip_id = trip_id
self.conn = conn
self._set_headsign()
def _set_headsign(self):
curr = self.conn.cursor()
curr.execute(self.HEADSIGN_SQL, (self.trip_id,))
data = curr.fetchone()
self.headsign = data[0] if data is not None else ''
def process_positions(positions):
print('Processing {} vehicle positions.'.format(len(positions)))
positions = positions[(positions.route_id != np.NaN) & (positions.trip_id != np.NaN)]
# Cannot add a new column to a subset of rows
# Have to add empty headsign to all rows which will be set correctly later
positions['trip_headsign'] = ''
trip_ids = positions.trip_id.unique()
with sqlite3.connect(utils.GTFS_DB) as conn:
headsigns = {trip_id: Trip(trip_id, conn).headsign for trip_id in trip_ids}
for trip_id in trip_ids:
trip_pos = positions[positions.trip_id == trip_id]
positions.loc[positions.trip_id == trip_id, 'trip_headsign'] = pd.Series([headsigns[trip_id]] * len(trip_pos), index=trip_pos.index)
return positions
def get_positions(db_path, capmetricsd_path, date=None):
# Fetch vehicle positions for the date (in local time)
if date is None:
date = arrow.now()
else:
date = date.replace(days=1)
date = arrow.now().replace(year=date.year, month=date.month, day=date.day, hour=0, minute=0, second=0, tzinfo='America/Chicago')
day_before = date.replace(days=-1)
print('Fetching positions from {} to {}.'.format(day_before.isoformat(), date.isoformat()))
tempdir = tempfile.mkdtemp()
path = os.path.join(tempdir, 'output.csv')
args = [capmetricsd_path, 'get', db_path, path, str(day_before.timestamp), str(date.timestamp)]
print(args)
code = subprocess.call(args)
if int(code) != 0:
raise Exception('Error getting data from capmetricsd: {}'.format(' '.join(args)))
return pd.read_csv(path, dtype=POSITION_DTYPES), tempdir
def save_vehicle_positions(db_path, capmetricsd_path, output, date=None):
positions, tempdir = get_positions(db_path, capmetricsd_path, date)
positions = process_positions(positions)
if date is None:
date = arrow.now().replace(days=-1)
day = date.strftime('%Y-%m-%d')
positions.to_csv('{}{}.csv'.format(output, day), index=False)
shutil.rmtree(tempdir)
def save_range_vehicle_positions(db_path, capmetricsd_path, output, start, end):
num_days = (end - start).days + 1
datelist = [end - datetime.timedelta(days=offset) for offset in range(num_days)]
print('Saving data from {} to {}.'.format(start.isoformat(), end.isoformat()))
for date in reversed(datelist):
print('Saving data for {}'.format(date.isoformat()))
save_vehicle_positions(db_path, capmetricsd_path, output, date)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Grab CapMetrics data from BoltDB and write it as a CSV file.')
parser.add_argument('-d', '--db', required=True, type=str, help='Path to a BoltDB database.')
parser.add_argument('-c', '--capmetricsd', required=True, type=str, help='Path to the capmetricsd binary.')
parser.add_argument('-O', '--output', type=str, default=OUTPUT_PATH, help='File to write data to.')
args = parser.parse_args()
utils.load_gtfs_data()
save_vehicle_positions(args.db, args.capmetricsd, args.output)