-
Notifications
You must be signed in to change notification settings - Fork 1
/
run_modal_split_in_train_stations.py
232 lines (208 loc) · 14.1 KB
/
run_modal_split_in_train_stations.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
from utils_mtmc.get_mtmc_files import get_etappen
import numpy as np
from pathlib import Path
import pandas as pd
def run_modal_split_in_train_stations():
# Read the rwo data of the Mobility and Transport Microcensus (MTMC) 2015, with only the necessay columns
df_etappen = get_etappen(2015, selected_columns=['HHNR', # ID of the person
'WP', # weight of the person
'WEGNR', # ID of the trip
'ETNR', # ID of the trip leg
'f52900', # Activity at destination of the trip leg
'f51300', # Transport mode of the trip leg
'Z_X', # X-coordinate of the destination of the trip leg
'Z_Y', # Y-coordinate of the destination of the trip leg
'Z_Str']) # Street name of the destination of the trip leg
# Keep only the trip legs ("etappen" in German) that are going throug the train station of Bern
df_etappen = define_trips_going_through_the_railway_station(df_etappen)
# Define the main transport mode of the trips
# from the start to the train station and
# from the train station to the end of the trip
groups_in_and_out = define_the_main_transport_mode_per_trip(df_etappen)
# Compute the weighted average
output = compute_weighted_average(groups_in_and_out)
# Save the output as CSV files
save_as_csv(output)
def save_as_csv(output):
# Adapt the output for the French CSV file
output.reset_index(level=0, inplace=True) # Define the combination of transport means as a column (is an index)
output.rename(columns={'WP': 'Parts'}, inplace=True) # Rename the column with percentages
output[['Moyen de transport principal in',
'Moyen de transport principal out']] = pd.DataFrame(output.f51300.values.tolist())
output.drop('f51300', axis=1, inplace=True) # Remove the column with the pair of transport means
output = output.sort_values(by='Parts', ascending=False) # Sort by percentages
output = output[['Moyen de transport principal in', 'Moyen de transport principal out', 'Parts']] # Reorder columns
# Save the output as a CSV file (French)
folder_path_output = Path('../data/output/')
output.to_csv(folder_path_output / 'modal_split_in_Bern_station_FR.csv', index=False, encoding='utf-8-sig')
# Translate the output to German
dict_fr_de = {'Autres': 'übrige',
'Train': 'Eisenbahn',
'Transports publics routiers': 'öffentl. Strassenverkehr',
'Transport individuel motorisé': 'motorisierter Individualverkehr',
'Vélo (incl. vélo électrique)': 'Velo (inkl. E-Bike)',
'A pied': 'zu Fuss'}
output.replace({'Moyen de transport principal in': dict_fr_de, # Replace the transport modes by
'Moyen de transport principal out': dict_fr_de}, inplace=True) # German words
output.rename(columns={'Parts': 'Anteile',
'Moyen de transport principal in': 'Verkehrsmittel in',
'Moyen de transport principal out': 'Verkehrsmittel out'}, inplace=True) # Rename columns
# Save the output as a CSV file in German
output.to_csv(folder_path_output / 'modal_split_in_Bern_station_DE.csv', index=False, encoding='utf-8-sig')
def compute_weighted_average(groups_in_and_out):
sum_of_all_weights = groups_in_and_out['WP'].sum()
output = groups_in_and_out.groupby(['f51300']).agg(lambda x: x.sum() / sum_of_all_weights)
return output
def define_the_main_transport_mode_per_trip(df_etappen):
# Recode transport mode in order of priority, with one being the top priority
df_etappen['f51300'].replace({17: 1, # plane
9: 2, # train
10: 3, # PostAuto / CarPostal / PostBus
16: 4, # boat
12: 5, # tram
11: 6, # bus
18: 7, # other public transport?
14: 8, # autocar
7: 9, # car as driver
8: 9, # car as passenger
15: 10, # truck
13: 11, # taxi
5: 12, # motorbike
6: 12, # motorbike as passenger
4: 13, # small motorbikes
3: 14, # cyclomoteur
2: 15, # bikes
20: 15, # Ebikes
21: 15, # Ebikes
1: 16, # walking
19: 17, # rollers, trottinettes, skateboards, ...
95: 95}, inplace=True) # other
# Define the main transport mode by trip in and out
groups_trip_legs = df_etappen.groupby(['HHNR', 'WEGNR', 'through_railway_station']).agg({'WP': lambda x: x.iloc[0],
'f51300': 'min'})
# Group main transport mode
groups_trip_legs['f51300'].replace({1: 'Autres', # plane -> other
2: 'Train', # train -> train
3: 'Transports publics routiers', # PostAuto -> road PT
4: 'Autres', # boat -> other
5: 'Transports publics routiers', # tram -> road PT
6: 'Transports publics routiers', # bus -> road PT
7: 'Autres', # other public transport -> other
8: 'Transports publics routiers', # autocar -> road PT
9: 'Transport individuel motorisé', # car -> TIM
10: 'Autres', # truck -> other
11: 'Autres', # taxi -> other
12: 'Transport individuel motorisé', # moto -> TIM
13: 'Transport individuel motorisé', # small motorbikes -> TIM
14: 'Transport individuel motorisé', # cyclomoter -> TIM
15: 'Vélo (incl. vélo électrique)',
16: 'A pied',
17: 'Autres',
95: 'Autres'}, inplace=True)
# Consider the transport mode as a tuple: transport modes in and out the station
groups_in_and_out = groups_trip_legs.groupby(['HHNR', 'WEGNR']).agg({'WP': lambda x: x.iloc[0],
'f51300': lambda x: tuple(x)})
return groups_in_and_out
def define_trips_going_through_the_railway_station(df_etappen):
define_trip_legs_going_through_the_railway_station(df_etappen)
define_trip_legs_before_and_after_going_through_the_railway_station(df_etappen)
df_etappen.drop('f52900', axis=1, inplace=True) # Remove the activity at destination, not useful anymore
df_etappen = df_etappen[df_etappen['through_railway_station'] > 0] # Remove trips not going through the station
return df_etappen
def define_trip_legs_before_and_after_going_through_the_railway_station(df_etappen):
# Add 1 in the new column when the trip leg is before the railway station and in the same trip
df_trips_including_a_stop_at_station = df_etappen.loc[df_etappen['through_railway_station'] == 1,
['HHNR', 'WEGNR', 'ETNR']]
define_trip_legs_before_going_through_the_railway_station(df_etappen, df_trips_including_a_stop_at_station)
# Add 2 in the new column if the trip leg is after the railway station and in the same trip
define_trip_legs_after_going_through_the_railway_station(df_etappen, df_trips_including_a_stop_at_station)
def define_trip_legs_after_going_through_the_railway_station(df_etappen, df_trips_including_a_stop_at_station):
# For all stops at the railway station...
for index, row in df_trips_including_a_stop_at_station.iterrows():
trip_leg_number = row['ETNR']
person_id = row['HHNR']
trip_id = row['WEGNR']
still_the_same_trip = True
while still_the_same_trip:
trip_leg_number += 1
# Gets the activity of each trip leg
activity_at_destination_of_the_trip_leg = df_etappen.loc[(df_etappen['HHNR'] == person_id) &
(df_etappen['WEGNR'] == trip_id) &
(df_etappen['ETNR'] == trip_leg_number),
'f52900']
if len(activity_at_destination_of_the_trip_leg) > 0:
index_of_the_trip_leg = activity_at_destination_of_the_trip_leg.index.values[0]
activity_at_destination_of_the_trip_leg = activity_at_destination_of_the_trip_leg.values[0]
# If activity at destination of the trip is "changing transport mode", i.e., still the same
if activity_at_destination_of_the_trip_leg == 1:
df_etappen.loc[index_of_the_trip_leg, 'through_railway_station'] = 2
# If activity at destination of the trip is anything else, still define it as part of the trip...
else:
df_etappen.loc[index_of_the_trip_leg, 'through_railway_station'] = 2
# And then stop the process
still_the_same_trip = False
else:
still_the_same_trip = False
def define_trip_legs_before_going_through_the_railway_station(df_etappen, df_trips_including_a_stop_at_station):
# For all stops at the railway station...
for index, row in df_trips_including_a_stop_at_station.iterrows():
trip_leg_number = row['ETNR']
person_id = row['HHNR']
trip_id = row['WEGNR']
still_the_same_trip = True
while still_the_same_trip:
trip_leg_number = trip_leg_number - 1
if trip_leg_number < 1:
still_the_same_trip = False
# Gets the activity of each trip leg
activity_at_destination_of_the_trip_leg = df_etappen.loc[(df_etappen['HHNR'] == person_id) &
(df_etappen['WEGNR'] == trip_id) &
(df_etappen['ETNR'] == trip_leg_number),
'f52900']
if len(activity_at_destination_of_the_trip_leg) > 0:
index_of_the_trip_leg = activity_at_destination_of_the_trip_leg.index.values[0]
activity_at_destination_of_the_trip_leg = activity_at_destination_of_the_trip_leg.values[0]
# If activity at destination of the trip is "changing transport mode", i.e., still the same
if activity_at_destination_of_the_trip_leg == 1:
df_etappen.loc[index_of_the_trip_leg, 'through_railway_station'] = 1
else:
still_the_same_trip = False
else:
still_the_same_trip = False
def define_trip_legs_going_through_the_railway_station(df_etappen):
# Create a new column with 1 if the trip leg ends in the train station based on coordinates, 0 otherwise
df_etappen['through_railway_station'] = np.where((df_etappen['Z_X'] > 7.4369) & (df_etappen['Z_X'] < 7.4406) &
(df_etappen['Z_Y'] > 46.9474) & (df_etappen['Z_Y'] < 46.9497),
1, 0)
# Manual correction: Bollwerk 4 is not the train station
manual_correction_streets(df_etappen, street_name='Bollwerk')
# The park above the train station is not the train station
manual_correction_streets(df_etappen, street_name='PARKTERRASSE')
# Bubenbergplatz 8 and 10 are not in the train station
manual_correction_streets(df_etappen, street_name='BUBENBERGPLATZ')
# Laupenstrasse 2 is a cinema, not in the train station
manual_correction_streets(df_etappen, street_name='LAUPENSTRASSE')
# Schanzenstrasse 1 ist not in the train station
manual_correction_streets(df_etappen, street_name='SCHANZENSTR.')
# Save the unique points defining the train station for visualization
saving_unique_points(df_etappen)
# Remove information about X-Y coordinates and street name, not useful anymore
df_etappen.drop(['Z_X', 'Z_Y', 'Z_Str'], axis=1, inplace=True)
print('Number of trip legs in the sample:',
len(df_etappen[df_etappen["through_railway_station"] == 1]))
print('Number of persons in the sample:',
df_etappen.HHNR[df_etappen["through_railway_station"] == 1].nunique())
def saving_unique_points(df_etappen):
# Keep only point in the train station
df_train_station_only = df_etappen[df_etappen['through_railway_station'] == 1]
# Remove person identification, transport mode and activity. Keep only coordinates and strret name.
df_train_station_only = df_train_station_only[['Z_X', 'Z_Y', 'Z_Str']]
# Save every point only once
folder_path_output = Path('../data/output/')
df_train_station_only.drop_duplicates().to_csv(folder_path_output / 'unique_points_train_station.csv',
index=False, sep=';', encoding='iso-8859-15')
def manual_correction_streets(df_etappen, street_name):
df_etappen.loc[((df_etappen['Z_Str'] == street_name) & (df_etappen['through_railway_station'] == 1)),
'through_railway_station'] = 0
if __name__ == '__main__':
run_modal_split_in_train_stations()