Skip to content

Commit

Permalink
Gtfs conditionally required (#253)
Browse files Browse the repository at this point in the history
* remove trailing separator in header

* - make only one of route_short_name and route_long_name required
- implement the optionality of agency if there only is one

* add the remaining stop types and the proper restrictions

---------

authored-by: Tobias Kohl <[email protected]>
  • Loading branch information
Royal2Flush authored Dec 27, 2024
1 parent b43b440 commit 2da717f
Show file tree
Hide file tree
Showing 68 changed files with 751 additions and 62 deletions.
97 changes: 75 additions & 22 deletions src/main/java/org/matsim/pt2matsim/gtfs/GtfsFeedImpl.java
Original file line number Diff line number Diff line change
Expand Up @@ -210,14 +210,22 @@ protected void loadAgencies() throws IOException {
String[] line = reader.readNext();
while(line != null) {
l++;
String agencyId = line[col.get(GtfsDefinitions.AGENCY_ID)];
String agencyId = col.containsKey(GtfsDefinitions.AGENCY_ID) ?
line[col.get(GtfsDefinitions.AGENCY_ID)] :
line[col.get(GtfsDefinitions.AGENCY_NAME)];
AgencyImpl agency = new AgencyImpl(agencyId, line[col.get(GtfsDefinitions.AGENCY_NAME)], line[col.get(GtfsDefinitions.AGENCY_URL)], line[col.get(GtfsDefinitions.AGENCY_TIMEZONE)]);
agencies.put(agencyId, agency);

line = reader.readNext();
}

reader.close();

if (this.agencies.isEmpty()) {
throw new IllegalArgumentException("agencies file must contain at least one agency!");
} else if (this.agencies.size() > 1 && !col.containsKey(GtfsDefinitions.AGENCY_ID)) {
throw new IllegalArgumentException("agencies file has more than one entry but no id column!");
}
} catch (ArrayIndexOutOfBoundsException e) {
throw new RuntimeException("Line " + l + " in agency.txt is empty or malformed.");
} catch (CsvValidationException e) {
Expand Down Expand Up @@ -248,35 +256,65 @@ protected void loadStops() throws IOException {
while(line != null) {
l++;
String stopId = line[col.get(GtfsDefinitions.STOP_ID)];
StopImpl stop = new StopImpl(stopId, line[col.get(GtfsDefinitions.STOP_NAME)], Double.parseDouble(line[col.get(GtfsDefinitions.STOP_LON)]), Double.parseDouble(line[col.get(GtfsDefinitions.STOP_LAT)]));
stops.put(stopId, stop);

// location type
if(col.get(GtfsDefinitions.LOCATION_TYPE) != null) {
if(line[col.get(GtfsDefinitions.LOCATION_TYPE)].equals("0")) {
stop.setLocationType(GtfsDefinitions.LocationType.STOP);
GtfsDefinitions.LocationType locationType = col.containsKey(GtfsDefinitions.LOCATION_TYPE) && !line[col.get(GtfsDefinitions.LOCATION_TYPE)].isEmpty() ?
GtfsDefinitions.LocationType.values()[Integer.parseInt(line[col.get(GtfsDefinitions.LOCATION_TYPE)])] :
GtfsDefinitions.LocationType.STOP;

String parentStation = null;
if (col.containsKey(GtfsDefinitions.PARENT_STATION)) {
if (line[col.get(GtfsDefinitions.PARENT_STATION)].isEmpty()) {
if (locationType.index == 2 || locationType.index == 3 || locationType.index == 4) {
throw new IllegalArgumentException("stop " + stopId + " has no parent but its type requires one");
}
} else {
if (locationType.index == 1) {
throw new IllegalArgumentException("stop " + stopId + " has a parent but its type forbids one");
} else {
parentStation = line[col.get(GtfsDefinitions.PARENT_STATION)];
}
}
if(line[col.get(GtfsDefinitions.LOCATION_TYPE)].equals("1")) {
stop.setLocationType(GtfsDefinitions.LocationType.STATION);
} else {
if (locationType.index == 1 || locationType.index == 2 || locationType.index == 3) {
throw new IllegalArgumentException("the dataset has no parent_station column but the type of stop " + stopId + " requires one");
}
}

// parent station
if(col.get(GtfsDefinitions.PARENT_STATION) != null && !line[col.get(GtfsDefinitions.PARENT_STATION)].isEmpty()) {
stop.setParentStation(line[col.get(GtfsDefinitions.PARENT_STATION)]);
}


StopImpl stop = new StopImpl(stopId, line[col.get(GtfsDefinitions.STOP_NAME)], locationType, parentStation);

if (col.containsKey(GtfsDefinitions.STOP_LON) && col.containsKey(GtfsDefinitions.STOP_LAT) && !line[col.get(GtfsDefinitions.STOP_LON)].isEmpty() && !line[col.get(GtfsDefinitions.STOP_LAT)].isEmpty()) {
stop.setLocation(Double.parseDouble(line[col.get(GtfsDefinitions.STOP_LON)]), Double.parseDouble(line[col.get(GtfsDefinitions.STOP_LAT)]));
} else if (locationType.index == 0 || locationType.index == 1 || locationType.index == 2) {
throw new IllegalArgumentException("stop " + stopId + " has no Coord but its type requires one!");
} // in case of type 3 or 4 we can set it via the parent later

stops.put(stopId, stop);
line = reader.readNext();
}

reader.close();
} catch (ArrayIndexOutOfBoundsException e) {
throw new RuntimeException("Line " + l + " in stops.txt is empty or malformed.");
} catch (CsvValidationException e) {
throw new RuntimeException(e);
}

for (Stop stop : stops.values()) {
setStopCoordFromParentRecursively(stop);
}

log.info("... stops.txt loaded");
}

private void setStopCoordFromParentRecursively(Stop stop) {
if (stop.getCoord() == null) {
if (stop.getParentStationId() != null) {
Stop parentStop = stops.get(stop.getParentStationId());
setStopCoordFromParentRecursively(parentStop);
((StopImpl) stop).setLocation(parentStop.getCoord().getX(), parentStop.getCoord().getY());
} else {
throw new IllegalArgumentException("stop " + stop.getId() + " has no Coord an no parent to derive it from!");
}
}
}

/**
* Reads all services and puts them in {@link #services}
Expand Down Expand Up @@ -442,7 +480,16 @@ protected void loadRoutes() throws IOException {
CSVReader reader = createCSVReader(root + GtfsDefinitions.Files.ROUTES.fileName);
String[] header = reader.readNext();
Map<String, Integer> col = getIndices(header, GtfsDefinitions.Files.ROUTES.columns, GtfsDefinitions.Files.ROUTES.optionalColumns);

if (!col.containsKey(GtfsDefinitions.ROUTE_SHORT_NAME) && !col.containsKey(GtfsDefinitions.ROUTE_LONG_NAME)) {
throw new IllegalArgumentException("at least one of 'route_short_name' or 'route_long_name' is required but the dataset has neither column!");
}
Agency defaultAgency = null;
if (this.agencies.size() > 1 && !col.containsKey(GtfsDefinitions.AGENCY_ID)) {
throw new IllegalArgumentException("there is no column 'agency_id' in the routes file but there are multiple agencies in the agency file");
} else if (this.agencies.size() == 1) {
defaultAgency = this.agencies.values().stream().findAny().get();
}

String[] line = reader.readNext();
while(line != null) {
l++;
Expand All @@ -455,12 +502,18 @@ protected void loadRoutes() throws IOException {
extendedRouteType = ExtendedRouteType.Unknown;
}
String routeId = line[col.get(GtfsDefinitions.ROUTE_ID)];
String shortName = line[col.get(GtfsDefinitions.ROUTE_SHORT_NAME)];
String longName = line[col.get(GtfsDefinitions.ROUTE_LONG_NAME)];
String shortName = col.containsKey(GtfsDefinitions.ROUTE_SHORT_NAME) ?
line[col.get(GtfsDefinitions.ROUTE_SHORT_NAME)] :
line[col.get(GtfsDefinitions.ROUTE_LONG_NAME)];
String longName = col.containsKey(GtfsDefinitions.ROUTE_LONG_NAME) ?
line[col.get(GtfsDefinitions.ROUTE_LONG_NAME)] :
line[col.get(GtfsDefinitions.ROUTE_SHORT_NAME)];

Agency agency = this.agencies.get(line[col.get(GtfsDefinitions.AGENCY_ID)]);
Agency agency = col.containsKey(GtfsDefinitions.AGENCY_ID) ?
this.agencies.get(line[col.get(GtfsDefinitions.AGENCY_ID)]) :
defaultAgency;
if (agency == null) {
throw new RuntimeException("Line " + l + " in routes.txt references unknown agency id " + line[col.get(GtfsDefinitions.AGENCY_ID)]);
throw new IllegalArgumentException("Line " + l + " in routes.txt references unknown agency id " + line[col.get(GtfsDefinitions.AGENCY_ID)]);
}
Route newGtfsRoute = new RouteImpl(routeId, shortName, longName, agency, extendedRouteType);
routes.put(line[col.get(GtfsDefinitions.ROUTE_ID)], newGtfsRoute);
Expand Down
25 changes: 20 additions & 5 deletions src/main/java/org/matsim/pt2matsim/gtfs/lib/GtfsDefinitions.java
Original file line number Diff line number Diff line change
Expand Up @@ -108,8 +108,8 @@ public final class GtfsDefinitions {
*/
public enum Files {
AGENCY("Agency", "agency.txt",
new String[]{AGENCY_ID, AGENCY_NAME, AGENCY_URL, AGENCY_TIMEZONE},
new String[]{AGENCY_LANG, AGENCY_PHONE, AGENCY_FARE_URL, AGENCY_EMAIL}),
new String[]{AGENCY_NAME, AGENCY_URL, AGENCY_TIMEZONE},
new String[]{AGENCY_ID, AGENCY_LANG, AGENCY_PHONE, AGENCY_FARE_URL, AGENCY_EMAIL}),

STOPS("Stop", "stops.txt",
new String[]{STOP_ID, STOP_LON, STOP_LAT, STOP_NAME},
Expand All @@ -132,8 +132,8 @@ public enum Files {

ROUTES("Route",
"routes.txt",
new String[]{ROUTE_ID, AGENCY_ID, ROUTE_SHORT_NAME, ROUTE_LONG_NAME, ROUTE_TYPE},
new String[]{ROUTE_DESC, ROUTE_URL, ROUTE_COLOR, ROUTE_TEXT_COLOR}),
new String[]{ROUTE_ID, ROUTE_TYPE},
new String[]{AGENCY_ID, ROUTE_SHORT_NAME, ROUTE_LONG_NAME, ROUTE_DESC, ROUTE_URL, ROUTE_COLOR, ROUTE_TEXT_COLOR}),

TRIPS("Trip",
"trips.txt",
Expand Down Expand Up @@ -420,9 +420,24 @@ public static ExtendedRouteType getExtendedRouteType(RouteType routeType) {
* The location type field can have the following values:<br/>
* 0 or blank: Stop. A location where passengers board or disembark from a transit vehicle<br/>
* 1: Station. A physical structure or area that contains one or more stop<br/>
* 2: Entrance/Exit. A location where passengers can enter or exit a station from the street. If an entrance/exit belongs to multiple stations, it may be linked by pathways to both, but the data provider must pick one of them as parent.
* 3: Generic Node. A location within a station, not matching any other location_type, that may be used to link together pathways define in pathways.txt.
* 4: Boarding Area. A specific location on a platform, where passengers can board and/or alight vehicles.
*/
public enum LocationType {
STOP, STATION
STOP(0, "stop"),
STATION(1, "station"),
ENTRANCE(2, "entrance / exit"),
NODE(3, "generic node"),
BOARDING_AREA(4, "boarding area");

public int index;
public String name;

LocationType(int index, String name) {
this.index = index;
this.name = name;
}
}

public enum FareTransferType {
Expand Down
40 changes: 15 additions & 25 deletions src/main/java/org/matsim/pt2matsim/gtfs/lib/StopImpl.java
Original file line number Diff line number Diff line change
Expand Up @@ -30,33 +30,25 @@ public class StopImpl implements Stop {

private final String id;
private final String name;
private final double lon;
private final double lat;
/** optional **/
private GtfsDefinitions.LocationType locationType = null;
/** optional **/
private String parentStationId = null;

private String parentStationId;
private final GtfsDefinitions.LocationType locationType;

private double lon;
private double lat;
private Coord coord;
private final Collection<Trip> trips = new HashSet<>();

public StopImpl(String id, String name, double lon, double lat) {
public StopImpl(String id, String name, GtfsDefinitions.LocationType locationType, String parentStationId) {
this.id = id;
this.lon = lon;
this.lat = lat;
this.coord = new Coord(lon, lat);
this.name = name;
}

public StopImpl(String id, String name, double lon, double lat, GtfsDefinitions.LocationType locationType, String parentStationId) {
this.id = id;
this.lon = lon;
this.lat = lat;
this.coord = new Coord(lon, lat);
this.name = name;
this.locationType = locationType;
this.parentStationId = parentStationId;
}

public StopImpl(String id, String name, double lon, double lat, GtfsDefinitions.LocationType locationType, String parentStationId) {
this(id, name, locationType, parentStationId);
setLocation(lon, lat);
}

@Override
public String getId() {
Expand Down Expand Up @@ -98,12 +90,10 @@ public Coord getCoord() {
return coord;
}

public void setLocationType(GtfsDefinitions.LocationType type) {
this.locationType = type;
}

public void setParentStation(String id) {
this.parentStationId = id;
public void setLocation(double lon, double lat) {
this.lon = lon;
this.lat = lat;
this.coord = new Coord(lon, lat);
}

public void addTrip(Trip trip) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,8 @@ void compareShapes() {

@Test
void statistics() {
Assertions.assertEquals(6, feed.getStops().size());
Assertions.assertEquals(12, feed.getStops().size());
Assertions.assertEquals(6, feed.getStops().values().stream().filter(s -> s.getLocationType().index==0).count());
Assertions.assertEquals(3, feed.getRoutes().size());
Assertions.assertEquals(4, feed.getServices().size());
Assertions.assertEquals(3, feed.getShapes().size());
Expand Down
74 changes: 74 additions & 0 deletions src/test/java/org/matsim/pt2matsim/gtfs/GtfsMinimalCaseTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
/* *********************************************************************** *
* project: org.matsim.*
* *********************************************************************** *
* *
* copyright : (C) 2016 by the members listed in the COPYING, *
* LICENSE and WARRANTY file. *
* email : info at matsim dot org *
* *
* *********************************************************************** *
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation; either version 2 of the License, or *
* (at your option) any later version. *
* See also COPYING, LICENSE and WARRANTY file *
* *
* *********************************************************************** */

package org.matsim.pt2matsim.gtfs;

import static org.junit.jupiter.api.Assertions.*;

import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.matsim.core.utils.geometry.transformations.TransformationFactory;
import org.matsim.pt2matsim.gtfs.lib.GtfsDefinitions;

/**
* This test should check that ftfs datasets, that only do the bare minimum (i.e. do our best
* do not meet the conditions in "conditionally required" and omit the corresponding fields)
* are still converted correctly
*
* @author Tobias Kohl / Senozon
*/
class GtfsMinimalCaseTest {

@Test
void noAgencyId() {
GtfsFeed feed = new GtfsFeedImpl("test/gtfs-feed-min/noAgencyId");
feed.getRoutes().values().forEach(route -> Assertions.assertNotNull(route.getAgency(), "no agency in route " + route.getId()));
Assertions.assertEquals("pt2matsim", feed.getRoutes().get("lineA").getAgency().getAgencyName());
Assertions.assertEquals("https://github.com/matsim-org/pt2matsim", feed.getRoutes().get("lineB").getAgency().getAgencyUrl());
Assertions.assertEquals("Europe/Zurich", feed.getRoutes().get("lineC").getAgency().getAgencyTimeZone());
}

@Test
void noShortName() {
GtfsFeed feed = new GtfsFeedImpl("test/gtfs-feed-min/noShortName");
Assertions.assertEquals("Bus Line A", feed.getRoutes().get("lineA").getShortName());
Assertions.assertEquals("Bus Line A", feed.getRoutes().get("lineA").getLongName());
Assertions.assertEquals(GtfsDefinitions.RouteType.BUS, feed.getRoutes().get("lineA").getRouteType());
Assertions.assertEquals("P2M", feed.getRoutes().get("lineB").getAgency().getId());
}

@Test
void noLongName() {
GtfsFeed feed = new GtfsFeedImpl("test/gtfs-feed-min/noLongName");
Assertions.assertEquals("Line A", feed.getRoutes().get("lineA").getShortName());
Assertions.assertEquals("Line A", feed.getRoutes().get("lineA").getLongName());
Assertions.assertEquals(GtfsDefinitions.RouteType.BUS, feed.getRoutes().get("lineA").getRouteType());
Assertions.assertEquals("P2M", feed.getRoutes().get("lineB").getAgency().getId());
}

@Test
void noNameAtAll() {
Assertions.assertThrows(IllegalArgumentException.class, () -> new GtfsFeedImpl("test/gtfs-feed-min/noNameAtAll"));
}

@Test
void multipleAgencies() {
Assertions.assertThrows(IllegalArgumentException.class, () -> new GtfsFeedImpl("test/gtfs-feed-min/multipleAgencies"));
}
}
2 changes: 1 addition & 1 deletion test/gtfs-feed-cal/routes.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
route_id,route_short_name,route_long_name,route_type,agency_id,
route_id,route_short_name,route_long_name,route_type,agency_id
lineA,Line A,Bus Line A,3,S42
lineB,Line B,Tram Line B,0,P2M
lineC,Line C,Something else,907,P2M
2 changes: 2 additions & 0 deletions test/gtfs-feed-min/agency.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
agency_id,agency_name,agency_url,agency_timezone
P2M,pt2matsim,https://github.com/matsim-org/pt2matsim,Europe/Zurich
5 changes: 5 additions & 0 deletions test/gtfs-feed-min/calendar.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
service_id,monday,tuesday,wednesday,thursday,friday,saturday,sunday,start_date,end_date
WEEK,1,1,1,1,1,0,0,20181001,20181007
EXPR,0,0,0,0,0,0,0,20181001,20181007
WEND,0,0,0,0,0,1,1,20181001,20181007
EMPT,1,0,1,1,0,1,0,20181001,20181007
6 changes: 6 additions & 0 deletions test/gtfs-feed-min/calendar_dates.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
service_id,date,exception_type
WEEK,20181006,2
WEEK,20181007,2
EXPR,20181005,1
EMPT,20181002,1
EMPT,20181001,2
4 changes: 4 additions & 0 deletions test/gtfs-feed-min/frequencies.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
trip_id,start_time,end_time,headway_secs,exact_times
routeA1,08:00:00,14:00:01,10800,0
routeA2,09:00:00,15:00:01,10800,0
routeB,07:00:00,16:00:01,10800,0
3 changes: 3 additions & 0 deletions test/gtfs-feed-min/multipleAgencies/agency.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
agency_id,agency_name,agency_url,agency_timezone
P2M,pt2matsim,https://github.com/matsim-org/pt2matsim,Europe/Zurich
S42,Service 42,htpps://google.com,Europe/Berlin
5 changes: 5 additions & 0 deletions test/gtfs-feed-min/multipleAgencies/calendar.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
service_id,monday,tuesday,wednesday,thursday,friday,saturday,sunday,start_date,end_date
WEEK,1,1,1,1,1,0,0,20181001,20181007
EXPR,0,0,0,0,0,0,0,20181001,20181007
WEND,0,0,0,0,0,1,1,20181001,20181007
EMPT,1,0,1,1,0,1,0,20181001,20181007
6 changes: 6 additions & 0 deletions test/gtfs-feed-min/multipleAgencies/calendar_dates.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
service_id,date,exception_type
WEEK,20181006,2
WEEK,20181007,2
EXPR,20181005,1
EMPT,20181002,1
EMPT,20181001,2
4 changes: 4 additions & 0 deletions test/gtfs-feed-min/multipleAgencies/frequencies.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
trip_id,start_time,end_time,headway_secs,exact_times
routeA1,08:00:00,14:00:01,10800,0
routeA2,09:00:00,15:00:01,10800,0
routeB,07:00:00,16:00:01,10800,0
Loading

0 comments on commit 2da717f

Please sign in to comment.