Skip to content

Commit

Permalink
implement 30 day removal of old trips
Browse files Browse the repository at this point in the history
  • Loading branch information
kylerchin committed Jan 7, 2025
1 parent 9eb6493 commit 5a4ac37
Show file tree
Hide file tree
Showing 7 changed files with 71 additions and 24 deletions.
5 changes: 1 addition & 4 deletions src/alpenrose/single_fetch_time.rs
Original file line number Diff line number Diff line change
Expand Up @@ -346,10 +346,7 @@ pub fn make_reqwest_for_url(

return Some(
client
.request(
reqwest::Method::POST,
url,
)
.request(reqwest::Method::POST, url)
.multipart(form)
.build()
.unwrap(),
Expand Down
3 changes: 1 addition & 2 deletions src/maple/gtfs_handlers/convex_hull.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use geo::Coord;

//graham scan
pub fn convex_hull(input: &Vec<(f64, f64)>) -> geo::Polygon {
let pnts = input
let pnts = input
.iter()
.filter(|coords| {
let (x, y) = coords;
Expand All @@ -23,5 +23,4 @@ pub fn convex_hull(input: &Vec<(f64, f64)>) -> geo::Polygon {
let hull = multipoint.convex_hull();

hull

}
4 changes: 3 additions & 1 deletion src/maple/gtfs_handlers/hull_from_gtfs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,9 @@ pub fn hull_from_gtfs(gtfs: &gtfs_structures::Gtfs) -> Option<Polygon> {
.iter()
.filter(|(_, stop)| stop.longitude.is_some() && stop.latitude.is_some())
.filter(|(_, stop)| !is_null_island(stop.latitude.unwrap(), stop.longitude.unwrap()))
.filter(|(_, stop)| stop.longitude.unwrap().is_finite() && stop.latitude.unwrap().is_finite())
.filter(|(_, stop)| {
stop.longitude.unwrap().is_finite() && stop.latitude.unwrap().is_finite()
})
.map(|(_, stop)| Point::new(stop.longitude.unwrap(), stop.latitude.unwrap()))
.collect::<Vec<Point>>();

Expand Down
5 changes: 5 additions & 0 deletions src/maple/gtfs_process.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ use catenary::models::{
};
use catenary::postgres_tools::CatenaryPostgresPool;
use catenary::route_id_transform;
use catenary::schedule_filtering::minimum_day_filter;
use chrono::NaiveDate;
use diesel::ExpressionMethods;
use diesel_async::RunQueryDsl;
Expand Down Expand Up @@ -98,6 +99,10 @@ pub async fn gtfs_process_feed(
_ => gtfs,
};

let today = chrono::Utc::now().naive_utc().date();

let gtfs = minimum_day_filter(gtfs, today - chrono::Duration::days(30));

println!(
"Finished reading GTFS for {}, took {:?}ms",
feed_id, gtfs.read_duration
Expand Down
7 changes: 5 additions & 2 deletions src/maple/transitland_download.rs
Original file line number Diff line number Diff line change
Expand Up @@ -72,14 +72,17 @@ async fn try_to_download(
match response {
Ok(response) => Ok(response),
Err(error) => {
println!("Error with downloading {}: {}, {:?}, trying again", feed_id, url, error);
println!(
"Error with downloading {}: {}, {:?}, trying again",
feed_id, url, error
);

//trying again with a different client

let client = reqwest::ClientBuilder::new()
.user_agent("Catenary Maple")
.timeout(Duration::from_secs(60 * 3))
.connect_timeout(Duration::from_secs(20))
.connect_timeout(Duration::from_secs(20))
.build()
.unwrap();

Expand Down
69 changes: 55 additions & 14 deletions src/schedule_filtering/mod.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use std::collections::BTreeSet;
use std::collections::BTreeMap;
use chrono::prelude::*;
use gtfs_structures::*;
use std::collections::BTreeMap;
use std::collections::BTreeSet;
use std::collections::HashMap;

pub fn include_only_route_types(gtfs: Gtfs, route_types: Vec<gtfs_structures::RouteType>) -> Gtfs {
Expand Down Expand Up @@ -67,22 +68,24 @@ pub fn include_only_route_types(gtfs: Gtfs, route_types: Vec<gtfs_structures::Ro
gtfs
}

fn minimum_day_filter(gtfs: Gtfs, naive_date: chrono::NaiveDate) -> Gtfs {
pub fn minimum_day_filter(gtfs: Gtfs, naive_date: chrono::NaiveDate) -> Gtfs {
let mut gtfs = gtfs;

let mut throwout_calendar_list = BTreeSet::new();
let mut routes_to_trips: HashMap<String, Vec<String>> = HashMap::new();
let mut trips_removed: BTreeSet<String> = BTreeSet::new();

let mut shapes_to_trips: HashMap<String, Vec<String>> = HashMap::new();

for (calendar_id, calendar) in &gtfs.calendar {
if calendar.end_date < naive_date {
let contains_any_exceptions_greater_than_or_equal_to_date =
match gtfs.calendar_dates.get(calendar_id) {
Some(calendar_dates) => {
calendar_dates.iter().any(|calendar_date| calendar_date.date >= naive_date)
},
None => false
};
let contains_any_exceptions_greater_than_or_equal_to_date =
match gtfs.calendar_dates.get(calendar_id) {
Some(calendar_dates) => calendar_dates
.iter()
.any(|calendar_date| calendar_date.date >= naive_date),
None => false,
};

if !contains_any_exceptions_greater_than_or_equal_to_date {
throwout_calendar_list.insert(calendar_id.clone());
Expand All @@ -91,15 +94,23 @@ fn minimum_day_filter(gtfs: Gtfs, naive_date: chrono::NaiveDate) -> Gtfs {
}

for (trip_id, trip) in &gtfs.trips {
routes_to_trips.entry(trip.route_id.clone())
.and_modify(|x| x.push(trip_id.clone()))
.or_insert(vec![trip_id.clone()]);
routes_to_trips
.entry(trip.route_id.clone())
.and_modify(|x| x.push(trip_id.clone()))
.or_insert(vec![trip_id.clone()]);

if let Some(shape_id) = &trip.shape_id {
shapes_to_trips
.entry(shape_id.clone())
.and_modify(|x| x.push(trip_id.clone()))
.or_insert(vec![trip_id.clone()]);
}

if throwout_calendar_list.contains(&trip.service_id) {
trips_removed.insert(trip_id.clone());
}
}

let mut throwout_routes_list: BTreeSet<String> = BTreeSet::new();

for (route_id, trip_ids) in routes_to_trips {
Expand All @@ -110,6 +121,14 @@ fn minimum_day_filter(gtfs: Gtfs, naive_date: chrono::NaiveDate) -> Gtfs {
}
}

for (shape_id, trip_ids) in shapes_to_trips {
let mark_for_deletion = trip_ids.iter().all(|x| trips_removed.contains(x));

if mark_for_deletion {
gtfs.shapes.remove(&shape_id);
}
}

for route_id in throwout_routes_list {
gtfs.routes.remove(&route_id);
}
Expand All @@ -128,6 +147,7 @@ fn minimum_day_filter(gtfs: Gtfs, naive_date: chrono::NaiveDate) -> Gtfs {

#[cfg(test)]
mod tests {

use super::*;

#[tokio::test]
Expand All @@ -142,4 +162,25 @@ mod tests {
println!("ends with");
gtfs.print_stats();
}

#[tokio::test]
async fn filter_amtrak() {
let now = chrono::Utc::now();

let today = chrono::NaiveDate::from_ymd(now.year(), now.month(), now.day());

let gtfs = gtfs_structures::Gtfs::from_url_async(
"https://content.amtrak.com/content/gtfs/GTFS.zip",
)
.await
.unwrap();

println!("amtk starts with");
gtfs.print_stats();

let gtfs = minimum_day_filter(gtfs, today - chrono::Duration::days(10));

println!("amtk ends with");
gtfs.print_stats();
}
}
2 changes: 1 addition & 1 deletion transitland-atlas
Submodule transitland-atlas updated 186 files

0 comments on commit 5a4ac37

Please sign in to comment.