diff --git a/docs/getting-started/using-strava-api.md b/docs/getting-started/using-strava-api.md index 7bdf961..a96672e 100644 --- a/docs/getting-started/using-strava-api.md +++ b/docs/getting-started/using-strava-api.md @@ -41,4 +41,8 @@ Then you are all set to download data from the Strava API. When you start one of When you first start this program and use the Strava API as a data source, it will download the metadata for all your activities. Then it will start to download all the time series data for each activity. Strava has a rate limiting, so after the first 200 activities it will crash and you will have to wait for 15 minutes until you can try again and it will download the next batch. -Therefore it is recommended to use a Strava export in order to get started quicker. For this go to the [Strava account download page](https://www.strava.com/athlete/delete_your_account) and download all your data. You will get a ZIP file. Unpack the files into `Playground/Strava Export`. These will be picked up there. Activities from Strava will only be downloaded after importing all these, and only the ones after the last one in the export will be downloaded. This way you can get started much quicker. \ No newline at end of file +Therefore it is recommended to use a Strava export in order to get started quicker. For this go to the [Strava account download page](https://www.strava.com/athlete/delete_your_account) and download all your data. You will get a ZIP file. Unpack the files into `Playground/Strava Export`. These will be picked up there. Activities from Strava will only be downloaded after importing all these, and only the ones after the last one in the export will be downloaded. This way you can get started much quicker. + +## Skip Strava download + +If you don't want to download new activities from Strava, use `--skip-strava` to have the webserver start right away. \ No newline at end of file diff --git a/geo_activity_playground/__main__.py b/geo_activity_playground/__main__.py index 3a63626..218e9ad 100644 --- a/geo_activity_playground/__main__.py +++ b/geo_activity_playground/__main__.py @@ -16,6 +16,7 @@ from geo_activity_playground.explorer.tile_visits import compute_tile_visits from geo_activity_playground.explorer.video import explorer_video_main from geo_activity_playground.importers.directory import import_from_directory +from geo_activity_playground.importers.strava_api import download_missing_calories from geo_activity_playground.importers.strava_api import import_from_strava_api from geo_activity_playground.webui.app import webui_main @@ -68,7 +69,7 @@ def main() -> None: subparser = subparsers.add_parser("serve", help="Launch webserver") subparser.set_defaults( func=lambda options: webui_main( - make_activity_repository(options.basedir), + make_activity_repository(options.basedir, options.skip_strava), host=options.host, port=options.port, ) @@ -79,10 +80,11 @@ def main() -> None: subparser.add_argument( "--port", default=5000, type=int, help="the port to run listen on" ) + subparser.add_argument("--skip-strava", action=argparse.BooleanOptionalAction) subparser = subparsers.add_parser("cache", help="Cache stuff") subparser.set_defaults( - func=lambda options: make_activity_repository(options.basedir) + func=lambda options: make_activity_repository(options.basedir, False) ) options = parser.parse_args() @@ -96,18 +98,23 @@ def main() -> None: options.func(options) -def make_activity_repository(basedir: pathlib.Path) -> ActivityRepository: +def make_activity_repository( + basedir: pathlib.Path, skip_strava: bool +) -> ActivityRepository: os.chdir(basedir) apply_cache_migrations() config = get_config() + if "strava" in config and not skip_strava: + download_missing_calories() + repository = ActivityRepository() if pathlib.Path("Activities").exists(): import_from_directory(repository, config.get("prefer_metadata_from_file", True)) if pathlib.Path("Strava Export").exists(): import_from_strava_checkout(repository) - if "strava" in config: + if "strava" in config and not skip_strava: import_from_strava_api(repository) embellish_time_series(repository) diff --git a/geo_activity_playground/importers/strava_api.py b/geo_activity_playground/importers/strava_api.py index e2bbca5..8e2006f 100644 --- a/geo_activity_playground/importers/strava_api.py +++ b/geo_activity_playground/importers/strava_api.py @@ -4,7 +4,6 @@ import logging import pathlib import pickle -import sys import time from typing import Any @@ -138,7 +137,8 @@ def try_import_strava(repository: ActivityRepository) -> None: time_series = download_strava_time_series(activity.id, client) except ObjectNotFound as e: logger.error( - f"The activity {activity.id} with name “{activity.name}” cannot be found. Perhaps it is a manual activity without a time series. Ignoring. {e=}" + f"The activity {activity.id} with name “{activity.name}” cannot be found." + f"Perhaps it is a manual activity without a time series. Ignoring. {e=}" ) continue time_series.name = activity.id @@ -150,6 +150,8 @@ def try_import_strava(repository: ActivityRepository) -> None: time_series["time"] = new_time time_series.to_parquet(time_series_path) + detailed_activity = get_detailed_activity(activity.id) + if len(time_series) > 0 and "latitude" in time_series.columns: repository.add_activity( { @@ -161,7 +163,7 @@ def try_import_strava(repository: ActivityRepository) -> None: "start": activity.start_date, "elapsed_time": activity.elapsed_time, "equipment": gear_names[activity.gear_id], - "calories": activity.calories, + "calories": detailed_activity.calories, } ) limit_exceeded = False @@ -189,3 +191,43 @@ def download_strava_time_series(activity_id: int, client: Client) -> pd.DataFram df = pd.DataFrame(columns) return df + + +def get_detailed_activity(activity_id: int, client: Client): + detailed_activity_path = pathlib.Path( + f"Cache/Detailed Activities/{activity_id}.pickle" + ) + if detailed_activity_path.exists(): + with open(detailed_activity_path, "rb") as f: + return pickle.load(f) + + detailed_activity = client.get_activity(activity_id) + + detailed_activity_path.parent.mkdir(parents=True, exist_ok=True) + with open(detailed_activity_path, "wb") as f: + pickle.dump(detailed_activity, f) + + return detailed_activity + + +def download_missing_calories() -> None: + activity_meta_path = pathlib.Path("Cache/activities.parquet") + if not activity_meta_path.exists(): + return + + activity_meta = pd.read_parquet(activity_meta_path) + activity_meta.index = activity_meta["id"] + + client = Client(access_token=get_current_access_token()) + + try: + for activity in tqdm( + client.get_activities(after="2000-01-01T00:00:00Z"), + desc="Downloading calories from Strava", + ): + calories = get_detailed_activity(activity.id, client).calories + activity_meta.loc[activity.id, "calories"] = calories + except RateLimitExceeded: + pass + finally: + activity_meta.to_parquet(activity_meta_path)