GH-17: Download calories from Strava

martin-ueding · Feb 3, 2024 · 7c410e6 · 7c410e6
1 parent cce73cd
commit 7c410e6
Show file tree

Hide file tree

Showing 3 changed files with 61 additions and 8 deletions.
diff --git a/docs/getting-started/using-strava-api.md b/docs/getting-started/using-strava-api.md
@@ -41,4 +41,8 @@ Then you are all set to download data from the Strava API. When you start one of
 
 When you first start this program and use the Strava API as a data source, it will download the metadata for all your activities. Then it will start to download all the time series data for each activity. Strava has a rate limiting, so after the first 200 activities it will crash and you will have to wait for 15 minutes until you can try again and it will download the next batch.
 
-Therefore it is recommended to use a Strava export in order to get started quicker. For this go to the [Strava account download page](https://www.strava.com/athlete/delete_your_account) and download all your data. You will get a ZIP file. Unpack the files into `Playground/Strava Export`. These will be picked up there. Activities from Strava will only be downloaded after importing all these, and only the ones after the last one in the export will be downloaded. This way you can get started much quicker.
+Therefore it is recommended to use a Strava export in order to get started quicker. For this go to the [Strava account download page](https://www.strava.com/athlete/delete_your_account) and download all your data. You will get a ZIP file. Unpack the files into `Playground/Strava Export`. These will be picked up there. Activities from Strava will only be downloaded after importing all these, and only the ones after the last one in the export will be downloaded. This way you can get started much quicker.
+
+## Skip Strava download
+
+If you don't want to download new activities from Strava, use `--skip-strava` to have the webserver start right away.
diff --git a/geo_activity_playground/__main__.py b/geo_activity_playground/__main__.py
@@ -16,6 +16,7 @@
 from geo_activity_playground.explorer.tile_visits import compute_tile_visits
 from geo_activity_playground.explorer.video import explorer_video_main
 from geo_activity_playground.importers.directory import import_from_directory
+from geo_activity_playground.importers.strava_api import download_missing_calories
 from geo_activity_playground.importers.strava_api import import_from_strava_api
 from geo_activity_playground.webui.app import webui_main
 
@@ -68,7 +69,7 @@ def main() -> None:
     subparser = subparsers.add_parser("serve", help="Launch webserver")
     subparser.set_defaults(
         func=lambda options: webui_main(
-            make_activity_repository(options.basedir),
+            make_activity_repository(options.basedir, options.skip_strava),
             host=options.host,
             port=options.port,
         )
@@ -79,10 +80,11 @@ def main() -> None:
     subparser.add_argument(
         "--port", default=5000, type=int, help="the port to run listen on"
     )
+    subparser.add_argument("--skip-strava", action=argparse.BooleanOptionalAction)
 
     subparser = subparsers.add_parser("cache", help="Cache stuff")
     subparser.set_defaults(
-        func=lambda options: make_activity_repository(options.basedir)
+        func=lambda options: make_activity_repository(options.basedir, False)
     )
 
     options = parser.parse_args()
@@ -96,18 +98,23 @@ def main() -> None:
     options.func(options)
 
 
-def make_activity_repository(basedir: pathlib.Path) -> ActivityRepository:
+def make_activity_repository(
+    basedir: pathlib.Path, skip_strava: bool
+) -> ActivityRepository:
     os.chdir(basedir)
     apply_cache_migrations()
     config = get_config()
 
+    if "strava" in config and not skip_strava:
+        download_missing_calories()
+
     repository = ActivityRepository()
 
     if pathlib.Path("Activities").exists():
         import_from_directory(repository, config.get("prefer_metadata_from_file", True))
     if pathlib.Path("Strava Export").exists():
         import_from_strava_checkout(repository)
-    if "strava" in config:
+    if "strava" in config and not skip_strava:
         import_from_strava_api(repository)
 
     embellish_time_series(repository)

diff --git a/geo_activity_playground/importers/strava_api.py b/geo_activity_playground/importers/strava_api.py
@@ -4,7 +4,6 @@
 import logging
 import pathlib
 import pickle
-import sys
 import time
 from typing import Any
 
@@ -138,7 +137,8 @@ def try_import_strava(repository: ActivityRepository) -> None:
                     time_series = download_strava_time_series(activity.id, client)
                 except ObjectNotFound as e:
                     logger.error(
-                        f"The activity {activity.id} with name “{activity.name}” cannot be found. Perhaps it is a manual activity without a time series. Ignoring. {e=}"
+                        f"The activity {activity.id} with name “{activity.name}” cannot be found."
+                        f"Perhaps it is a manual activity without a time series. Ignoring. {e=}"
                     )
                     continue
                 time_series.name = activity.id
@@ -150,6 +150,8 @@ def try_import_strava(repository: ActivityRepository) -> None:
                 time_series["time"] = new_time
                 time_series.to_parquet(time_series_path)
 
+            detailed_activity = get_detailed_activity(activity.id)
+
             if len(time_series) > 0 and "latitude" in time_series.columns:
                 repository.add_activity(
                     {
@@ -161,7 +163,7 @@ def try_import_strava(repository: ActivityRepository) -> None:
                         "start": activity.start_date,
                         "elapsed_time": activity.elapsed_time,
                         "equipment": gear_names[activity.gear_id],
-                        "calories": activity.calories,
+                        "calories": detailed_activity.calories,
                     }
                 )
         limit_exceeded = False
@@ -189,3 +191,43 @@ def download_strava_time_series(activity_id: int, client: Client) -> pd.DataFram
 
     df = pd.DataFrame(columns)
     return df
+
+
+def get_detailed_activity(activity_id: int, client: Client):
+    detailed_activity_path = pathlib.Path(
+        f"Cache/Detailed Activities/{activity_id}.pickle"
+    )
+    if detailed_activity_path.exists():
+        with open(detailed_activity_path, "rb") as f:
+            return pickle.load(f)
+
+    detailed_activity = client.get_activity(activity_id)
+
+    detailed_activity_path.parent.mkdir(parents=True, exist_ok=True)
+    with open(detailed_activity_path, "wb") as f:
+        pickle.dump(detailed_activity, f)
+
+    return detailed_activity
+
+
+def download_missing_calories() -> None:
+    activity_meta_path = pathlib.Path("Cache/activities.parquet")
+    if not activity_meta_path.exists():
+        return
+
+    activity_meta = pd.read_parquet(activity_meta_path)
+    activity_meta.index = activity_meta["id"]
+
+    client = Client(access_token=get_current_access_token())
+
+    try:
+        for activity in tqdm(
+            client.get_activities(after="2000-01-01T00:00:00Z"),
+            desc="Downloading calories from Strava",
+        ):
+            calories = get_detailed_activity(activity.id, client).calories
+            activity_meta.loc[activity.id, "calories"] = calories
+    except RateLimitExceeded:
+        pass
+    finally:
+        activity_meta.to_parquet(activity_meta_path)