Skip to content

Commit

Permalink
GH-17: Download calories from Strava
Browse files Browse the repository at this point in the history
  • Loading branch information
martin-ueding committed Feb 3, 2024
1 parent cce73cd commit 7c410e6
Show file tree
Hide file tree
Showing 3 changed files with 61 additions and 8 deletions.
6 changes: 5 additions & 1 deletion docs/getting-started/using-strava-api.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,4 +41,8 @@ Then you are all set to download data from the Strava API. When you start one of

When you first start this program and use the Strava API as a data source, it will download the metadata for all your activities. Then it will start to download all the time series data for each activity. Strava has a rate limiting, so after the first 200 activities it will crash and you will have to wait for 15 minutes until you can try again and it will download the next batch.

Therefore it is recommended to use a Strava export in order to get started quicker. For this go to the [Strava account download page](https://www.strava.com/athlete/delete_your_account) and download all your data. You will get a ZIP file. Unpack the files into `Playground/Strava Export`. These will be picked up there. Activities from Strava will only be downloaded after importing all these, and only the ones after the last one in the export will be downloaded. This way you can get started much quicker.
Therefore it is recommended to use a Strava export in order to get started quicker. For this go to the [Strava account download page](https://www.strava.com/athlete/delete_your_account) and download all your data. You will get a ZIP file. Unpack the files into `Playground/Strava Export`. These will be picked up there. Activities from Strava will only be downloaded after importing all these, and only the ones after the last one in the export will be downloaded. This way you can get started much quicker.

## Skip Strava download

If you don't want to download new activities from Strava, use `--skip-strava` to have the webserver start right away.
15 changes: 11 additions & 4 deletions geo_activity_playground/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from geo_activity_playground.explorer.tile_visits import compute_tile_visits
from geo_activity_playground.explorer.video import explorer_video_main
from geo_activity_playground.importers.directory import import_from_directory
from geo_activity_playground.importers.strava_api import download_missing_calories
from geo_activity_playground.importers.strava_api import import_from_strava_api
from geo_activity_playground.webui.app import webui_main

Expand Down Expand Up @@ -68,7 +69,7 @@ def main() -> None:
subparser = subparsers.add_parser("serve", help="Launch webserver")
subparser.set_defaults(
func=lambda options: webui_main(
make_activity_repository(options.basedir),
make_activity_repository(options.basedir, options.skip_strava),
host=options.host,
port=options.port,
)
Expand All @@ -79,10 +80,11 @@ def main() -> None:
subparser.add_argument(
"--port", default=5000, type=int, help="the port to run listen on"
)
subparser.add_argument("--skip-strava", action=argparse.BooleanOptionalAction)

subparser = subparsers.add_parser("cache", help="Cache stuff")
subparser.set_defaults(
func=lambda options: make_activity_repository(options.basedir)
func=lambda options: make_activity_repository(options.basedir, False)
)

options = parser.parse_args()
Expand All @@ -96,18 +98,23 @@ def main() -> None:
options.func(options)


def make_activity_repository(basedir: pathlib.Path) -> ActivityRepository:
def make_activity_repository(
basedir: pathlib.Path, skip_strava: bool
) -> ActivityRepository:
os.chdir(basedir)
apply_cache_migrations()
config = get_config()

if "strava" in config and not skip_strava:
download_missing_calories()

repository = ActivityRepository()

if pathlib.Path("Activities").exists():
import_from_directory(repository, config.get("prefer_metadata_from_file", True))
if pathlib.Path("Strava Export").exists():
import_from_strava_checkout(repository)
if "strava" in config:
if "strava" in config and not skip_strava:
import_from_strava_api(repository)

embellish_time_series(repository)
Expand Down
48 changes: 45 additions & 3 deletions geo_activity_playground/importers/strava_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import logging
import pathlib
import pickle
import sys
import time
from typing import Any

Expand Down Expand Up @@ -138,7 +137,8 @@ def try_import_strava(repository: ActivityRepository) -> None:
time_series = download_strava_time_series(activity.id, client)
except ObjectNotFound as e:
logger.error(
f"The activity {activity.id} with name “{activity.name}” cannot be found. Perhaps it is a manual activity without a time series. Ignoring. {e=}"
f"The activity {activity.id} with name “{activity.name}” cannot be found."
f"Perhaps it is a manual activity without a time series. Ignoring. {e=}"
)
continue
time_series.name = activity.id
Expand All @@ -150,6 +150,8 @@ def try_import_strava(repository: ActivityRepository) -> None:
time_series["time"] = new_time
time_series.to_parquet(time_series_path)

detailed_activity = get_detailed_activity(activity.id)

if len(time_series) > 0 and "latitude" in time_series.columns:
repository.add_activity(
{
Expand All @@ -161,7 +163,7 @@ def try_import_strava(repository: ActivityRepository) -> None:
"start": activity.start_date,
"elapsed_time": activity.elapsed_time,
"equipment": gear_names[activity.gear_id],
"calories": activity.calories,
"calories": detailed_activity.calories,
}
)
limit_exceeded = False
Expand Down Expand Up @@ -189,3 +191,43 @@ def download_strava_time_series(activity_id: int, client: Client) -> pd.DataFram

df = pd.DataFrame(columns)
return df


def get_detailed_activity(activity_id: int, client: Client):
detailed_activity_path = pathlib.Path(
f"Cache/Detailed Activities/{activity_id}.pickle"
)
if detailed_activity_path.exists():
with open(detailed_activity_path, "rb") as f:
return pickle.load(f)

detailed_activity = client.get_activity(activity_id)

detailed_activity_path.parent.mkdir(parents=True, exist_ok=True)
with open(detailed_activity_path, "wb") as f:
pickle.dump(detailed_activity, f)

return detailed_activity


def download_missing_calories() -> None:
activity_meta_path = pathlib.Path("Cache/activities.parquet")
if not activity_meta_path.exists():
return

activity_meta = pd.read_parquet(activity_meta_path)
activity_meta.index = activity_meta["id"]

client = Client(access_token=get_current_access_token())

try:
for activity in tqdm(
client.get_activities(after="2000-01-01T00:00:00Z"),
desc="Downloading calories from Strava",
):
calories = get_detailed_activity(activity.id, client).calories
activity_meta.loc[activity.id, "calories"] = calories
except RateLimitExceeded:
pass
finally:
activity_meta.to_parquet(activity_meta_path)

0 comments on commit 7c410e6

Please sign in to comment.