Skip to content

create-public-files-0.0.1-car #32

create-public-files-0.0.1-car

create-public-files-0.0.1-car #32

---
name: create-public-files
run-name: create-public-files-${{ inputs.version }}-${{ inputs.mode }}
on:
workflow_dispatch:
inputs:
version:
required: true
description: Version of data
default: '0.0.1'
type: string
mode:
required: true
description: Mode of travel
default: 'car'
type: choice
options:
- car
- bicycle
- foot
override_years:
required: false
description: |
Comma-separated list of OSM data years to run e.g. 2020,2023.
Will run all (see params.yaml) if null
type: string
override_states:
required: false
description: |
Comma-separated state codes to run e.g. 01,06.
Will run all (see params.yaml) if null
type: string
override_datasets:
required: false
description: |
Comma-separated list datasets to publish e.g. times,metadata.
Will run all (see params.yaml) if null
type: string
override_geographies:
required: false
description: |
Comma-separated geographies to limit run e.g. county,tract.
Will run all (see params.yaml) if null
type: string
env:
AWS_DEFAULT_REGION: us-east-1
# See: https://github.com/aws/aws-cli/issues/5262#issuecomment-705832151
AWS_EC2_METADATA_DISABLED: true
PYTHONUNBUFFERED: "1"
jobs:
setup-jobs:
runs-on: ubuntu-24.04
outputs:
years: ${{ steps.create-year-jobs.outputs.param }}
states: ${{ steps.create-state-jobs.outputs.param }}
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Create year jobs
id: create-year-jobs
uses: ./.github/actions/parse-gh-input
with:
param_path: '.input.year'
param_override: '${{ inputs.override_years }}'
- name: Create state jobs
id: create-state-jobs
uses: ./.github/actions/parse-gh-input
with:
param_path: '.input.state'
param_override: '${{ inputs.override_states }}'
create-files:
runs-on: ubuntu-24.04
needs: setup-jobs
strategy:
# Don't fail all chunks if one fails
fail-fast: false
matrix:
year: ${{ fromJSON(needs.setup-jobs.outputs.years) }}
state: ${{ fromJSON(needs.setup-jobs.outputs.states) }}
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Setup Cloudflare credentials
uses: ./.github/actions/setup-cloudflare-s3
with:
CLOUDFLARE_S3_API_ACCESS_KEY_ID: ${{ secrets.CLOUDFLARE_S3_API_ACCESS_KEY_ID }}
CLOUDFLARE_S3_API_SECRET_ACCESS_KEY: ${{ secrets.CLOUDFLARE_S3_API_SECRET_ACCESS_KEY }}
- name: Remove unnecessary software and increase swap space
uses: ./.github/actions/prep-disk-and-swap
with:
swap_override: 50000
- name: Install uv
uses: astral-sh/setup-uv@v5
with:
enable-cache: true
cache-suffix: "site-data"
cache-dependency-glob: |
pyproject.toml
uv.lock
- name: Install Python dependencies
id: install-python-dependencies
shell: bash
run: |
sudo apt-get install libgeos-dev
uv python install
uv venv
uv pip install ".[site,data]"
- name: Create file jobs per dataset
id: create-dataset-jobs
uses: ./.github/actions/parse-gh-input
with:
param_path: '.output.dataset'
param_override: '${{ inputs.override_datasets }}'
- name: Create file jobs per geography
id: create-geo-jobs
uses: ./.github/actions/parse-gh-input
with:
param_path: '.input.census.geography.all'
param_override: '${{ inputs.override_geographies }}'
- name: Setup tmate session
uses: mxschmitt/action-tmate@v3
with:
limit-access-to-actor: true
- name: Create public files
working-directory: 'data'
shell: bash
run: |
geographies='${{ steps.create-geo-jobs.outputs.param }}'
geographies_array=($(echo "$geographies" | jq -r '.[]'))
datasets='${{ steps.create-dataset-jobs.outputs.param }}'
datasets_array=($(echo "$datasets" | jq -r '.[]'))
for geo in "${geographies_array[@]}"; do
for dataset in "${datasets_array[@]}"; do
uv run ./src/create_public_files.py \
--dataset "$dataset" --version ${{ inputs.version }} \
--mode ${{ inputs.mode }} --year ${{ matrix.year }} \
--geography "$geo" --state ${{ matrix.state }}
done
done