-
Notifications
You must be signed in to change notification settings - Fork 0
163 lines (139 loc) · 4.69 KB
/
create-public-files.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
---
name: create-public-files
run-name: create-public-files-${{ inputs.version }}-${{ inputs.mode }}
on:
workflow_dispatch:
inputs:
version:
required: true
description: Version of data
default: '0.0.1'
type: string
mode:
required: true
description: Mode of travel
default: 'car'
type: choice
options:
- car
- bicycle
- foot
override_years:
required: false
description: |
Comma-separated list of OSM data years to run e.g. 2020,2023.
Will run all (see params.yaml) if null
type: string
override_states:
required: false
description: |
Comma-separated state codes to run e.g. 01,06.
Will run all (see params.yaml) if null
type: string
override_datasets:
required: false
description: |
Comma-separated list datasets to publish e.g. times,metadata.
Will run all (see params.yaml) if null
type: string
override_geographies:
required: false
description: |
Comma-separated geographies to limit run e.g. county,tract.
Will run all (see params.yaml) if null
type: string
env:
AWS_DEFAULT_REGION: us-east-1
# See: https://github.com/aws/aws-cli/issues/5262#issuecomment-705832151
AWS_EC2_METADATA_DISABLED: true
PYTHONUNBUFFERED: "1"
jobs:
setup-jobs:
runs-on: ubuntu-24.04
outputs:
years: ${{ steps.create-year-jobs.outputs.param }}
states: ${{ steps.create-state-jobs.outputs.param }}
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Create year jobs
id: create-year-jobs
uses: ./.github/actions/parse-gh-input
with:
param_path: '.input.year'
param_override: '${{ inputs.override_years }}'
- name: Create state jobs
id: create-state-jobs
uses: ./.github/actions/parse-gh-input
with:
param_path: '.input.state'
param_override: '${{ inputs.override_states }}'
create-files:
runs-on: ubuntu-24.04
needs: setup-jobs
strategy:
# Don't fail all chunks if one fails
fail-fast: false
matrix:
year: ${{ fromJSON(needs.setup-jobs.outputs.years) }}
state: ${{ fromJSON(needs.setup-jobs.outputs.states) }}
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Setup Cloudflare credentials
uses: ./.github/actions/setup-cloudflare-s3
with:
CLOUDFLARE_S3_API_ACCESS_KEY_ID: ${{ secrets.CLOUDFLARE_S3_API_ACCESS_KEY_ID }}
CLOUDFLARE_S3_API_SECRET_ACCESS_KEY: ${{ secrets.CLOUDFLARE_S3_API_SECRET_ACCESS_KEY }}
- name: Remove unnecessary software and increase swap space
uses: ./.github/actions/prep-disk-and-swap
with:
swap_override: 50000
- name: Install uv
uses: astral-sh/setup-uv@v5
with:
enable-cache: true
cache-suffix: "site-data"
cache-dependency-glob: |
pyproject.toml
uv.lock
- name: Install Python dependencies
id: install-python-dependencies
shell: bash
run: |
sudo apt-get install libgeos-dev
uv python install
uv venv
uv pip install ".[site,data]"
- name: Create file jobs per dataset
id: create-dataset-jobs
uses: ./.github/actions/parse-gh-input
with:
param_path: '.output.dataset'
param_override: '${{ inputs.override_datasets }}'
- name: Create file jobs per geography
id: create-geo-jobs
uses: ./.github/actions/parse-gh-input
with:
param_path: '.input.census.geography.all'
param_override: '${{ inputs.override_geographies }}'
- name: Setup tmate session
uses: mxschmitt/action-tmate@v3
with:
limit-access-to-actor: true
- name: Create public files
working-directory: 'data'
shell: bash
run: |
geographies='${{ steps.create-geo-jobs.outputs.param }}'
geographies_array=($(echo "$geographies" | jq -r '.[]'))
datasets='${{ steps.create-dataset-jobs.outputs.param }}'
datasets_array=($(echo "$datasets" | jq -r '.[]'))
for geo in "${geographies_array[@]}"; do
for dataset in "${datasets_array[@]}"; do
uv run ./src/create_public_files.py \
--dataset "$dataset" --version ${{ inputs.version }} \
--mode ${{ inputs.mode }} --year ${{ matrix.year }} \
--geography "$geo" --state ${{ matrix.state }}
done
done