Skip to content

Commit 3f85aed

Browse files
authored
Merge pull request #30 from Clay-foundation/freddie-model-with-context
Model with context
2 parents 8b57503 + 71cdd70 commit 3f85aed

8 files changed

+1473
-93
lines changed

Diff for: configs/naip-multilabel-contextual.yaml

+25
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
dataloader:
2+
_target_: earthtext.datamodules.chipmultilabel.ChipMultilabelModule
3+
metadata_file: /opt/data/california-naip-chips/california-naip-chips-100k.parquet
4+
neighbor_embeddings_folder: /opt/data/california-naip-chips/california-naip-chips-100k-neighbours/npy
5+
neighborhood_radius: 8 # max 8
6+
get_osm_strlabels: True
7+
get_osm_ohearea: True
8+
get_osm_ohecount: True
9+
get_osm_ohelength: True
10+
embeddings_normalization: True
11+
osmvector_normalization: False
12+
# multilabel_threshold_osm_ohecount: 1
13+
multilabel_threshold_osm_ohearea: 1
14+
batch_size: 16
15+
pin_memory: True
16+
num_workers: 4
17+
18+
model:
19+
_target_: earthtext.models.multilabel.MultisizeContextualCNN
20+
input_dim: 768
21+
output_dim: 140
22+
layers_spec: [512, 256, 128]
23+
activation_fn: 'relu'
24+
# channel_specific: True
25+
# osm_codeset: 'naip'

Diff for: notebooks/models/04e - multilabel classification w context v1 NAIP.ipynb

+963
Large diffs are not rendered by default.

Diff for: notebooks/naip/00c - neighbours 3D embedding arrays.ipynb

+183-41
Large diffs are not rendered by default.

Diff for: notebooks/naip/00d - neighbours dataloader.ipynb

+113-31
Original file line numberDiff line numberDiff line change
@@ -2,19 +2,10 @@
22
"cells": [
33
{
44
"cell_type": "code",
5-
"execution_count": 19,
5+
"execution_count": 1,
66
"id": "08a04217-5d99-41e2-b2b3-31c286a26936",
77
"metadata": {},
8-
"outputs": [
9-
{
10-
"name": "stdout",
11-
"output_type": "stream",
12-
"text": [
13-
"The autoreload extension is already loaded. To reload it, use:\n",
14-
" %reload_ext autoreload\n"
15-
]
16-
}
17-
],
8+
"outputs": [],
189
"source": [
1910
"import geopandas as gpd\n",
2011
"import pandas as pd\n",
@@ -114,27 +105,27 @@
114105
},
115106
{
116107
"cell_type": "code",
117-
"execution_count": 8,
108+
"execution_count": 5,
118109
"id": "12ecddb4-8a5b-48e0-9281-f0d128740222",
119110
"metadata": {},
120111
"outputs": [
121112
{
122113
"name": "stderr",
123114
"output_type": "stream",
124115
"text": [
125-
"\u001b[32m2024-06-03 21:59:01.002\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mearthtext.datamodules.components.chipmultilabel\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m98\u001b[0m - \u001b[1musing embeddings found in metadata file\u001b[0m\n",
126-
"\u001b[32m2024-06-03 21:59:01.004\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mearthtext.datamodules.components.chipmultilabel\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m111\u001b[0m - \u001b[1mread train split with 72268 chip files (out of 72268)\u001b[0m\n",
127-
"\u001b[32m2024-06-03 21:59:01.004\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mearthtext.datamodules.components.chipmultilabel\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m115\u001b[0m - \u001b[1mremoving chip IDs with no associated neighbors .npy files\u001b[0m\n",
128-
"\u001b[32m2024-06-03 21:59:01.447\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mearthtext.datamodules.components.chipmultilabel\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m123\u001b[0m - \u001b[1mmax cache size is -1\u001b[0m\n",
129-
"\u001b[32m2024-06-03 21:59:02.923\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mearthtext.datamodules.components.chipmultilabel\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m98\u001b[0m - \u001b[1musing embeddings found in metadata file\u001b[0m\n",
130-
"\u001b[32m2024-06-03 21:59:02.925\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mearthtext.datamodules.components.chipmultilabel\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m111\u001b[0m - \u001b[1mread val split with 14770 chip files (out of 14770)\u001b[0m\n",
131-
"\u001b[32m2024-06-03 21:59:02.926\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mearthtext.datamodules.components.chipmultilabel\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m115\u001b[0m - \u001b[1mremoving chip IDs with no associated neighbors .npy files\u001b[0m\n",
132-
"\u001b[32m2024-06-03 21:59:03.333\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mearthtext.datamodules.components.chipmultilabel\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m123\u001b[0m - \u001b[1mmax cache size is -1\u001b[0m\n",
133-
"\u001b[32m2024-06-03 21:59:04.614\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mearthtext.datamodules.components.chipmultilabel\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m98\u001b[0m - \u001b[1musing embeddings found in metadata file\u001b[0m\n",
134-
"\u001b[32m2024-06-03 21:59:04.617\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mearthtext.datamodules.components.chipmultilabel\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m111\u001b[0m - \u001b[1mread test split with 17202 chip files (out of 17202)\u001b[0m\n",
135-
"\u001b[32m2024-06-03 21:59:04.617\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mearthtext.datamodules.components.chipmultilabel\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m115\u001b[0m - \u001b[1mremoving chip IDs with no associated neighbors .npy files\u001b[0m\n",
136-
"\u001b[32m2024-06-03 21:59:05.033\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mearthtext.datamodules.components.chipmultilabel\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m123\u001b[0m - \u001b[1mmax cache size is -1\u001b[0m\n",
137-
"\u001b[32m2024-06-03 21:59:05.035\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mearthtext.datamodules.chipmultilabel\u001b[0m:\u001b[36mcompute\u001b[0m:\u001b[36m41\u001b[0m - \u001b[1mreading means and stddevs from /opt/data/california-naip-chips/california-naip-chips-100k_metadata_embeddings_meansstdevs.pkl\u001b[0m\n"
116+
"\u001b[32m2024-06-04 17:10:23.099\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mearthtext.datamodules.components.chipmultilabel\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m100\u001b[0m - \u001b[1musing embeddings found in metadata file\u001b[0m\n",
117+
"\u001b[32m2024-06-04 17:10:23.101\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mearthtext.datamodules.components.chipmultilabel\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m113\u001b[0m - \u001b[1mread train split with 72268 chip files (out of 72268)\u001b[0m\n",
118+
"\u001b[32m2024-06-04 17:10:23.101\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mearthtext.datamodules.components.chipmultilabel\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m117\u001b[0m - \u001b[1mremoving chip IDs with no associated neighbors .npy files\u001b[0m\n",
119+
"\u001b[32m2024-06-04 17:10:23.523\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mearthtext.datamodules.components.chipmultilabel\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m125\u001b[0m - \u001b[1mmax cache size is -1\u001b[0m\n",
120+
"\u001b[32m2024-06-04 17:10:24.827\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mearthtext.datamodules.components.chipmultilabel\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m100\u001b[0m - \u001b[1musing embeddings found in metadata file\u001b[0m\n",
121+
"\u001b[32m2024-06-04 17:10:24.830\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mearthtext.datamodules.components.chipmultilabel\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m113\u001b[0m - \u001b[1mread val split with 14770 chip files (out of 14770)\u001b[0m\n",
122+
"\u001b[32m2024-06-04 17:10:24.830\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mearthtext.datamodules.components.chipmultilabel\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m117\u001b[0m - \u001b[1mremoving chip IDs with no associated neighbors .npy files\u001b[0m\n",
123+
"\u001b[32m2024-06-04 17:10:25.228\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mearthtext.datamodules.components.chipmultilabel\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m125\u001b[0m - \u001b[1mmax cache size is -1\u001b[0m\n",
124+
"\u001b[32m2024-06-04 17:10:26.502\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mearthtext.datamodules.components.chipmultilabel\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m100\u001b[0m - \u001b[1musing embeddings found in metadata file\u001b[0m\n",
125+
"\u001b[32m2024-06-04 17:10:26.505\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mearthtext.datamodules.components.chipmultilabel\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m113\u001b[0m - \u001b[1mread test split with 17202 chip files (out of 17202)\u001b[0m\n",
126+
"\u001b[32m2024-06-04 17:10:26.505\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mearthtext.datamodules.components.chipmultilabel\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m117\u001b[0m - \u001b[1mremoving chip IDs with no associated neighbors .npy files\u001b[0m\n",
127+
"\u001b[32m2024-06-04 17:10:26.908\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mearthtext.datamodules.components.chipmultilabel\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m125\u001b[0m - \u001b[1mmax cache size is -1\u001b[0m\n",
128+
"\u001b[32m2024-06-04 17:10:26.910\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mearthtext.datamodules.chipmultilabel\u001b[0m:\u001b[36mcompute\u001b[0m:\u001b[36m41\u001b[0m - \u001b[1mreading means and stddevs from /opt/data/california-naip-chips/california-naip-chips-100k_metadata_embeddings_meansstdevs.pkl\u001b[0m\n"
138129
]
139130
}
140131
],
@@ -147,7 +138,7 @@
147138
},
148139
{
149140
"cell_type": "code",
150-
"execution_count": 12,
141+
"execution_count": 6,
151142
"id": "5fa5944f-b902-4fab-a017-19b7e020b903",
152143
"metadata": {},
153144
"outputs": [
@@ -157,7 +148,7 @@
157148
"(17, 17, 768)"
158149
]
159150
},
160-
"execution_count": 12,
151+
"execution_count": 6,
161152
"metadata": {},
162153
"output_type": "execute_result"
163154
}
@@ -184,7 +175,7 @@
184175
},
185176
{
186177
"cell_type": "code",
187-
"execution_count": 15,
178+
"execution_count": 7,
188179
"id": "a85712fa-17f0-4690-a0c2-487be97ea799",
189180
"metadata": {},
190181
"outputs": [],
@@ -194,7 +185,7 @@
194185
},
195186
{
196187
"cell_type": "code",
197-
"execution_count": 16,
188+
"execution_count": 8,
198189
"id": "3fa30ac9-0376-4494-b048-19f8ffc0d41f",
199190
"metadata": {},
200191
"outputs": [],
@@ -204,7 +195,7 @@
204195
},
205196
{
206197
"cell_type": "code",
207-
"execution_count": 18,
198+
"execution_count": 9,
208199
"id": "4ff9295c-4ff8-4b31-9465-70a03ff23b51",
209200
"metadata": {},
210201
"outputs": [
@@ -214,12 +205,103 @@
214205
"torch.Size([16, 17, 17, 768])"
215206
]
216207
},
217-
"execution_count": 18,
208+
"execution_count": 9,
209+
"metadata": {},
210+
"output_type": "execute_result"
211+
}
212+
],
213+
"source": [
214+
"batch['embedding'].shape"
215+
]
216+
},
217+
{
218+
"cell_type": "markdown",
219+
"id": "a75faa90-5b40-4c8f-be84-376b6893b0fd",
220+
"metadata": {},
221+
"source": [
222+
"---"
223+
]
224+
},
225+
{
226+
"cell_type": "markdown",
227+
"id": "ec2a0ec5-ab77-4712-bd15-dc7bb561300d",
228+
"metadata": {},
229+
"source": [
230+
"smaller neighborhood radius"
231+
]
232+
},
233+
{
234+
"cell_type": "code",
235+
"execution_count": 10,
236+
"id": "ad97b784-56bd-497e-9f41-107887db6499",
237+
"metadata": {},
238+
"outputs": [
239+
{
240+
"name": "stderr",
241+
"output_type": "stream",
242+
"text": [
243+
"\u001b[32m2024-06-04 17:11:50.681\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mearthtext.datamodules.components.chipmultilabel\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m100\u001b[0m - \u001b[1musing embeddings found in metadata file\u001b[0m\n",
244+
"\u001b[32m2024-06-04 17:11:50.684\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mearthtext.datamodules.components.chipmultilabel\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m113\u001b[0m - \u001b[1mread train split with 72268 chip files (out of 72268)\u001b[0m\n",
245+
"\u001b[32m2024-06-04 17:11:50.684\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mearthtext.datamodules.components.chipmultilabel\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m117\u001b[0m - \u001b[1mremoving chip IDs with no associated neighbors .npy files\u001b[0m\n",
246+
"\u001b[32m2024-06-04 17:11:51.103\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mearthtext.datamodules.components.chipmultilabel\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m125\u001b[0m - \u001b[1mmax cache size is -1\u001b[0m\n",
247+
"\u001b[32m2024-06-04 17:11:52.474\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mearthtext.datamodules.components.chipmultilabel\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m100\u001b[0m - \u001b[1musing embeddings found in metadata file\u001b[0m\n",
248+
"\u001b[32m2024-06-04 17:11:52.478\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mearthtext.datamodules.components.chipmultilabel\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m113\u001b[0m - \u001b[1mread val split with 14770 chip files (out of 14770)\u001b[0m\n",
249+
"\u001b[32m2024-06-04 17:11:52.478\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mearthtext.datamodules.components.chipmultilabel\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m117\u001b[0m - \u001b[1mremoving chip IDs with no associated neighbors .npy files\u001b[0m\n",
250+
"\u001b[32m2024-06-04 17:11:52.876\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mearthtext.datamodules.components.chipmultilabel\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m125\u001b[0m - \u001b[1mmax cache size is -1\u001b[0m\n",
251+
"\u001b[32m2024-06-04 17:11:54.286\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mearthtext.datamodules.components.chipmultilabel\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m100\u001b[0m - \u001b[1musing embeddings found in metadata file\u001b[0m\n",
252+
"\u001b[32m2024-06-04 17:11:54.289\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mearthtext.datamodules.components.chipmultilabel\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m113\u001b[0m - \u001b[1mread test split with 17202 chip files (out of 17202)\u001b[0m\n",
253+
"\u001b[32m2024-06-04 17:11:54.290\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mearthtext.datamodules.components.chipmultilabel\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m117\u001b[0m - \u001b[1mremoving chip IDs with no associated neighbors .npy files\u001b[0m\n",
254+
"\u001b[32m2024-06-04 17:11:54.694\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mearthtext.datamodules.components.chipmultilabel\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m125\u001b[0m - \u001b[1mmax cache size is -1\u001b[0m\n",
255+
"\u001b[32m2024-06-04 17:11:54.697\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mearthtext.datamodules.chipmultilabel\u001b[0m:\u001b[36mcompute\u001b[0m:\u001b[36m41\u001b[0m - \u001b[1mreading means and stddevs from /opt/data/california-naip-chips/california-naip-chips-100k_metadata_embeddings_meansstdevs.pkl\u001b[0m\n"
256+
]
257+
}
258+
],
259+
"source": [
260+
"conf.dataloader.neighborhood_radius = 5\n",
261+
"dl = hydra.utils.instantiate(conf.dataloader)"
262+
]
263+
},
264+
{
265+
"cell_type": "code",
266+
"execution_count": 11,
267+
"id": "7765d322-4822-43fb-a8cf-9cf8a6741b7b",
268+
"metadata": {},
269+
"outputs": [
270+
{
271+
"data": {
272+
"text/plain": [
273+
"(11, 11, 768)"
274+
]
275+
},
276+
"execution_count": 11,
277+
"metadata": {},
278+
"output_type": "execute_result"
279+
}
280+
],
281+
"source": [
282+
"dl.train_dataset[0]['embedding'].shape"
283+
]
284+
},
285+
{
286+
"cell_type": "code",
287+
"execution_count": 12,
288+
"id": "c27ef4fe-0bd6-453a-bbb6-43e273a87cf2",
289+
"metadata": {},
290+
"outputs": [
291+
{
292+
"data": {
293+
"text/plain": [
294+
"torch.Size([16, 11, 11, 768])"
295+
]
296+
},
297+
"execution_count": 12,
218298
"metadata": {},
219299
"output_type": "execute_result"
220300
}
221301
],
222302
"source": [
303+
"dltrain = dl.train_dataloader()\n",
304+
"batch = next(iter(dltrain))\n",
223305
"batch['embedding'].shape"
224306
]
225307
}

Diff for: src/earthtext/datamodules/chipmultilabel.py

+4
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,7 @@ def __init__(self,
143143
embeddings_folder: str = None,
144144
patch_embeddings_folder: str = None,
145145
neighbor_embeddings_folder: str = None,
146+
neighborhood_radius: int = None,
146147
multilabel_threshold_osm_ohecount = None,
147148
multilabel_threshold_osm_ohearea = None,
148149
get_osm_strlabels = False,
@@ -170,6 +171,7 @@ def __init__(self,
170171
embeddings_folder = embeddings_folder,
171172
patch_embeddings_folder = patch_embeddings_folder,
172173
neighbor_embeddings_folder = neighbor_embeddings_folder,
174+
neighborhood_radius = neighborhood_radius,
173175
chip_transforms = chip_transforms,
174176
get_osm_strlabels = get_osm_strlabels,
175177
get_osm_ohecount = get_osm_ohecount,
@@ -192,6 +194,7 @@ def __init__(self,
192194
embeddings_folder = embeddings_folder,
193195
patch_embeddings_folder = patch_embeddings_folder,
194196
neighbor_embeddings_folder = neighbor_embeddings_folder,
197+
neighborhood_radius = neighborhood_radius,
195198
chip_transforms=chip_transforms,
196199
get_osm_strlabels = get_osm_strlabels,
197200
get_osm_ohecount = get_osm_ohecount,
@@ -214,6 +217,7 @@ def __init__(self,
214217
embeddings_folder = embeddings_folder,
215218
patch_embeddings_folder = patch_embeddings_folder,
216219
neighbor_embeddings_folder = neighbor_embeddings_folder,
220+
neighborhood_radius = neighborhood_radius,
217221
chip_transforms = chip_transforms,
218222
get_osm_strlabels = get_osm_strlabels,
219223
get_osm_ohecount = get_osm_ohecount,

0 commit comments

Comments
 (0)