Skip to content

Commit c5c942a

Browse files
author
trchudley
committed
batch processing script
1 parent 24e0451 commit c5c942a

9 files changed

+200
-40
lines changed

README.md

+2
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ Easy filtering of ocean/mélange, allowing for assessment of calving fronts and
4444
</a>
4545
</p>
4646

47+
An example batch download and coregistration script is also included in the `batch` directory as a jumping-off point for large-scale projects.
4748

4849
# Cite
4950

@@ -409,6 +410,7 @@ If you have requested multiple variables, the output of the `.pdt.terrain()` is
409410

410411
| Version | Date | Notes |
411412
| ------- | ---- | ----- |
413+
| 0.4 | November 2023 | Made batch processing script available as an example. |
412414
| 0.3 | September 2023 | Aligned search function with the new PGC-provided GeoParquet files |
413415
| 0.2 | August 2023 | Update `load.mosaic()` function to include the new ArcticDEM mosaic v4.1 |
414416
| 0.1 | May 2023 | Initial release |
+128
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
"""
2+
This script automatically downloads and coregisters a complete history of ArcticDEM or
3+
REMA strips within a given AOI. The strips will be created within a named directory in
4+
the current working directory.
5+
6+
The script will download the 2 m ArcticDEM or REMA mosaic for coregistration, and then
7+
loop through the strip record, downloading and coregistering against stable ground
8+
identified within the BedMachine mask.
9+
10+
Note: this will only work in regions of Greenland and Antarctica where bare rock
11+
is exposed. If you require coregistration in other regions, please drop me an email -
12+
I would be happy to help.
13+
14+
Tom Chudley | [email protected]
15+
Durham University
16+
November 2023
17+
"""
18+
19+
import os
20+
import rioxarray as rxr
21+
import numpy as np
22+
import pdemtools as pdt
23+
24+
# ----------------------------------------------------------------------------------- #
25+
# EDIT THIS SECTION TO SPECIFY DEPENDENT FILES, THE REGION, AND BOUNDS OF INTEREST
26+
# ----------------------------------------------------------------------------------- #
27+
28+
# Filepath to ArcticDEM parquet file -- download from https://data.pgc.umn.edu/elev/dem/setsm/ArcticDEM/indexes/ArcticDEM_Strip_Index_latest_gpqt.zip
29+
index_fpath = ".../ArcticDEM_Strip_Index_s2s041_gpqt.parquet"
30+
31+
# Filepath to BedMachine v5 netcdf file -- download from https://data.pgc.umn.edu/elev/dem/setsm/ArcticDEM/indexes/ArcticDEM_Strip_Index_latest_gpqt.zip
32+
bm_fpath = ".../data/bedmachine_5/BedMachineGreenland-v5.nc"
33+
34+
# The name of your study area - this will be used to name output files and directories
35+
region = "isunguatasermia"
36+
37+
# "arcticdem" or "rema"
38+
dataset = "arcticdem"
39+
40+
# the bounds of your study area, in EPSG:3413 for ArcticDEM or EPSG:3031 for REMA
41+
xmin, ymin, xmax, ymax = -247000, -2500000, -212000, -2487000
42+
43+
# Parameters with which to filter ArcticDEM dataset. Note that you may wish to add
44+
# more or less - feel free to modify the search function at line 73.
45+
dates = "20010101/20231231"
46+
baseline_max_hours = 24
47+
min_aoi_frac = 0.1
48+
49+
# ----------------------------------------------------------------------------------- #
50+
# END OF EDITABLE PARAMETER SECTION
51+
# ----------------------------------------------------------------------------------- #
52+
53+
# define AOI
54+
bounds = (xmin, ymin, xmax, ymax)
55+
56+
print(f"Downloading data for {region}:")
57+
58+
# Create output directory
59+
outdir = f"{region}_data"
60+
if not os.path.exists(outdir):
61+
os.mkdir(outdir)
62+
63+
# Get reference DEM (ArcticDEM mosaic) if it doesn't already exist
64+
reference_dem_fpath = os.path.join(outdir, f"{region}_arcticdem_mosaic_2m.tif")
65+
66+
if not os.path.exists(reference_dem_fpath):
67+
print("\nDownloading reference DEM...")
68+
69+
reference_dem = pdt.load.mosaic(
70+
dataset=dataset, # must be `arcticdem` or `rema`
71+
resolution=2, # must be 2, 10, or 32
72+
bounds=bounds, # (xmin, ymin, xmax, ymax) or shapely geometry
73+
version="v4.1", # optional: desired version (defaults to most recent)
74+
)
75+
reference_dem.rio.to_raster(
76+
reference_dem_fpath, compress="ZSTD", predictor=3, zlevel=1
77+
)
78+
79+
else:
80+
print("\nLoading reference DEM...")
81+
reference_dem = pdt.load.from_fpath(
82+
os.path.join(outdir, f"{region}_arcticdem_mosaic_2m.tif"), bounds=bounds
83+
)
84+
85+
reference_dem = reference_dem.squeeze()
86+
bedrock_mask = pdt.data.bedrock_mask_from_bedmachine(bm_fpath, reference_dem)
87+
88+
# Search for DEM strips
89+
print("\nSearching for DEM strips...")
90+
gdf = pdt.search(
91+
index_fpath,
92+
bounds,
93+
dates=dates,
94+
# months=[6, 7, 8, 9],
95+
# years=[2019],
96+
baseline_max_hours=baseline_max_hours,
97+
# sensors=["WV03", "WV02", "WV01"],
98+
# accuracy=2,
99+
min_aoi_frac=min_aoi_frac,
100+
)
101+
gdf = gdf.sort_values("acqdate1")
102+
103+
n_strips = len(gdf)
104+
105+
print(f"{n_strips} strips found")
106+
107+
i = 0
108+
109+
print("\nDownloading DEM strips...")
110+
for _, row in gdf.iterrows():
111+
date = row.acqdate1.date()
112+
date_str = date.strftime("%Y%m%d")
113+
dem_id = row.dem_id
114+
115+
out_fpath = os.path.join(outdir, f"{date_str}_{dem_id}_coreg.tif")
116+
117+
if not os.path.exists(out_fpath):
118+
print(f"\nDownloading {i}/{n_strips} {os.path.basename(out_fpath)}...")
119+
dem = pdt.load.from_search(row, bounds=bounds, bitmask=True)
120+
dem.compute() # rioxarray uses lazy evaluation, so we can force the download using the `.compute()` function.
121+
dem = dem.rio.pad_box(*bounds, constant_values=np.nan)
122+
dem = dem.pdt.coregister(reference_dem, bedrock_mask, max_horiz_offset=50)
123+
dem.rio.to_raster(out_fpath, compress="ZSTD", predictor=3, zlevel=1)
124+
del dem
125+
126+
i += 1
127+
128+
print("Finished")

notebooks/get_icebergs.ipynb

+3-3
Large diffs are not rendered by default.

notebooks/mosaic_and_terrain.ipynb

+1-1
Original file line numberDiff line numberDiff line change
@@ -229,7 +229,7 @@
229229
},
230230
"outputs": [],
231231
"source": [
232-
"bedmachine_fpath = '/Users/pgwc34/Library/CloudStorage/OneDrive-DurhamUniversity/data/bedmachine_5/BedMachineGreenland-v5.nc'\n",
232+
"bedmachine_fpath = '.../BedMachineGreenland-v5.nc'\n",
233233
"\n",
234234
"geoid = pdt.data.geoid_from_bedmachine(bedmachine_fpath, dem)"
235235
]

notebooks/strip_search_and_dem_difference.ipynb

+10-4
Large diffs are not rendered by default.

src/pdemtools/_accessor.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -82,10 +82,9 @@ def coregister(
8282
reference: DataArray,
8383
stable_mask: Optional[DataArray] = None,
8484
return_stats: Optional[bool] = False,
85-
max_horiz_offset: float = 15,
85+
max_horiz_offset: float = 50,
8686
rmse_step_thresh: float = -0.001,
8787
max_iterations: int = 5,
88-
8988
) -> DataArray:
9089
"""
9190
Coregisters the scene against a reference DEM based on the Nuth and Kääb (2011)
@@ -111,9 +110,10 @@ def coregister(
111110
residuals
112111
"""
113112

114-
if geospatial_match(self._obj, reference) == False:
113+
check_match = geospatial_match(self._obj, reference, return_info=True)
114+
if not check_match == True:
115115
raise ValueError(
116-
"Input DEM and reference DEM do not share geospatial information"
116+
f"Input DEM and reference DEM do not share geospatial information: {check_match}. Consider padding (`.rio.pad_box`) or reprojecting (`.rio.repoject_match`) DEMs."
117117
)
118118

119119
if stable_mask is None:

src/pdemtools/_coreg.py

+7-4
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ def coregisterdems(
2525
dem2,
2626
mask,
2727
res,
28-
max_horiz_offset=15,
28+
max_horiz_offset=50,
2929
rmse_step_thresh=-0.001,
3030
max_iterations=5,
3131
):
@@ -73,7 +73,10 @@ def coregisterdems(
7373

7474
# Break loop if conditions reached
7575
if np.any(np.abs(pn[1:]) > max_horiz_offset):
76-
print(f"Maximum horizontal offset ({max_horiz_offset}) exceeded")
76+
print(
77+
f"Maximum horizontal offset ({max_horiz_offset}) exceeded."
78+
"Consider raising the threshold if offsets are large."
79+
)
7780
return_meddz = True
7881
break
7982

@@ -122,7 +125,7 @@ def coregisterdems(
122125
if rmse_step > rmse_step_thresh or np.isnan(d0):
123126
print(
124127
"RMSE step in this iteration ({:.5f}) is above threshold ({}), "
125-
"stopping and returning values of prior iteration".format(
128+
"stopping and returning values of prior iteration. ".format(
126129
rmse_step, rmse_step_thresh
127130
)
128131
)
@@ -278,7 +281,7 @@ def interp2_gdal(X, Y, Z, Xi, Yi, interp_str, extrapolate=False, oob_val=np.nan)
278281
def dtype_np2gdal(dtype_np):
279282
# TODO: Write docstring.
280283

281-
if dtype_np == bool: #np.bool:
284+
if dtype_np == bool: # np.bool:
282285
promote_dtype = np.uint8
283286
elif dtype_np == np.int8:
284287
promote_dtype = np.int16

src/pdemtools/_utils.py

+18-9
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,9 @@ def get_resolution(xds: DataArray) -> float:
2525
return abs(resolutions[0])
2626

2727

28-
def geospatial_match(rxd_1: DataArray, rxd_2: DataArray) -> bool:
28+
def geospatial_match(
29+
rxd_1: DataArray, rxd_2: DataArray, return_info: bool = False
30+
) -> bool:
2931
"""Check whether two (rio)xarray DataArrays or Datasets have the same geospatial
3032
information.
3133
@@ -38,17 +40,24 @@ def geospatial_match(rxd_1: DataArray, rxd_2: DataArray) -> bool:
3840
:rtype: bool
3941
"""
4042

41-
matches = [
42-
rxd_1.rio.shape == rxd_2.rio.shape,
43-
rxd_1.rio.resolution() == rxd_2.rio.resolution(),
44-
rxd_1.rio.bounds() == rxd_2.rio.bounds(),
45-
rxd_1.rio.crs == rxd_2.rio.crs,
46-
]
43+
failed = []
4744

48-
if all(matches):
45+
if not rxd_1.rio.shape == rxd_2.rio.shape:
46+
failed.append("shape")
47+
if not rxd_1.rio.resolution() == rxd_2.rio.resolution():
48+
failed.append("resolution")
49+
if not rxd_1.rio.bounds() == rxd_2.rio.bounds():
50+
failed.append("bounds")
51+
if not rxd_1.rio.crs == rxd_2.rio.crs:
52+
failed.append("crs")
53+
54+
if len(failed) == 0:
4955
return True
5056
else:
51-
return False
57+
if return_info = True:
58+
return failed
59+
else:
60+
return False
5261

5362

5463
def clip(

src/pdemtools/load.py

+27-15
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
from ._utils import clip
1919

2020
# arctic and rema valid versions for STAC retrival
21-
VERSIONS = {'arcticdem': ['v3.0', 'v4.1'], 'rema': ['v2.0']}
21+
VERSIONS = {"arcticdem": ["v3.0", "v4.1"], "rema": ["v2.0"]}
2222

2323
# filenames of mosaic indexes in ./src/pdemtools/mosaic_index directory
2424
ARCTICDEM_V3_INDEX_FNAME = "ArcticDEM_Mosaic_Index_v3_gpkg.gpkg"
@@ -263,37 +263,47 @@ def mosaic(
263263
# sanity check that datset and versioning is correct versioning is valid for selected dataset
264264
dataset = dataset.lower()
265265
if dataset not in VERSIONS.keys():
266-
raise ValueError(f'Dataset must be one of {VERSIONS.keys}. Currently `{dataset}`.')
266+
raise ValueError(
267+
f"Dataset must be one of {VERSIONS.keys}. Currently `{dataset}`."
268+
)
267269
if version == None: # pick the most recent dataset
268270
version = VERSIONS[dataset][-1]
269-
else:
271+
else:
270272
if version not in VERSIONS[dataset]:
271-
raise ValueError(f'Version of {dataset} must be one of {VERSIONS[dataset]}. Currently `{version}`')
273+
raise ValueError(
274+
f"Version of {dataset} must be one of {VERSIONS[dataset]}. Currently `{version}`"
275+
)
272276

273277
# get resolution as str
274278
if type(resolution) == int:
275279
resolution = f"{resolution}m"
276280

277281
# check if valid resolution
278282
if resolution not in VALID_MOSAIC_RES:
279-
raise ValueError(f"Resolution must be one of {VALID_MOSAIC_RES}. Currently `{resolution}`")
283+
raise ValueError(
284+
f"Resolution must be one of {VALID_MOSAIC_RES}. Currently `{resolution}`"
285+
)
280286

281287
# Sanitise shapely geometry to bounds tuple
282288
if type(bounds) == Polygon:
283289
bounds = bounds.bounds
284290

285291
# get dataset version
286-
if dataset == "arcticdem" and version == 'v3.0':
292+
if dataset == "arcticdem" and version == "v3.0":
287293
layer = ARCTICDEM_V3_INDEX_2M_LAYER_NAME
288-
elif dataset == 'arcticdem' and version =='v4.1':
294+
elif dataset == "arcticdem" and version == "v4.1":
289295
layer = ARCTICDEM_V4_INDEX_2M_LAYER_NAME
290-
elif dataset == "rema" and version == 'v2.0':
296+
elif dataset == "rema" and version == "v2.0":
291297
layer = REMA_V2_INDEX_2M_LAYER_NAME
292298
else:
293-
raise ValueError("Cannot retrive internal index filepath for specified dataset and version.")
299+
raise ValueError(
300+
"Cannot retrive internal index filepath for specified dataset and version."
301+
)
294302

295303
# Load tiles that intersect with AOI
296-
tiles = gpd.read_file(_get_index_fpath(dataset, version=version), layer=layer, bbox=bounds)
304+
tiles = gpd.read_file(
305+
_get_index_fpath(dataset, version=version), layer=layer, bbox=bounds
306+
)
297307

298308
if len(tiles) < 1:
299309
raise ValueError(
@@ -339,14 +349,16 @@ def _get_index_fpath(
339349
"""
340350

341351
# get dataset version
342-
if dataset == "arcticdem" and version == 'v3.0':
352+
if dataset == "arcticdem" and version == "v3.0":
343353
fname = ARCTICDEM_V3_INDEX_FNAME
344-
elif dataset == 'arcticdem' and version =='v4.1':
354+
elif dataset == "arcticdem" and version == "v4.1":
345355
fname = ARCTICDEM_V4_INDEX_FNAME
346-
elif dataset == "rema" and version == 'v2.0':
356+
elif dataset == "rema" and version == "v2.0":
347357
fname = REMA_V2_INDEX_FNAME
348358
else:
349-
raise ValueError("Cannot retrive internal index filepath for specified dataset and version.")
359+
raise ValueError(
360+
"Cannot retrive internal index filepath for specified dataset and version."
361+
)
350362

351363
return resources.files("pdemtools.mosaic_index").joinpath(fname)
352364

@@ -362,7 +374,7 @@ def _aws_link(
362374
`PREFIX`, construct the filepath of the relevant ArcticDEM or REMA mosaic tile.
363375
"""
364376
# Construct appropriate suffix, considering ArcticDEM v3.0's alternate naming scheme
365-
if dataset == "arcticdem" and version=="v3.0":
377+
if dataset == "arcticdem" and version == "v3.0":
366378
suffix = f"_{resolution}_{version}_reg_dem.tif"
367379
else:
368380
suffix = f"_{resolution}_{version}_dem.tif"

0 commit comments

Comments
 (0)