Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ Also a convenient [flat file](https://en.wikipedia.org/wiki/Flat-file_database)
[![Codefresh build status](https://g.codefresh.io/api/badges/pipeline/weiji14_marketplace/weiji14%2Fdeepbedmap%2Fdeepbedmap?type=cf-1)](https://g.codefresh.io/public/accounts/weiji14_marketplace/pipelines/weiji14/deepbedmap/deepbedmap)
[![Dependabot Status](https://api.dependabot.com/badges/status?host=github&repo=weiji14/deepbedmap)](https://dependabot.com)

![DeepBedMap DEM over entire Antarctic continent, EPSG:3031 projection](https://user-images.githubusercontent.com/23487320/55309232-739ef600-545d-11e9-9d09-688dfa1209fb.png)
![DeepBedMap DEM over entire Antarctic continent, EPSG:3031 projection](https://user-images.githubusercontent.com/23487320/60510321-6e0cb280-9ccf-11e9-8096-d2a32eb28e6c.png)

![DeepBedMap Pipeline](https://yuml.me/diagram/scruffy;dir:LR/class/[Data|Highres/Lowres/Misc]->[Preprocessing|data_prep.ipynb],[Preprocessing]->[Model-Training|srgan_train.ipynb],[Model-Training]->[Inference|deepbedmap.ipynb])

Expand Down
166 changes: 144 additions & 22 deletions data_prep.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -1191,14 +1191,17 @@
" window_bounds: list,\n",
" padding: int = 0, # in projected coordinate system units\n",
" out_shape: tuple = None,\n",
" gapfill_raster_filepath: str = None,\n",
" gapfiller=None,\n",
") -> np.ndarray:\n",
" \"\"\"\n",
" Reads in raster and tiles them selectively.\n",
" Tiles will go according to list of window_bounds.\n",
" Reads in a raster and tiles them selectively according to\n",
" list of window_bounds in the form of (xmin, ymin, xmax, ymax).\n",
" Output shape can be set to e.g. (16,16) to resample input raster to\n",
" some desired shape/resolution.\n",
"\n",
" Gaps in the main raster can be filled by passing in an argument to gapfiller which\n",
" can either a raster filepath (str) or a number (float).\n",
"\n",
" >>> xr.DataArray(\n",
" ... data=np.flipud(m=np.diag(v=np.arange(8))).astype(dtype=np.float32),\n",
" ... coords={\"y\": np.linspace(7, 0, 8), \"x\": np.linspace(0, 7, 8)},\n",
Expand Down Expand Up @@ -1266,37 +1269,156 @@
" nan_grid_indexes = np.argwhere(mask.any(axis=(-3, -2, -1))).ravel()\n",
"\n",
" # Replace pixels from another raster if available, else raise error\n",
" if gapfill_raster_filepath is not None:\n",
" if gapfiller is not None:\n",
" print(f\"gapfilling ... \", end=\"\")\n",
" with xr.open_rasterio(gapfill_raster_filepath, chunks={}) as dataset2:\n",
" daarray_list2 = [\n",
" dataset2.interp_like(daarray_list[idx].squeeze(), method=\"linear\")\n",
" for idx in nan_grid_indexes\n",
" ]\n",
" daarray_stack2 = dask.array.ma.masked_values(\n",
" x=dask.array.stack(seq=daarray_list2), value=dataset2.nodatavals\n",
" )\n",
"\n",
" fill_tiles = (\n",
" dask.array.ma.getdata(daarray_stack2).compute().astype(dtype=np.float32)\n",
" )\n",
" mask2 = dask.array.ma.getmaskarray(daarray_stack2).compute()\n",
"\n",
" for i, array2 in enumerate(fill_tiles):\n",
" idx = nan_grid_indexes[i]\n",
" np.copyto(dst=out_tiles[idx], src=array2, where=mask[idx])\n",
" # assert not (mask[idx] & mask2[i]).any() # Ensure no NANs after gapfill\n",
" if isinstance(gapfiller, str): # gapfill using another raster\n",
" with xr.open_rasterio(gapfiller, chunks={}) as dataset2:\n",
" daarray_list2 = [\n",
" dataset2.interp_like(\n",
" daarray_list[idx].squeeze(), method=\"linear\"\n",
" )\n",
" for idx in nan_grid_indexes\n",
" ]\n",
" daarray_stack2 = dask.array.ma.masked_values(\n",
" x=dask.array.stack(seq=daarray_list2), value=dataset2.nodatavals\n",
" )\n",
"\n",
" fill_tiles = (\n",
" dask.array.ma.getdata(daarray_stack2)\n",
" .compute()\n",
" .astype(dtype=np.float32)\n",
" )\n",
" # mask2 = dask.array.ma.getmaskarray(daarray_stack2).compute()\n",
"\n",
" for i, array2 in enumerate(fill_tiles):\n",
" idx = nan_grid_indexes[i]\n",
" np.copyto(dst=out_tiles[idx], src=array2, where=mask[idx])\n",
" # assert not (mask[idx] & mask2[i]).any() # Ensure no NANs after gapfill\n",
"\n",
" elif isinstance(gapfiller, float): # gapfill using just a number\n",
" np.copyto(\n",
" dst=out_tiles,\n",
" src=np.full_like(a=out_tiles, fill_value=gapfiller),\n",
" where=mask,\n",
" )\n",
"\n",
" else:\n",
" for i in nan_grid_indexes:\n",
" daarray_list[i].plot()\n",
" plt.show()\n",
" print(f\"WARN: Tiles have missing data, try pass in gapfill_raster_filepath\")\n",
" print(\n",
" f\"WARN: Tiles have missing data, try passing in an input to 'gapfiller'\"\n",
" )\n",
"\n",
" print(\"done!\")\n",
" return out_tiles"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"lines_to_next_cell": 2
},
"outputs": [],
"source": [
"def selective_tile_old(\n",
" filepath: str,\n",
" window_bounds: list,\n",
" padding: int = 0, # in projected coordinate system units\n",
" out_shape: tuple = None,\n",
" gapfill_raster_filepath: str = None,\n",
") -> np.ndarray:\n",
" \"\"\"\n",
" Reads in raster and tiles them selectively.\n",
" Tiles will go according to list of window_bounds.\n",
" Output shape can be set to e.g. (16,16) to resample input raster to\n",
" some desired shape/resolution.\n",
"\n",
" >>> xr.DataArray(\n",
" ... data=np.flipud(m=np.diag(v=np.arange(8))).astype(dtype=np.float32),\n",
" ... coords={\"y\": np.linspace(7, 0, 8), \"x\": np.linspace(0, 7, 8)},\n",
" ... dims=[\"y\", \"x\"],\n",
" ... ).to_netcdf(path=\"/tmp/tmp_st.nc\", mode=\"w\")\n",
" >>> selective_tile_old(\n",
" ... filepath=\"/tmp/tmp_st.nc\",\n",
" ... window_bounds=[(0.5, 0.5, 2.5, 2.5), (2.5, 1.5, 4.5, 3.5)],\n",
" ... )\n",
" Tiling: /tmp/tmp_st.nc ... done!\n",
" array([[[[0., 2.],\n",
" [1., 0.]]],\n",
" <BLANKLINE>\n",
" <BLANKLINE>\n",
" [[[3., 0.],\n",
" [0., 0.]]]], dtype=float32)\n",
" >>> os.remove(\"/tmp/tmp_st.nc\")\n",
" \"\"\"\n",
" array_list = []\n",
"\n",
" with rasterio.open(filepath) as dataset:\n",
" print(f\"Tiling: {filepath} ... \", end=\"\")\n",
" for window_bound in window_bounds:\n",
"\n",
" if padding > 0:\n",
" window_bound = (\n",
" window_bound[0] - padding, # minx\n",
" window_bound[1] - padding, # miny\n",
" window_bound[2] + padding, # maxx\n",
" window_bound[3] + padding, # maxy\n",
" )\n",
"\n",
" window = rasterio.windows.from_bounds(\n",
" *window_bound, transform=dataset.transform, precision=None\n",
" ).round_offsets()\n",
"\n",
" # Read the raster according to the crop window\n",
" array = dataset.read(\n",
" indexes=list(range(1, dataset.count + 1)),\n",
" masked=True,\n",
" window=window,\n",
" out_shape=out_shape,\n",
" )\n",
" assert array.ndim == 3 # check that we have shape like (1, height, width)\n",
" assert array.shape[0] == 1 # channel-first (assuming only 1 channel)\n",
" assert not 0 in array.shape # ensure no empty dimensions (invalid window)\n",
"\n",
" try:\n",
" assert not array.mask.any() # check that there are no NAN values\n",
" except AssertionError:\n",
" # Replace pixels from another raster if available, else raise error\n",
" if gapfill_raster_filepath is not None:\n",
" with rasterio.open(gapfill_raster_filepath) as dataset2:\n",
" window2 = rasterio.windows.from_bounds(\n",
" *window_bound, transform=dataset2.transform, precision=None\n",
" ).round_offsets()\n",
"\n",
" array2 = dataset2.read(\n",
" indexes=list(range(1, dataset2.count + 1)),\n",
" masked=True,\n",
" window=window2,\n",
" out_shape=array.shape[1:],\n",
" )\n",
"\n",
" np.copyto(\n",
" dst=array, src=array2, where=array.mask\n",
" ) # fill in gaps where mask is True\n",
"\n",
" # assert not array.mask.any() # ensure no NAN values after gapfill\n",
" else:\n",
" plt.imshow(array.data[0, :, :])\n",
" plt.show()\n",
" print(\n",
" f\"WARN: Tile has missing data, try passing in gapfill_raster_filepath\"\n",
" )\n",
"\n",
" # assert array.shape[1] == array.shape[2] # check that height==width\n",
" array_list.append(array.data.astype(dtype=np.float32))\n",
" print(\"done!\")\n",
"\n",
" return np.stack(arrays=array_list)"
]
},
{
"cell_type": "code",
"execution_count": 24,
Expand Down
159 changes: 137 additions & 22 deletions data_prep.py
Original file line number Diff line number Diff line change
Expand Up @@ -601,14 +601,17 @@ def selective_tile(
window_bounds: list,
padding: int = 0, # in projected coordinate system units
out_shape: tuple = None,
gapfill_raster_filepath: str = None,
gapfiller=None,
) -> np.ndarray:
"""
Reads in raster and tiles them selectively.
Tiles will go according to list of window_bounds.
Reads in a raster and tiles them selectively according to
list of window_bounds in the form of (xmin, ymin, xmax, ymax).
Output shape can be set to e.g. (16,16) to resample input raster to
some desired shape/resolution.

Gaps in the main raster can be filled by passing in an argument to gapfiller which
can either a raster filepath (str) or a number (float).

>>> xr.DataArray(
... data=np.flipud(m=np.diag(v=np.arange(8))).astype(dtype=np.float32),
... coords={"y": np.linspace(7, 0, 8), "x": np.linspace(0, 7, 8)},
Expand Down Expand Up @@ -676,37 +679,149 @@ def selective_tile(
nan_grid_indexes = np.argwhere(mask.any(axis=(-3, -2, -1))).ravel()

# Replace pixels from another raster if available, else raise error
if gapfill_raster_filepath is not None:
if gapfiller is not None:
print(f"gapfilling ... ", end="")
with xr.open_rasterio(gapfill_raster_filepath, chunks={}) as dataset2:
daarray_list2 = [
dataset2.interp_like(daarray_list[idx].squeeze(), method="linear")
for idx in nan_grid_indexes
]
daarray_stack2 = dask.array.ma.masked_values(
x=dask.array.stack(seq=daarray_list2), value=dataset2.nodatavals
)

fill_tiles = (
dask.array.ma.getdata(daarray_stack2).compute().astype(dtype=np.float32)
)
mask2 = dask.array.ma.getmaskarray(daarray_stack2).compute()

for i, array2 in enumerate(fill_tiles):
idx = nan_grid_indexes[i]
np.copyto(dst=out_tiles[idx], src=array2, where=mask[idx])
# assert not (mask[idx] & mask2[i]).any() # Ensure no NANs after gapfill
if isinstance(gapfiller, str): # gapfill using another raster
with xr.open_rasterio(gapfiller, chunks={}) as dataset2:
daarray_list2 = [
dataset2.interp_like(
daarray_list[idx].squeeze(), method="linear"
)
for idx in nan_grid_indexes
]
daarray_stack2 = dask.array.ma.masked_values(
x=dask.array.stack(seq=daarray_list2), value=dataset2.nodatavals
)

fill_tiles = (
dask.array.ma.getdata(daarray_stack2)
.compute()
.astype(dtype=np.float32)
)
# mask2 = dask.array.ma.getmaskarray(daarray_stack2).compute()

for i, array2 in enumerate(fill_tiles):
idx = nan_grid_indexes[i]
np.copyto(dst=out_tiles[idx], src=array2, where=mask[idx])
# assert not (mask[idx] & mask2[i]).any() # Ensure no NANs after gapfill

elif isinstance(gapfiller, float): # gapfill using just a number
np.copyto(
dst=out_tiles,
src=np.full_like(a=out_tiles, fill_value=gapfiller),
where=mask,
)

else:
for i in nan_grid_indexes:
daarray_list[i].plot()
plt.show()
print(f"WARN: Tiles have missing data, try pass in gapfill_raster_filepath")
print(
f"WARN: Tiles have missing data, try passing in an input to 'gapfiller'"
)

print("done!")
return out_tiles


# %%
def selective_tile_old(
filepath: str,
window_bounds: list,
padding: int = 0, # in projected coordinate system units
out_shape: tuple = None,
gapfill_raster_filepath: str = None,
) -> np.ndarray:
"""
Reads in raster and tiles them selectively.
Tiles will go according to list of window_bounds.
Output shape can be set to e.g. (16,16) to resample input raster to
some desired shape/resolution.

>>> xr.DataArray(
... data=np.flipud(m=np.diag(v=np.arange(8))).astype(dtype=np.float32),
... coords={"y": np.linspace(7, 0, 8), "x": np.linspace(0, 7, 8)},
... dims=["y", "x"],
... ).to_netcdf(path="/tmp/tmp_st.nc", mode="w")
>>> selective_tile_old(
... filepath="/tmp/tmp_st.nc",
... window_bounds=[(0.5, 0.5, 2.5, 2.5), (2.5, 1.5, 4.5, 3.5)],
... )
Tiling: /tmp/tmp_st.nc ... done!
array([[[[0., 2.],
[1., 0.]]],
<BLANKLINE>
<BLANKLINE>
[[[3., 0.],
[0., 0.]]]], dtype=float32)
>>> os.remove("/tmp/tmp_st.nc")
"""
array_list = []

with rasterio.open(filepath) as dataset:
print(f"Tiling: {filepath} ... ", end="")
for window_bound in window_bounds:

if padding > 0:
window_bound = (
window_bound[0] - padding, # minx
window_bound[1] - padding, # miny
window_bound[2] + padding, # maxx
window_bound[3] + padding, # maxy
)

window = rasterio.windows.from_bounds(
*window_bound, transform=dataset.transform, precision=None
).round_offsets()

# Read the raster according to the crop window
array = dataset.read(
indexes=list(range(1, dataset.count + 1)),
masked=True,
window=window,
out_shape=out_shape,
)
assert array.ndim == 3 # check that we have shape like (1, height, width)
assert array.shape[0] == 1 # channel-first (assuming only 1 channel)
assert not 0 in array.shape # ensure no empty dimensions (invalid window)

try:
assert not array.mask.any() # check that there are no NAN values
except AssertionError:
# Replace pixels from another raster if available, else raise error
if gapfill_raster_filepath is not None:
with rasterio.open(gapfill_raster_filepath) as dataset2:
window2 = rasterio.windows.from_bounds(
*window_bound, transform=dataset2.transform, precision=None
).round_offsets()

array2 = dataset2.read(
indexes=list(range(1, dataset2.count + 1)),
masked=True,
window=window2,
out_shape=array.shape[1:],
)

np.copyto(
dst=array, src=array2, where=array.mask
) # fill in gaps where mask is True

# assert not array.mask.any() # ensure no NAN values after gapfill
else:
plt.imshow(array.data[0, :, :])
plt.show()
print(
f"WARN: Tile has missing data, try passing in gapfill_raster_filepath"
)

# assert array.shape[1] == array.shape[2] # check that height==width
array_list.append(array.data.astype(dtype=np.float32))
print("done!")

return np.stack(arrays=array_list)


# %%
geodataframe = gpd.read_file("model/train/tiles_3031.geojson")
filepaths = geodataframe.grid_name.unique()
Expand Down
Loading