Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
141 changes: 77 additions & 64 deletions data_prep.ipynb

Large diffs are not rendered by default.

56 changes: 36 additions & 20 deletions data_prep.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import shutil
import sys
import tarfile
import tempfile
import urllib
import yaml
import zipfile
Expand Down Expand Up @@ -306,6 +307,7 @@ def ascii_to_xyz(pipeline_file: str) -> pd.DataFrame:
)
for f in files
)
df.dropna(axis="index", inplace=True) # drop rows with NaN values
df.reset_index(drop=True, inplace=True) # reset index after concatenation

## Advanced table read with conversions
Expand Down Expand Up @@ -354,19 +356,32 @@ def ascii_to_xyz(pipeline_file: str) -> pd.DataFrame:
# ![Clean XYZ Table to Raster Grid via interpolation function](https://yuml.me/diagram/scruffy;dir:LR/class/[Clean-XYZ-Table|*.xyz]->[Interpolation-Function],[Interpolation-Function]->[Raster-Grid|*.tif/*.nc])

# %%
def get_region(xyz_data: pd.DataFrame) -> str:
def get_region(xyz_data: pd.DataFrame, round_increment: int = 250) -> str:
"""
Gets the bounding box region of an xyz pandas.DataFrame in string
format xmin/xmax/ymin/ymax rounded to 5 decimal places.
Used for the -R 'region of interest' parameter in GMT.

>>> xyz_data = pd.DataFrame(np.random.RandomState(seed=42).rand(30).reshape(10, 3))
Gets an extended bounding box region for points in an xyz pandas.DataFrame with
columns x, y, and z. The coordinates will be rounded to values specified by the
round_increment parameter. Implementation uses gmt.info with the -I (increment)
setting, see also https://gmt.soest.hawaii.edu/doc/latest/gmtinfo.html#i

The output region is returned in a string format 'xmin/xmax/ymin/ymax' directly
usable as the -R 'region of interest' parameter in GMT. Indeed, the rounding is
specifically optimized to give grid dimensions for fastest results in programs like
GMT surface.

>>> xyz_data = pd.DataFrame(
... 10000 * np.random.RandomState(seed=42).rand(30).reshape(10, 3),
... columns=["x", "y", "z"],
... )
>>> get_region(xyz_data=xyz_data)
'0.05808/0.83244/0.02058/0.95071'
'-250/9500/0/9750'
"""
xmin, ymin, _ = xyz_data.min(axis="rows")
xmax, ymax, _ = xyz_data.max(axis="rows")
return f"{xmin:.5f}/{xmax:.5f}/{ymin:.5f}/{ymax:.5f}"
assert (xyz_data.columns == pd.Index(data=["x", "y", "z"], dtype="object")).all()

with tempfile.NamedTemporaryFile(suffix=".csv") as tmpfile:
xyz_data.to_csv(tmpfile.name, header=False, index=False)
region = gmt.info(fname=tmpfile.name, I=f"s{round_increment}").strip()[2:]

return region


# %%
Expand All @@ -381,18 +396,19 @@ def xyz_to_grid(
"""
Performs interpolation of x, y, z point data to a raster grid.

>>> xyz_data = 1000*pd.DataFrame(np.random.RandomState(seed=42).rand(60).reshape(20, 3))
>>> xyz_data = pd.DataFrame(
... 600 * np.random.RandomState(seed=42).rand(60).reshape(20, 3),
... columns=["x", "y", "z"],
... )
>>> region = get_region(xyz_data=xyz_data)
>>> grid = xyz_to_grid(xyz_data=xyz_data, region=region, spacing=250)
>>> grid.to_array().shape
(1, 5, 5)
(1, 4, 4)
>>> grid.to_array().values
array([[[403.17618 , 544.92535 , 670.7824 , 980.75055 , 961.47723 ],
[379.0757 , 459.26407 , 314.38297 , 377.78555 , 546.0469 ],
[450.67664 , 343.26 , 88.391594, 260.10492 , 452.3337 ],
[586.09906 , 469.74008 , 216.8168 , 486.9802 , 642.2116 ],
[451.4794 , 652.7244 , 325.77896 , 879.8973 , 916.7921 ]]],
dtype=float32)
array([[[135.95927, 294.8152 , 615.32513, 706.4135 ],
[224.46773, 191.75693, 298.87057, 521.85254],
[242.41916, 149.34332, 430.84137, 603.4236 ],
[154.60516, 194.24237, 447.6185 , 645.13574]]], dtype=float32)
"""
## Preprocessing with blockmedian
with gmt.helpers.GMTTempFile(suffix=".txt") as tmpfile:
Expand All @@ -414,7 +430,7 @@ def xyz_to_grid(
region=region,
spacing=f"{spacing}+e",
T=tension,
V="",
V="n", # normal verbosity: produce only fatal error messages
M=f"{mask_cell_radius}c",
)

Expand All @@ -435,7 +451,7 @@ def xyz_to_grid(
grid_dict[name] = xyz_to_grid(
xyz_data=xyz_data, region=region, outfile=f"highres/{name}.nc"
)
print(f"done! {grid_dict[name].to_array().shape}")
print(f"done! {grid_dict[name].z.shape}")

# %% [markdown]
# ### 2.3 Plot raster grids
Expand Down
477 changes: 214 additions & 263 deletions deepbedmap.ipynb

Large diffs are not rendered by default.

5 changes: 2 additions & 3 deletions deepbedmap.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,11 +108,10 @@ def get_image_with_bounds(filepaths: list, indexers: dict = None) -> xr.DataArra


# %%
test_filepaths = ["highres/2007tx", "highres/2010tr", "highres/istarxx"]
test_filepaths = ["highres/2007tx"] # , "highres/2010tr", "highres/istarxx"]
groundtruth = get_image_with_bounds(
filepaths=[f"{t}.nc" for t in test_filepaths],
indexers={"y": slice(0, -1), "x": slice(0, -1)}, # for 2007tx, 2010tr and istarxx
# indexers={"y": slice(1, -1), "x": slice(1, -1)}, # for 2007tx only
# indexers={"y": slice(0, -1), "x": slice(0, -1)}, # for 2007tx, 2010tr and istarxx
)

# %% [markdown]
Expand Down
12 changes: 8 additions & 4 deletions features/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,10 +84,13 @@ def _quick_download_lowres_misc_datasets():
print("done!")


def _download_deepbedmap_model_weights_from_comet():
def _download_deepbedmap_model_weights_from_comet(experiment_key: str = "latest"):
"""
Download latest neural network model weights from Comet.ML
Uses their Python REST API class at https://www.comet.ml/docs/python-sdk/API/
Download DeepBedMap's Generator neural network model weights from Comet.ML
By default, the model weights from the latest experimental run are downloaded
Passing in an alternative experiment_key hash will download that one instead

Uses Comet.ML's Python REST API class at https://www.comet.ml/docs/python-sdk/API/
Requires the COMET_REST_API_KEY environment variable to be set in the .env file
"""
comet_api = comet_ml.API(
Expand All @@ -99,7 +102,8 @@ def _download_deepbedmap_model_weights_from_comet():
df = pd.io.json.json_normalize(data=project.data["experiments"].values())

# Get the key to the latest DeepBedMap experiment on Comet ML
experiment_key = df.loc[df["start_server_timestamp"].idxmax()].experiment_key
if experiment_key == "latest":
experiment_key = df.loc[df["start_server_timestamp"].idxmax()].experiment_key
experiment = comet_api.get(
workspace="weiji14", project="deepbedmap", experiment=experiment_key
)
Expand Down
6 changes: 4 additions & 2 deletions features/steps/test_data_prep.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,8 @@ def process_data_through_pipeline_and_get_output(context, pipeline_file):
@when("interpolate the xyz data table to {output_file}")
def interpolate_xyz_data_to_grid(context, output_file):
region = context.data_prep.get_region(context.xyz_data)
context.outfile = os.path.join("highres", output_file)
context.outfile = os.path.join("/tmp/highres", output_file)
os.makedirs(os.path.dirname(context.outfile), exist_ok=True)
context.data_prep.xyz_to_grid(
xyz_data=context.xyz_data, region=region, outfile=context.outfile
)
Expand All @@ -60,7 +61,8 @@ def open_raster_grid_to_check(context):
@given("a big {dataset_type} raster grid {raster_grid}")
def get_a_raster_grid(context, dataset_type, raster_grid):
context.raster_grid = raster_grid
context.filepath = os.path.join(dataset_type, raster_grid)
context.filepath = os.path.join("/tmp", dataset_type, raster_grid)
os.makedirs(os.path.dirname(context.filepath), exist_ok=True)
url = (
f"https://github.com/weiji14/deepbedmap/releases/download/v0.7.0/{raster_grid}"
)
Expand Down
2 changes: 1 addition & 1 deletion misc/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,6 @@ Filename|Location|Resolution|Literature Citation|Data Citation
---|---|---|---|---
Arthern_accumulation_bedmap2_grid1.tif|Antarctica|1000m|[Arthern2006Accumulation](https://doi.org/10.1029/2004JD005667)|
lisa750_2013182_2017120_0000_0400_vv_v1.tif|Antarctica|750m|[Fahnestock2019LISA](https://doi.org/10.1016/j.rse.2015.11.023)|[DOI](https://doi.org/10.7265/nxpc-e997)
3 *.dbf files|Antarctica|nan|[Mouginot2017MEASURES](https://doi.org/10.1126/science.1235798)|[DOI](https://doi.org/10.5067/AXE4121732AD)
3 *.shx files|Antarctica|nan|[Mouginot2017MEASURES](https://doi.org/10.1126/science.1235798)|[DOI](https://doi.org/10.5067/AXE4121732AD)
2 *.tif files|Antarctica|100m|[Noh2018REMA](https://doi.org/10.1016/j.isprsjprs.2017.12.008)|[DOI](https://doi.org/10.7910/DVN/SAIK8B)
MEaSUREs_IceFlowSpeed_450m.tif|Antarctica|450m|[Rignot2011MEASURES](https://doi.org/10.1126/science.1208336)|[DOI](https://doi.org/10.5067/D7GK8F5J8M8R)
Loading