-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add Meteobase ASCII read function (#159)
* add reader for meteobase zip files * add test for reading zipfile from meteobase * Update __init__.py add meteobase * Update meteobase.py fix | character * Update meteobase.py fix if statement * Update meteobase.py fix redefining built-in 'type' * fix test * Revert "fix test" This reverts commit e651df1. * fix test using os :)) * Create Meteobase_ASCII_test.zip * Revert "fix test" This reverts commit cb4c60c. * Update meteobase.py codacy fix * fix hour bug get_datetime_from_fname function only search name with removed date * rename test_015 to test_017 * Update meteobase.py use zeros instead of empty * Replace np.datetime64 by pd.Timestamp --------- Co-authored-by: Ruben Caljé <[email protected]>
- Loading branch information
1 parent
187472c
commit 8d6031f
Showing
4 changed files
with
273 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,14 @@ | ||
from . import ahn, bgt, brp, geotop, jarkus, knmi, regis, rws, waterboard, webservices | ||
from . import ( | ||
ahn, | ||
bgt, | ||
brp, | ||
geotop, | ||
jarkus, | ||
knmi, | ||
meteobase, | ||
regis, | ||
rws, | ||
waterboard, | ||
webservices, | ||
) | ||
from .regis import get_regis |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,252 @@ | ||
import re | ||
from pandas import Timestamp | ||
from enum import Enum | ||
from io import FileIO | ||
from pathlib import Path | ||
from typing import Dict, List, Optional, Tuple, Union | ||
from zipfile import ZipFile | ||
|
||
import numpy as np | ||
from xarray import DataArray | ||
|
||
|
||
class MeteobaseType(Enum): | ||
"""Enum class to couple folder names to observation type (from in LEESMIJ.txt)""" | ||
|
||
NEERSLAG = "Neerslagradargegevens in Arc/Info-formaat." | ||
MAKKINK = "Verdampingsgegevens volgens Makkink." | ||
PENMAN = "Verdampingsgegevens volgens Penman-Monteith." | ||
EVAPOTRANSPIRATIE = "Actuele evapotranspiratie volgens SATDATA 3.0." | ||
VERDAMPINGSTEKORT = "Verdampingstekort (Epot - Eact) volgens SATDATA 3.0." | ||
|
||
|
||
def read_leesmij(fo: FileIO) -> Dict[str, Dict[str, str]]: | ||
"""Read LEESMIJ.TXT file | ||
Parameters | ||
---------- | ||
fo : FileIO | ||
File object | ||
Returns | ||
------- | ||
Dict[str, Dict[str, str]] | ||
Dicionary with metadata per observation type | ||
""" | ||
meta = {} # meta dict | ||
submeta = {} # 1 meta dict per gegevens | ||
line = str(fo.readline(), encoding="utf-8") | ||
while line: | ||
if any(x for x in [e.value for e in MeteobaseType] if x in line): | ||
mtype = line.strip() | ||
submeta["type"] = mtype | ||
meta_idx = MeteobaseType(mtype).name | ||
elif ":" in line: # regel met metadata | ||
l1, l2 = line.split(":") | ||
if "coordinaat" in l1: | ||
submeta[l1] = float(l2.strip()) | ||
else: | ||
submeta[l1] = l2.strip() | ||
elif len(line) == 2: # lege regel | ||
meta[meta_idx] = submeta # sla submeta op in meta | ||
submeta = {} | ||
line = str(fo.readline(), encoding="utf-8") | ||
return meta | ||
|
||
|
||
def get_timestamp_from_fname(fname: str) -> Timestamp: | ||
"""Get the Timestamp from a filename (with some assumptions about the formatting)""" | ||
datestr = re.search("([0-9]{8})", fname) # assumes YYYYMMDD | ||
if datestr is not None: | ||
match = datestr.group(0) | ||
year = int(match[0:4]) | ||
month = int(match[4:6]) | ||
day = int(match[6:8]) | ||
|
||
hour = 0 | ||
fname_wo_date = fname.replace(match, "") | ||
hourstr = re.search("(_[0-9]{2})", fname_wo_date) # assumes _HH | ||
if hourstr is not None: | ||
match = hourstr.group(0) | ||
hour = int(match.replace("_", "")) | ||
|
||
dtime = Timestamp(year=year, month=month, day=day, hour=hour) | ||
return dtime | ||
|
||
|
||
def read_ascii(fo: FileIO) -> Union[np.ndarray, dict]: | ||
"""Read Esri ASCII raster format file | ||
Parameters | ||
---------- | ||
fo : FileIO | ||
File object | ||
Returns | ||
------- | ||
Union[np.ndarray, dict] | ||
Numpy array with data and header meta | ||
""" | ||
# read file | ||
lines = fo.readlines() | ||
|
||
# extract header | ||
meta = {} | ||
for line in lines[0:6]: | ||
l1, l2 = str(line, encoding="utf-8").split() | ||
if l1.lower() in ("ncols", "nrows", "nodata_value"): | ||
l2 = int(l2) | ||
elif l1.lower() in ( | ||
"xllcorner", | ||
"yllcorner", | ||
"cellsize", | ||
"xllcenter", | ||
"yllcenter", | ||
): | ||
l2 = float(l2) | ||
else: | ||
raise ValueError(f"Found unknown key '{l1}' in ASCII header") | ||
|
||
meta[l1.lower()] = l2 | ||
|
||
# extract data | ||
data = np.array([x.split() for x in lines[6:]], dtype=float) | ||
|
||
return data, meta | ||
|
||
|
||
def get_xy_from_ascii_meta( | ||
meta: Dict[str, Union[int, float]] | ||
) -> Tuple[np.ndarray, np.ndarray]: | ||
"""Get the xy coordinates Esri ASCII raster format header | ||
Parameters | ||
---------- | ||
meta : dict | ||
dictonary with the following keys and value types: | ||
{cellsize: int, | ||
nrows: int, | ||
ncols: int, | ||
xllcorner/xllcenter: float, | ||
yllcorner/yllcenter: float} | ||
Returns | ||
------- | ||
Tuple[np.ndarray, np.ndarray] | ||
Tuple with the the x and y coordinates as numpy array | ||
""" | ||
|
||
if "xllcorner" in meta.keys(): | ||
xstart = meta["xllcorner"] + meta["cellsize"] / 2 | ||
elif "xllcenter" in meta.keys(): | ||
xstart = meta["xllcenter"] | ||
|
||
x = np.linspace( | ||
xstart, | ||
xstart + meta["cellsize"] * meta["ncols"], | ||
meta["ncols"], | ||
endpoint=False, | ||
) | ||
|
||
if "yllcorner" in meta.keys(): | ||
ystart = meta["yllcorner"] + meta["cellsize"] / 2 | ||
elif "yllcenter" in meta.keys(): | ||
ystart = meta["yllcenter"] | ||
|
||
y = np.linspace( | ||
ystart, | ||
ystart + meta["cellsize"] * meta["nrows"], | ||
meta["nrows"], | ||
endpoint=False, | ||
) | ||
return x, y | ||
|
||
|
||
def read_meteobase_ascii( | ||
zfile: ZipFile, foldername: str, meta: Dict[str, str] | ||
) -> DataArray: | ||
"""Read list of .asc files in a meteobase zipfile | ||
Parameters | ||
---------- | ||
zfile : ZipFile | ||
meteobase zipfile | ||
foldername : str | ||
foldername where specific observation type is stored | ||
meta : Dict[str, str] | ||
relevant metadata for DataArray | ||
Returns | ||
------- | ||
DataArray | ||
""" | ||
fnames = [x for x in zfile.namelist() if f"{foldername}/" in x] | ||
if meta["Bestandsformaat"] == ".ASC (Arc/Info-raster)": | ||
times = [] | ||
for i, fname in enumerate(fnames): | ||
data_array = None | ||
with zfile.open(fname) as fo: | ||
data, ascii_meta = read_ascii(fo) | ||
if data_array is None: | ||
meta = meta | ascii_meta | ||
data_array = np.zeros( | ||
shape=(len(fnames), ascii_meta["nrows"], ascii_meta["ncols"]), | ||
dtype=float, | ||
) | ||
data_array[i] = data | ||
|
||
times.append(get_timestamp_from_fname(fname)) | ||
|
||
x, y = get_xy_from_ascii_meta(ascii_meta) | ||
|
||
da = DataArray( | ||
data_array, | ||
dims=["time", "y", "x"], | ||
coords=dict( | ||
time=times, | ||
x=x, | ||
y=y, | ||
), | ||
attrs=meta, | ||
name=foldername, | ||
) | ||
return da | ||
|
||
else: | ||
raise ValueError(f"Can't read bestandsformaat '{meta['Bestandsformaat']}'") | ||
|
||
|
||
def read_meteobase( | ||
path: Union[Path, str], meteobase_type: Optional[str] = None | ||
) -> List[DataArray]: | ||
"""Read Meteobase zipfile with ASCII data | ||
Parameters | ||
---------- | ||
path : Union[Path,str] | ||
Path to meteobase .zipfile | ||
meteobase_type : Optional[str], optional | ||
Must be one of 'NEERSLAG', 'MAKKINK', 'PENMAN', 'EVAPOTRANSPIRATIE', | ||
'VERDAMPINGSTEKORT', by default None which reads all data from the | ||
zipfile. | ||
Returns | ||
------- | ||
List[DataArray] | ||
""" | ||
|
||
with ZipFile(Path(path)) as zfile: | ||
with zfile.open("LEESMIJ.TXT") as fo: | ||
meta = read_leesmij(fo) | ||
|
||
if meteobase_type is None: | ||
meteo_basetype = list(meta.keys()) | ||
|
||
da_list = [] | ||
for mb_type in meteo_basetype: | ||
da = read_meteobase_ascii(zfile, mb_type.upper(), meta[mb_type.upper()]) | ||
da_list.append(da) | ||
|
||
return da_list |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
import nlmod | ||
from pathlib import Path | ||
|
||
data_path = Path(__file__).parent / "data" | ||
|
||
|
||
def test_read_meteobase() -> None: | ||
_ = nlmod.read.meteobase.read_meteobase(data_path / "Meteobase_ASCII_test.zip") |