see changelog

barronh · Aug 14, 2024 · 6978598 · 6978598
1 parent ad39cf0
commit 6978598
Show file tree

Hide file tree

Showing 9 changed files with 4,653 additions and 594 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -0,0 +1,29 @@
+Change Log
+==========
+
+Not all changes are listed, but notable changes are itemized for ease of review.
+
+* v0.9.0: Overhauled xdr for faster and clearer processing
+          Added bin format capability for population
+          Add robustness fixes for cmaq
+          Added updates to DescribeCoverage.csv
+* v0.8.5: Added simple control over cache warning.
+* v0.8.4: Added support for Subset 9.0 CMAQ and Grid 1.0 xdr formats.
+          Updated keys to rely on descriptions (via DescribeCoverage).
+          Added utilites for basic polygon/cmaq intersections for HMS.
+* v0.8.3: Added xdr Polygon 1.0 format capability, added package
+          DescribeCoverage in data module and restructured utilities.
+* v0.8.2: Added xdr CALIPSO 1.0 format capability.
+* v0.8.1: Added xdr Point 1.0 format capability.
+* v0.8.0: Restructuring module code and adding CMAQ pairing.
+* v0.7.0: Added offline descriptions for review of space/time coverage.
+* v0.7.0: Added TEMPO options for screening
+* v0.6.0: Added latitude longitude grid pass thru support.
+* v0.5.1: Added convenience function for opening many IOAPI files at once.
+* v0.5.1: Updated TEMPO proxy naming.
+* v0.4.6: Added support for legacy TLS servers (e.g, ofmpub and maple)
+* v0.4.5: Updated TEMPO proxy naming
+* v0.4.4: Adding pandora explicit support
+* v0.4.3: updated to work with CMAQ EQUATES data (must exclude grid=False)
+* v0.4.3: updated to support GDTYP=7 (equatorial mercator)
+* v0.4.2: updated to support GDTYP=2 (equatorial mercator)
diff --git a/README.md b/README.md
@@ -89,28 +89,3 @@ keys = rsigapi.keys(offline=False) # slow and likely to many options
 print(len(keys))
 # 3875
 ```
-
-## Change Log
-
-Not all changes are listed, but notable changes are itemized for ease of review.
-
-* v0.8.5: Added simple control over cache warning.
-* v0.8.4: Added support for Subset 9.0 CMAQ and Grid 1.0 xdr formats.
-          Updated keys to rely on descriptions (via DescribeCoverage).
-          Added utilites for basic polygon/cmaq intersections for HMS.
-* v0.8.3: Added xdr Polygon 1.0 format capability, added package
-          DescribeCoverage in data module and restructured utilities.
-* v0.8.2: Added xdr CALIPSO 1.0 format capability.
-* v0.8.1: Added xdr Point 1.0 format capability.
-* v0.8.0: Restructuring module code and adding CMAQ pairing.
-* v0.7.0: Added offline descriptions for review of space/time coverage.
-* v0.7.0: Added TEMPO options for screening
-* v0.6.0: Added latitude longitude grid pass thru support.
-* v0.5.1: Added convenience function for opening many IOAPI files at once.
-* v0.5.1: Updated TEMPO proxy naming.
-* v0.4.6: Added support for legacy TLS servers (e.g, ofmpub and maple)
-* v0.4.5: Updated TEMPO proxy naming
-* v0.4.4: Adding pandora explicit support
-* v0.4.3: updated to work with CMAQ EQUATES data (must exclude grid=False)
-* v0.4.3: updated to support GDTYP=7 (equatorial mercator)
-* v0.4.2: updated to support GDTYP=2 (equatorial mercator)
diff --git a/examples/maps/plot_mortality.py b/examples/maps/plot_mortality.py
@@ -0,0 +1,136 @@
+"""
+Plot Smoke Polygons
+===================
+
+Get HMS Smoke from RSIG and create daily plots.
+"""
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+import xarray as xr
+from shapely import polygons
+import geopandas as gpd
+import pycno
+import pyrsig
+
+
+datakey = 'cmaq.equates.conus.aconc.PM25'
+ckey = 'PM25'
+
+# %%
+# Concentration Reponse Function Formulation
+# ------------------------------------------
+#
+# * Krewski 2009 PM25
+#   * all-cause mortality risk 1.06 per 10 micrograms/m3
+#   * 30+ year-old population
+# * Simplifying Assumptions
+#   * Baseline mortality incidence is spatiall uniform
+#     * [GDB IHME](vizhub.healthdata.org/gbd-results) for 2019 in the US 
+#     * all cause deaths =   2853165.03
+#     * population       = 329996155.19
+#   * Age distribution is spatially unfiorm
+#     * Age distribution from the US Census ACSST5Y2020
+#     * US population 30 years or older: 201686433 
+#     * US population total            : 329824950
+beta = np.log(1.06) / 10
+y0 = 2853165.03 /329996155.19
+f_pop = 201686433 / 329824950
+
+
+bdates = pd.date_range('2019-01-01', '2019-12-31', freq='1d')
+print('**WARNING**: using 1 day/month as representative of monthly average')
+bdates = bdates[15::30]
+
+
+# %%
+# Retrieve Conc from RSIG (or cache)
+# ----------------------------------
+dss = []
+for bdate in bdates:
+    ds = pyrsig.to_ioapi(datakey, bbox=(-180, 0, 0, 90), bdate=bdate)
+    dss.append(ds[[ckey]].mean(('TSTEP', 'LAY'), keep_attrs=True))
+
+attrs = {k: v for k, v in ds.attrs.items()}
+ds = xr.concat(dss, dim='TSTEP')
+ds.attrs.update(attrs)
+units = ds[ckey].units.strip()
+
+# %%
+# Retrieve Pop from RSIG (or cache)
+# ---------------------------------
+#
+# population available for pacific, gulf, and atlantic and is returned as a
+# geopandas.GeoDataFrame. These populations are at the county scale, which
+# limits the spatial resolution of the analysis to sizes of counties.
+popdf = pyrsig.to_dataframe(
+    'landuse.atlantic.population_iclus1',
+    bdate='2010-01-01T000000Z', edate='2090-01-01T235959Z'
+)
+popdf.crs = 4326
+
+# %%
+# Convert Conc to GeoDataFrame
+# ----------------------------
+#
+# Converting Conc to a GeoDataFrame for compatibility with population.
+df = ds.mean('TSTEP').to_dataframe()
+
+# Create a GeoDataFrame in grid cell units
+cell_off = np.array([[[-.5, -.5], [.5, -.5], [.5, .5], [-.5, .5]]])
+xy = df.reset_index()[['COL', 'ROW']].values[:, None, :]
+geom = polygons(xy + cell_off)
+cgdf = gpd.GeoDataFrame(df, geometry=geom, crs=ds.crs_proj4)
+
+
+# %%
+# Area Weight Conc at Population
+# ------------------------------
+#
+# Create area weighted concentrations at the population polygon level.
+# 1. Create intersection
+# 2. Calculate intersection fraction from each grid cell
+# 3. Weight concentration by area
+intxdf = gpd.overlay(cgdf.reset_index(), popdf.to_crs(cgdf.crs).reset_index())
+intxdf['intx_area'] = intxdf.geometry.area
+intx_total_area = intxdf[['index', 'intx_area']].groupby('index').sum()
+intxdf['overlapped_area'] = intx_total_area.loc[intxdf['index']].values
+area_factor = intxdf['intx_area'] / intxdf['overlapped_area']
+# Calculate area weighted concentration as C and use 2010 population as P
+intxdf[f'areawt_{ckey}'] = intxdf[ckey] * area_factor
+finaldf = intxdf.groupby('index').agg(
+    C=(f'areawt_{ckey}', 'sum'),
+    P=('BC_2010POP', 'first'),
+)
+
+# Convert to geodataframe pulling geometry from original pop dataframe
+pgeom = popdf.loc[finaldf.index, 'geometry']
+finaldf = gpd.GeoDataFrame(finaldf, geometry=pgeom, crs=4326)
+
+F = finaldf['F'] = 1 - np.exp(-beta * finaldf['C'])
+M = finaldf['M'] = y0 * finaldf['P'] * f_pop
+finaldf['M_i'] = M * F
+M_i = float(finaldf['M_i'].sum())
+P = float(finaldf['P'].sum())
+M = float(M.sum())
+C_p = float(finaldf.eval('C * P').sum() / P)
+
+# Now make the plot
+gskw = dict(left=0.05, right=0.95)
+fig, axx = plt.subplots(1, 2, figsize=(8, 4), gridspec_kw=gskw)
+lkw = dict(label=f'{ckey} [{units}]')
+finaldf.plot('C', ax=axx[0], legend=True, legend_kwds=lkw)
+tprops = dict(
+    x=0.95, y=0.05, horizontalalignment='right', backgroundcolor='w'
+)
+tprops['transform'] = axx[0].transAxes
+axx[0].text(s='$\\overline{C^p}$ = ' f'{C_p:.2f}', **tprops)
+lkw = dict(label=f'Population [#]')
+finaldf.plot('P', ax=axx[1], legend=True, legend_kwds=lkw)
+tprops['transform'] = axx[1].transAxes
+axx[1].text(s='$\\sum{P}$ = ' f'{P:.0f}', **tprops)
+pycno.cno().drawstates(ax=axx)
+fig.suptitle(
+    f'$M_i = f_a y_0 P_x F_x$ = {f_pop:.2f} * {y0:.5f} * P$_x$ * (1 - exp(-{beta:.6f} C$_x$)) = {M_i:.0f}'
+)
+fig.savefig(f'{ckey}_mortality.png')
diff --git a/examples/timeseries/plot_elpaso.py b/examples/timeseries/plot_elpaso.py
@@ -59,7 +59,7 @@
 
 # Configure axes
 ax.set(ylabel='AirNow NO2 ppb', ylim=(0, 10))
-tax.set(ylim=(0.3e15, 4e15), ylabel='TropOMI NO2 molecules/cm$^2$')
+tax.set(ylim=(0.3e15, 4e15), ylabel='TEMPO NO2 molecules/cm$^2$')
 
 plt.show()
 # Or save out figure

diff --git a/pyrsig/__init__.py b/pyrsig/__init__.py
@@ -1,5 +1,5 @@
 __all__ = ['RsigApi', 'RsigGui', 'open_ioapi', 'open_mfioapi', 'cmaq']
-__version__ = '0.8.5'
+__version__ = '0.9.0'
 
 from . import cmaq
 from .cmaq import open_ioapi, open_mfioapi
@@ -13,6 +13,12 @@
 )
 _nolonlats_prefixes = ('cmaq', 'regridded')
 _noregrid_prefixes = ('cmaq', 'regridded')
+_shpxdrprefixes = ['hms.']
+_shpbinprefixes = [
+    'landuse.atlantic.population_iclus',
+    'landuse.gulf.population_iclus',
+    'landuse.pacific.population_iclus',
+]
 
 
 def _actionf(msg, action, ErrorTyp=None):
@@ -705,18 +711,26 @@ def to_dataframe(
 
         """
         from . import xdr
-        assert backend in {'ascii', 'xdr'}
-        if key.startswith('hms.'):
+        from . import bin
+        assert backend in {'ascii', 'xdr', 'bin'}
+        if any([key.startswith(pfx) for pfx in _shpxdrprefixes]):
             backend = 'xdr'
+        elif any([key.startswith(pfx) for pfx in _shpbinprefixes]):
+            backend = 'bin'
+
         outpath = self.get_file(
             backend, key=key, bdate=bdate, edate=edate, bbox=bbox,
             grid=False, verbose=verbose, corners=corners,
             compress=1
         )
         if backend == 'ascii':
             df = pd.read_csv(outpath, delimiter='\t', na_values=[-9999., -999])
-        else:
+        elif backend == 'xdr':
             df = xdr.from_xdrfile(outpath, na_values=[-9999., -999])
+        elif backend == 'bin':
+            df = bin.from_binfile(outpath)
+        else:
+            raise KeyError(f'format {backend} unknown; use xdr, bin or ascii')
 
         if withmeta:
             metapath = self.get_file(
@@ -1099,3 +1113,11 @@ def check(self, action='return'):
             _actionf('bdate is later than edate', action)
 
         return iswe & issn & isbe
+
+
+# Add easy access defaults
+_defapi = RsigApi()
+descriptions = _defapi.descriptions
+to_dataframe = _defapi.to_dataframe
+to_ioapi = _defapi.to_ioapi
+to_netcdf = _defapi.to_netcdf
diff --git a/pyrsig/bin.py b/pyrsig/bin.py
@@ -0,0 +1,50 @@
+__all__ = ['from_binfile']
+
+
+def from_binfile(path):
+    """
+    Arguments
+    ---------
+    path : str
+        Path to binary file that has a first line
+        prefix shxn shpn dbfn
+        Where shxn is the length of the shx file
+        Where shpn is the length of the shp file
+        Where dbfn is the length of the shp file
+
+    Returns
+    -------
+    df : geopandas.GeoDataFrame
+    """
+    import tempfile
+    import gzip
+    import geopandas as gpd
+
+    if path.endswith('.gz'):
+        bf = gzip.open(path)
+    else:
+        bf = open(path, 'rb')
+    _l = bf.readline().decode().strip()
+    prefix, shxn, shpn, dbfn = _l.split()
+    shxn = int(shxn)
+    shpn = int(shpn)
+    dbfn = int(dbfn)
+    shxs = bf.tell()
+    shxe = shxs + shxn
+    shps = shxe
+    shpe = shps + shpn
+    dbfs = shpe
+    dbfe = dbfs + dbfn
+    with tempfile.TemporaryDirectory() as td:
+        filestem = f'{td}/{prefix}'
+        bf.seek(shxs, 0)
+        with open(filestem + '.shx', 'wb') as shxf:
+            shxf.write(bf.read(shxe - shxs))
+        with open(filestem + '.shp', 'wb') as shpf:
+            shpf.write(bf.read(shpe - shps))
+        with open(filestem + '.dbf', 'wb') as dbff:
+            dbff.write(bf.read(dbfe - dbfs))
+        outdf = gpd.read_file(filestem + '.shp')
+
+    outdf.crs = 4326
+    return outdf
diff --git a/pyrsig/cmaq/__init__.py b/pyrsig/cmaq/__init__.py
@@ -125,7 +125,7 @@ def save_ioapi(ds, path, format='NETCDF3_CLASSIC', **kwds):
     return odds.to_netcdf(path, format=format, **kwds)
 
 
-def open_ioapi(path, metapath=None, earth_radius=6370000.):
+def open_ioapi(path, metapath=None, earth_radius=6370000., **kwds):
     """
     Open an IOAPI file, add coordinate data, and optionally add RSIG metadata.
 
@@ -138,15 +138,17 @@ def open_ioapi(path, metapath=None, earth_radius=6370000.):
         added as metadata global property.
     earth_radius : float
         Assumed radius of the earth. 6370000 is the WRF default.
+    kwds : mappable
+        Passed to xr.open_dataset
 
     Returns
     -------
     ds : xarray.Dataset
         Dataset with IOAPI metadata
     """
     import xarray as xr
-
-    f = xr.open_dataset(path, engine='netcdf4')
+    kwds.setdefault('engine', 'netcdf4')
+    f = xr.open_dataset(path, **kwds)
     f = add_ioapi_meta(
         f, path=path, metapath=metapath, earth_radius=earth_radius
     )
@@ -176,24 +178,39 @@ def add_ioapi_meta(ds, metapath=None, earth_radius=6370000., path=''):
     import numpy as np
     import warnings
     f = ds
-    lvls = f.attrs['VGLVLS']
-    tflag = f['TFLAG'].astype('i').values[:, 0, :]
-    yyyyjjj = tflag[:, 0]
-    yyyyjjj = np.where(yyyyjjj < 1, 1970001, yyyyjjj)
-    HHMMSS = tflag[:, 1]
-    tstrs = []
-    for j, t in zip(yyyyjjj, HHMMSS):
-        tstrs.append(f'{j:07d}T{t:06d}')
-
     try:
+        tflag = f['TFLAG'].astype('i').values[:, 0, :]
+        yyyyjjj = tflag[:, 0]
+        yyyyjjj = np.where(yyyyjjj < 1, 1970001, yyyyjjj)
+        HHMMSS = tflag[:, 1]
+        tstrs = []
+        for j, t in zip(yyyyjjj, HHMMSS):
+            tstrs.append(f'{j:07d}T{t:06d}')
+
         time = pd.to_datetime(tstrs, format='%Y%jT%H%M%S')
         f.coords['TSTEP'] = time
     except Exception:
         pass
 
-    f.coords['LAY'] = (lvls[:-1] + lvls[1:]) / 2.
-    f.coords['ROW'] = np.arange(f.attrs['NROWS']) + 0.5
-    f.coords['COL'] = np.arange(f.attrs['NCOLS']) + 0.5
+    if 'VGLVLS' in f.attrs:
+        lvls = f.attrs['VGLVLS']
+        if len(lvls) > 1:
+            f.coords['LAY'] = (lvls[:-1] + lvls[1:]) / 2.
+        else:
+            f.coords['LAY'] = [np.mean(lvls)]
+
+    nrs = [f.sizes.get('ROW', None), f.attrs.get('NROWS', None)]
+    for nr in nrs:
+        if nr is not None:
+            f.coords['ROW'] = np.arange(f.attrs['NROWS']) + 0.5
+            break
+
+    ncs = [f.sizes.get('COL', None), f.attrs.get('NCOLS', None)]
+    for nc in ncs:
+        if nc is not None:
+            f.coords['COL'] = np.arange(f.attrs['NCOLS']) + 0.5
+            break
+
     try:
         proj4str = get_proj4(f.attrs, earth_radius=earth_radius)
         f.attrs['crs_proj4'] = proj4str