Skip to content

Commit

Permalink
see changelog
Browse files Browse the repository at this point in the history
  • Loading branch information
barronh committed Aug 14, 2024
1 parent ad39cf0 commit 6978598
Show file tree
Hide file tree
Showing 9 changed files with 4,653 additions and 594 deletions.
29 changes: 29 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
Change Log
==========

Not all changes are listed, but notable changes are itemized for ease of review.

* v0.9.0: Overhauled xdr for faster and clearer processing
Added bin format capability for population
Add robustness fixes for cmaq
Added updates to DescribeCoverage.csv
* v0.8.5: Added simple control over cache warning.
* v0.8.4: Added support for Subset 9.0 CMAQ and Grid 1.0 xdr formats.
Updated keys to rely on descriptions (via DescribeCoverage).
Added utilites for basic polygon/cmaq intersections for HMS.
* v0.8.3: Added xdr Polygon 1.0 format capability, added package
DescribeCoverage in data module and restructured utilities.
* v0.8.2: Added xdr CALIPSO 1.0 format capability.
* v0.8.1: Added xdr Point 1.0 format capability.
* v0.8.0: Restructuring module code and adding CMAQ pairing.
* v0.7.0: Added offline descriptions for review of space/time coverage.
* v0.7.0: Added TEMPO options for screening
* v0.6.0: Added latitude longitude grid pass thru support.
* v0.5.1: Added convenience function for opening many IOAPI files at once.
* v0.5.1: Updated TEMPO proxy naming.
* v0.4.6: Added support for legacy TLS servers (e.g, ofmpub and maple)
* v0.4.5: Updated TEMPO proxy naming
* v0.4.4: Adding pandora explicit support
* v0.4.3: updated to work with CMAQ EQUATES data (must exclude grid=False)
* v0.4.3: updated to support GDTYP=7 (equatorial mercator)
* v0.4.2: updated to support GDTYP=2 (equatorial mercator)
25 changes: 0 additions & 25 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -89,28 +89,3 @@ keys = rsigapi.keys(offline=False) # slow and likely to many options
print(len(keys))
# 3875
```

## Change Log

Not all changes are listed, but notable changes are itemized for ease of review.

* v0.8.5: Added simple control over cache warning.
* v0.8.4: Added support for Subset 9.0 CMAQ and Grid 1.0 xdr formats.
Updated keys to rely on descriptions (via DescribeCoverage).
Added utilites for basic polygon/cmaq intersections for HMS.
* v0.8.3: Added xdr Polygon 1.0 format capability, added package
DescribeCoverage in data module and restructured utilities.
* v0.8.2: Added xdr CALIPSO 1.0 format capability.
* v0.8.1: Added xdr Point 1.0 format capability.
* v0.8.0: Restructuring module code and adding CMAQ pairing.
* v0.7.0: Added offline descriptions for review of space/time coverage.
* v0.7.0: Added TEMPO options for screening
* v0.6.0: Added latitude longitude grid pass thru support.
* v0.5.1: Added convenience function for opening many IOAPI files at once.
* v0.5.1: Updated TEMPO proxy naming.
* v0.4.6: Added support for legacy TLS servers (e.g, ofmpub and maple)
* v0.4.5: Updated TEMPO proxy naming
* v0.4.4: Adding pandora explicit support
* v0.4.3: updated to work with CMAQ EQUATES data (must exclude grid=False)
* v0.4.3: updated to support GDTYP=7 (equatorial mercator)
* v0.4.2: updated to support GDTYP=2 (equatorial mercator)
136 changes: 136 additions & 0 deletions examples/maps/plot_mortality.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
"""
Plot Smoke Polygons
===================
Get HMS Smoke from RSIG and create daily plots.
"""
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import xarray as xr
from shapely import polygons
import geopandas as gpd
import pycno
import pyrsig


datakey = 'cmaq.equates.conus.aconc.PM25'
ckey = 'PM25'

# %%
# Concentration Reponse Function Formulation
# ------------------------------------------
#
# * Krewski 2009 PM25
# * all-cause mortality risk 1.06 per 10 micrograms/m3
# * 30+ year-old population
# * Simplifying Assumptions
# * Baseline mortality incidence is spatiall uniform
# * [GDB IHME](vizhub.healthdata.org/gbd-results) for 2019 in the US
# * all cause deaths = 2853165.03
# * population = 329996155.19
# * Age distribution is spatially unfiorm
# * Age distribution from the US Census ACSST5Y2020
# * US population 30 years or older: 201686433
# * US population total : 329824950
beta = np.log(1.06) / 10
y0 = 2853165.03 /329996155.19
f_pop = 201686433 / 329824950


bdates = pd.date_range('2019-01-01', '2019-12-31', freq='1d')
print('**WARNING**: using 1 day/month as representative of monthly average')
bdates = bdates[15::30]


# %%
# Retrieve Conc from RSIG (or cache)
# ----------------------------------
dss = []
for bdate in bdates:
ds = pyrsig.to_ioapi(datakey, bbox=(-180, 0, 0, 90), bdate=bdate)
dss.append(ds[[ckey]].mean(('TSTEP', 'LAY'), keep_attrs=True))

attrs = {k: v for k, v in ds.attrs.items()}
ds = xr.concat(dss, dim='TSTEP')
ds.attrs.update(attrs)
units = ds[ckey].units.strip()

# %%
# Retrieve Pop from RSIG (or cache)
# ---------------------------------
#
# population available for pacific, gulf, and atlantic and is returned as a
# geopandas.GeoDataFrame. These populations are at the county scale, which
# limits the spatial resolution of the analysis to sizes of counties.
popdf = pyrsig.to_dataframe(
'landuse.atlantic.population_iclus1',
bdate='2010-01-01T000000Z', edate='2090-01-01T235959Z'
)
popdf.crs = 4326

# %%
# Convert Conc to GeoDataFrame
# ----------------------------
#
# Converting Conc to a GeoDataFrame for compatibility with population.
df = ds.mean('TSTEP').to_dataframe()

# Create a GeoDataFrame in grid cell units
cell_off = np.array([[[-.5, -.5], [.5, -.5], [.5, .5], [-.5, .5]]])
xy = df.reset_index()[['COL', 'ROW']].values[:, None, :]
geom = polygons(xy + cell_off)
cgdf = gpd.GeoDataFrame(df, geometry=geom, crs=ds.crs_proj4)


# %%
# Area Weight Conc at Population
# ------------------------------
#
# Create area weighted concentrations at the population polygon level.
# 1. Create intersection
# 2. Calculate intersection fraction from each grid cell
# 3. Weight concentration by area
intxdf = gpd.overlay(cgdf.reset_index(), popdf.to_crs(cgdf.crs).reset_index())
intxdf['intx_area'] = intxdf.geometry.area
intx_total_area = intxdf[['index', 'intx_area']].groupby('index').sum()
intxdf['overlapped_area'] = intx_total_area.loc[intxdf['index']].values
area_factor = intxdf['intx_area'] / intxdf['overlapped_area']
# Calculate area weighted concentration as C and use 2010 population as P
intxdf[f'areawt_{ckey}'] = intxdf[ckey] * area_factor
finaldf = intxdf.groupby('index').agg(
C=(f'areawt_{ckey}', 'sum'),
P=('BC_2010POP', 'first'),
)

# Convert to geodataframe pulling geometry from original pop dataframe
pgeom = popdf.loc[finaldf.index, 'geometry']
finaldf = gpd.GeoDataFrame(finaldf, geometry=pgeom, crs=4326)

F = finaldf['F'] = 1 - np.exp(-beta * finaldf['C'])
M = finaldf['M'] = y0 * finaldf['P'] * f_pop
finaldf['M_i'] = M * F
M_i = float(finaldf['M_i'].sum())
P = float(finaldf['P'].sum())
M = float(M.sum())
C_p = float(finaldf.eval('C * P').sum() / P)

# Now make the plot
gskw = dict(left=0.05, right=0.95)
fig, axx = plt.subplots(1, 2, figsize=(8, 4), gridspec_kw=gskw)
lkw = dict(label=f'{ckey} [{units}]')
finaldf.plot('C', ax=axx[0], legend=True, legend_kwds=lkw)
tprops = dict(
x=0.95, y=0.05, horizontalalignment='right', backgroundcolor='w'
)
tprops['transform'] = axx[0].transAxes
axx[0].text(s='$\\overline{C^p}$ = ' f'{C_p:.2f}', **tprops)
lkw = dict(label=f'Population [#]')
finaldf.plot('P', ax=axx[1], legend=True, legend_kwds=lkw)
tprops['transform'] = axx[1].transAxes
axx[1].text(s='$\\sum{P}$ = ' f'{P:.0f}', **tprops)
pycno.cno().drawstates(ax=axx)
fig.suptitle(
f'$M_i = f_a y_0 P_x F_x$ = {f_pop:.2f} * {y0:.5f} * P$_x$ * (1 - exp(-{beta:.6f} C$_x$)) = {M_i:.0f}'
)
fig.savefig(f'{ckey}_mortality.png')
2 changes: 1 addition & 1 deletion examples/timeseries/plot_elpaso.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@

# Configure axes
ax.set(ylabel='AirNow NO2 ppb', ylim=(0, 10))
tax.set(ylim=(0.3e15, 4e15), ylabel='TropOMI NO2 molecules/cm$^2$')
tax.set(ylim=(0.3e15, 4e15), ylabel='TEMPO NO2 molecules/cm$^2$')

plt.show()
# Or save out figure
Expand Down
30 changes: 26 additions & 4 deletions pyrsig/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
__all__ = ['RsigApi', 'RsigGui', 'open_ioapi', 'open_mfioapi', 'cmaq']
__version__ = '0.8.5'
__version__ = '0.9.0'

from . import cmaq
from .cmaq import open_ioapi, open_mfioapi
Expand All @@ -13,6 +13,12 @@
)
_nolonlats_prefixes = ('cmaq', 'regridded')
_noregrid_prefixes = ('cmaq', 'regridded')
_shpxdrprefixes = ['hms.']
_shpbinprefixes = [
'landuse.atlantic.population_iclus',
'landuse.gulf.population_iclus',
'landuse.pacific.population_iclus',
]


def _actionf(msg, action, ErrorTyp=None):
Expand Down Expand Up @@ -705,18 +711,26 @@ def to_dataframe(
"""
from . import xdr
assert backend in {'ascii', 'xdr'}
if key.startswith('hms.'):
from . import bin
assert backend in {'ascii', 'xdr', 'bin'}
if any([key.startswith(pfx) for pfx in _shpxdrprefixes]):
backend = 'xdr'
elif any([key.startswith(pfx) for pfx in _shpbinprefixes]):
backend = 'bin'

outpath = self.get_file(
backend, key=key, bdate=bdate, edate=edate, bbox=bbox,
grid=False, verbose=verbose, corners=corners,
compress=1
)
if backend == 'ascii':
df = pd.read_csv(outpath, delimiter='\t', na_values=[-9999., -999])
else:
elif backend == 'xdr':
df = xdr.from_xdrfile(outpath, na_values=[-9999., -999])
elif backend == 'bin':
df = bin.from_binfile(outpath)
else:
raise KeyError(f'format {backend} unknown; use xdr, bin or ascii')

if withmeta:
metapath = self.get_file(
Expand Down Expand Up @@ -1099,3 +1113,11 @@ def check(self, action='return'):
_actionf('bdate is later than edate', action)

return iswe & issn & isbe


# Add easy access defaults
_defapi = RsigApi()
descriptions = _defapi.descriptions
to_dataframe = _defapi.to_dataframe
to_ioapi = _defapi.to_ioapi
to_netcdf = _defapi.to_netcdf
50 changes: 50 additions & 0 deletions pyrsig/bin.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
__all__ = ['from_binfile']


def from_binfile(path):
"""
Arguments
---------
path : str
Path to binary file that has a first line
prefix shxn shpn dbfn
Where shxn is the length of the shx file
Where shpn is the length of the shp file
Where dbfn is the length of the shp file
Returns
-------
df : geopandas.GeoDataFrame
"""
import tempfile
import gzip
import geopandas as gpd

if path.endswith('.gz'):
bf = gzip.open(path)
else:
bf = open(path, 'rb')
_l = bf.readline().decode().strip()
prefix, shxn, shpn, dbfn = _l.split()
shxn = int(shxn)
shpn = int(shpn)
dbfn = int(dbfn)
shxs = bf.tell()
shxe = shxs + shxn
shps = shxe
shpe = shps + shpn
dbfs = shpe
dbfe = dbfs + dbfn
with tempfile.TemporaryDirectory() as td:
filestem = f'{td}/{prefix}'
bf.seek(shxs, 0)
with open(filestem + '.shx', 'wb') as shxf:
shxf.write(bf.read(shxe - shxs))
with open(filestem + '.shp', 'wb') as shpf:
shpf.write(bf.read(shpe - shps))
with open(filestem + '.dbf', 'wb') as dbff:
dbff.write(bf.read(dbfe - dbfs))
outdf = gpd.read_file(filestem + '.shp')

outdf.crs = 4326
return outdf
47 changes: 32 additions & 15 deletions pyrsig/cmaq/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ def save_ioapi(ds, path, format='NETCDF3_CLASSIC', **kwds):
return odds.to_netcdf(path, format=format, **kwds)


def open_ioapi(path, metapath=None, earth_radius=6370000.):
def open_ioapi(path, metapath=None, earth_radius=6370000., **kwds):
"""
Open an IOAPI file, add coordinate data, and optionally add RSIG metadata.
Expand All @@ -138,15 +138,17 @@ def open_ioapi(path, metapath=None, earth_radius=6370000.):
added as metadata global property.
earth_radius : float
Assumed radius of the earth. 6370000 is the WRF default.
kwds : mappable
Passed to xr.open_dataset
Returns
-------
ds : xarray.Dataset
Dataset with IOAPI metadata
"""
import xarray as xr

f = xr.open_dataset(path, engine='netcdf4')
kwds.setdefault('engine', 'netcdf4')
f = xr.open_dataset(path, **kwds)
f = add_ioapi_meta(
f, path=path, metapath=metapath, earth_radius=earth_radius
)
Expand Down Expand Up @@ -176,24 +178,39 @@ def add_ioapi_meta(ds, metapath=None, earth_radius=6370000., path=''):
import numpy as np
import warnings
f = ds
lvls = f.attrs['VGLVLS']
tflag = f['TFLAG'].astype('i').values[:, 0, :]
yyyyjjj = tflag[:, 0]
yyyyjjj = np.where(yyyyjjj < 1, 1970001, yyyyjjj)
HHMMSS = tflag[:, 1]
tstrs = []
for j, t in zip(yyyyjjj, HHMMSS):
tstrs.append(f'{j:07d}T{t:06d}')

try:
tflag = f['TFLAG'].astype('i').values[:, 0, :]
yyyyjjj = tflag[:, 0]
yyyyjjj = np.where(yyyyjjj < 1, 1970001, yyyyjjj)
HHMMSS = tflag[:, 1]
tstrs = []
for j, t in zip(yyyyjjj, HHMMSS):
tstrs.append(f'{j:07d}T{t:06d}')

time = pd.to_datetime(tstrs, format='%Y%jT%H%M%S')
f.coords['TSTEP'] = time
except Exception:
pass

f.coords['LAY'] = (lvls[:-1] + lvls[1:]) / 2.
f.coords['ROW'] = np.arange(f.attrs['NROWS']) + 0.5
f.coords['COL'] = np.arange(f.attrs['NCOLS']) + 0.5
if 'VGLVLS' in f.attrs:
lvls = f.attrs['VGLVLS']
if len(lvls) > 1:
f.coords['LAY'] = (lvls[:-1] + lvls[1:]) / 2.
else:
f.coords['LAY'] = [np.mean(lvls)]

nrs = [f.sizes.get('ROW', None), f.attrs.get('NROWS', None)]
for nr in nrs:
if nr is not None:
f.coords['ROW'] = np.arange(f.attrs['NROWS']) + 0.5
break

ncs = [f.sizes.get('COL', None), f.attrs.get('NCOLS', None)]
for nc in ncs:
if nc is not None:
f.coords['COL'] = np.arange(f.attrs['NCOLS']) + 0.5
break

try:
proj4str = get_proj4(f.attrs, earth_radius=earth_radius)
f.attrs['crs_proj4'] = proj4str
Expand Down
Loading

0 comments on commit 6978598

Please sign in to comment.