Skip to content

Commit

Permalink
Merge pull request #12 from edgi-govdata-archiving/more-readable-pres…
Browse files Browse the repository at this point in the history
…ets-for-make_data_sets

Separate presets, construction in `make_data_sets`
  • Loading branch information
ericnost committed Oct 21, 2020
2 parents 85463a4 + 2a8b034 commit b0a288e
Showing 1 changed file with 213 additions and 162 deletions.
375 changes: 213 additions & 162 deletions make_data_sets.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,217 @@

from ECHO_modules.DataSet import DataSet


# These are all the presets that `make_data_sets()` can construct.
# The keys of this dictionary are the preset names and the values are
# dictionaries of the constructor arguments for `DataSet` that should be used
# when creating one based on the preset.
PRESETS = {
"RCRA Violations": dict(
idx_field="ID_NUMBER",
base_table="RCRA_VIOLATIONS",
table_name="RCRA_VIOLATIONS_MVIEW",
echo_type="RCRA",
date_field="DATE_VIOLATION_DETERMINED",
date_format="%m/%d/%Y",
agg_type="count",
agg_col="VIOL_DETERMINED_BY_AGENCY",
unit="violations"
),

"RCRA Inspections": dict(
idx_field="ID_NUMBER",
base_table="RCRA_EVALUATIONS",
table_name="RCRA_EVALUATIONS_MVIEW",
echo_type="RCRA",
date_field="EVALUATION_START_DATE",
date_format="%m/%d/%Y",
agg_type="count",
agg_col="EVALUATION_AGENCY",
unit="inspections"
),

"RCRA Penalties": dict(
echo_type="RCRA",
base_table="RCRA_ENFORCEMENTS",
table_name="RCRA_ENFORCEMENTS_MVIEW",
idx_field="ID_NUMBER",
date_field="ENFORCEMENT_ACTION_DATE",
date_format="%m/%d/%Y",
agg_type="sum",
agg_col="FMP_AMOUNT",
unit="dollars"
),

"ICIS EPA Inspections": dict(
echo_type="AIR",
base_table="ICIS_FEC_EPA_INSPECTIONS",
table_name="AIR_INSPECTIONS_MVIEW",
idx_field="REGISTRY_ID",
date_field="ACTUAL_END_DATE",
date_format="%m/%d/%Y",
agg_type="count",
agg_col="ACTIVITY_TYPE_DESC",
unit="inspections"
),

"CAA Violations": dict(
echo_type="AIR",
base_table="ICIS-AIR_VIOLATION_HISTORY",
table_name="AIR_VIOLATIONS_MVIEW",
idx_field="PGM_SYS_ID",
date_field="Date",
date_format="%m-%d-%Y",
agg_type="count",
agg_col="AGENCY_TYPE_DESC",
unit="violations"
),

"CAA Penalties": dict(
echo_type="AIR",
base_table="ICIS-AIR_FORMAL_ACTIONS",
table_name="AIR_FORMAL_ACTIONS_MVIEW",
idx_field="PGM_SYS_ID",
date_field="SETTLEMENT_ENTERED_DATE",
date_format="%m/%d/%Y",
agg_type="sum",
agg_col="PENALTY_AMOUNT",
unit="dollars"
),

"CAA Inspections": dict(
echo_type="AIR",
base_table="ICIS-AIR_FCES_PCES",
table_name="AIR_COMPLIANCE_MVIEW",
idx_field="PGM_SYS_ID",
date_field="ACTUAL_END_DATE",
date_format="%m-%d-%Y",
agg_type="count",
agg_col="STATE_EPA_FLAG",
unit="inspections"
),

"Combined Air Emissions": dict(
echo_type=["GHG","TRI"],
base_table="POLL_RPT_COMBINED_EMISSIONS",
table_name="COMBINED_AIR_EMISSIONS_MVIEW",
idx_field="REGISTRY_ID",
date_field="REPORTING_YEAR",
date_format="%Y"
),

"Greenhouse Gas Emissions": dict(
echo_type="GHG",
base_table="POLL_RPT_COMBINED_EMISSIONS",
table_name="GREENHOUSE_GASES_MVIEW",
idx_field="REGISTRY_ID",
date_field="REPORTING_YEAR",
date_format="%Y",
agg_type="sum",
agg_col="ANNUAL_EMISSION",
unit="metric tons of CO2 equivalent"
),

"Toxic Releases": dict(
echo_type="TRI",
base_table="POLL_RPT_COMBINED_EMISSIONS",
table_name="TOXIC_RELEASES_MVIEW",
idx_field="REGISTRY_ID",
date_field="REPORTING_YEAR",
date_format="%Y"
),

"CWA Violations": dict(
echo_type="NPDES",
base_table="NPDES_QNCR_HISTORY",
table_name="WATER_QUARTERLY_VIOLATIONS_MVIEW",
idx_field="NPDES_ID",
date_field="YEARQTR",
date_format="%Y",
agg_type="sum",
agg_col="NUME90Q",
unit="effluent violations"
),

"CWA Inspections": dict(
echo_type="NPDES",
base_table="NPDES_INSPECTIONS",
table_name="CLEAN_WATER_INSPECTIONS_MVIEW",
idx_field="NPDES_ID",
date_field="ACTUAL_END_DATE",
date_format="%m/%d/%Y",
agg_type="count",
agg_col="STATE_EPA_FLAG",
unit="inspections"
),

"CWA Penalties": dict(
echo_type="NPDES",
base_table="NPDES_FORMAL_ENFORCEMENT_ACTIONS",
table_name="CLEAN_WATER_ENFORCEMENT_ACTIONS_MVIEW",
idx_field="NPDES_ID",
date_field="SETTLEMENT_ENTERED_DATE",
date_format="%m/%d/%Y",
agg_type="sum",
agg_col="FED_PENALTY_ASSESSED_AMT",
unit="dollars"
),

"SDWA Site Visits": dict(
echo_type="SDWA",
base_table="SDWA_SITE_VISITS",
table_name="SDWA_SITE_VISITS_MVIEW",
idx_field="PWSID",
date_field="SITE_VISIT_DATE",
date_format="%m/%d/%Y"
),

"SDWA Enforcements": dict(
echo_type="SDWA",
base_table="SDWA_ENFORCEMENTS",
table_name="SDWA_ENFORCEMENTS_MVIEW",
idx_field="PWSID",
date_field="ENFORCEMENT_DATE",
date_format="%m/%d/%Y"
),

"SDWA Public Water Systems": dict(
echo_type="SDWA",
base_table="SDWA_PUB_WATER_SYSTEMS",
table_name="SDWA_PUB_WATER_SYSTEMS_MVIEW",
idx_field="PWSID",
date_field="FISCAL_YEAR",
date_format="%Y"
),

"SDWA Violations": dict(
echo_type="SDWA",
base_table="SDWA_VIOLATIONS",
table_name="SDWA_VIOLATIONS_MVIEW",
idx_field="PWSID",
date_field="FISCAL_YEAR",
date_format="%Y"
),

"SDWA Serious Violators": dict(
echo_type="SDWA",
base_table="SDWA_SERIOUS_VIOLATORS",
table_name="SDWA_SERIOUS_VIOLATORS_MVIEW",
idx_field="PWSID",
date_field="FISCAL_YEAR",
date_format="%Y"
),

# "SDWA Return to Compliance": dict(
# echo_type="SDWA",
# table_name="SDWA_RETURN_TO_COMPLIANCE",
# idx_field="PWSID",
# date_field="FISCAL_YEAR",
# date_format="%Y"
# )
}


def make_data_sets( data_set_list = None ):
"""
Create DataSet objects from a list of preset configurations. This takes a
Expand Down Expand Up @@ -54,165 +265,5 @@ def make_data_sets( data_set_list = None ):
),
}
"""
data_sets = {}
ds_name = 'RCRA Violations'
if ( data_set_list is None or ds_name in data_set_list ):
ds = DataSet( name=ds_name, idx_field='ID_NUMBER',
base_table='RCRA_VIOLATIONS',
table_name='RCRA_VIOLATIONS_MVIEW', echo_type="RCRA",
date_field='DATE_VIOLATION_DETERMINED',
date_format='%m/%d/%Y', agg_type = "count",
agg_col="VIOL_DETERMINED_BY_AGENCY",
unit="violations")
# For possible later use in assessing state v federal )
data_sets[ ds.name ] = ds
ds_name = 'RCRA Inspections'
if ( data_set_list is None or ds_name in data_set_list ):
ds = DataSet( name=ds_name, idx_field='ID_NUMBER',
base_table='RCRA_EVALUATIONS',
table_name='RCRA_EVALUATIONS_MVIEW', echo_type="RCRA",
date_field='EVALUATION_START_DATE', date_format='%m/%d/%Y',
agg_type = "count", agg_col="EVALUATION_AGENCY",
unit="inspections")
# For possible later use in assessing state v federal )
data_sets[ ds.name ] = ds
ds_name = 'RCRA Penalties'
if ( data_set_list is None or ds_name in data_set_list ):
ds = DataSet( name=ds_name, echo_type="RCRA",
base_table='RCRA_ENFORCEMENTS',
table_name='RCRA_ENFORCEMENTS_MVIEW', idx_field='ID_NUMBER',
date_field='ENFORCEMENT_ACTION_DATE', date_format='%m/%d/%Y',
agg_type = "sum", agg_col="FMP_AMOUNT", unit="dollars")
data_sets[ ds.name ] = ds
ds_name = 'ICIS EPA Inspections'
if ( data_set_list is None or ds_name in data_set_list ):
ds = DataSet( name=ds_name, echo_type="AIR",
base_table='ICIS_FEC_EPA_INSPECTIONS',
table_name='AIR_INSPECTIONS_MVIEW', idx_field='REGISTRY_ID',
date_field='ACTUAL_END_DATE', date_format='%m/%d/%Y',
agg_type = "count", agg_col="ACTIVITY_TYPE_DESC",
unit="inspections" )
data_sets[ ds.name ] = ds
ds_name = 'CAA Violations'
if ( data_set_list is None or ds_name in data_set_list ):
ds = DataSet( name=ds_name, echo_type="AIR",
base_table='ICIS-AIR_VIOLATION_HISTORY',
table_name='AIR_VIOLATIONS_MVIEW', idx_field='PGM_SYS_ID',
date_field='Date', date_format='%m-%d-%Y',
agg_type = "count", agg_col="AGENCY_TYPE_DESC",
unit="violations")
# For possible later use in assessing state v federal )
data_sets[ ds.name ] = ds
ds_name = 'CAA Penalties'
if ( data_set_list is None or ds_name in data_set_list ):
ds = DataSet( name=ds_name, echo_type="AIR",
base_table='ICIS-AIR_FORMAL_ACTIONS',
table_name='AIR_FORMAL_ACTIONS_MVIEW', idx_field='PGM_SYS_ID',
date_field='SETTLEMENT_ENTERED_DATE', date_format='%m/%d/%Y',
agg_type = "sum", agg_col="PENALTY_AMOUNT", unit="dollars")
data_sets[ ds.name ] = ds
ds_name = 'CAA Inspections'
if ( data_set_list is None or ds_name in data_set_list ):
ds = DataSet( name=ds_name, echo_type="AIR",
base_table='ICIS-AIR_FCES_PCES',
table_name='AIR_COMPLIANCE_MVIEW', idx_field='PGM_SYS_ID',
date_field='ACTUAL_END_DATE', date_format='%m-%d-%Y',
agg_type = "count", agg_col="STATE_EPA_FLAG",
unit="inspections")
# For possible later use in assessing state v federal )
data_sets[ ds.name ] = ds
ds_name = 'Combined Air Emissions'
if ( data_set_list is None or ds_name in data_set_list ):
ds = DataSet( name=ds_name, echo_type=["GHG","TRI"],
base_table='POLL_RPT_COMBINED_EMISSIONS',
table_name='COMBINED_AIR_EMISSIONS_MVIEW',
idx_field='REGISTRY_ID', date_field='REPORTING_YEAR',
date_format='%Y' )
data_sets[ ds.name ] = ds
ds_name = 'Greenhouse Gas Emissions'
if ( data_set_list is None or ds_name in data_set_list ):
ds = DataSet( name=ds_name, echo_type="GHG",
base_table='POLL_RPT_COMBINED_EMISSIONS',
table_name='GREENHOUSE_GASES_MVIEW', idx_field='REGISTRY_ID',
date_field='REPORTING_YEAR', date_format='%Y',
agg_type = "sum", agg_col="ANNUAL_EMISSION",
unit="metric tons of CO2 equivalent")
data_sets[ ds.name ] = ds
ds_name = 'Toxic Releases'
if ( data_set_list is None or ds_name in data_set_list ):
ds = DataSet( name=ds_name, echo_type="TRI",
base_table='POLL_RPT_COMBINED_EMISSIONS',
table_name='TOXIC_RELEASES_MVIEW', idx_field='REGISTRY_ID',
date_field='REPORTING_YEAR', date_format='%Y' )
data_sets[ ds.name ] = ds
ds_name = 'CWA Violations'
if ( data_set_list is None or ds_name in data_set_list ):
ds = DataSet( name=ds_name, echo_type="NPDES",
base_table='NPDES_QNCR_HISTORY',
table_name='WATER_QUARTERLY_VIOLATIONS_MVIEW',
idx_field='NPDES_ID', date_field='YEARQTR', date_format='%Y',
agg_type = "sum", agg_col="NUME90Q",
unit="effluent violations")
data_sets[ ds.name ] = ds
ds_name = 'CWA Inspections'
if ( data_set_list is None or ds_name in data_set_list ):
ds = DataSet( name=ds_name, echo_type="NPDES",
base_table='NPDES_INSPECTIONS',
table_name='CLEAN_WATER_INSPECTIONS_MVIEW',
idx_field='NPDES_ID', date_field='ACTUAL_END_DATE',
date_format='%m/%d/%Y', agg_type = "count",
agg_col="STATE_EPA_FLAG", unit="inspections")
# For possible later use in assessing state v federal
data_sets[ ds.name ] = ds
ds_name = 'CWA Penalties'
if ( data_set_list is None or ds_name in data_set_list ):
ds = DataSet( name=ds_name, echo_type="NPDES",
base_table='NPDES_FORMAL_ENFORCEMENT_ACTIONS',
table_name='CLEAN_WATER_ENFORCEMENT_ACTIONS_MVIEW',
idx_field='NPDES_ID', date_field='SETTLEMENT_ENTERED_DATE',
date_format='%m/%d/%Y', agg_type = "sum",
agg_col="FED_PENALTY_ASSESSED_AMT", unit="dollars")
data_sets[ ds.name ] = ds
ds_name = 'SDWA Site Visits'
if ( data_set_list is None or ds_name in data_set_list ):
ds = DataSet( name=ds_name, echo_type="SDWA",
base_table='SDWA_SITE_VISITS',
table_name='SDWA_SITE_VISITS_MVIEW', idx_field='PWSID',
date_field='SITE_VISIT_DATE', date_format='%m/%d/%Y' )
data_sets[ ds.name ] = ds
ds_name = 'SDWA Enforcements'
if ( data_set_list is None or ds_name in data_set_list ):
ds = DataSet( name=ds_name, echo_type="SDWA",
base_table='SDWA_ENFORCEMENTS',
table_name='SDWA_ENFORCEMENTS_MVIEW', idx_field='PWSID',
date_field='ENFORCEMENT_DATE', date_format='%m/%d/%Y' )
data_sets[ ds.name ] = ds
ds_name = 'SDWA Public Water Systems'
if ( data_set_list is None or ds_name in data_set_list ):
ds = DataSet( name=ds_name, echo_type="SDWA",
base_table='SDWA_PUB_WATER_SYSTEMS',
table_name='SDWA_PUB_WATER_SYSTEMS_MVIEW', idx_field='PWSID',
date_field='FISCAL_YEAR', date_format='%Y' )
data_sets[ ds.name ] = ds
ds_name = 'SDWA Violations'
if ( data_set_list is None or ds_name in data_set_list ):
ds = DataSet( name=ds_name, echo_type="SDWA",
base_table='SDWA_VIOLATIONS',
table_name='SDWA_VIOLATIONS_MVIEW', idx_field='PWSID',
date_field='FISCAL_YEAR', date_format='%Y' )
data_sets[ ds.name ] = ds
ds_name = 'SDWA Serious Violators'
if ( data_set_list is None or ds_name in data_set_list ):
ds = DataSet( name=ds_name, echo_type="SDWA",
base_table='SDWA_SERIOUS_VIOLATORS',
table_name='SDWA_SERIOUS_VIOLATORS_MVIEW', idx_field='PWSID',
date_field='FISCAL_YEAR', date_format='%Y' )
data_sets[ ds.name ] = ds
# ds_name = 'RCRA Violations'
# if ( data_set_list is None or ds_name in data_set_list ):
# ds = DataSet( name='SDWA Return to Compliance', echo_type="SDWA",
# table_name='SDWA_RETURN_TO_COMPLIANCE', idx_field='PWSID',
# date_field='FISCAL_YEAR', date_format='%Y' )
# data_sets[ ds.name ] = ds
return data_sets

return {name: DataSet(name=name, **PRESETS[name])
for name in data_set_list or PRESETS.keys()}

0 comments on commit b0a288e

Please sign in to comment.