Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Separate presets, construction in make_data_sets #12

Merged
merged 2 commits into from
Oct 21, 2020
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
375 changes: 213 additions & 162 deletions make_data_sets.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,217 @@

from ECHO_modules.DataSet import DataSet


# These are all the presets that `make_data_sets()` can construct.
# The keys of this dictionary are the preset names and the values are
# dictionaries of the constructor arguments for `DataSet` that should be used
# when creating one based on the preset.
PRESETS = {
"RCRA Violations": dict(
idx_field="ID_NUMBER",
base_table="RCRA_VIOLATIONS",
table_name="RCRA_VIOLATIONS_MVIEW",
echo_type="RCRA",
date_field="DATE_VIOLATION_DETERMINED",
date_format="%m/%d/%Y",
agg_type="count",
agg_col="VIOL_DETERMINED_BY_AGENCY",
unit="violations"
),

"RCRA Inspections": dict(
idx_field="ID_NUMBER",
base_table="RCRA_EVALUATIONS",
table_name="RCRA_EVALUATIONS_MVIEW",
echo_type="RCRA",
date_field="EVALUATION_START_DATE",
date_format="%m/%d/%Y",
agg_type="count",
agg_col="EVALUATION_AGENCY",
unit="inspections"
),

"RCRA Penalties": dict(
echo_type="RCRA",
base_table="RCRA_ENFORCEMENTS",
table_name="RCRA_ENFORCEMENTS_MVIEW",
idx_field="ID_NUMBER",
date_field="ENFORCEMENT_ACTION_DATE",
date_format="%m/%d/%Y",
agg_type="sum",
agg_col="FMP_AMOUNT",
unit="dollars"
),

"ICIS EPA Inspections": dict(
echo_type="AIR",
base_table="ICIS_FEC_EPA_INSPECTIONS",
table_name="AIR_INSPECTIONS_MVIEW",
idx_field="REGISTRY_ID",
date_field="ACTUAL_END_DATE",
date_format="%m/%d/%Y",
agg_type="count",
agg_col="ACTIVITY_TYPE_DESC",
unit="inspections"
),

"CAA Violations": dict(
echo_type="AIR",
base_table="ICIS-AIR_VIOLATION_HISTORY",
table_name="AIR_VIOLATIONS_MVIEW",
idx_field="PGM_SYS_ID",
date_field="Date",
date_format="%m-%d-%Y",
agg_type="count",
agg_col="AGENCY_TYPE_DESC",
unit="violations"
),

"CAA Penalties": dict(
echo_type="AIR",
base_table="ICIS-AIR_FORMAL_ACTIONS",
table_name="AIR_FORMAL_ACTIONS_MVIEW",
idx_field="PGM_SYS_ID",
date_field="SETTLEMENT_ENTERED_DATE",
date_format="%m/%d/%Y",
agg_type="sum",
agg_col="PENALTY_AMOUNT",
unit="dollars"
),

"CAA Inspections": dict(
echo_type="AIR",
base_table="ICIS-AIR_FCES_PCES",
table_name="AIR_COMPLIANCE_MVIEW",
idx_field="PGM_SYS_ID",
date_field="ACTUAL_END_DATE",
date_format="%m-%d-%Y",
agg_type="count",
agg_col="STATE_EPA_FLAG",
unit="inspections"
),

"Combined Air Emissions": dict(
echo_type=["GHG","TRI"],
base_table="POLL_RPT_COMBINED_EMISSIONS",
table_name="COMBINED_AIR_EMISSIONS_MVIEW",
idx_field="REGISTRY_ID",
date_field="REPORTING_YEAR",
date_format="%Y"
),

"Greenhouse Gas Emissions": dict(
echo_type="GHG",
base_table="POLL_RPT_COMBINED_EMISSIONS",
table_name="GREENHOUSE_GASES_MVIEW",
idx_field="REGISTRY_ID",
date_field="REPORTING_YEAR",
date_format="%Y",
agg_type="sum",
agg_col="ANNUAL_EMISSION",
unit="metric tons of CO2 equivalent"
),

"Toxic Releases": dict(
echo_type="TRI",
base_table="POLL_RPT_COMBINED_EMISSIONS",
table_name="TOXIC_RELEASES_MVIEW",
idx_field="REGISTRY_ID",
date_field="REPORTING_YEAR",
date_format="%Y"
),

"CWA Violations": dict(
echo_type="NPDES",
base_table="NPDES_QNCR_HISTORY",
table_name="WATER_QUARTERLY_VIOLATIONS_MVIEW",
idx_field="NPDES_ID",
date_field="YEARQTR",
date_format="%Y",
agg_type="sum",
agg_col="NUME90Q",
unit="effluent violations"
),

"CWA Inspections": dict(
echo_type="NPDES",
base_table="NPDES_INSPECTIONS",
table_name="CLEAN_WATER_INSPECTIONS_MVIEW",
idx_field="NPDES_ID",
date_field="ACTUAL_END_DATE",
date_format="%m/%d/%Y",
agg_type="count",
agg_col="STATE_EPA_FLAG",
unit="inspections"
),

"CWA Penalties": dict(
echo_type="NPDES",
base_table="NPDES_FORMAL_ENFORCEMENT_ACTIONS",
table_name="CLEAN_WATER_ENFORCEMENT_ACTIONS_MVIEW",
idx_field="NPDES_ID",
date_field="SETTLEMENT_ENTERED_DATE",
date_format="%m/%d/%Y",
agg_type="sum",
agg_col="FED_PENALTY_ASSESSED_AMT",
unit="dollars"
),

"SDWA Site Visits": dict(
echo_type="SDWA",
base_table="SDWA_SITE_VISITS",
table_name="SDWA_SITE_VISITS_MVIEW",
idx_field="PWSID",
date_field="SITE_VISIT_DATE",
date_format="%m/%d/%Y"
),

"SDWA Enforcements": dict(
echo_type="SDWA",
base_table="SDWA_ENFORCEMENTS",
table_name="SDWA_ENFORCEMENTS_MVIEW",
idx_field="PWSID",
date_field="ENFORCEMENT_DATE",
date_format="%m/%d/%Y"
),

"SDWA Public Water Systems": dict(
echo_type="SDWA",
base_table="SDWA_PUB_WATER_SYSTEMS",
table_name="SDWA_PUB_WATER_SYSTEMS_MVIEW",
idx_field="PWSID",
date_field="FISCAL_YEAR",
date_format="%Y"
),

"SDWA Violations": dict(
echo_type="SDWA",
base_table="SDWA_VIOLATIONS",
table_name="SDWA_VIOLATIONS_MVIEW",
idx_field="PWSID",
date_field="FISCAL_YEAR",
date_format="%Y"
),

"SDWA Serious Violators": dict(
echo_type="SDWA",
base_table="SDWA_SERIOUS_VIOLATORS",
table_name="SDWA_SERIOUS_VIOLATORS_MVIEW",
idx_field="PWSID",
date_field="FISCAL_YEAR",
date_format="%Y"
),

# "SDWA Return to Compliance": dict(
# echo_type="SDWA",
# table_name="SDWA_RETURN_TO_COMPLIANCE",
# idx_field="PWSID",
# date_field="FISCAL_YEAR",
# date_format="%Y"
# )
Comment on lines +217 to +223
Copy link
Member Author

@Mr0grog Mr0grog Oct 9, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This preset was there before but commented out. Normally I’d just drop stuff like that, but opted to keep it here (but reformatted) in case this was only temporarily commented out. LMK if it should just be removed.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's keep it, if only as a note to ourselves that this table exists in the database and that this is its configuration.

}


def make_data_sets( data_set_list = None ):
"""
Create DataSet objects from a list of preset configurations. This takes a
Expand Down Expand Up @@ -54,165 +265,5 @@ def make_data_sets( data_set_list = None ):
),
}
"""
data_sets = {}
ds_name = 'RCRA Violations'
if ( data_set_list is None or ds_name in data_set_list ):
ds = DataSet( name=ds_name, idx_field='ID_NUMBER',
base_table='RCRA_VIOLATIONS',
table_name='RCRA_VIOLATIONS_MVIEW', echo_type="RCRA",
date_field='DATE_VIOLATION_DETERMINED',
date_format='%m/%d/%Y', agg_type = "count",
agg_col="VIOL_DETERMINED_BY_AGENCY",
unit="violations")
# For possible later use in assessing state v federal )
data_sets[ ds.name ] = ds
ds_name = 'RCRA Inspections'
if ( data_set_list is None or ds_name in data_set_list ):
ds = DataSet( name=ds_name, idx_field='ID_NUMBER',
base_table='RCRA_EVALUATIONS',
table_name='RCRA_EVALUATIONS_MVIEW', echo_type="RCRA",
date_field='EVALUATION_START_DATE', date_format='%m/%d/%Y',
agg_type = "count", agg_col="EVALUATION_AGENCY",
unit="inspections")
# For possible later use in assessing state v federal )
data_sets[ ds.name ] = ds
ds_name = 'RCRA Penalties'
if ( data_set_list is None or ds_name in data_set_list ):
ds = DataSet( name=ds_name, echo_type="RCRA",
base_table='RCRA_ENFORCEMENTS',
table_name='RCRA_ENFORCEMENTS_MVIEW', idx_field='ID_NUMBER',
date_field='ENFORCEMENT_ACTION_DATE', date_format='%m/%d/%Y',
agg_type = "sum", agg_col="FMP_AMOUNT", unit="dollars")
data_sets[ ds.name ] = ds
ds_name = 'ICIS EPA Inspections'
if ( data_set_list is None or ds_name in data_set_list ):
ds = DataSet( name=ds_name, echo_type="AIR",
base_table='ICIS_FEC_EPA_INSPECTIONS',
table_name='AIR_INSPECTIONS_MVIEW', idx_field='REGISTRY_ID',
date_field='ACTUAL_END_DATE', date_format='%m/%d/%Y',
agg_type = "count", agg_col="ACTIVITY_TYPE_DESC",
unit="inspections" )
data_sets[ ds.name ] = ds
ds_name = 'CAA Violations'
if ( data_set_list is None or ds_name in data_set_list ):
ds = DataSet( name=ds_name, echo_type="AIR",
base_table='ICIS-AIR_VIOLATION_HISTORY',
table_name='AIR_VIOLATIONS_MVIEW', idx_field='PGM_SYS_ID',
date_field='Date', date_format='%m-%d-%Y',
agg_type = "count", agg_col="AGENCY_TYPE_DESC",
unit="violations")
# For possible later use in assessing state v federal )
data_sets[ ds.name ] = ds
ds_name = 'CAA Penalties'
if ( data_set_list is None or ds_name in data_set_list ):
ds = DataSet( name=ds_name, echo_type="AIR",
base_table='ICIS-AIR_FORMAL_ACTIONS',
table_name='AIR_FORMAL_ACTIONS_MVIEW', idx_field='PGM_SYS_ID',
date_field='SETTLEMENT_ENTERED_DATE', date_format='%m/%d/%Y',
agg_type = "sum", agg_col="PENALTY_AMOUNT", unit="dollars")
data_sets[ ds.name ] = ds
ds_name = 'CAA Inspections'
if ( data_set_list is None or ds_name in data_set_list ):
ds = DataSet( name=ds_name, echo_type="AIR",
base_table='ICIS-AIR_FCES_PCES',
table_name='AIR_COMPLIANCE_MVIEW', idx_field='PGM_SYS_ID',
date_field='ACTUAL_END_DATE', date_format='%m-%d-%Y',
agg_type = "count", agg_col="STATE_EPA_FLAG",
unit="inspections")
# For possible later use in assessing state v federal )
data_sets[ ds.name ] = ds
ds_name = 'Combined Air Emissions'
if ( data_set_list is None or ds_name in data_set_list ):
ds = DataSet( name=ds_name, echo_type=["GHG","TRI"],
base_table='POLL_RPT_COMBINED_EMISSIONS',
table_name='COMBINED_AIR_EMISSIONS_MVIEW',
idx_field='REGISTRY_ID', date_field='REPORTING_YEAR',
date_format='%Y' )
data_sets[ ds.name ] = ds
ds_name = 'Greenhouse Gas Emissions'
if ( data_set_list is None or ds_name in data_set_list ):
ds = DataSet( name=ds_name, echo_type="GHG",
base_table='POLL_RPT_COMBINED_EMISSIONS',
table_name='GREENHOUSE_GASES_MVIEW', idx_field='REGISTRY_ID',
date_field='REPORTING_YEAR', date_format='%Y',
agg_type = "sum", agg_col="ANNUAL_EMISSION",
unit="metric tons of CO2 equivalent")
data_sets[ ds.name ] = ds
ds_name = 'Toxic Releases'
if ( data_set_list is None or ds_name in data_set_list ):
ds = DataSet( name=ds_name, echo_type="TRI",
base_table='POLL_RPT_COMBINED_EMISSIONS',
table_name='TOXIC_RELEASES_MVIEW', idx_field='REGISTRY_ID',
date_field='REPORTING_YEAR', date_format='%Y' )
data_sets[ ds.name ] = ds
ds_name = 'CWA Violations'
if ( data_set_list is None or ds_name in data_set_list ):
ds = DataSet( name=ds_name, echo_type="NPDES",
base_table='NPDES_QNCR_HISTORY',
table_name='WATER_QUARTERLY_VIOLATIONS_MVIEW',
idx_field='NPDES_ID', date_field='YEARQTR', date_format='%Y',
agg_type = "sum", agg_col="NUME90Q",
unit="effluent violations")
data_sets[ ds.name ] = ds
ds_name = 'CWA Inspections'
if ( data_set_list is None or ds_name in data_set_list ):
ds = DataSet( name=ds_name, echo_type="NPDES",
base_table='NPDES_INSPECTIONS',
table_name='CLEAN_WATER_INSPECTIONS_MVIEW',
idx_field='NPDES_ID', date_field='ACTUAL_END_DATE',
date_format='%m/%d/%Y', agg_type = "count",
agg_col="STATE_EPA_FLAG", unit="inspections")
# For possible later use in assessing state v federal
data_sets[ ds.name ] = ds
ds_name = 'CWA Penalties'
if ( data_set_list is None or ds_name in data_set_list ):
ds = DataSet( name=ds_name, echo_type="NPDES",
base_table='NPDES_FORMAL_ENFORCEMENT_ACTIONS',
table_name='CLEAN_WATER_ENFORCEMENT_ACTIONS_MVIEW',
idx_field='NPDES_ID', date_field='SETTLEMENT_ENTERED_DATE',
date_format='%m/%d/%Y', agg_type = "sum",
agg_col="FED_PENALTY_ASSESSED_AMT", unit="dollars")
data_sets[ ds.name ] = ds
ds_name = 'SDWA Site Visits'
if ( data_set_list is None or ds_name in data_set_list ):
ds = DataSet( name=ds_name, echo_type="SDWA",
base_table='SDWA_SITE_VISITS',
table_name='SDWA_SITE_VISITS_MVIEW', idx_field='PWSID',
date_field='SITE_VISIT_DATE', date_format='%m/%d/%Y' )
data_sets[ ds.name ] = ds
ds_name = 'SDWA Enforcements'
if ( data_set_list is None or ds_name in data_set_list ):
ds = DataSet( name=ds_name, echo_type="SDWA",
base_table='SDWA_ENFORCEMENTS',
table_name='SDWA_ENFORCEMENTS_MVIEW', idx_field='PWSID',
date_field='ENFORCEMENT_DATE', date_format='%m/%d/%Y' )
data_sets[ ds.name ] = ds
ds_name = 'SDWA Public Water Systems'
if ( data_set_list is None or ds_name in data_set_list ):
ds = DataSet( name=ds_name, echo_type="SDWA",
base_table='SDWA_PUB_WATER_SYSTEMS',
table_name='SDWA_PUB_WATER_SYSTEMS_MVIEW', idx_field='PWSID',
date_field='FISCAL_YEAR', date_format='%Y' )
data_sets[ ds.name ] = ds
ds_name = 'SDWA Violations'
if ( data_set_list is None or ds_name in data_set_list ):
ds = DataSet( name=ds_name, echo_type="SDWA",
base_table='SDWA_VIOLATIONS',
table_name='SDWA_VIOLATIONS_MVIEW', idx_field='PWSID',
date_field='FISCAL_YEAR', date_format='%Y' )
data_sets[ ds.name ] = ds
ds_name = 'SDWA Serious Violators'
if ( data_set_list is None or ds_name in data_set_list ):
ds = DataSet( name=ds_name, echo_type="SDWA",
base_table='SDWA_SERIOUS_VIOLATORS',
table_name='SDWA_SERIOUS_VIOLATORS_MVIEW', idx_field='PWSID',
date_field='FISCAL_YEAR', date_format='%Y' )
data_sets[ ds.name ] = ds
# ds_name = 'RCRA Violations'
# if ( data_set_list is None or ds_name in data_set_list ):
# ds = DataSet( name='SDWA Return to Compliance', echo_type="SDWA",
# table_name='SDWA_RETURN_TO_COMPLIANCE', idx_field='PWSID',
# date_field='FISCAL_YEAR', date_format='%Y' )
# data_sets[ ds.name ] = ds
return data_sets

return {name: DataSet(name=name, **PRESETS[name])
for name in data_set_list or PRESETS.keys()}