Skip to content

Commit

Permalink
update recipes and scripts to include gini dataset
Browse files Browse the repository at this point in the history
  • Loading branch information
semio committed Dec 22, 2016
1 parent caf1c89 commit 492ed47
Show file tree
Hide file tree
Showing 6 changed files with 683 additions and 7 deletions.
48 changes: 48 additions & 0 deletions etl/recipes/recipe_gw_gini.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# recipe for dataset
info:
id: gw-gini
base: # the datasets for ingredients
- &d1 ddf--gapminder--gini

config:
# the path to search recipe files to include
recipes_dir: ./
# the path to search dictionary files
dictionary_dir: ./translation_dictionaries
# the path to search ddf datasets
ddf_dir: /Users/semio/src/work/Gapminder


# define your ingredients and procedures below
ingredients:
# Ingredients have 5 parameters: id, dataset, key, value, filter
# in wich:
# id: the name of ingredient, will be used in cooking procedures
# dataset: which dataset is this ingredient from
# key: the key to filter in ddf--index
# value: the value to filter in ddf--index
# filter: (optional) more keyword filters
#
# below some example of ingredients:
#
- id: gini-datapoints
dataset: *d1
key: "geo,time"
value:
- gapminder_gini
- id: gini-concepts
dataset: *d1
key: concept
value: "*"
filter:
concept:
- gapminder_gini
#
# - id: gw-concepts-aqua
# dataset: *d2
# key: concept
# value: "*"
# filter:
# concept: &CONC_FILTER
# - agricultural_water_withdrawal_percent_of_total
# - desalinated_water_produced_billion_cu_meters
3 changes: 3 additions & 0 deletions etl/recipes/recipe_main.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ include:
- recipe_gw_co2.yaml
- recipe_gw_pop.yaml
- recipe_wdi.yaml
- recipe_gw_gini.yaml
# - recipe_bp.yaml
# - recipe_cdiac.yaml
# - recipe_cme.yaml
Expand Down Expand Up @@ -57,6 +58,7 @@ cooking:
- pop-datapoints-final
- co2-datapoints-final
- wdi-datapoints-final
- gini-datapoints
# - bp-datapoints-final
# - cdiac-datapoints-final
# - cme-datapoints-final
Expand Down Expand Up @@ -94,5 +96,6 @@ cooking:
- gdp-concepts-final
- pop-concepts-final
- co2-concepts-final
- gini-concepts
options:
deep: true
18 changes: 14 additions & 4 deletions etl/scripts/patch.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,13 +51,19 @@ def concepts_tag_column():

concs['tags'] = concs['tags'].fillna('_none')

# remove concepts from dont panic poverty
concs = concs.drop(['sg_population', 'sg_gini', 'sg_gdp_p_cap_const_ppp2011_dollar'])

concs.to_csv(os.path.join(out_dir, 'ddf--concepts.csv'), encoding='utf8')


def remove_yearly_co2_emissions_tonnes():
"""remove datapoints for yearly_co2_emissions_tonnes"""
def remove_unneeded_dps():
"""remove some datapoints"""
# FIXME: remove this function when chef is ready for this kind of tasks.
os.remove(os.path.join(out_dir, 'ddf--datapoints--yearly_co2_emissions_tonnes--by--geo--time.csv'))
os.remove(os.path.join(out_dir, 'ddf--datapoints--sg_gdp_p_cap_const_ppp2011_dollar--by--geo--time.csv'))
os.remove(os.path.join(out_dir, 'ddf--datapoints--sg_population--by--geo--time.csv'))
os.remove(os.path.join(out_dir, 'ddf--datapoints--sg_gini--by--geo--time.csv'))


def apply_patches():
Expand All @@ -69,11 +75,15 @@ def apply_patches():
'ddf--concepts.1.csv',
'ddf--concepts.2.csv',
'ddf--concepts.3.csv',
'ddf--concepts.4.csv'
'ddf--concepts.4.csv',
'ddf--concepts.5.csv'
],
'ddf--entities--tag.csv': [
'ddf--entities--tag.0.csv'
],
'ddf--index.csv' : [
'ddf--inedx.0.csv'
]
}
# apply the patches
for f, ps in patches.items():
Expand All @@ -92,7 +102,7 @@ def apply_patches():
def do_all_changes():
print("applying patches to DDF...")
concepts_tag_column()
remove_yearly_co2_emissions_tonnes()
remove_unneeded_dps()
apply_patches()


Expand Down
3 changes: 0 additions & 3 deletions etl/scripts/patches/ddf--concepts.1.csv
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,6 @@
"","life_expectancy_at_birth_data_from_ihme","Life expectancy at birth (IHME)","measure","","https://docs.google.com/spreadsheet/pub?key=0ArfEDsV3bBwCdG9jSHA0WklHU0dqUnBCVUpVOXFzQUE","","[""linear"", ""log""]","","","","Life expectancy at birth, the data is taken from Institute for health metrics and evaluation","alternative_demography_indicators"
"...","...","...","...","...","...","...","...","...","...","...","...","..."
"","newborn_deaths","Newborn deaths","measure","","https://docs.google.com/spreadsheet/pub?key=1iTvolhgIMta3MmsoPkGnewJiaybev3MvR9e_7mxZ48o","","[""linear"", ""log""]","","","","Neonatal deaths Children dying within the first 28 days.","newborn_infants"
":","sg_population","sg_population","measure","","http://www.gapminder.org/news/data-sources-dont-panic-end-poverty","","[""linear"", ""log""]","","","","","_none"
"","sg_gini","sg_gini","measure","","http://www.gapminder.org/news/data-sources-dont-panic-end-poverty","","[""linear""]","","","","","_none"
"->","sg_gdp_p_cap_const_ppp2011_dollar","sg_gdp_p_cap_const_ppp2011_dollar","measure","","http://www.gapminder.org/news/data-sources-dont-panic-end-poverty","{""palette"": {""3"": ""#F77481"", ""2"": ""#E1CE00"", ""1"": ""#B4DE79"", ""0"": ""#62CCE3""}}->{""palette"": {""2"": ""#E1CE00"", ""1"": ""#B4DE79"", ""3"": ""#F77481"", ""0"": ""#62CCE3""}}","[""log"", ""linear""]","","","exp","","_none"
":","description","Description","string","","","","","","","","","_none"
":","indicator_url","Indicator url","string","","","","","","","","","_none"
":","name","Name","string","","https://github.com/open-numbers/ddf--gapminder--dim_geo_countries_and_groups/blob/master/ddf--list--geo--country.csv","","[""ordinal""]","","","","","_none"
Expand Down
6 changes: 6 additions & 0 deletions etl/scripts/patches/ddf--concepts.5.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
@@,concept,name,...,color,scales,drill_up,...
...,...,...,...,...,...,...,...
,child_mortality_0_5_year_olds_dying_per_1000_born,"Child mortality rate",...,,"[""log"", ""linear""]",,...
->,gapminder_gini,"Gapminder gini",...,,"NULL->[""linear""]",,...
,life_expectancy_years,"Life expectancy",...,,"[""linear"", ""log""]",,...
...,...,...,...,...,...,...,...
Loading

0 comments on commit 492ed47

Please sign in to comment.