Skip to content

Commit

Permalink
update script
Browse files Browse the repository at this point in the history
  • Loading branch information
semio committed Jan 22, 2024
1 parent 4deabc0 commit 3ceacfa
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 1 deletion.
25 changes: 25 additions & 0 deletions etl/notebooks/economy_entity_domain.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
"""a script to produce the data in economy_entity_domain.xlsx"""

import pandas as pd
import polars as pl

input_file = "../source/CLASS.xlsx"


data = pd.read_excel(input_file, sheet_name="Groups")

data = pl.from_pandas(data)

data.select(
pl.col(["GroupCode", "GroupName"]).unique()
)

groups = data.select(
pl.col(["GroupCode", "GroupName"]).unique(maintain_order=True)
)

groups.write_csv('./groups.csv')

"""
And then, copy the data into economy_entity_domain.xlsx, and use vlookup to update the data.
"""
2 changes: 1 addition & 1 deletion etl/scripts/etl.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
source_dir = '../source/'
output_dir = '../../'

data_csv = os.path.join(source_dir, 'WDICSV.csv')
data_csv = os.path.join(source_dir, 'WDIData.csv')
country_csv = os.path.join(source_dir, 'WDICountry.csv')
series_csv = os.path.join(source_dir, 'WDISeries.csv')
groups_xls = os.path.join(source_dir, 'CLASS.xlsx')
Expand Down

0 comments on commit 3ceacfa

Please sign in to comment.