diff --git a/pandas_questions.py b/pandas_questions.py index 262ad29..72dcb58 100644 --- a/pandas_questions.py +++ b/pandas_questions.py @@ -14,10 +14,10 @@ def load_data(): - """Load data from the CSV files referundum/regions/departments.""" - referendum = pd.DataFrame({}) - regions = pd.DataFrame({}) - departments = pd.DataFrame({}) + """Load data from the CSV files referendum/regions/departments.""" + referendum = pd.read_csv("./data/referendum.csv", sep=';') + regions = pd.read_csv("./data/regions.csv") + departments = pd.read_csv("./data/departments.csv") return referendum, regions, departments @@ -28,8 +28,14 @@ def merge_regions_and_departments(regions, departments): The columns in the final DataFrame should be: ['code_reg', 'name_reg', 'code_dep', 'name_dep'] """ - - return pd.DataFrame({}) + merged = pd.merge(regions, departments, + left_on='code', right_on='region_code') + merged.columns = ['id_x', 'code_reg', + 'name_reg', 'slug_x', + 'id_y', 'region_code', + 'code_dep', 'name_dep', + 'slug_y'] + return merged[['code_reg', 'name_reg', 'code_dep', 'name_dep']] def merge_referendum_and_areas(referendum, regions_and_departments): @@ -39,7 +45,14 @@ def merge_referendum_and_areas(referendum, regions_and_departments): french living abroad. """ - return pd.DataFrame({}) + r_and_d = regions_and_departments + dc = "Department code" + referendum[dc] = referendum[dc].astype(str).str.zfill(2) + r_and_d["code_dep"] = r_and_d["code_dep"].astype(str).str.zfill(2) + referendum_and_areas = referendum.merge(r_and_d, + left_on="Department code", + right_on="code_dep") + return referendum_and_areas def compute_referendum_result_by_regions(referendum_and_areas): @@ -49,7 +62,12 @@ def compute_referendum_result_by_regions(referendum_and_areas): ['name_reg', 'Registered', 'Abstentions', 'Null', 'Choice A', 'Choice B'] """ - return pd.DataFrame({}) + grouped = referendum_and_areas.groupby(["code_reg", "name_reg"]).sum() + grouped = grouped.reset_index() + grouped = grouped.set_index("code_reg")[[ + "name_reg", "Registered", "Abstentions", + "Null", "Choice A", "Choice B"]] + return grouped def plot_referendum_map(referendum_result_by_regions): @@ -62,7 +80,17 @@ def plot_referendum_map(referendum_result_by_regions): * Return a gpd.GeoDataFrame with a column 'ratio' containing the results. """ - return gpd.GeoDataFrame({}) + geodatas = gpd.read_file("./data/regions.geojson") + merged = pd.merge(geodatas, + referendum_result_by_regions, + left_on="code", + right_on="code_reg") + all_expressed = merged["Choice A"] + merged["Choice B"] + merged['ratio'] = merged['Choice A'] / all_expressed + merged = gpd.GeoDataFrame(merged) + merged.plot(column="ratio") + plt.show() + return merged if __name__ == "__main__":