|
| 1 | +--- |
| 2 | +title: "Graph development" |
| 3 | +format: |
| 4 | + html: |
| 5 | + code-fold: true |
| 6 | +jupyter: python3 |
| 7 | +--- |
| 8 | + |
| 9 | +```{python} |
| 10 | +import os |
| 11 | +os.chdir('..') |
| 12 | +``` |
| 13 | + |
| 14 | +```{python} |
| 15 | +import polars as pl |
| 16 | +import pandas as pd |
| 17 | +import geopandas as gpd |
| 18 | +``` |
| 19 | + |
| 20 | +```{python} |
| 21 | +roads_df = gpd.read_file("data/interim/roads_2012_06.gpkg", engine="pyogrio") |
| 22 | +puma = gpd.read_file("data/interim/puma_06.gpkg", engine="pyogrio") |
| 23 | +tmp_puma = puma[(puma["geo_id"] == "0600101")].copy().reset_index(drop=True) |
| 24 | +clipped = roads_df.clip(tmp_puma['geometry']) |
| 25 | +``` |
| 26 | + |
| 27 | +```{python} |
| 28 | +base = tmp_puma.plot(color='white', edgecolor='black') |
| 29 | +clipped.plot(ax=base, marker='o', color='red', markersize=5) |
| 30 | +``` |
| 31 | + |
| 32 | +```{python} |
| 33 | +clipped.length.sum() |
| 34 | +``` |
| 35 | + |
| 36 | +```{python} |
| 37 | +empty_df = [ |
| 38 | + pl.Series("year", [], dtype=pl.Int64), |
| 39 | + pl.Series("puma_id", [], dtype=pl.String), |
| 40 | + pl.Series("leangth", [], dtype=pl.Float64) |
| 41 | +] |
| 42 | +df = pl.DataFrame(empty_df) |
| 43 | +puma2 = puma[puma["geo_id"].str.startswith("06")] |
| 44 | +for pum in puma2["geo_id"]: |
| 45 | + tmp = puma.loc[puma["geo_id"] == pum] |
| 46 | + clipped = roads_df.clip(tmp['geometry']) |
| 47 | + leng = pl.DataFrame( |
| 48 | + { |
| 49 | + "year": 2012, |
| 50 | + "puma_id": pum, |
| 51 | + "leangth": clipped.length.sum() |
| 52 | + } |
| 53 | + ) |
| 54 | + df = pl.concat([df, leng], how="vertical") |
| 55 | +print(df) |
| 56 | +``` |
| 57 | + |
| 58 | +```{python} |
| 59 | +def process(roads, state_id, pumas_df, year): |
| 60 | + empty_df = [ |
| 61 | + pl.Series("year", [], dtype=pl.Int64), |
| 62 | + pl.Series("puma_id", [], dtype=pl.String), |
| 63 | + pl.Series("leangth", [], dtype=pl.Float64) |
| 64 | + ] |
| 65 | + df = pl.DataFrame(empty_df) |
| 66 | + pumas = pumas_df[pumas_df["geo_id"].str.startswith(state_id)] |
| 67 | + for puma in pumas["geo_id"]: |
| 68 | + tmp = pumas.loc[pumas["geo_id"] == puma] |
| 69 | + clipped = roads.clip(tmp['geometry']) |
| 70 | + leng = pl.DataFrame( |
| 71 | + { |
| 72 | + "year": year, |
| 73 | + "puma_id": puma, |
| 74 | + "leangth": clipped.length.sum() |
| 75 | + } |
| 76 | + ) |
| 77 | + df = pl.concat([df, leng], how="vertical") |
| 78 | + print("\033[0;35mINFO: \033[0m" + f"Finished processing roads for {puma}") |
| 79 | + return df |
| 80 | +``` |
| 81 | + |
| 82 | +```{python} |
| 83 | +temp = process(roads_df, "06", puma, 2012) |
| 84 | +temp |
| 85 | +``` |
| 86 | + |
| 87 | + |
| 88 | +```{python} |
| 89 | +empty_df = [ |
| 90 | + pl.Series("year", [], dtype=pl.Int64), |
| 91 | + pl.Series("puma_id", [], dtype=pl.String), |
| 92 | + pl.Series("leangth", [], dtype=pl.Float64) |
| 93 | + ] |
| 94 | +data = pl.DataFrame(empty_df) |
| 95 | +for year in range(2012, 2019): |
| 96 | + roads_df = gpd.GeoDataFrame(columns=['linear_id', 'year', 'geometry']) |
| 97 | + for file in os.listdir("data/shape_files/"): |
| 98 | + if file.startswith(f"roads_{year}_06"): |
| 99 | + gdf = gpd.read_file(f"data/shape_files/{file}", engine="pyogrio") |
| 100 | + gdf.rename(columns={"LINEARID": "linear_id"}, inplace=True) |
| 101 | + gdf[["county_id", "year"]] = "01063", 2012 |
| 102 | + gdf = gdf[["year", "linear_id", "county_id", "geometry"]].set_crs(3857, allow_override=True) |
| 103 | + roads_df = pd.concat([roads_df, gdf], ignore_index=True) |
| 104 | + print("\033[0;36mINFO: \033[0m" + f"Finished processing roads for {file}") |
| 105 | + |
| 106 | + data = pl.concat([data, process(roads_df, "06", puma, year)], how="vertical") |
| 107 | +``` |
| 108 | + |
| 109 | +```{python} |
| 110 | +data.write_parquet("data/processed/roads_final_06.parquet") |
| 111 | +``` |
0 commit comments