@@ -54,32 +54,33 @@ def process_acs(self):
54
54
]
55
55
acs = pl .DataFrame (empty_df ).clear ()
56
56
57
- for file in os .listdir ("data/raw" ):
58
- if file .startswith ("acs" ):
59
- original = pl .read_parquet (f"data/raw/{ file } " )
60
- for sex in [1 , 2 , 3 ]:
61
- for race in ["RACAIAN" ,"RACASN" ,"RACBLK" ,"RACNUM" ,"RACWHT" ,"RACSOR" ,"HISP" ,"ALL" ,]:
62
- df = original
63
- if not sex == 3 :
64
- df = df .filter (pl .col ("SEX" ) == sex )
65
- if not race == "ALL" :
66
- df = df .filter (pl .col (race ) == 1 )
67
- df = df .filter (pl .col ("JWMNP" ) > 0 )
68
- df = df .select ("year" , "state" , "PUMA" , "PWGTP" , "JWMNP" )
69
- df = df .with_columns (total_time = (pl .col ("PWGTP" ) * pl .col ("JWMNP" )))
70
- df = df .group_by ("year" , "state" , "PUMA" ).agg (
71
- pl .col ("PWGTP" , "total_time" ).sum ())
72
- df = df .select ("year" ,"state" , "PUMA" , "PWGTP" ,
73
- (pl .col ("total_time" ) / pl .col ("PWGTP" )).alias ("avg_time" ),
74
- )
75
- df = df .with_columns (
76
- sex = pl .lit (sex ),
77
- race = pl .lit (race ),
78
- )
79
- acs = pl .concat ([acs , df ], how = "vertical" )
80
- acs .write_parquet ("data/interim/acs.parquet" )
81
- if self .debug :
82
- print ("\033 [0;36mINFO: \033 [0m" + "Finished processing acs" )
57
+ if not os .path .exists ("data/processed/acs.parquet" ):
58
+ for file in os .listdir ("data/raw" ):
59
+ if file .startswith ("acs" ):
60
+ original = pl .read_parquet (f"data/raw/{ file } " )
61
+ for sex in [1 , 2 , 3 ]:
62
+ for race in ["RACAIAN" ,"RACASN" ,"RACBLK" ,"RACNUM" ,"RACWHT" ,"RACSOR" ,"HISP" ,"ALL" ,]:
63
+ df = original
64
+ if not sex == 3 :
65
+ df = df .filter (pl .col ("SEX" ) == sex )
66
+ if not race == "ALL" :
67
+ df = df .filter (pl .col (race ) == 1 )
68
+ df = df .filter (pl .col ("JWMNP" ) > 0 )
69
+ df = df .select ("year" , "state" , "PUMA" , "PWGTP" , "JWMNP" )
70
+ df = df .with_columns (total_time = (pl .col ("PWGTP" ) * pl .col ("JWMNP" )))
71
+ df = df .group_by ("year" , "state" , "PUMA" ).agg (
72
+ pl .col ("PWGTP" , "total_time" ).sum ())
73
+ df = df .select ("year" ,"state" , "PUMA" , "PWGTP" ,
74
+ (pl .col ("total_time" ) / pl .col ("PWGTP" )).alias ("avg_time" ),
75
+ )
76
+ df = df .with_columns (
77
+ sex = pl .lit (sex ),
78
+ race = pl .lit (race ),
79
+ )
80
+ acs = pl .concat ([acs , df ], how = "vertical" )
81
+ acs .write_parquet ("data/processed/acs.parquet" )
82
+ if self .debug :
83
+ print ("\033 [0;36mINFO: \033 [0m" + "Finished processing acs" )
83
84
84
85
def process_roads (self ):
85
86
roads = gpd .GeoDataFrame (columns = ['linear_id' , 'year' , 'geometry' ])
0 commit comments