|
| 1 | +##============================================================================== |
| 2 | +## INITIALIZE |
| 3 | +##============================================================================== |
| 4 | +## Remove all objects; perform garbage collection |
| 5 | +rm(list=ls()) |
| 6 | +gc(reset=TRUE) |
| 7 | + |
| 8 | +## Load libraries & project functions |
| 9 | +geneorama::loadinstall_libraries(c("data.table", "MASS")) |
| 10 | +geneorama::sourceDir("CODE/functions/") |
| 11 | +## Import shift function |
| 12 | +shift <- geneorama::shift |
| 13 | + |
| 14 | +##============================================================================== |
| 15 | +## LOAD CACHED RDS FILES |
| 16 | +##============================================================================== |
| 17 | +foodInspect <- readRDS("DATA/13_food_inspections.Rds") |
| 18 | + |
| 19 | +## Apply row filter to remove invalid data |
| 20 | +foodInspect <- filter_foodInspect(foodInspect) |
| 21 | + |
| 22 | +## Remove violations from food inspection, violations are caputured in the |
| 23 | +## violation matrix data |
| 24 | +foodInspect$Violations <- NULL |
| 25 | + |
| 26 | +## Import violation matrix which lists violations by categories: |
| 27 | +## Critical, serious, and minor violations |
| 28 | +violation_dat <- readRDS("DATA/21_food_inspection_violation_matrix.Rds") |
| 29 | + |
| 30 | +##============================================================================== |
| 31 | +## CALCULATE FEATURES |
| 32 | +##============================================================================== |
| 33 | + |
| 34 | +## Facility_Type_Clean: Anything that is not "restaurant" or "grocery" is "other" |
| 35 | +foodInspect[ , Facility_Type_Clean := |
| 36 | + categorize(x = Facility_Type, |
| 37 | + primary = list(Restaurant = "restaurant", |
| 38 | + Grocery_Store = "grocery"), |
| 39 | + ignore.case = TRUE)] |
| 40 | +## Join in the violation matrix |
| 41 | +foodInspect <- merge(x = foodInspect, |
| 42 | + y = violation_dat, |
| 43 | + by = "Inspection_ID") |
| 44 | +## Create pass / fail flags |
| 45 | +foodInspect[ , pass_flag := ifelse(Results=="Pass",1, 0)] |
| 46 | +foodInspect[ , fail_flag := ifelse(Results=="Fail",1, 0)] |
| 47 | +## Set key to ensure that records are treated CHRONOLOGICALLY... |
| 48 | +setkey(foodInspect, License, Inspection_Date) |
| 49 | +## Then find previous info by "shifting" the columns (grouped by License) |
| 50 | +foodInspect[ , pastFail := shift(fail_flag, -1, 0), by = License] |
| 51 | +foodInspect[ , pastCritical := shift(criticalCount, -1, 0), by = License] |
| 52 | +foodInspect[ , pastSerious := shift(seriousCount, -1, 0), by = License] |
| 53 | +foodInspect[ , pastMinor := shift(minorCount, -1, 0), by = License] |
| 54 | + |
| 55 | +## Calcualte time since last inspection. |
| 56 | +## If the time is NA, this means it's the first inspection; add an inicator |
| 57 | +## variable to indicate that it's the first inspection. |
| 58 | +foodInspect[i = TRUE , |
| 59 | + j = timeSinceLast := as.numeric( |
| 60 | + Inspection_Date - shift(Inspection_Date, -1, NA)) / 365, |
| 61 | + by = License] |
| 62 | +foodInspect[ , firstRecord := 0] |
| 63 | +foodInspect[is.na(timeSinceLast), firstRecord := 1] |
| 64 | +foodInspect[is.na(timeSinceLast), timeSinceLast := 2] |
| 65 | +foodInspect[ , timeSinceLast := pmin(timeSinceLast, 2)] |
| 66 | + |
| 67 | +##============================================================================== |
| 68 | +## SAVE RDS |
| 69 | +##============================================================================== |
| 70 | +setkey(foodInspect, Inspection_ID) |
| 71 | +saveRDS(foodInspect, file.path("DATA/23_food_insp_features.Rds")) |
| 72 | + |
| 73 | + |
| 74 | + |
0 commit comments