-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdata_processing.Rmd
56 lines (45 loc) · 1.74 KB
/
data_processing.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
---
title: "Watts Up CA: Data Processing"
author: "Claire Boyd, Kathryn Link-Oberstar, Megan Moore, Eshan Prasher"
date: "`r Sys.Date()`"
output: pdf_document
description: This file reads in monthly energy consumption and cost data for every US state since 1990 and extracts monthly residential consumption for the the state of California, and returns an excel file with year-month consumtpion data for the state of California from 1990 to 2023.
---
```{r}
suppressMessages({
library(readxl)
library(dplyr)
library(tidyverse)
library(openxlsx)
})
```
```{r}
# Load the Energy Consumption Data
pathname <- "./data/electricity_consumption_data/"
files <- list.files(path=pathname, pattern = "\\.(xls|xlsx)$")
print(files)
combined_data <- data.frame()
# Convert Files to df, collapse the 3 header columns into 1 header
for (file in files) {
full_path <- file.path(pathname, file)
data <- read_excel(full_path, col_names = FALSE)
new_headers <- apply(data[1:3, ], 2, function(x) paste(na.omit(x), collapse = " "))
names(data) <- new_headers
data <- data[-(1:3), ]
data <- data.frame(lapply(data, function(x) {
if(is.factor(x)) as.character(x) else x
}), stringsAsFactors = FALSE)
combined_data <- bind_rows(combined_data, data)
}
# Make Month and Year Numeric
combined_data$Month <- as.numeric(combined_data$Month)
combined_data$Year <- as.numeric(combined_data$Year)
# Filter data to exclude NA and only include CA
combined_data <- combined_data %>% filter(!is.na(Year) & !is.na(Month) & combined_data$State == 'CA')
# Sort by year and month
sorted_data <- combined_data %>%
arrange(Year, Month)
# Write to Excel
output_path <- "./data/sorted_electricity_consumption_data.xlsx"
cat('\n', 'Data processed and written to', output_path)
```