update based on Alison's email

This just fixed the figure -- where Wind was plotted instead of Temp
atsa-es · Feb 26, 2021 · c6e3d66 · c6e3d66
1 parent a7b089c
commit c6e3d66
Show file tree

Hide file tree

Showing 38 changed files with 1,211 additions and 24 deletions.
diff --git a/Lab-intro-to-stan/fitting-models-with-stan.Rmd b/Lab-intro-to-stan/fitting-models-with-stan.Rmd
@@ -1,7 +1,7 @@
 ---
 output:
-  pdf_document: default
   html_document: default
+  pdf_document: default
 ---
 ```{r stan-setup, include=FALSE}
 knitr::opts_chunk$set(echo = TRUE, comment = NA, cache = TRUE, tidy.opts = list(width.cutoff = 60), tidy = TRUE, fig.align = "center", out.width = "80%", warning=FALSE, message=FALSE)
@@ -99,11 +99,11 @@ One of the other useful things we can do is look at the predicted values of our
 ```{r stan-fig-lm, fig.cap='Data and predicted values for the linear regression model.'}
 plot(apply(pars$pred, 2, mean),
   main = "Predicted values", lwd = 2,
-  ylab = "Wind", ylim = c(min(pars$pred), max(pars$pred)), type = "l"
+  ylab = "Temp", ylim = c(min(pars$pred), max(pars$pred)), type = "l"
 )
 lines(apply(pars$pred, 2, quantile, 0.025))
 lines(apply(pars$pred, 2, quantile, 0.975))
-points(Wind, col = "red")
+points(Temp, col = "red")
 ```
 
 ### Burn-in and thinning {#sec-stan-burn}

diff --git a/Lab-intro-to-stan/fitting-models-with-stan_cache/html/__packages b/Lab-intro-to-stan/fitting-models-with-stan_cache/html/__packages
@@ -0,0 +1,6 @@
+base
+atsar
+StanHeaders
+ggplot2
+rstan
+loo
diff --git a/...stan/fitting-models-with-stan_cache/html/stan-data_5859771cddf4b7db5bd14aee7784a585.RData b/...stan/fitting-models-with-stan_cache/html/stan-data_5859771cddf4b7db5bd14aee7784a585.RData
diff --git a/...o-stan/fitting-models-with-stan_cache/html/stan-data_5859771cddf4b7db5bd14aee7784a585.rdb b/...o-stan/fitting-models-with-stan_cache/html/stan-data_5859771cddf4b7db5bd14aee7784a585.rdb
diff --git a/...o-stan/fitting-models-with-stan_cache/html/stan-data_5859771cddf4b7db5bd14aee7784a585.rdx b/...o-stan/fitting-models-with-stan_cache/html/stan-data_5859771cddf4b7db5bd14aee7784a585.rdx
diff --git a/...o-stan/fitting-models-with-stan_cache/html/stan-lm_cec5c2488510cd03641f7362dee68ffd.RData b/...o-stan/fitting-models-with-stan_cache/html/stan-lm_cec5c2488510cd03641f7362dee68ffd.RData
diff --git a/...-to-stan/fitting-models-with-stan_cache/html/stan-lm_cec5c2488510cd03641f7362dee68ffd.rdb b/...-to-stan/fitting-models-with-stan_cache/html/stan-lm_cec5c2488510cd03641f7362dee68ffd.rdb
diff --git a/...-to-stan/fitting-models-with-stan_cache/html/stan-lm_cec5c2488510cd03641f7362dee68ffd.rdx b/...-to-stan/fitting-models-with-stan_cache/html/stan-lm_cec5c2488510cd03641f7362dee68ffd.rdx
diff --git a/...stan/fitting-models-with-stan_cache/html/stan-load_c977803b17e46d27ec83398832415598.RData b/...stan/fitting-models-with-stan_cache/html/stan-load_c977803b17e46d27ec83398832415598.RData
diff --git a/...o-stan/fitting-models-with-stan_cache/html/stan-load_c977803b17e46d27ec83398832415598.rdb b/...o-stan/fitting-models-with-stan_cache/html/stan-load_c977803b17e46d27ec83398832415598.rdb
diff --git a/...o-stan/fitting-models-with-stan_cache/html/stan-load_c977803b17e46d27ec83398832415598.rdx b/...o-stan/fitting-models-with-stan_cache/html/stan-load_c977803b17e46d27ec83398832415598.rdx
diff --git a/...ting-models-with-stan_cache/html/stan-loadpackages_08916da92f4b25a19d6c160a087d850c.RData b/...ting-models-with-stan_cache/html/stan-loadpackages_08916da92f4b25a19d6c160a087d850c.RData
diff --git a/...itting-models-with-stan_cache/html/stan-loadpackages_08916da92f4b25a19d6c160a087d850c.rdb b/...itting-models-with-stan_cache/html/stan-loadpackages_08916da92f4b25a19d6c160a087d850c.rdb
diff --git a/...itting-models-with-stan_cache/html/stan-loadpackages_08916da92f4b25a19d6c160a087d850c.rdx b/...itting-models-with-stan_cache/html/stan-loadpackages_08916da92f4b25a19d6c160a087d850c.rdx
diff --git a/...tting-models-with-stan_cache/html/stan-rstan-setup_6cc667765bb9b4c2ecf4338dd5d1354f.RData b/...tting-models-with-stan_cache/html/stan-rstan-setup_6cc667765bb9b4c2ecf4338dd5d1354f.RData
diff --git a/...fitting-models-with-stan_cache/html/stan-rstan-setup_6cc667765bb9b4c2ecf4338dd5d1354f.rdb b/...fitting-models-with-stan_cache/html/stan-rstan-setup_6cc667765bb9b4c2ecf4338dd5d1354f.rdb
diff --git a/...fitting-models-with-stan_cache/html/stan-rstan-setup_6cc667765bb9b4c2ecf4338dd5d1354f.rdx b/...fitting-models-with-stan_cache/html/stan-rstan-setup_6cc667765bb9b4c2ecf4338dd5d1354f.rdx
diff --git a/Lab-intro-to-stan/fitting-models-with-stan_cache/latex/__packages b/Lab-intro-to-stan/fitting-models-with-stan_cache/latex/__packages
@@ -0,0 +1,6 @@
+base
+atsar
+StanHeaders
+ggplot2
+rstan
+loo
diff --git a/...tan/fitting-models-with-stan_cache/latex/stan-data_3dc3deb6c09a6d3b61becf8fc4fe19ee.RData b/...tan/fitting-models-with-stan_cache/latex/stan-data_3dc3deb6c09a6d3b61becf8fc4fe19ee.RData
diff --git a/...-stan/fitting-models-with-stan_cache/latex/stan-data_3dc3deb6c09a6d3b61becf8fc4fe19ee.rdb b/...-stan/fitting-models-with-stan_cache/latex/stan-data_3dc3deb6c09a6d3b61becf8fc4fe19ee.rdb
diff --git a/...-stan/fitting-models-with-stan_cache/latex/stan-data_3dc3deb6c09a6d3b61becf8fc4fe19ee.rdx b/...-stan/fitting-models-with-stan_cache/latex/stan-data_3dc3deb6c09a6d3b61becf8fc4fe19ee.rdx
diff --git a/...-stan/fitting-models-with-stan_cache/latex/stan-lm_38c630aec595db494d423f12ba9e5222.RData b/...-stan/fitting-models-with-stan_cache/latex/stan-lm_38c630aec595db494d423f12ba9e5222.RData
diff --git a/...to-stan/fitting-models-with-stan_cache/latex/stan-lm_38c630aec595db494d423f12ba9e5222.rdb b/...to-stan/fitting-models-with-stan_cache/latex/stan-lm_38c630aec595db494d423f12ba9e5222.rdb
diff --git a/...to-stan/fitting-models-with-stan_cache/latex/stan-lm_38c630aec595db494d423f12ba9e5222.rdx b/...to-stan/fitting-models-with-stan_cache/latex/stan-lm_38c630aec595db494d423f12ba9e5222.rdx
diff --git a/...tan/fitting-models-with-stan_cache/latex/stan-load_c5bc5df5d77111ccc304ac4e6fa1fc5d.RData b/...tan/fitting-models-with-stan_cache/latex/stan-load_c5bc5df5d77111ccc304ac4e6fa1fc5d.RData
diff --git a/...-stan/fitting-models-with-stan_cache/latex/stan-load_c5bc5df5d77111ccc304ac4e6fa1fc5d.rdb b/...-stan/fitting-models-with-stan_cache/latex/stan-load_c5bc5df5d77111ccc304ac4e6fa1fc5d.rdb
diff --git a/...-stan/fitting-models-with-stan_cache/latex/stan-load_c5bc5df5d77111ccc304ac4e6fa1fc5d.rdx b/...-stan/fitting-models-with-stan_cache/latex/stan-load_c5bc5df5d77111ccc304ac4e6fa1fc5d.rdx
diff --git a/...ing-models-with-stan_cache/latex/stan-loadpackages_94d153744db894c9d6e6a3f9accce2d9.RData b/...ing-models-with-stan_cache/latex/stan-loadpackages_94d153744db894c9d6e6a3f9accce2d9.RData
diff --git a/...tting-models-with-stan_cache/latex/stan-loadpackages_94d153744db894c9d6e6a3f9accce2d9.rdb b/...tting-models-with-stan_cache/latex/stan-loadpackages_94d153744db894c9d6e6a3f9accce2d9.rdb
diff --git a/...tting-models-with-stan_cache/latex/stan-loadpackages_94d153744db894c9d6e6a3f9accce2d9.rdx b/...tting-models-with-stan_cache/latex/stan-loadpackages_94d153744db894c9d6e6a3f9accce2d9.rdx
diff --git a/...ting-models-with-stan_cache/latex/stan-rstan-setup_1d582b33b853bcdc0681f83e17b41224.RData b/...ting-models-with-stan_cache/latex/stan-rstan-setup_1d582b33b853bcdc0681f83e17b41224.RData
diff --git a/...itting-models-with-stan_cache/latex/stan-rstan-setup_1d582b33b853bcdc0681f83e17b41224.rdb b/...itting-models-with-stan_cache/latex/stan-rstan-setup_1d582b33b853bcdc0681f83e17b41224.rdb
diff --git a/...itting-models-with-stan_cache/latex/stan-rstan-setup_1d582b33b853bcdc0681f83e17b41224.rdx b/...itting-models-with-stan_cache/latex/stan-rstan-setup_1d582b33b853bcdc0681f83e17b41224.rdx
diff --git a/Lab-intro-to-stan/hw_week7.Rmd b/Lab-intro-to-stan/hw_week7.Rmd
@@ -0,0 +1,78 @@
+## Problems {#sec-week7-problems}
+
+For the second part of the homework, we'll use data from the Pacific Decadal Oscillation (PDO) to ask questions about identifying regimes. This dataset can be accessed via the `rpdo` package. First, let's grab the data. 
+
+```{r read-data}
+library(dplyr)
+install.packages("rsoi")
+pdo <- rsoi::download_pdo()
+pdo$water_year = pdo$Year
+pdo$water_year[which(pdo$Month%in%c("Oct","Nov","Dec"))] = pdo$water_year[which(pdo$Month%in%c("Oct","Nov","Dec"))] + 1
+pdo = dplyr::group_by(pdo, water_year) %>%
+  dplyr::summarize(winter_pdo = mean(PDO[which(Month %in% c("Oct","Nov","Dec","Jan","Feb"))])) %>% 
+  dplyr::select(winter_pdo,water_year) %>% 
+  dplyr::rename(year=water_year)
+```
+
+1. Identifying regimes using Hidden Markov Models (HMMs)
+
+    a. Start by fitting a 2-state HMM to the annual indices of winter PDO. You're welcome to use any package / method you like ('depmixS4' would be a good choice if you're unsure). Assume Gaussian errors. 
+
+    b. Try to fit the model 10-20 times. Does the likelihood seem reasonably stable?
+
+    c. Change the model to a 3-state model. Using AIC as a model selection metric, does the 3-state model perform better (lower AIC) compared to the 2-state model? What about a 1-state model?
+
+    d. What is the transition matrix for the best model? What are the persistence probabilities (e.g. probabilities of being in the same state)?
+
+    e. Plot the probability of being in the various states from your best model (e.g. probability of being in state 1 over time)
+
+    f. Plot the time series of predicted values form the model
+
+    f. If you include time varying parameters (e.g. year) in the means of each state, or state transition probabilities, does the model do any better?
+
+2. Bayesian MARSS modelling 
+
+```{r}
+data(neon_barc, package = "atsalibrary")
+data <- neon_barc
+data$indx <- seq(1, nrow(data))
+n_forecast <- 7
+n_lag_o2 <- 1
+n_lag_temp <- 1
+last_obs <- nrow(data)
+
+create_stan_data <- function(data, last_obs, n_forecast, n_lag_o2, 
+    n_lag_temp) {
+    # create test data
+    o2_test <- dplyr::filter(data, indx %in% seq(last_obs + 1, 
+        (last_obs + n_forecast)))
+    temp_test <- dplyr::filter(data, indx %in% seq(last_obs + 
+        1, (last_obs + n_forecast)))
+    
+    o2_train <- dplyr::filter(data, indx <= last_obs, !is.na(oxygen))
+    o2_x <- o2_train$indx
+    o2_y <- o2_train$oxygen
+    o2_sd <- o2_train$oxygen_sd
+    n_o2 <- nrow(o2_train)
+    
+    temp_train <- dplyr::filter(data, indx <= last_obs, !is.na(temperature))
+    temp_x <- temp_train$indx
+    temp_y <- temp_train$temperature
+    temp_sd <- temp_train$temperature_sd
+    n_temp <- nrow(temp_train)
+    
+    stan_data <- list(n = last_obs, n_lag_o2 = n_lag_o2, n_lag_temp = n_lag_temp, 
+        n_forecast = n_forecast, n_o2 = n_o2, o2_x = o2_x, o2_y = o2_y, 
+        o2_sd = o2_sd, n_temp = n_temp, temp_x = temp_x, temp_y = temp_y, 
+        temp_sd = temp_sd)
+    
+    return(list(o2_train = o2_train, temp_train = temp_train, 
+        stan_data = stan_data, o2_test = o2_test, temp_test = temp_test))
+}
+m <- stan_model(file = "model_02.stan")
+```
+
+
+
+
+
diff --git a/Lab-intro-to-stan/model_02.stan b/Lab-intro-to-stan/model_02.stan
@@ -22,23 +22,23 @@ parameters {
   vector[n+n_forecast-1] o2_devs;
   vector[n+n_forecast-1] temp_devs;
   //cov_matrix[2] Sigma;
-  real log_o2_sd_proc;
-  real log_temp_sd_proc;
   real o2_x0[n_lag_o2]; // initial conditions
   real<lower=-1,upper=1> o2_b[n_lag_o2];
+  real<lower=0.05, upper=0.25> o2_sd_proc;
+  real<lower=0.3,upper=0.6> temp_sd_proc;
+  real<lower=2> o2_df;
+  real<lower=2> temp_df;
   real temp_x0[n_lag_temp]; // initial conditions
-  real<lower=-1,upper=1> temp_b[n_lag_temp];  
+  real<lower=-1,upper=1> temp_b[n_lag_temp]; 
 }
 transformed parameters {
-  real o2_sd_proc;
-  real temp_sd_proc;
   vector[n+n_forecast] o2_pred;
   vector[n_forecast] o2_forecast;
   vector[n+n_forecast] temp_pred;
   vector[n_forecast] temp_forecast;  
+  vector[n_lag_temp] temp_b_trans;
+  vector[n_lag_o2] o2_b_trans;
 
-  o2_sd_proc = exp(log_o2_sd_proc);
-  temp_sd_proc = exp(log_temp_sd_proc);
   // predictions for first states
   for(t in 1:n_lag_o2) {
     o2_pred[t] = o2_x0[t];
@@ -50,18 +50,16 @@ transformed parameters {
   for(i in (1+n_lag_o2):(n+n_forecast)) {
     o2_pred[i] = 0;
     for(k in 1:n_lag_o2) {
-      o2_pred[i] = o2_pred[i] + o2_b[k]*o2_pred[i-k];
+      o2_pred[i] += o2_b[k]*o2_pred[i-k];
     }
-    o2_pred[i] = o2_pred[i] + o2_sd_proc*o2_devs[i-1];
-    if(o2_pred[i] < 0) o2_pred[i] = 0; 
+    o2_pred[i] += o2_sd_proc*o2_devs[i-1];
   }
   for(i in (1+n_lag_temp):(n+n_forecast)) {
     temp_pred[i] = 0;
     for(k in 1:n_lag_temp) {
-      temp_pred[i] = temp_pred[i] + temp_b[k]*temp_pred[i-k];
+      temp_pred[i] += temp_b[k]*temp_pred[i-k];
     }
-    temp_pred[i] = temp_pred[i] + temp_sd_proc*temp_devs[i-1]; 
-    if(temp_pred[i] < 0) temp_pred[i] = 0; 
+    temp_pred[i] += temp_sd_proc*temp_devs[i-1];
   }
 
   // this is redundant but easier to work with output -- creates object o2_forecast
@@ -74,16 +72,21 @@ transformed parameters {
 }
 model {
   // initial conditions
-  o2_x0 ~ normal(0,1);
-  o2_b ~ normal(0,1);
+  o2_x0 ~ normal(7,3);
+  o2_b ~ normal(1,1);
   // coefficients
-  temp_x0 ~ normal(0,1);
-  temp_b ~ normal(0,1);
-  log_o2_sd_proc ~ normal(-2,1);
-  log_temp_sd_proc ~ normal(-2,1);
+  temp_x0 ~ normal(22,3);
+  temp_b ~ normal(1,1);
+  o2_sd_proc ~ student_t(3,0,2);
+  temp_sd_proc ~ student_t(3,0,2);
+  //o2_sd_obs ~ student_t(3,0,2);
+  //temp_sd_obs ~ student_t(3,0,2);
+  // df parameters
+  o2_df ~ student_t(3,2,2);
+  temp_df ~ student_t(3,2,2);
   // process standard deviations
-  o2_devs ~ std_normal();
-  temp_devs ~ std_normal();
+  o2_devs ~ student_t(o2_df,0,1);
+  temp_devs ~ student_t(temp_df,0,1);
 
   // likelihood
   for(t in 1:n_o2) {

diff --git a/Lab-intro-to-stan/model_03.stan b/Lab-intro-to-stan/model_03.stan
@@ -0,0 +1,121 @@
+data {
+  int<lower=0> n; // size of trainging set (with NAs)
+  int<lower=0> n_o2; // number of observations
+  int o2_x[n_o2];
+  real o2_y[n_o2];
+  real o2_z[n_o2];  
+  real o2_sd[n_o2];
+  int<lower=0> n_temp; // number of observations
+  int temp_x[n_temp];
+  real temp_y[n_temp];
+  real temp_sd[n_temp];  
+  int n_forecast;
+  int n_lag_o2;
+  int n_lag_temp;
+}
+transformed data {
+  vector[2] zeros;
+  matrix[2,2] identity;
+  int lag_o2;
+  int lag_temp;  
+  zeros = rep_vector(0, 2);
+  identity = diag_matrix(rep_vector(1.0,2));
+
+  lag_o2 = 1;
+  lag_temp = 1;
+  if(n_lag_o2 > 1) lag_o2 = 1;
+  if(n_lag_temp > 1) lag_temp = 1;  
+}
+parameters {
+  vector[2] devs[n+n_forecast-1];
+  cov_matrix[2] Sigma;
+  matrix[2,2] ma_coefs;
+  //real<lower=2> nu;
+  real o2_depth; // o2 effect in observation model
+  real o2_x0[lag_o2]; // initial conditions
+  real temp_x0[lag_temp]; // initial conditions
+  real<lower=-2,upper=2> o2_b[lag_o2];
+  real<lower=-2,upper=2> temp_b[lag_temp]; 
+  real<lower=-2,upper=2> temp_on_o2_b[lag_o2];
+  real<lower=-2,upper=2> o2_on_temp_b[lag_temp]; 
+  real u_temp;
+  real u_o2;
+}
+transformed parameters {
+  vector[n+n_forecast] o2_pred;
+  vector[n_forecast] o2_forecast;
+  vector[n+n_forecast] temp_pred;
+  vector[n_forecast] temp_forecast;  
+
+  // predictions for first states
+  for(t in 1:lag_o2) {
+    o2_pred[t] = o2_x0[t];
+  }
+  for(t in 1:lag_o2) {
+    temp_pred[t] = temp_x0[t];
+  }  
+
+  for(i in (1+lag_o2):(n+n_forecast)) {
+    o2_pred[i] = u_o2;
+    if(n_lag_o2 > 0) {
+      for(k in 1:lag_o2) {
+        o2_pred[i] += o2_b[k]*o2_pred[i-k];// + temp_on_o2_b[k]*temp_pred[i-k];
+      }
+    } else {
+      o2_pred[i] += o2_pred[i-1];
+    }
+    o2_pred[i] += devs[i-1,1];
+  }
+  for(i in (1+lag_temp):(n+n_forecast)) {
+    temp_pred[i] = u_temp;
+    if(n_lag_temp > 0) {
+      for(k in 1:n_lag_temp) {
+        temp_pred[i] += temp_b[k]*temp_pred[i-k];// + o2_on_temp_b[k]*o2_pred[i-k];
+      }
+    } else {
+      temp_pred[i] = temp_pred[i-1]; 
+    }
+    temp_pred[i] += devs[i-1,2];
+  }
+
+  // this is redundant but easier to work with output -- creates object o2_forecast
+  // containing forecast n_forecast time steps ahead
+  for(t in 1:n_forecast) {
+    o2_forecast[t] = o2_pred[n_o2+t];
+    temp_forecast[t] = temp_pred[n_temp+t];
+  }
+
+}
+model {
+  // initial conditions, centered on mean
+  o2_x0 ~ normal(7,3);
+  temp_x0 ~ normal(22,3);
+
+  // AR coefficients
+  o2_b ~ normal(1,1);
+  temp_b ~ normal(1,1);
+  // coefficients on eachother
+  temp_on_o2_b ~ normal(0,1);
+  o2_on_temp_b ~ normal(0,1);  
+  // optional
+  o2_depth ~ normal(0,1);
+  // df parameter
+  //nu ~ student_t(3,2,2);
+  devs[1] ~ multi_student_t(3, zeros, Sigma);
+  for(i in 2:(n+n_forecast-1)) {
+    devs[i] ~ multi_student_t(3, ma_coefs*devs[i-1], Sigma);
+    //devs[i] ~ multi_student_t(nu, zeros, Sigma);
+  }
+
+  // likelihood
+  for(t in 1:n_o2) {
+    o2_y[t] ~ normal(o2_pred[o2_x[t]], o2_sd[t]);
+  }
+  for(t in 1:n_temp) {
+    temp_y[t] ~ normal(temp_pred[temp_x[t]], temp_sd[t]);
+  }
+} 
+generated quantities {
+
+}
+