Skip to content

Commit

Permalink
Merge pull request #78 from lanl/cv-testing
Browse files Browse the repository at this point in the history
Merge updates in cv-testing branch to prepare for 0.6 release
  • Loading branch information
rfiorella authored Aug 4, 2022
2 parents 85ac198 + 6ca69db commit cc05ad2
Show file tree
Hide file tree
Showing 17 changed files with 121 additions and 323 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ BugReports: https://github.com/lanl/NEONiso/issues
URL: https://github.com/lanl/NEONiso
Encoding: UTF-8
LazyData: true
RoxygenNote: 7.2.0
RoxygenNote: 7.2.1
Imports:
dplyr,
zoo,
Expand Down
2 changes: 1 addition & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ have some known issues (e.g., no correction is made for concentration dependence
of the analyzers yet), and any data produced from this function should be considered
provisional.
* Added capability to plot data used in carbon calibration regression in order
to help identify periods where calibration paramters seem to be okay, but
to help identify periods where calibration parameters seem to be okay, but
quality of calibrated data is degraded.
* Added cross-validation error estimates to carbon calibration routines.
* The calibrate_carbon_bymonth() function has been marked as deprecated, but will
Expand Down
27 changes: 14 additions & 13 deletions R/calibrate_ambient_carbon_Bowling2003.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
#'
#' @author Rich Fiorella \email{rfiorella@@lanl.gov}
#'
#' Function called by `calibrate_carbon_bymoth()` to apply
#' Function called by `calibrate_carbon_bymonth()` to apply
#' gain and offset parameters to the ambient datasets (000_0x0_09m and
#' 000_0x0_30m). This function should generally not be used independently,
#' but should be used in coordination with
Expand Down Expand Up @@ -155,8 +155,8 @@ calibrate_ambient_carbon_Bowling2003 <- function(amb_data_list,
# extract 12CO2 and 13CO2 concentrations from the ambient data,
# set as NA, unless overwritten

mean12c <- max12c <- min12c <- cv5rmse12c <- loocv12c <- rep(NA, length(amb_delta$mean)) # placeholders for 12CO2 vecs
mean13c <- max13c <- min13c <- cv5rmse13c <- loocv13c <- rep(NA, length(amb_delta$mean)) # placeholders for 13CO2 vecs
mean12c <- max12c <- min12c <- cv5rmse12c <- cvloo12c <- rep(NA, length(amb_delta$mean)) # placeholders for 12CO2 vecs
mean13c <- max13c <- min13c <- cv5rmse13c <- cvloo13c <- rep(NA, length(amb_delta$mean)) # placeholders for 13CO2 vecs

for (i in seq_len(length(var_inds_in_calperiod))) {
# calculate calibrated 12CO2 concentrations
Expand All @@ -166,8 +166,8 @@ calibrate_ambient_carbon_Bowling2003 <- function(amb_data_list,
amb_12CO2$min[var_inds_in_calperiod[[i]]] + caldf$offset12C[i]
max12c[var_inds_in_calperiod[[i]]] <- caldf$gain12C[i] *
amb_12CO2$max[var_inds_in_calperiod[[i]]] + caldf$offset12C[i]
cv5rmse12c[var_inds_in_calperiod[[i]]] <- caldf$cv5rmse_12C[i]
loocv12c[var_inds_in_calperiod[[i]]] <- caldf$loocv_12C[i]
# cv5rmse12c[var_inds_in_calperiod[[i]]] <- caldf$cv5rmse_12C[i]
# cvloo12c[var_inds_in_calperiod[[i]]] <- caldf$cvloo_12C[i]

# calculate calibrated 13CO2 concentrations
mean13c[var_inds_in_calperiod[[i]]] <- caldf$gain13C[i] *
Expand All @@ -176,8 +176,8 @@ calibrate_ambient_carbon_Bowling2003 <- function(amb_data_list,
amb_13CO2$min[var_inds_in_calperiod[[i]]] + caldf$offset13C[i]
max13c[var_inds_in_calperiod[[i]]] <- caldf$gain13C[i] *
amb_13CO2$max[var_inds_in_calperiod[[i]]] + caldf$offset13C[i]
cv5rmse13c[var_inds_in_calperiod[[i]]] <- caldf$cv5rmse_13C[i]
loocv13c[var_inds_in_calperiod[[i]]] <- caldf$loocv_13C[i]
# cv5rmse13c[var_inds_in_calperiod[[i]]] <- caldf$cv5rmse_13C[i]
# cvloo13c[var_inds_in_calperiod[[i]]] <- caldf$cvloo_13C[i]

}

Expand All @@ -198,12 +198,13 @@ calibrate_ambient_carbon_Bowling2003 <- function(amb_data_list,
}

# calculate uncertainties:
amb_delta$CVcalUcrt <- round(abs(amb_delta$mean_cal) *
sqrt((cv5rmse12c/mean12c)^2 + (cv5rmse13c/mean13c)^2), 3)
amb_delta$LOOcalUcrt <- round(abs(amb_delta$mean_cal) *
sqrt((loocv12c/mean12c)^2 + (loocv13c/mean13c)^2), 3)
amb_co2$CVcalUcrt <- round(sqrt(cv5rmse12c^2 + cv5rmse13c^2), 3)
amb_co2$LOOcalUcrt <- round(sqrt(loocv12c^2 + loocv13c^2), 3)
# save this ucrt propogation for later version.
#amb_delta$CVcalUcrt <- round(abs(amb_delta$mean_cal) *
# sqrt((cv5rmse12c/mean12c)^2 + (cv5rmse13c/mean13c)^2), 3)
#amb_delta$LOOcalUcrt <- round(abs(amb_delta$mean_cal) *
# sqrt((cvloo12c/mean12c)^2 + (cvloo13c/mean13c)^2), 3)
#amb_co2$CVcalUcrt <- round(sqrt(cv5rmse12c^2 + cv5rmse13c^2), 3)
#amb_co2$LOOcalUcrt <- round(sqrt(cvloo12c^2 + cvloo13c^2), 3)

# replace ambdf in amb_data_list, return amb_data_list
amb_data_list$dlta13CCo2 <- amb_delta
Expand Down
13 changes: 12 additions & 1 deletion R/calibrate_ambient_carbon_linreg.R
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,16 @@ calibrate_ambient_carbon_linreg <- function(amb_data_list,
# extract d13C and CO2 concentrations from the ambient data
d13C_ambdf$mean_cal <- d13C_ambdf$mean
co2_ambdf$mean_cal <- co2_ambdf$mean


# add columns to d13C_ambdf and co2_ambdf for uncertainty calculation
d13C_ambdf$cvloo <- d13C_ambdf$mean
d13C_ambdf$cv5rmse <- d13C_ambdf$mean
d13C_ambdf$cv5mae <- d13C_ambdf$mean

co2_ambdf$cvloo <- co2_ambdf$mean
co2_ambdf$cv5rmse <- co2_ambdf$mean
co2_ambdf$cv5mae <- co2_ambdf$mean

for (i in 1:length(var_inds_in_calperiod)) {

d13C_ambdf$mean_cal[var_inds_in_calperiod[[i]]] <- caldf$d13C_intercept[i] +
Expand All @@ -135,6 +144,8 @@ calibrate_ambient_carbon_linreg <- function(amb_data_list,
d13C_ambdf$max[var_inds_in_calperiod[[i]]] <- caldf$d13C_intercept[i] +
d13C_ambdf$max[var_inds_in_calperiod[[i]]] * caldf$d13C_slope[i]

# d13C_ambdf$cvloo[var_inds_in_calperiod[[i]]] <-

co2_ambdf$mean_cal[var_inds_in_calperiod[[i]]] <- caldf$co2_intercept[i] +
co2_ambdf$mean[var_inds_in_calperiod[[i]]] * caldf$co2_slope[i]

Expand Down
2 changes: 1 addition & 1 deletion R/calibrate_carbon.R
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ calibrate_carbon <- function(inname,

tmp_names <- names(ciso$reference)

print("correcting reference output df.,..")
print("correcting reference output df...")
#apply seems to strip names from ciso$reference, so need to save above
# and reassign below.
ciso$reference <- lapply(names(ciso$reference),
Expand Down
3 changes: 0 additions & 3 deletions R/output_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -296,9 +296,6 @@ write_carbon_ambient_data <- function(outname, site, amb_data_list) {
for (i in 1:length(amb_data_list)) {
amb_data_subset <- amb_data_list[i]

print(names(amb_data_subset))
print(site)

co2_data_outloc <- rhdf5::H5Gcreate(fid,
paste0("/", site, "/dp01/data/isoCo2/",
names(amb_data_subset)))
Expand Down
66 changes: 53 additions & 13 deletions R/reference_data_regression.R
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,8 @@ fit_carbon_regression <- function(ref_data, method, calibration_half_width,
offset13C = as.numeric(NA),
r2_12C = as.numeric(NA),
r2_13C = as.numeric(NA),
loocv_12C = as.numeric(NA),
loocv_13C = as.numeric(NA),
cvloo_12C = as.numeric(NA),
cvloo_13C = as.numeric(NA),
cv5mae_12C = as.numeric(NA),
cv5mae_13C = as.numeric(NA),
cv5rmse_12C = as.numeric(NA),
Expand All @@ -118,8 +118,8 @@ fit_carbon_regression <- function(ref_data, method, calibration_half_width,
offset13C = numeric(length = 2e5),
r2_12C = numeric(length = 2e5),
r2_13C = numeric(length = 2e5),
loocv_12C = numeric(length = 2e5),
loocv_13C = numeric(length = 2e5),
cvloo_12C = numeric(length = 2e5),
cvloo_13C = numeric(length = 2e5),
cv5mae_12C = numeric(length = 2e5),
cv5mae_13C = numeric(length = 2e5),
cv5rmse_12C = numeric(length = 2e5),
Expand Down Expand Up @@ -183,8 +183,8 @@ fit_carbon_regression <- function(ref_data, method, calibration_half_width,
out$r2_13C[i] <- summary(tmpmod13C)$r.squared

# extract leave-one-out CV value
out$loocv_12C[i] <- loocv(tmpmod12C)
out$loocv_13C[i] <- loocv(tmpmod13C)
out$cvloo_12C[i] <- loocv(tmpmod12C)
out$cvloo_13C[i] <- loocv(tmpmod13C)

# get cv5 values
tmp <- stats::formula(conc12CCO2_ref ~ conc12CCO2_obs)
Expand All @@ -206,8 +206,8 @@ fit_carbon_regression <- function(ref_data, method, calibration_half_width,
out$offset13C[i] <- NA
out$r2_12C[i] <- NA
out$r2_13C[i] <- NA
out$loocv_12C[i] <- NA
out$loocv_13C[i] <- NA
out$cvloo_12C[i] <- NA
out$cvloo_13C[i] <- NA
out$cv5mae_12C[i] <- NA
out$cv5mae_13C[i] <- NA
out$cv5rmse_12C[i] <- NA
Expand All @@ -225,9 +225,9 @@ fit_carbon_regression <- function(ref_data, method, calibration_half_width,
# re-order columns to ensure that they are consistent across methods
out <- out[, c("timeBgn", "timeEnd",
"gain12C", "offset12C", "r2_12C",
"loocv_12C", "cv5mae_12C", "cv5rmse_12C",
"cvloo_12C", "cv5mae_12C", "cv5rmse_12C",
"gain13C", "offset13C", "r2_13C",
"loocv_13C", "cv5mae_13C", "cv5rmse_13C")]
"cvloo_13C", "cv5mae_13C", "cv5rmse_13C")]

}
} else if (method == "linreg") {
Expand All @@ -245,18 +245,30 @@ fit_carbon_regression <- function(ref_data, method, calibration_half_width,
d13C_slope = as.numeric(NA),
d13C_intercept = as.numeric(NA),
d13C_r2 = as.numeric(NA),
d13C_cvloo = as.numeric(NA),
d13C_cv5mae = as.numeric(NA),
d13C_cv5rmse = as.numeric(NA),
co2_slope = as.numeric(NA),
co2_intercept = as.numeric(NA),
co2_r2 = as.numeric(NA))
co2_r2 = as.numeric(NA),
co2_cvloo = as.numeric(NA),
co2_cv5mae = as.numeric(NA),
co2_cv5rmse = as.numeric(NA))
} else {

# output dataframe giving valid time range, slopes, intercepts, rsquared.
out <- data.frame(d13C_slope = numeric(length = 2e5),
d13C_intercept = numeric(length = 2e5),
d13C_r2 = numeric(length = 2e5),
d13C_cvloo = numeric(length = 2e5),
d13C_cv5mae = numeric(length = 2e5),
d13C_cv5rmse = numeric(length = 2e5),
co2_slope = numeric(length = 2e5),
co2_intercept = numeric(length = 2e5),
co2_r2 = numeric(length = 2e5))
co2_r2 = numeric(length = 2e5),
co2_cvloo = numeric(length = 2e5),
co2_cv5mae = numeric(length = 2e5),
co2_cv5rmse = numeric(length = 2e5))

# get start and end days.
start_date <- as.Date(min(ref_data$timeBgn))
Expand Down Expand Up @@ -302,6 +314,7 @@ fit_carbon_regression <- function(ref_data, method, calibration_half_width,
tmpmod_co2 <- stats::lm(rtioMoleDryCo2Refe.mean ~ rtioMoleDryCo2.mean,
data = cal_subset)


out$d13C_slope[i] <- coef(tmpmod_d13)[[2]]
out$d13C_intercept[i] <- coef(tmpmod_d13)[[1]]
out$d13C_r2[i] <- summary(tmpmod_d13)$r.squared
Expand All @@ -310,6 +323,23 @@ fit_carbon_regression <- function(ref_data, method, calibration_half_width,
out$co2_intercept[i] <- coef(tmpmod_co2)[[1]]
out$co2_r2[i] <- summary(tmpmod_co2)$r.squared

# extract uncertainties:
# extract leave-one-out CV value
out$d13C_cvloo[i] <- loocv(tmpmod_d13)
out$co2_cvloo[i] <- loocv(tmpmod_co2)

# get cv5 values
tmp <- stats::formula(dlta13CCo2Refe.mean ~ dlta13CCo2.mean)
cv_d13C <- estimate_calibration_error(tmp, cal_subset)
tmp <- stats::formula(rtioMoleDryCo2Refe.mean ~ rtioMoleDryCo2.mean)
cv_co2 <- estimate_calibration_error(tmp, cal_subset)

# assign cv values:
out$d13C_cv5mae[i] <- cv_d13C$MAE
out$co2_cv5mae[i] <- cv_co2$MAE
out$d13C_cv5rmse[i] <- cv_d13C$RMSE
out$co2_cv5rmse[i] <- cv_co2$RMSE

} else {

out$d13C_slope[i] <- NA
Expand All @@ -320,6 +350,14 @@ fit_carbon_regression <- function(ref_data, method, calibration_half_width,
out$co2_intercept[i] <- NA
out$co2_r2[i] <- NA

# set uncertainty values to NA as well.
out$d13C_cvloo[i] <- NA
out$d13C_cv5mae[i] <- NA
out$d13C_cv5rmse[i] <- NA
out$co2_cvloo[i] <- NA
out$co2_cv5mae[i] <- NA
out$co2_cv5rmse[i] <- NA

}
}

Expand All @@ -333,7 +371,9 @@ fit_carbon_regression <- function(ref_data, method, calibration_half_width,
# re-order columns to ensure that they are consistent across methods
out <- out[, c("timeBgn", "timeEnd",
"d13C_slope", "d13C_intercept", "d13C_r2",
"co2_slope", "co2_intercept", "co2_r2")]
"d13C_cvloo", "d13C_cv5mae", "d13C_cv5rmse",
"co2_slope", "co2_intercept", "co2_r2",
"co2_cvloo", "co2_cv5mae", "co2_cv5rmse")]

}
}
Expand Down
6 changes: 0 additions & 6 deletions R/restructure_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -88,9 +88,6 @@ ingest_data <- function(inname, analyte, name_fix = TRUE) {

ambi_by_height <- base::split(ambient, factor(ambient$verticalPosition))
refe_by_height <- base::split(reference, factor(reference$verticalPosition))

print(names(ambi_by_height))
print(names(refe_by_height))

#-------------------------
# RESTRUCTURE AMBIENT
Expand Down Expand Up @@ -144,9 +141,6 @@ ingest_data <- function(inname, analyte, name_fix = TRUE) {
names(refe_out) <- paste0(names(refe_out), "_09m")
}

print(names(refe_out))
print(names(ambi_out))

output <- list(ambi_out, refe_out, reference)
names(output) <- c("ambient", "reference", "refe_stacked")

Expand Down
22 changes: 16 additions & 6 deletions cran-comments.md
Original file line number Diff line number Diff line change
@@ -1,19 +1,29 @@
## Test environments
* local R installation: R 4.1.2
* local R installation: R 4.2.1
* GitHub Actions (ubuntu-18.04): devel, release, oldrel
* GitHub Actions (windows-latest): release
* GitHub Actions (macOS-latest): devel, release, oldrel
* win-builder: devel
* win-builder: devel, release
* R-hub (Windows Server 2022): devel
* R-hub (ubuntu-20.04.1 LTS): release
* R-hub (fedora): devel

## R CMD check results

* No errors, warnings, or notes in local environment.
* No errors, warnings, or notes in GitHub Actions, except for windows-latest where
R CMD check does not run due to an error in 'session info'
(Error: invalid version specification '<NA'>). This error does not seem to be related
to the package itself.

* No errors, warnings, or notes in GitHub Actions, except for macOS-latest (devel),
which failed to download R-devel due to a 404 HTTP error

* No errors or warnings on win-builder.

* One error and one note issued on R-hub/Windows Server (devel): error is about a
bioconductor dependency not being available, note is about change in maintainer
(my email address has changed) and invalid URLs (URLs are correct however).
Same note is also provided on ubuntu-20.04.1 LTS.

* No errors, warnings, or notes on R-hub fedora.

* Auto-check will issue a warning about misspelled words in DESCRIPTION, but the words flagged are all correct.


Expand Down
38 changes: 0 additions & 38 deletions extract_data_for_Gabe.R

This file was deleted.

2 changes: 1 addition & 1 deletion man/calibrate_ambient_carbon_Bowling2003.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 4 additions & 4 deletions setup_testing_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@ library(rhdf5)
# exists in rbuildignore, so should not be bundled with package.
# requires install and restart w/ NEONiso.

master_file <- "/Volumes/GradSchoolBackup/NEON/DP4_00200_001/ONAQ/NEON.D15.ONAQ.DP4.00200.001.nsae.2019-06.basic.h5"
master_ccalB03_file <- "~/Desktop/NEONcal/210303_CisoEC/ONAQ/NEON.D15.ONAQ.DP4.00200.001.nsae.2019-06.basic.20201020T212232Z.calibrated.h5"
master_ccalLR_file <- "~/Desktop/NEONcal/210303_CisoLR/ONAQ/NEON.D15.ONAQ.DP4.00200.001.nsae.2019-06.basic.20201020T212232Z.calibrated.h5"
master_file <- "NEON/DP4_00200_001/ONAQ/NEON.D15.ONAQ.DP4.00200.001.nsae.2019-06.basic.h5"
master_ccalB03_file <- "210303_CisoEC/ONAQ/NEON.D15.ONAQ.DP4.00200.001.nsae.2019-06.basic.20201020T212232Z.calibrated.h5"
master_ccalLR_file <- "210303_CisoLR/ONAQ/NEON.D15.ONAQ.DP4.00200.001.nsae.2019-06.basic.20201020T212232Z.calibrated.h5"

#---------------------------------------
# extract co2 data for unit testing:
Expand Down Expand Up @@ -462,5 +462,5 @@ h5delete(fid, '/YELL/dp01/data/isoCo2/co2High_30m')

H5Fclose(fid)

system('/Users/rfiorella/opt/anaconda3/bin/h5repack inst/extdata/NEON.D12.YELL.DP4.00200.001.nsae.2020-11.basic.20210209T161116Z.h5 inst/extdata/NEON.D12.YELL.DP4.00200.001.nsae.2020-11.basic.packed.h5')
system('~/opt/anaconda3/bin/h5repack inst/extdata/NEON.D12.YELL.DP4.00200.001.nsae.2020-11.basic.20210209T161116Z.h5 inst/extdata/NEON.D12.YELL.DP4.00200.001.nsae.2020-11.basic.packed.h5')

Loading

0 comments on commit cc05ad2

Please sign in to comment.