cleanup

radiant-rstats · Jul 6, 2017 · 6abea7b · 6abea7b
1 parent 1440bf9
commit 6abea7b
Show file tree

Hide file tree

Showing 22 changed files with 3,110 additions and 2,746 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -2,7 +2,7 @@ Package: radiant.basics
 Type: Package
 Title: Basics Menu for Radiant: Business Analytics using R and Shiny
 Version: 0.8.3
-Date: 2017-6-29
+Date: 2017-7-5
 Authors@R: person("Vincent", "Nijs", , "[email protected]", c("aut", "cre"))
 Description: The Radiant Basics menu includes interfaces for probability calculation, central limit theorem simulation, comparing means and proportions, goodness-of-fit testing, cross-tabs, and correlation. The application extends the functionality in radiant.data.
 Depends:
@@ -13,9 +13,9 @@ Imports:
     gridExtra (>= 2.0.0),
     scales (>= 0.4.0),
     dplyr (>= 0.5),
-    tidyr (>= 0.4.1),
+    tidyr (>= 0.6),
     magrittr (>= 1.5),
-    shiny (>= 1.0.0),
+    shiny (>= 1.0.3),
     psych (>= 1.6.6),
     import (>= 1.1.0),
     methods

diff --git a/NEWS.md b/NEWS.md
@@ -4,8 +4,9 @@
 * Renamed methods `summary.correlation_` and `plot.correlation_` to `summary.correlation` and `plot.correlation`
 * Added `tab` argument to `goodness` and `cross_tabs` so a table object can be passed directly
 * Documentation updates
-* Code clean-up and various minor fixes and improvements
 * Scatter plots in _Correlation > Plot_ are now based on 1,000 data points by default. Use _R > Report_ to adjust (e.g., `plot(result, n = -1)`)
+* Fix for level ordering in goodness-of-fit expected-values plot
+* Code clean-up and various minor fixes and improvements
 
 # CHANGES IN radiant.basics 0.8.0
 

diff --git a/R/compare_means.R b/R/compare_means.R
@@ -32,102 +32,102 @@ compare_means <- function(dataset, var1, var2,
                           test = "t",
                           data_filter = "") {
 
-	vars <- c(var1, var2)
-	dat <- getdata(dataset, vars, filt = data_filter)
+  vars <- c(var1, var2)
+  dat <- getdata(dataset, vars, filt = data_filter)
   if (!is_string(dataset)) dataset <- deparse(substitute(dataset)) %>% set_attr("df", TRUE)
 
-	## in case : was used for var2
-	vars <- colnames(dat)
+  ## in case : was used for var2
+  vars <- colnames(dat)
 
   if (is.numeric(dat[[var1]])) {
-		dat %<>% gather_("variable", "values", vars)
-		dat[["variable"]] %<>% factor(levels = vars)
-		cname <- " "
-	} else {
+    dat %<>% gather_("variable", "values", vars)
+    dat[["variable"]] %<>% factor(levels = vars)
+    cname <- " "
+  } else {
     if (is.character(dat[[var1]])) dat[[var1]] <- as.factor(dat[[var1]])
-		colnames(dat) <- c("variable","values")
-		cname <- var1
+    colnames(dat) <- c("variable","values")
+    cname <- var1
   }
 
   ## needed with new tidyr
   dat$variable %<>% as.factor
 
-	## check there is variation in the data
+  ## check there is variation in the data
   if (any(summarise_all(dat, funs(does_vary)) == FALSE))
-  	return("Test could not be calculated (no variation). Please select another variable." %>%
-  	       add_class("compare_means"))
+    return("Test could not be calculated (no variation). Please select another variable." %>%
+           add_class("compare_means"))
 
-	## resetting option to independent if the number of observations is unequal
+  ## resetting option to independent if the number of observations is unequal
   ## summary on factor gives counts
   if (samples == "paired") {
     if (summary(dat[["variable"]]) %>% {max(.) != min(.)})
       samples <- "independent (obs. per level unequal)"
   }
 
-	levs <- levels(dat[["variable"]])
+  levs <- levels(dat[["variable"]])
 
   cmb <- combn(levs, 2) %>% t %>% as.data.frame
   rownames(cmb) <- cmb %>% apply(1, paste, collapse = ":")
   colnames(cmb) <- c("group1","group2")
 
-	if (!is_empty(comb)) {
-		if (all(comb %in% rownames(cmb))) {
-			cmb <- cmb[comb, ]
-		} else {
-			cmb <- cmb[1,]
-  	}
-	}
+  if (!is_empty(comb)) {
+    if (all(comb %in% rownames(cmb))) {
+      cmb <- cmb[comb, ]
+    } else {
+      cmb <- cmb[1,]
+    }
+  }
 
   res <- cmb
   res[ ,c("t.value","p.value", "df", "ci_low", "ci_high", "cis_low", "cis_high")] <- 0
 
   for (i in 1:nrow(cmb)) {
-  	sel <- cmb[i,]
+    sel <- cmb[i,]
 
-  	x <- filter_(dat, paste0("variable == '", sel[[1]], "'")) %>% .[["values"]]
-  	y <- filter_(dat, paste0("variable == '", sel[[2]], "'")) %>% .[["values"]]
+    x <- filter_(dat, paste0("variable == '", sel[[1]], "'")) %>% .[["values"]]
+    y <- filter_(dat, paste0("variable == '", sel[[2]], "'")) %>% .[["values"]]
 
-	  	res[i,c("t.value","p.value", "df", "ci_low", "ci_high")] <-
-	  	  t.test(x, y, paired = samples == "paired", alternative = alternative, conf.level = conf_lev) %>%
-	  	  tidy %>% .[1, c("statistic", "p.value","parameter", "conf.low", "conf.high")]
+      res[i,c("t.value","p.value", "df", "ci_low", "ci_high")] <-
+        t.test(x, y, paired = samples == "paired", alternative = alternative, conf.level = conf_lev) %>%
+        tidy %>% .[1, c("statistic", "p.value","parameter", "conf.low", "conf.high")]
 
-  	if (test != "t") {
-			  res[i,"p.value"] <-
-			    wilcox.test(x, y, paired = samples == "paired", alternative = alternative,
-			                conf.int = FALSE, conf.level = conf_lev) %>%
-			    tidy %>% .[1,"p.value"]
-		}
+    if (test != "t") {
+        res[i,"p.value"] <-
+          wilcox.test(x, y, paired = samples == "paired", alternative = alternative,
+                      conf.int = FALSE, conf.level = conf_lev) %>%
+          tidy %>% .[1,"p.value"]
+    }
 
-  	## bootstrap confidence intervals
-  	## seem almost identical, even with highly skewed data
-		# nr_x <- length(x)
-		# nr_y <- length(y)
+    ## bootstrap confidence intervals
+    ## seem almost identical, even with highly skewed data
+    # nr_x <- length(x)
+    # nr_y <- length(y)
 
-		# sim_ci <-
-		# 	replicate(1000, mean(sample(x, nr_x, replace = TRUE)) -
-		# 	                mean(sample(y, nr_y, replace = TRUE))) %>%
-		# 	quantile(probs = {(1-conf_lev)/2} %>% c(., 1 - .))
+    # sim_ci <-
+    #   replicate(1000, mean(sample(x, nr_x, replace = TRUE)) -
+    #                   mean(sample(y, nr_y, replace = TRUE))) %>%
+    #   quantile(probs = {(1-conf_lev)/2} %>% c(., 1 - .))
 
-		# res[i, c("cis_low", "cis_high")] <- sim_ci
+    # res[i, c("cis_low", "cis_high")] <- sim_ci
 
   }
   rm(x,y,sel)
 
-	if (adjust != "none")
-		res$p.value %<>% p.adjust(method = adjust)
+  if (adjust != "none")
+    res$p.value %<>% p.adjust(method = adjust)
 
-	## from http://www.cookbook-r.com/Graphs/Plotting_means_and_error_bars_(ggplot2)/
-	ci_calc <- function(se, n, conf.lev = .95)
-	 	se * qt(conf.lev/2 + .5, n - 1)
+  ## from http://www.cookbook-r.com/Graphs/Plotting_means_and_error_bars_(ggplot2)/
+  ci_calc <- function(se, n, conf.lev = .95)
+    se * qt(conf.lev/2 + .5, n - 1)
 
-	dat_summary <-
-	  dat %>%
-		group_by_("variable") %>%
+  dat_summary <-
+    dat %>%
+    group_by_("variable") %>%
     summarise_all(funs(mean = mean, n = length(.), sd, se = sd/sqrt(n),
-                   			ci = ci_calc(se, n, conf_lev))) %>%
+                        ci = ci_calc(se, n, conf_lev))) %>%
     rename_(.dots = setNames("variable", cname))
 
-	vars <- paste0(vars, collapse = ", ")
+  vars <- paste0(vars, collapse = ", ")
   as.list(environment()) %>% add_class("compare_means")
 }
 
@@ -154,20 +154,20 @@ compare_means <- function(dataset, var1, var2,
 #' @export
 summary.compare_means <- function(object, show = FALSE, dec = 3, ...) {
 
-	if (is.character(object)) return(object)
+  if (is.character(object)) return(object)
 
   cat(paste0("Pairwise mean comparisons (", object$test, "-test)\n"))
-	cat("Data      :", object$dataset, "\n")
-	if (object$data_filter %>% gsub("\\s","",.) != "")
-		cat("Filter    :", gsub("\\n","", object$data_filter), "\n")
-	cat("Variables :", object$vars, "\n")
-	cat("Samples   :", object$samples, "\n")
-	cat("Confidence:", object$conf_lev, "\n")
-	cat("Adjustment:", if (object$adjust == "bonf") "Bonferroni" else "None", "\n\n")
+  cat("Data      :", object$dataset, "\n")
+  if (object$data_filter %>% gsub("\\s","",.) != "")
+    cat("Filter    :", gsub("\\n","", object$data_filter), "\n")
+  cat("Variables :", object$vars, "\n")
+  cat("Samples   :", object$samples, "\n")
+  cat("Confidence:", object$conf_lev, "\n")
+  cat("Adjustment:", if (object$adjust == "bonf") "Bonferroni" else "None", "\n\n")
 
   object$dat_summary[,-1] %<>% round(dec)
   print(object$dat_summary %>% as.data.frame, row.names = FALSE)
-	cat("\n")
+  cat("\n")
 
   hyp_symbol <- c("two.sided" = "not equal to",
                   "less" = "<",
@@ -176,31 +176,31 @@ summary.compare_means <- function(object, show = FALSE, dec = 3, ...) {
   means <- object$dat_summary$mean
   names(means) <- object$dat_summary[[1]] %>% as.character
 
-	## determine lower and upper % for ci
-	ci_perc <- ci_label(object$alternative, object$conf_lev)
-
-	mod <- object$res
-	mod$`Alt. hyp.` <- paste(mod$group1,hyp_symbol,mod$group2," ")
-	mod$`Null hyp.` <- paste(mod$group1,"=",mod$group2, " ")
-	mod$diff <- { means[mod$group1 %>% as.character] - means[mod$group2 %>% as.character] } %>% round(dec)
-
-	if (show) {
-	  # mod <- mod[,c("Null hyp.", "Alt. hyp.", "diff", "t.value", "df", "ci_low", "ci_high", "p.value")]
-	  mod$se <- (mod$diff / mod$t.value) %>% round(dec)
-	  mod <- mod[,c("Null hyp.", "Alt. hyp.", "diff", "p.value", "se", "t.value", "df", "ci_low", "ci_high")]
-	  # mod <- mod[,c("Alt. hyp.", "Null hyp.", "diff", "t.value", "df", "ci_low", "ci_high", "cis_low", "cis_high", "p.value")]
-		if (!is.integer(mod[["df"]])) mod[["df"]] %<>% round(dec)
-		mod[,c("t.value", "ci_low","ci_high")] %<>% round(dec)
-	  mod <- rename_(mod, .dots = setNames(c("ci_low","ci_high"), ci_perc))
-	} else {
-	  mod <- mod[,c("Null hyp.", "Alt. hyp.", "diff", "p.value")]
-	}
-
-	mod$` ` <- sig_stars(mod$p.value)
-	mod$p.value <- round(mod$p.value, dec)
-	mod$p.value[ mod$p.value < .001 ] <- "< .001"
-	print(mod, row.names = FALSE, right = FALSE)
-	cat("\nSignif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\n")
+  ## determine lower and upper % for ci
+  ci_perc <- ci_label(object$alternative, object$conf_lev)
+
+  mod <- object$res
+  mod$`Alt. hyp.` <- paste(mod$group1,hyp_symbol,mod$group2," ")
+  mod$`Null hyp.` <- paste(mod$group1,"=",mod$group2, " ")
+  mod$diff <- { means[mod$group1 %>% as.character] - means[mod$group2 %>% as.character] } %>% round(dec)
+
+  if (show) {
+    # mod <- mod[,c("Null hyp.", "Alt. hyp.", "diff", "t.value", "df", "ci_low", "ci_high", "p.value")]
+    mod$se <- (mod$diff / mod$t.value) %>% round(dec)
+    mod <- mod[,c("Null hyp.", "Alt. hyp.", "diff", "p.value", "se", "t.value", "df", "ci_low", "ci_high")]
+    # mod <- mod[,c("Alt. hyp.", "Null hyp.", "diff", "t.value", "df", "ci_low", "ci_high", "cis_low", "cis_high", "p.value")]
+    if (!is.integer(mod[["df"]])) mod[["df"]] %<>% round(dec)
+    mod[,c("t.value", "ci_low","ci_high")] %<>% round(dec)
+    mod <- rename_(mod, .dots = setNames(c("ci_low","ci_high"), ci_perc))
+  } else {
+    mod <- mod[,c("Null hyp.", "Alt. hyp.", "diff", "p.value")]
+  }
+
+  mod$` ` <- sig_stars(mod$p.value)
+  mod$p.value <- round(mod$p.value, dec)
+  mod$p.value[ mod$p.value < .001 ] <- "< .001"
+  print(mod, row.names = FALSE, right = FALSE)
+  cat("\nSignif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\n")
 }
 
 #' Plot method for the compare_means function
@@ -223,59 +223,59 @@ summary.compare_means <- function(object, show = FALSE, dec = 3, ...) {
 #' @export
 plot.compare_means <- function(x, plots = "scatter", shiny = FALSE, custom = FALSE, ...) {
 
-	if (is.character(x)) return(x)
-	object <- x; rm(x)
-
-	dat <- object$dat
-	v1 <- colnames(dat)[1]
-	v2 <- colnames(dat)[-1]
-
-	## cname is equal to " " when the xvar is numeric
-	if (object$cname == " ") {
-		var1 <- v1
-		var2 <- v2
-	} else {
-		var1 <- object$var1
-		var2 <- object$var2
-	}
-
-	## from http://www.cookbook-r.com/Graphs/Plotting_means_and_error_bars_(ggplot2)/
-	plot_list <- list()
-	if ("bar" %in% plots) {
-		colnames(object$dat_summary)[1] <- "variable"
-		## use of `which` allows the user to change the order of the plots shown
-		plot_list[[which("bar" == plots)]] <-
-		  ggplot(object$dat_summary,
-		    aes_string(x = "variable", y = "mean", fill = "variable")) +
-		    geom_bar(stat = "identity")  +
-		 		geom_errorbar(width = .1, aes(ymin = mean - ci, ymax = mean + ci)) +
-		 		geom_errorbar(width = .05, aes(ymin = mean - se, ymax = mean + se), colour = "blue") +
-		 		theme(legend.position = "none") +
-		 		xlab(var1) + ylab(paste0(var2, " (mean)"))
-	}
-
-	## graphs on full data
-	if ("box" %in% plots) {
-		plot_list[[which("box" == plots)]] <-
-		  visualize(dat, xvar = v1, yvar = v2, type = "box", custom = TRUE) +
-			theme(legend.position = "none") + xlab(var1) + ylab(var2)
-	}
-
-	if ("density" %in% plots) {
-		plot_list[[which("density" == plots)]] <-
-		  visualize(dat, xvar = v2, type = "density", fill = v1, custom = TRUE) +
-	 		  xlab(var2) + guides(fill = guide_legend(title = var1))
-	}
-
-	if ("scatter" %in% plots) {
-		plot_list[[which("scatter" == plots)]] <-
-		  visualize(dat, xvar = v1, yvar = v2, type = "scatter", check = "jitter", alpha = .3, custom = TRUE) +
-	 		xlab(var1) + ylab(paste0(var2, " (mean)"))
+  if (is.character(x)) return(x)
+  object <- x; rm(x)
+
+  dat <- object$dat
+  v1 <- colnames(dat)[1]
+  v2 <- colnames(dat)[-1]
+
+  ## cname is equal to " " when the xvar is numeric
+  if (object$cname == " ") {
+    var1 <- v1
+    var2 <- v2
+  } else {
+    var1 <- object$var1
+    var2 <- object$var2
+  }
+
+  ## from http://www.cookbook-r.com/Graphs/Plotting_means_and_error_bars_(ggplot2)/
+  plot_list <- list()
+  if ("bar" %in% plots) {
+    colnames(object$dat_summary)[1] <- "variable"
+    ## use of `which` allows the user to change the order of the plots shown
+    plot_list[[which("bar" == plots)]] <-
+      ggplot(object$dat_summary,
+        aes_string(x = "variable", y = "mean", fill = "variable")) +
+        geom_bar(stat = "identity")  +
+        geom_errorbar(width = .1, aes(ymin = mean - ci, ymax = mean + ci)) +
+        geom_errorbar(width = .05, aes(ymin = mean - se, ymax = mean + se), colour = "blue") +
+        theme(legend.position = "none") +
+        xlab(var1) + ylab(paste0(var2, " (mean)"))
+  }
+
+  ## graphs on full data
+  if ("box" %in% plots) {
+    plot_list[[which("box" == plots)]] <-
+      visualize(dat, xvar = v1, yvar = v2, type = "box", custom = TRUE) +
+      theme(legend.position = "none") + xlab(var1) + ylab(var2)
+  }
+
+  if ("density" %in% plots) {
+    plot_list[[which("density" == plots)]] <-
+      visualize(dat, xvar = v2, type = "density", fill = v1, custom = TRUE) +
+        xlab(var2) + guides(fill = guide_legend(title = var1))
+  }
+
+  if ("scatter" %in% plots) {
+    plot_list[[which("scatter" == plots)]] <-
+      visualize(dat, xvar = v1, yvar = v2, type = "scatter", check = "jitter", alpha = .3, custom = TRUE) +
+      xlab(var1) + ylab(paste0(var2, " (mean)"))
   }
 
   if (custom)
     if (length(plot_list) == 1) return(plot_list[[1]]) else return(plot_list)
 
-	sshhr(gridExtra::grid.arrange(grobs = plot_list, ncol = 1)) %>%
- 	  { if (shiny) . else print(.) }
+  sshhr(gridExtra::grid.arrange(grobs = plot_list, ncol = 1)) %>%
+    { if (shiny) . else print(.) }
 }