Skip to content

Commit

Permalink
cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
vnijs committed Jul 6, 2017
1 parent 1440bf9 commit 6abea7b
Show file tree
Hide file tree
Showing 22 changed files with 3,110 additions and 2,746 deletions.
6 changes: 3 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ Package: radiant.basics
Type: Package
Title: Basics Menu for Radiant: Business Analytics using R and Shiny
Version: 0.8.3
Date: 2017-6-29
Date: 2017-7-5
Authors@R: person("Vincent", "Nijs", , "[email protected]", c("aut", "cre"))
Description: The Radiant Basics menu includes interfaces for probability calculation, central limit theorem simulation, comparing means and proportions, goodness-of-fit testing, cross-tabs, and correlation. The application extends the functionality in radiant.data.
Depends:
Expand All @@ -13,9 +13,9 @@ Imports:
gridExtra (>= 2.0.0),
scales (>= 0.4.0),
dplyr (>= 0.5),
tidyr (>= 0.4.1),
tidyr (>= 0.6),
magrittr (>= 1.5),
shiny (>= 1.0.0),
shiny (>= 1.0.3),
psych (>= 1.6.6),
import (>= 1.1.0),
methods
Expand Down
3 changes: 2 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@
* Renamed methods `summary.correlation_` and `plot.correlation_` to `summary.correlation` and `plot.correlation`
* Added `tab` argument to `goodness` and `cross_tabs` so a table object can be passed directly
* Documentation updates
* Code clean-up and various minor fixes and improvements
* Scatter plots in _Correlation > Plot_ are now based on 1,000 data points by default. Use _R > Report_ to adjust (e.g., `plot(result, n = -1)`)
* Fix for level ordering in goodness-of-fit expected-values plot
* Code clean-up and various minor fixes and improvements

# CHANGES IN radiant.basics 0.8.0

Expand Down
274 changes: 137 additions & 137 deletions R/compare_means.R
Original file line number Diff line number Diff line change
Expand Up @@ -32,102 +32,102 @@ compare_means <- function(dataset, var1, var2,
test = "t",
data_filter = "") {

vars <- c(var1, var2)
dat <- getdata(dataset, vars, filt = data_filter)
vars <- c(var1, var2)
dat <- getdata(dataset, vars, filt = data_filter)
if (!is_string(dataset)) dataset <- deparse(substitute(dataset)) %>% set_attr("df", TRUE)

## in case : was used for var2
vars <- colnames(dat)
## in case : was used for var2
vars <- colnames(dat)

if (is.numeric(dat[[var1]])) {
dat %<>% gather_("variable", "values", vars)
dat[["variable"]] %<>% factor(levels = vars)
cname <- " "
} else {
dat %<>% gather_("variable", "values", vars)
dat[["variable"]] %<>% factor(levels = vars)
cname <- " "
} else {
if (is.character(dat[[var1]])) dat[[var1]] <- as.factor(dat[[var1]])
colnames(dat) <- c("variable","values")
cname <- var1
colnames(dat) <- c("variable","values")
cname <- var1
}

## needed with new tidyr
dat$variable %<>% as.factor

## check there is variation in the data
## check there is variation in the data
if (any(summarise_all(dat, funs(does_vary)) == FALSE))
return("Test could not be calculated (no variation). Please select another variable." %>%
add_class("compare_means"))
return("Test could not be calculated (no variation). Please select another variable." %>%
add_class("compare_means"))

## resetting option to independent if the number of observations is unequal
## resetting option to independent if the number of observations is unequal
## summary on factor gives counts
if (samples == "paired") {
if (summary(dat[["variable"]]) %>% {max(.) != min(.)})
samples <- "independent (obs. per level unequal)"
}

levs <- levels(dat[["variable"]])
levs <- levels(dat[["variable"]])

cmb <- combn(levs, 2) %>% t %>% as.data.frame
rownames(cmb) <- cmb %>% apply(1, paste, collapse = ":")
colnames(cmb) <- c("group1","group2")

if (!is_empty(comb)) {
if (all(comb %in% rownames(cmb))) {
cmb <- cmb[comb, ]
} else {
cmb <- cmb[1,]
}
}
if (!is_empty(comb)) {
if (all(comb %in% rownames(cmb))) {
cmb <- cmb[comb, ]
} else {
cmb <- cmb[1,]
}
}

res <- cmb
res[ ,c("t.value","p.value", "df", "ci_low", "ci_high", "cis_low", "cis_high")] <- 0

for (i in 1:nrow(cmb)) {
sel <- cmb[i,]
sel <- cmb[i,]

x <- filter_(dat, paste0("variable == '", sel[[1]], "'")) %>% .[["values"]]
y <- filter_(dat, paste0("variable == '", sel[[2]], "'")) %>% .[["values"]]
x <- filter_(dat, paste0("variable == '", sel[[1]], "'")) %>% .[["values"]]
y <- filter_(dat, paste0("variable == '", sel[[2]], "'")) %>% .[["values"]]

res[i,c("t.value","p.value", "df", "ci_low", "ci_high")] <-
t.test(x, y, paired = samples == "paired", alternative = alternative, conf.level = conf_lev) %>%
tidy %>% .[1, c("statistic", "p.value","parameter", "conf.low", "conf.high")]
res[i,c("t.value","p.value", "df", "ci_low", "ci_high")] <-
t.test(x, y, paired = samples == "paired", alternative = alternative, conf.level = conf_lev) %>%
tidy %>% .[1, c("statistic", "p.value","parameter", "conf.low", "conf.high")]

if (test != "t") {
res[i,"p.value"] <-
wilcox.test(x, y, paired = samples == "paired", alternative = alternative,
conf.int = FALSE, conf.level = conf_lev) %>%
tidy %>% .[1,"p.value"]
}
if (test != "t") {
res[i,"p.value"] <-
wilcox.test(x, y, paired = samples == "paired", alternative = alternative,
conf.int = FALSE, conf.level = conf_lev) %>%
tidy %>% .[1,"p.value"]
}

## bootstrap confidence intervals
## seem almost identical, even with highly skewed data
# nr_x <- length(x)
# nr_y <- length(y)
## bootstrap confidence intervals
## seem almost identical, even with highly skewed data
# nr_x <- length(x)
# nr_y <- length(y)

# sim_ci <-
# replicate(1000, mean(sample(x, nr_x, replace = TRUE)) -
# mean(sample(y, nr_y, replace = TRUE))) %>%
# quantile(probs = {(1-conf_lev)/2} %>% c(., 1 - .))
# sim_ci <-
# replicate(1000, mean(sample(x, nr_x, replace = TRUE)) -
# mean(sample(y, nr_y, replace = TRUE))) %>%
# quantile(probs = {(1-conf_lev)/2} %>% c(., 1 - .))

# res[i, c("cis_low", "cis_high")] <- sim_ci
# res[i, c("cis_low", "cis_high")] <- sim_ci

}
rm(x,y,sel)

if (adjust != "none")
res$p.value %<>% p.adjust(method = adjust)
if (adjust != "none")
res$p.value %<>% p.adjust(method = adjust)

## from http://www.cookbook-r.com/Graphs/Plotting_means_and_error_bars_(ggplot2)/
ci_calc <- function(se, n, conf.lev = .95)
se * qt(conf.lev/2 + .5, n - 1)
## from http://www.cookbook-r.com/Graphs/Plotting_means_and_error_bars_(ggplot2)/
ci_calc <- function(se, n, conf.lev = .95)
se * qt(conf.lev/2 + .5, n - 1)

dat_summary <-
dat %>%
group_by_("variable") %>%
dat_summary <-
dat %>%
group_by_("variable") %>%
summarise_all(funs(mean = mean, n = length(.), sd, se = sd/sqrt(n),
ci = ci_calc(se, n, conf_lev))) %>%
ci = ci_calc(se, n, conf_lev))) %>%
rename_(.dots = setNames("variable", cname))

vars <- paste0(vars, collapse = ", ")
vars <- paste0(vars, collapse = ", ")
as.list(environment()) %>% add_class("compare_means")
}

Expand All @@ -154,20 +154,20 @@ compare_means <- function(dataset, var1, var2,
#' @export
summary.compare_means <- function(object, show = FALSE, dec = 3, ...) {

if (is.character(object)) return(object)
if (is.character(object)) return(object)

cat(paste0("Pairwise mean comparisons (", object$test, "-test)\n"))
cat("Data :", object$dataset, "\n")
if (object$data_filter %>% gsub("\\s","",.) != "")
cat("Filter :", gsub("\\n","", object$data_filter), "\n")
cat("Variables :", object$vars, "\n")
cat("Samples :", object$samples, "\n")
cat("Confidence:", object$conf_lev, "\n")
cat("Adjustment:", if (object$adjust == "bonf") "Bonferroni" else "None", "\n\n")
cat("Data :", object$dataset, "\n")
if (object$data_filter %>% gsub("\\s","",.) != "")
cat("Filter :", gsub("\\n","", object$data_filter), "\n")
cat("Variables :", object$vars, "\n")
cat("Samples :", object$samples, "\n")
cat("Confidence:", object$conf_lev, "\n")
cat("Adjustment:", if (object$adjust == "bonf") "Bonferroni" else "None", "\n\n")

object$dat_summary[,-1] %<>% round(dec)
print(object$dat_summary %>% as.data.frame, row.names = FALSE)
cat("\n")
cat("\n")

hyp_symbol <- c("two.sided" = "not equal to",
"less" = "<",
Expand All @@ -176,31 +176,31 @@ summary.compare_means <- function(object, show = FALSE, dec = 3, ...) {
means <- object$dat_summary$mean
names(means) <- object$dat_summary[[1]] %>% as.character

## determine lower and upper % for ci
ci_perc <- ci_label(object$alternative, object$conf_lev)

mod <- object$res
mod$`Alt. hyp.` <- paste(mod$group1,hyp_symbol,mod$group2," ")
mod$`Null hyp.` <- paste(mod$group1,"=",mod$group2, " ")
mod$diff <- { means[mod$group1 %>% as.character] - means[mod$group2 %>% as.character] } %>% round(dec)

if (show) {
# mod <- mod[,c("Null hyp.", "Alt. hyp.", "diff", "t.value", "df", "ci_low", "ci_high", "p.value")]
mod$se <- (mod$diff / mod$t.value) %>% round(dec)
mod <- mod[,c("Null hyp.", "Alt. hyp.", "diff", "p.value", "se", "t.value", "df", "ci_low", "ci_high")]
# mod <- mod[,c("Alt. hyp.", "Null hyp.", "diff", "t.value", "df", "ci_low", "ci_high", "cis_low", "cis_high", "p.value")]
if (!is.integer(mod[["df"]])) mod[["df"]] %<>% round(dec)
mod[,c("t.value", "ci_low","ci_high")] %<>% round(dec)
mod <- rename_(mod, .dots = setNames(c("ci_low","ci_high"), ci_perc))
} else {
mod <- mod[,c("Null hyp.", "Alt. hyp.", "diff", "p.value")]
}

mod$` ` <- sig_stars(mod$p.value)
mod$p.value <- round(mod$p.value, dec)
mod$p.value[ mod$p.value < .001 ] <- "< .001"
print(mod, row.names = FALSE, right = FALSE)
cat("\nSignif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\n")
## determine lower and upper % for ci
ci_perc <- ci_label(object$alternative, object$conf_lev)

mod <- object$res
mod$`Alt. hyp.` <- paste(mod$group1,hyp_symbol,mod$group2," ")
mod$`Null hyp.` <- paste(mod$group1,"=",mod$group2, " ")
mod$diff <- { means[mod$group1 %>% as.character] - means[mod$group2 %>% as.character] } %>% round(dec)

if (show) {
# mod <- mod[,c("Null hyp.", "Alt. hyp.", "diff", "t.value", "df", "ci_low", "ci_high", "p.value")]
mod$se <- (mod$diff / mod$t.value) %>% round(dec)
mod <- mod[,c("Null hyp.", "Alt. hyp.", "diff", "p.value", "se", "t.value", "df", "ci_low", "ci_high")]
# mod <- mod[,c("Alt. hyp.", "Null hyp.", "diff", "t.value", "df", "ci_low", "ci_high", "cis_low", "cis_high", "p.value")]
if (!is.integer(mod[["df"]])) mod[["df"]] %<>% round(dec)
mod[,c("t.value", "ci_low","ci_high")] %<>% round(dec)
mod <- rename_(mod, .dots = setNames(c("ci_low","ci_high"), ci_perc))
} else {
mod <- mod[,c("Null hyp.", "Alt. hyp.", "diff", "p.value")]
}

mod$` ` <- sig_stars(mod$p.value)
mod$p.value <- round(mod$p.value, dec)
mod$p.value[ mod$p.value < .001 ] <- "< .001"
print(mod, row.names = FALSE, right = FALSE)
cat("\nSignif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\n")
}

#' Plot method for the compare_means function
Expand All @@ -223,59 +223,59 @@ summary.compare_means <- function(object, show = FALSE, dec = 3, ...) {
#' @export
plot.compare_means <- function(x, plots = "scatter", shiny = FALSE, custom = FALSE, ...) {

if (is.character(x)) return(x)
object <- x; rm(x)

dat <- object$dat
v1 <- colnames(dat)[1]
v2 <- colnames(dat)[-1]

## cname is equal to " " when the xvar is numeric
if (object$cname == " ") {
var1 <- v1
var2 <- v2
} else {
var1 <- object$var1
var2 <- object$var2
}

## from http://www.cookbook-r.com/Graphs/Plotting_means_and_error_bars_(ggplot2)/
plot_list <- list()
if ("bar" %in% plots) {
colnames(object$dat_summary)[1] <- "variable"
## use of `which` allows the user to change the order of the plots shown
plot_list[[which("bar" == plots)]] <-
ggplot(object$dat_summary,
aes_string(x = "variable", y = "mean", fill = "variable")) +
geom_bar(stat = "identity") +
geom_errorbar(width = .1, aes(ymin = mean - ci, ymax = mean + ci)) +
geom_errorbar(width = .05, aes(ymin = mean - se, ymax = mean + se), colour = "blue") +
theme(legend.position = "none") +
xlab(var1) + ylab(paste0(var2, " (mean)"))
}

## graphs on full data
if ("box" %in% plots) {
plot_list[[which("box" == plots)]] <-
visualize(dat, xvar = v1, yvar = v2, type = "box", custom = TRUE) +
theme(legend.position = "none") + xlab(var1) + ylab(var2)
}

if ("density" %in% plots) {
plot_list[[which("density" == plots)]] <-
visualize(dat, xvar = v2, type = "density", fill = v1, custom = TRUE) +
xlab(var2) + guides(fill = guide_legend(title = var1))
}

if ("scatter" %in% plots) {
plot_list[[which("scatter" == plots)]] <-
visualize(dat, xvar = v1, yvar = v2, type = "scatter", check = "jitter", alpha = .3, custom = TRUE) +
xlab(var1) + ylab(paste0(var2, " (mean)"))
if (is.character(x)) return(x)
object <- x; rm(x)

dat <- object$dat
v1 <- colnames(dat)[1]
v2 <- colnames(dat)[-1]

## cname is equal to " " when the xvar is numeric
if (object$cname == " ") {
var1 <- v1
var2 <- v2
} else {
var1 <- object$var1
var2 <- object$var2
}

## from http://www.cookbook-r.com/Graphs/Plotting_means_and_error_bars_(ggplot2)/
plot_list <- list()
if ("bar" %in% plots) {
colnames(object$dat_summary)[1] <- "variable"
## use of `which` allows the user to change the order of the plots shown
plot_list[[which("bar" == plots)]] <-
ggplot(object$dat_summary,
aes_string(x = "variable", y = "mean", fill = "variable")) +
geom_bar(stat = "identity") +
geom_errorbar(width = .1, aes(ymin = mean - ci, ymax = mean + ci)) +
geom_errorbar(width = .05, aes(ymin = mean - se, ymax = mean + se), colour = "blue") +
theme(legend.position = "none") +
xlab(var1) + ylab(paste0(var2, " (mean)"))
}

## graphs on full data
if ("box" %in% plots) {
plot_list[[which("box" == plots)]] <-
visualize(dat, xvar = v1, yvar = v2, type = "box", custom = TRUE) +
theme(legend.position = "none") + xlab(var1) + ylab(var2)
}

if ("density" %in% plots) {
plot_list[[which("density" == plots)]] <-
visualize(dat, xvar = v2, type = "density", fill = v1, custom = TRUE) +
xlab(var2) + guides(fill = guide_legend(title = var1))
}

if ("scatter" %in% plots) {
plot_list[[which("scatter" == plots)]] <-
visualize(dat, xvar = v1, yvar = v2, type = "scatter", check = "jitter", alpha = .3, custom = TRUE) +
xlab(var1) + ylab(paste0(var2, " (mean)"))
}

if (custom)
if (length(plot_list) == 1) return(plot_list[[1]]) else return(plot_list)

sshhr(gridExtra::grid.arrange(grobs = plot_list, ncol = 1)) %>%
{ if (shiny) . else print(.) }
sshhr(gridExtra::grid.arrange(grobs = plot_list, ncol = 1)) %>%
{ if (shiny) . else print(.) }
}
Loading

0 comments on commit 6abea7b

Please sign in to comment.