From 4da5ded85d03a3344d1f6b13215199cb1a5195af Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Mon, 27 Apr 2020 16:10:14 +0800 Subject: [PATCH 01/10] overhaul stop/message/warning calls to be more translation-friendly/canonical --- R/pkg/R/DataFrame.R | 44 +++++++++++----------- R/pkg/R/RDD.R | 2 +- R/pkg/R/SQLContext.R | 20 +++++----- R/pkg/R/client.R | 15 ++++++-- R/pkg/R/context.R | 8 ++-- R/pkg/R/deserialize.R | 2 +- R/pkg/R/group.R | 4 +- R/pkg/R/install.R | 68 ++++++++++++++++------------------ R/pkg/R/mllib_classification.R | 4 +- R/pkg/R/mllib_stat.R | 3 +- R/pkg/R/pairRDD.R | 2 +- R/pkg/R/schema.R | 2 +- R/pkg/R/serialize.R | 4 +- R/pkg/R/sparkR.R | 11 ++++-- R/pkg/R/utils.R | 37 +++++++++--------- 15 files changed, 119 insertions(+), 107 deletions(-) diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index 14d2076e88ef..da09ff40a91a 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -431,7 +431,7 @@ setMethod("coltypes", if (is.null(type)) { specialtype <- specialtypeshandle(x) if (is.null(specialtype)) { - stop(paste("Unsupported data type: ", x)) + stop("Unsupported data type: ", x) } type <- PRIMITIVE_TYPES[[specialtype]] } @@ -829,8 +829,11 @@ setMethod("repartitionByRange", jcol <- lapply(cols, function(c) { c@jc }) sdf <- callJMethod(x@sdf, "repartitionByRange", numToInt(numPartitions), jcol) } else { - stop(paste("numPartitions and col must be numeric and Column; however, got", - class(numPartitions), "and", class(col))) + stop(gettextf( + "numPartitions and col must be numeric and Column; however, got %s and %s", + class(numPartitions), class(col), domain = "R-SparkR"), + domain = NA + ) } } else if (!is.null(col)) { # only columns are specified @@ -839,7 +842,7 @@ setMethod("repartitionByRange", jcol <- lapply(cols, function(c) { c@jc }) sdf <- callJMethod(x@sdf, "repartitionByRange", jcol) } else { - stop(paste("col must be Column; however, got", class(col))) + stop("col must be Column; however, got ", class(col)) } } else if (!is.null(numPartitions)) { # only numPartitions is specified @@ -1068,10 +1071,10 @@ setMethod("sample", signature(x = "SparkDataFrame"), function(x, withReplacement = FALSE, fraction, seed) { if (!is.numeric(fraction)) { - stop(paste("fraction must be numeric; however, got", class(fraction))) + stop("fraction must be numeric; however, got ", class(fraction)) } if (!is.logical(withReplacement)) { - stop(paste("withReplacement must be logical; however, got", class(withReplacement))) + stop("withReplacement must be logical; however, got ", class(withReplacement)) } if (!missing(seed)) { @@ -1211,11 +1214,10 @@ setMethod("collect", checkSchemaInArrow(schema(x)) TRUE }, error = function(e) { - warning(paste0("The conversion from Spark DataFrame to R DataFrame was attempted ", - "with Arrow optimization because ", - "'spark.sql.execution.arrow.sparkr.enabled' is set to true; ", - "however, failed, attempting non-optimization. Reason: ", - e)) + warning("The conversion from Spark DataFrame to R DataFrame was attempted ", + "with Arrow optimization because ", + "'spark.sql.execution.arrow.sparkr.enabled' is set to true; ", + "however, failed, attempting non-optimization. Reason: ", e) FALSE }) } @@ -1513,8 +1515,8 @@ dapplyInternal <- function(x, func, schema) { if (inherits(schema, "structType")) { checkSchemaInArrow(schema) } else if (is.null(schema)) { - stop(paste0("Arrow optimization does not support 'dapplyCollect' yet. Please disable ", - "Arrow optimization or use 'collect' and 'dapply' APIs instead.")) + stop("Arrow optimization does not support 'dapplyCollect' yet. Please disable ", + "Arrow optimization or use 'collect' and 'dapply' APIs instead.") } else { stop("'schema' should be DDL-formatted string or structType.") } @@ -1995,8 +1997,8 @@ setMethod("[", signature(x = "SparkDataFrame"), x } else { if (class(i) != "Column") { - stop(paste0("Expressions other than filtering predicates are not supported ", - "in the first parameter of extract operator [ or subset() method.")) + stop("Expressions other than filtering predicates are not supported ", + "in the first parameter of extract operator [ or subset() method.") } filter(x, i) } @@ -2587,18 +2589,18 @@ setMethod("join", if (is.null(joinType)) { sdf <- callJMethod(x@sdf, "join", y@sdf, joinExpr@jc) } else { - if (joinType %in% c("inner", "cross", + valid_join_types = c("inner", "cross", "outer", "full", "fullouter", "full_outer", "left", "leftouter", "left_outer", "right", "rightouter", "right_outer", - "semi", "left_semi", "leftsemi", "anti", "left_anti", "leftanti")) { + "semi", "left_semi", "leftsemi", "anti", "left_anti", "leftanti") + if (joinType %in% valid_join_types) { joinType <- gsub("_", "", joinType) sdf <- callJMethod(x@sdf, "join", y@sdf, joinExpr@jc, joinType) } else { - stop(paste("joinType must be one of the following types:", - "'inner', 'cross', 'outer', 'full', 'fullouter', 'full_outer',", - "'left', 'leftouter', 'left_outer', 'right', 'rightouter', 'right_outer',", - "'semi', 'leftsemi', 'left_semi', 'anti', 'leftanti' or 'left_anti'.")) + stop(gettextf("joinType must be one of the following types: '%s'", + paste(valid_join_types, collapse = "', '"), domain = "R-SparkR"), + domain = NA) } } } diff --git a/R/pkg/R/RDD.R b/R/pkg/R/RDD.R index 6e89b4bb4d96..66dab4c134e3 100644 --- a/R/pkg/R/RDD.R +++ b/R/pkg/R/RDD.R @@ -947,7 +947,7 @@ setMethod("takeSample", signature(x = "RDD", withReplacement = "logical", MAXINT <- .Machine$integer.max if (num < 0) - stop(paste("Negative number of elements requested")) + stop("Negative number of elements requested") if (initialCount > MAXINT - 1) { maxSelected <- MAXINT - 1 diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R index c6842912706a..dcde79783cb0 100644 --- a/R/pkg/R/SQLContext.R +++ b/R/pkg/R/SQLContext.R @@ -34,7 +34,7 @@ getInternalType <- function(x) { Date = "date", POSIXlt = "timestamp", POSIXct = "timestamp", - stop(paste("Unsupported type for SparkDataFrame:", class(x)))) + stop("Unsupported type for SparkDataFrame:", class(x))) } #' return the SparkSession @@ -111,9 +111,11 @@ sparkR.conf <- function(key, defaultValue) { tryCatch(callJMethod(conf, "get", key), error = function(e) { if (any(grep("java.util.NoSuchElementException", as.character(e)))) { - stop(paste0("Config '", key, "' is not set")) + stop(gettextf("Config '%s' is not set", + key, domain = "R-SparkR"), + domain = NA) } else { - stop(paste0("Unknown error: ", as.character(e))) + stop("Unknown error: ", as.character(e)) } }) } else { @@ -207,7 +209,8 @@ getSchema <- function(schema, firstRow = NULL, rdd = NULL) { names <- lapply(names, function(n) { nn <- gsub("[.]", "_", n) if (nn != n) { - warning(paste("Use", nn, "instead of", n, "as column name")) + warning(gettextf("Use %s instead of %s as column name", + nn, n, domain = "R-SparkR"), domain = NA) } nn }) @@ -289,10 +292,9 @@ createDataFrame <- function(data, schema = NULL, samplingRatio = 1.0, TRUE }, error = function(e) { - warning(paste0("createDataFrame attempted Arrow optimization because ", - "'spark.sql.execution.arrow.sparkr.enabled' is set to true; however, ", - "failed, attempting non-optimization. Reason: ", - e)) + warning("createDataFrame attempted Arrow optimization because ", + "'spark.sql.execution.arrow.sparkr.enabled' is set to true; however, ", + "failed, attempting non-optimization. Reason: ", e) FALSE }) } @@ -325,7 +327,7 @@ createDataFrame <- function(data, schema = NULL, samplingRatio = 1.0, } else if (inherits(data, "RDD")) { rdd <- data } else { - stop(paste("unexpected type:", class(data))) + stop("unexpected type: ", class(data)) } schema <- getSchema(schema, firstRow, rdd) diff --git a/R/pkg/R/client.R b/R/pkg/R/client.R index 2ff68ab7b9d7..0b767afd4411 100644 --- a/R/pkg/R/client.R +++ b/R/pkg/R/client.R @@ -102,10 +102,17 @@ checkJavaVersion <- function() { javaVersionNum <- as.integer(versions[1]) } if (javaVersionNum < minJavaVersion || javaVersionNum >= maxJavaVersion) { - stop(paste0("Java version, greater than or equal to ", minJavaVersion, - " and less than ", maxJavaVersion, - ", is required for this package; found version: ", - javaVersionStr)) + stop( + gettextf( + "Java version, greater than or equal to %s and less than %s, ", + minJavaVersion, maxJavaVersion, domain = "R-SparkR" + ), + gettextf( + "is required for this package; found version: %s", + javaVersionStr, domain = "R-SparkR" + ), + domain = NA + ) } return(javaVersionNum) } diff --git a/R/pkg/R/context.R b/R/pkg/R/context.R index d96a287f818a..e3c9d9f8793d 100644 --- a/R/pkg/R/context.R +++ b/R/pkg/R/context.R @@ -144,13 +144,13 @@ parallelize <- function(sc, coll, numSlices = 1) { if ((!is.list(coll) && !is.vector(coll)) || is.data.frame(coll)) { # nolint end if (is.data.frame(coll)) { - message(paste("context.R: A data frame is parallelized by columns.")) + message("context.R: A data frame is parallelized by columns.") } else { if (is.matrix(coll)) { - message(paste("context.R: A matrix is parallelized by elements.")) + message("context.R: A matrix is parallelized by elements.") } else { - message(paste("context.R: parallelize() currently only supports lists and vectors.", - "Calling as.list() to coerce coll into a list.")) + message("context.R: parallelize() currently only supports lists and vectors. ", + "Calling as.list() to coerce coll into a list.") } } coll <- as.list(coll) diff --git a/R/pkg/R/deserialize.R b/R/pkg/R/deserialize.R index ca4a6e342d77..4676da6e239a 100644 --- a/R/pkg/R/deserialize.R +++ b/R/pkg/R/deserialize.R @@ -57,7 +57,7 @@ readTypedObject <- function(con, type) { "s" = readStruct(con), "n" = NULL, "j" = getJobj(readString(con)), - stop(paste("Unsupported type for deserialization", type))) + stop("Unsupported type for deserialization", type)) } readStringData <- function(con, len) { diff --git a/R/pkg/R/group.R b/R/pkg/R/group.R index 2b7995e1e37f..99d62240a3b2 100644 --- a/R/pkg/R/group.R +++ b/R/pkg/R/group.R @@ -234,8 +234,8 @@ gapplyInternal <- function(x, func, schema) { if (inherits(schema, "structType")) { checkSchemaInArrow(schema) } else if (is.null(schema)) { - stop(paste0("Arrow optimization does not support 'gapplyCollect' yet. Please disable ", - "Arrow optimization or use 'collect' and 'gapply' APIs instead.")) + stop("Arrow optimization does not support 'gapplyCollect' yet. Please disable ", + "Arrow optimization or use 'collect' and 'gapply' APIs instead.") } else { stop("'schema' should be DDL-formatted string or structType.") } diff --git a/R/pkg/R/install.R b/R/pkg/R/install.R index 6d1edf6b6f3c..42c2ab80cd3b 100644 --- a/R/pkg/R/install.R +++ b/R/pkg/R/install.R @@ -89,8 +89,8 @@ install.spark <- function(hadoopVersion = "2.7", mirrorUrl = NULL, } if (overwrite) { - message(paste0("Overwrite = TRUE: download and overwrite the tar file", - "and Spark package directory if they exist.")) + message("Overwrite = TRUE: download and overwrite the tar file", + "and Spark package directory if they exist.") } releaseUrl <- Sys.getenv("SPARKR_RELEASE_DOWNLOAD_URL") @@ -103,12 +103,14 @@ install.spark <- function(hadoopVersion = "2.7", mirrorUrl = NULL, # can use dir.exists(packageLocalDir) under R 3.2.0 or later if (!is.na(file.info(packageLocalDir)$isdir) && !overwrite) { if (releaseUrl != "") { - message(paste(packageName, "found, setting SPARK_HOME to", packageLocalDir)) + message(gettextf("%s found, setting SPARK_HOME to %s", + packageName, packageLocalDir, domain = "R-SparkR"), + domain = NA) } else { - fmt <- "%s for Hadoop %s found, setting SPARK_HOME to %s" - msg <- sprintf(fmt, version, ifelse(hadoopVersion == "without", "Free build", hadoopVersion), - packageLocalDir) - message(msg) + message(gettextf("%s for Hadoop %s found, setting SPARK_HOME to %s", + version, if (hadoopVersion == 'without') 'Free build' else hadoopVersion, + packageLocalDir, domain = "R-SparkR"), + domain = NA) } Sys.setenv(SPARK_HOME = packageLocalDir) return(invisible(packageLocalDir)) @@ -127,26 +129,23 @@ install.spark <- function(hadoopVersion = "2.7", mirrorUrl = NULL, success <- downloadUrl(releaseUrl, packageLocalPath) if (!success) { unlink(packageLocalPath) - stop(paste0("Fetch failed from ", releaseUrl)) + stop("Fetch failed from ", releaseUrl) } } else { robustDownloadTar(mirrorUrl, version, hadoopVersion, packageName, packageLocalPath) } } - message(sprintf("Installing to %s", localDir)) + message("Installing to ", localDir) # There are two ways untar can fail - untar could stop() on errors like incomplete block on file # or, tar command can return failure code success <- tryCatch(untar(tarfile = packageLocalPath, exdir = localDir) == 0, error = function(e) { - message(e) - message() + message(e, '\n') FALSE }, warning = function(w) { - # Treat warning as error, add an empty line with message() - message(w) - message() + message(w, '\n') FALSE }) if (!tarExists || overwrite || !success) { @@ -160,7 +159,7 @@ install.spark <- function(hadoopVersion = "2.7", mirrorUrl = NULL, if (!success) stop("Extract archive failed.") message("DONE.") Sys.setenv(SPARK_HOME = packageLocalDir) - message(paste("SPARK_HOME set to", packageLocalDir)) + message("SPARK_HOME set to ", packageLocalDir) invisible(packageLocalDir) } @@ -173,7 +172,7 @@ robustDownloadTar <- function(mirrorUrl, version, hadoopVersion, packageName, pa if (success) { return() } else { - message(paste0("Unable to download from mirrorUrl: ", mirrorUrl)) + message("Unable to download from mirrorUrl: ", mirrorUrl) } } else { message("MirrorUrl not provided.") @@ -201,11 +200,11 @@ robustDownloadTar <- function(mirrorUrl, version, hadoopVersion, packageName, pa # remove any partially downloaded file unlink(packageLocalPath) message("Unable to download from default mirror site: ", mirrorUrl) - msg <- sprintf(paste("Unable to download Spark %s for Hadoop %s.", - "Please check network connection, Hadoop version,", - "or provide other mirror sites."), - version, ifelse(hadoopVersion == "without", "Free build", hadoopVersion)) - stop(msg) + stop(gettextf("Unable to download Spark %s for Hadoop %s. ", + version, if (hadoopVersion == "without") "Free build" else hadoopVersion, + domain = "R-SparkR"), + "Please check network connection, Hadoop version, or provide other mirror sites.", + domain = NA) } } @@ -222,7 +221,7 @@ getPreferredMirror <- function(version, packageName) { endPos <- matchInfo + attr(matchInfo, "match.length") - 2 mirrorPreferred <- base::substr(linePreferred, startPos, endPos) mirrorPreferred <- paste0(mirrorPreferred, "spark") - message(sprintf("Preferred mirror site found: %s", mirrorPreferred)) + message("Preferred mirror site found: ", mirrorPreferred) } else { mirrorPreferred <- NULL } @@ -231,24 +230,21 @@ getPreferredMirror <- function(version, packageName) { directDownloadTar <- function(mirrorUrl, version, hadoopVersion, packageName, packageLocalPath) { packageRemotePath <- paste0(file.path(mirrorUrl, version, packageName), ".tgz") - fmt <- "Downloading %s for Hadoop %s from:\n- %s" - msg <- sprintf(fmt, version, ifelse(hadoopVersion == "without", "Free build", hadoopVersion), - packageRemotePath) - message(msg) + message(gettextf("Downloading %s for Hadoop %s from:\n- %s", + version, if (hadoopVersion == "without") "Free build" else hadoopVersion, + packageRemotePath, domain = "R-SparkR"), + domain = NA) downloadUrl(packageRemotePath, packageLocalPath) } downloadUrl <- function(remotePath, localPath) { isFail <- tryCatch(download.file(remotePath, localPath), error = function(e) { - message(e) - message() + message(e, '\n') TRUE }, warning = function(w) { - # Treat warning as error, add an empty line with message() - message(w) - message() + message(w, '\n') TRUE }) !isFail @@ -279,9 +275,9 @@ sparkCachePath <- function() { winAppPath <- Sys.getenv("USERPROFILE", unset = NA) } if (is.na(winAppPath)) { - stop(paste("%LOCALAPPDATA% and %USERPROFILE% not found.", - "Please define the environment variable", - "or restart and enter an installation path in localDir.")) + stop("%LOCALAPPDATA% and %USERPROFILE% not found. ", + "Please define the environment variable ", + "or restart and enter an installation path in localDir.") } else { path <- file.path(winAppPath, "Apache", "Spark", "Cache") } @@ -293,7 +289,7 @@ sparkCachePath <- function() { Sys.getenv("XDG_CACHE_HOME", file.path(Sys.getenv("HOME"), ".cache")), "spark") } } else { - stop(sprintf("Unknown OS: %s", .Platform$OS.type)) + stop(gettextf("Unknown OS: %s", .Platform$OS.type, domain = "R-SparkR"), domain = NA) } normalizePath(path, mustWork = FALSE) } @@ -322,7 +318,7 @@ installInstruction <- function(mode) { "If you need further help, ", "contact the administrators of the cluster.") } else { - stop(paste0("No instruction found for ", mode, " mode.")) + stop("No instruction found for mode ", mode) } } diff --git a/R/pkg/R/mllib_classification.R b/R/pkg/R/mllib_classification.R index fc5ac9f06d8d..ec83b6bd406a 100644 --- a/R/pkg/R/mllib_classification.R +++ b/R/pkg/R/mllib_classification.R @@ -337,8 +337,8 @@ setMethod("spark.logit", signature(data = "SparkDataFrame", formula = "formula") if (!is.null(lowerBoundsOnCoefficients) && (row != nrow(upperBoundsOnCoefficients) || col != ncol(upperBoundsOnCoefficients))) { - stop(paste0("dimension of upperBoundsOnCoefficients ", - "is not the same as lowerBoundsOnCoefficients", sep = "")) + stop("dimension of upperBoundsOnCoefficients ", + "is not the same as lowerBoundsOnCoefficients") } if (is.null(lowerBoundsOnCoefficients)) { diff --git a/R/pkg/R/mllib_stat.R b/R/pkg/R/mllib_stat.R index f8c332935996..6db4d5d4831d 100644 --- a/R/pkg/R/mllib_stat.R +++ b/R/pkg/R/mllib_stat.R @@ -69,8 +69,7 @@ setMethod("spark.kstest", signature(data = "SparkDataFrame"), function(data, testCol = "test", nullHypothesis = c("norm"), distParams = c(0, 1)) { tryCatch(match.arg(nullHypothesis), error = function(e) { - msg <- paste("Distribution", nullHypothesis, "is not supported.") - stop(msg) + stop("Distribution ", nullHypothesis, " is not supported.") }) if (nullHypothesis == "norm") { distParams <- as.numeric(distParams) diff --git a/R/pkg/R/pairRDD.R b/R/pkg/R/pairRDD.R index 9c2e57d3067d..b29381bb900f 100644 --- a/R/pkg/R/pairRDD.R +++ b/R/pkg/R/pairRDD.R @@ -906,7 +906,7 @@ setMethod("sampleByKey", for (elem in fractions) { if (elem < 0.0) { - stop(paste("Negative fraction value ", fractions[which(fractions == elem)])) + stop("Negative fraction value ", fractions[which(fractions == elem)]) } } diff --git a/R/pkg/R/schema.R b/R/pkg/R/schema.R index 9831fc3cc6d0..ff40ada4ca29 100644 --- a/R/pkg/R/schema.R +++ b/R/pkg/R/schema.R @@ -200,7 +200,7 @@ checkType <- function(type) { }) } - stop(paste("Unsupported type for SparkDataframe:", type)) + stop("Unsupported type for SparkDataframe:", type) } #' @param type The data type of the field diff --git a/R/pkg/R/serialize.R b/R/pkg/R/serialize.R index cb3c1c59d12e..7760d9be16f0 100644 --- a/R/pkg/R/serialize.R +++ b/R/pkg/R/serialize.R @@ -84,7 +84,7 @@ writeObject <- function(con, object, writeType = TRUE) { Date = writeDate(con, object), POSIXlt = writeTime(con, object), POSIXct = writeTime(con, object), - stop(paste("Unsupported type for serialization", type))) + stop("Unsupported type for serialization ", type)) } writeVoid <- function(con) { @@ -158,7 +158,7 @@ writeType <- function(con, class) { Date = "D", POSIXlt = "t", POSIXct = "t", - stop(paste("Unsupported type for serialization", class))) + stop("Unsupported type for serialization ", class)) writeBin(charToRaw(type), con) } diff --git a/R/pkg/R/sparkR.R b/R/pkg/R/sparkR.R index cc8c92b8ab26..db3e79b3b109 100644 --- a/R/pkg/R/sparkR.R +++ b/R/pkg/R/sparkR.R @@ -154,8 +154,8 @@ sparkR.sparkContext <- function( connectionTimeout <- as.numeric(Sys.getenv("SPARKR_BACKEND_CONNECTION_TIMEOUT", "6000")) if (existingPort != "") { if (length(packages) != 0) { - warning(paste("sparkPackages has no effect when using spark-submit or sparkR shell", - " please use the --packages commandline instead", sep = ",")) + warning("sparkPackages has no effect when using spark-submit or sparkR shell, ", + "please use the --packages commandline instead") } backendPort <- existingPort authSecret <- Sys.getenv("SPARKR_BACKEND_AUTH_SECRET") @@ -439,8 +439,11 @@ sparkR.session <- function( rPackageVersion <- paste0(packageVersion("SparkR")) if (jvmVersionStrip != rPackageVersion) { - warning(paste("Version mismatch between Spark JVM and SparkR package. JVM version was", - jvmVersion, ", while R package version was", rPackageVersion)) + warning( + "Version mismatch between Spark JVM and SparkR package. ", + gettextf("JVM version was %s, while R package version was %s", + jvmVersion, rPackageVersion, domain = "R-SparkR"), + domain = NA) } sparkSession diff --git a/R/pkg/R/utils.R b/R/pkg/R/utils.R index a8c1ddb3dd20..b9147e48babd 100644 --- a/R/pkg/R/utils.R +++ b/R/pkg/R/utils.R @@ -46,9 +46,9 @@ convertJListToRList <- function(jList, flatten, logicalUpperBound = NULL, res <- list(unserialize(keyBytes), unserialize(valBytes)) } else { - stop(paste("utils.R: convertJListToRList only supports", - "RDD[Array[Byte]] and", - "JavaPairRDD[Array[Byte], Array[Byte]] for now")) + stop("utils.R: convertJListToRList only supports ", + "RDD[Array[Byte]] and ", + "JavaPairRDD[Array[Byte], Array[Byte]] for now") } } else { if (inherits(obj, "raw")) { @@ -137,7 +137,7 @@ hashCode <- function(key) { as.integer(hashC) } } else { - warning(paste("Could not hash object, returning 0", sep = "")) + warning("Could not hash object, returning 0") as.integer(0) } } @@ -354,8 +354,10 @@ varargsToStrEnv <- function(...) { } else { value <- pairs[[name]] if (!(is.logical(value) || is.numeric(value) || is.character(value) || is.null(value))) { - stop(paste0("Unsupported type for ", name, " : ", class(value), - ". Supported types are logical, numeric, character and NULL."), call. = FALSE) + stop(gettextf("Unsupported type for %s : %s. ", + name, class(value), domain = "R-SparkR"), + "Supported types are logical, numeric, character and NULL.", + call. = FALSE, domain = NA) } if (is.logical(value)) { env[[name]] <- tolower(as.character(value)) @@ -369,8 +371,9 @@ varargsToStrEnv <- function(...) { } if (length(ignoredNames) != 0) { - warning(paste0("Unnamed arguments ignored: ", paste(ignoredNames, collapse = ", "), "."), - call. = FALSE) + warning(gettextf("Unnamed arguments ignored: %s.", + paste(ignoredNames, collapse = ", "), domain = "R-SparkR"), + call. = FALSE, domain = NA) } env } @@ -449,7 +452,7 @@ storageLevelToString <- function(levelObj) { # the user to type (for example) `5` instead of `5L` to avoid a confusing error message. numToInt <- function(num) { if (as.integer(num) != num) { - warning(paste("Coercing", as.list(sys.call())[[2]], "to integer.")) + warning("Coercing ", as.list(sys.call())[[2L]], " to integer.") } as.integer(num) } @@ -650,8 +653,8 @@ mergePartitions <- function(rdd, zip) { # For zip operation, check if corresponding partitions # of both RDDs have the same number of elements. if (zip && lengthOfKeys != lengthOfValues) { - stop(paste("Can only zip RDDs with same number of elements", - "in each pair of corresponding partitions.")) + stop("Can only zip RDDs with same number of elements ", + "in each pair of corresponding partitions.") } if (lengthOfKeys > 1) { @@ -825,21 +828,21 @@ captureJVMException <- function(e, method) { rmsg <- msg[1] # Extract the first message of JVM exception. first <- strsplit(msg[2], "\r?\n\tat")[[1]][1] - stop(paste0(rmsg, "streaming query error - ", first), call. = FALSE) + stop(rmsg, "streaming query error - ", first, call. = FALSE) } else if (any(grep("java.lang.IllegalArgumentException: ", stacktrace))) { msg <- strsplit(stacktrace, "java.lang.IllegalArgumentException: ", fixed = TRUE)[[1]] # Extract "Error in ..." message. rmsg <- msg[1] # Extract the first message of JVM exception. first <- strsplit(msg[2], "\r?\n\tat")[[1]][1] - stop(paste0(rmsg, "illegal argument - ", first), call. = FALSE) + stop(rmsg, "illegal argument - ", first, call. = FALSE) } else if (any(grep("org.apache.spark.sql.AnalysisException: ", stacktrace))) { msg <- strsplit(stacktrace, "org.apache.spark.sql.AnalysisException: ", fixed = TRUE)[[1]] # Extract "Error in ..." message. rmsg <- msg[1] # Extract the first message of JVM exception. first <- strsplit(msg[2], "\r?\n\tat")[[1]][1] - stop(paste0(rmsg, "analysis error - ", first), call. = FALSE) + stop(rmsg, "analysis error - ", first, call. = FALSE) } else if (any(grep("org.apache.spark.sql.catalyst.analysis.NoSuchDatabaseException: ", stacktrace))) { msg <- strsplit(stacktrace, "org.apache.spark.sql.catalyst.analysis.NoSuchDatabaseException: ", @@ -848,7 +851,7 @@ captureJVMException <- function(e, method) { rmsg <- msg[1] # Extract the first message of JVM exception. first <- strsplit(msg[2], "\r?\n\tat")[[1]][1] - stop(paste0(rmsg, "no such database - ", first), call. = FALSE) + stop(rmsg, "no such database - ", first, call. = FALSE) } else if (any(grep("org.apache.spark.sql.catalyst.analysis.NoSuchTableException: ", stacktrace))) { msg <- strsplit(stacktrace, "org.apache.spark.sql.catalyst.analysis.NoSuchTableException: ", @@ -857,7 +860,7 @@ captureJVMException <- function(e, method) { rmsg <- msg[1] # Extract the first message of JVM exception. first <- strsplit(msg[2], "\r?\n\tat")[[1]][1] - stop(paste0(rmsg, "no such table - ", first), call. = FALSE) + stop(rmsg, "no such table - ", first, call. = FALSE) } else if (any(grep("org.apache.spark.sql.catalyst.parser.ParseException: ", stacktrace))) { msg <- strsplit(stacktrace, "org.apache.spark.sql.catalyst.parser.ParseException: ", fixed = TRUE)[[1]] @@ -865,7 +868,7 @@ captureJVMException <- function(e, method) { rmsg <- msg[1] # Extract the first message of JVM exception. first <- strsplit(msg[2], "\r?\n\tat")[[1]][1] - stop(paste0(rmsg, "parse error - ", first), call. = FALSE) + stop(rmsg, "parse error - ", first, call. = FALSE) } else { stop(stacktrace, call. = FALSE) } From b84c6f3a0f29a572b1a342b8535224db153c5e09 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Mon, 27 Apr 2020 16:20:38 +0800 Subject: [PATCH 02/10] some missing whitespace --- R/pkg/R/SQLContext.R | 2 +- R/pkg/R/deserialize.R | 2 +- R/pkg/R/schema.R | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R index dcde79783cb0..67d30712ecc4 100644 --- a/R/pkg/R/SQLContext.R +++ b/R/pkg/R/SQLContext.R @@ -34,7 +34,7 @@ getInternalType <- function(x) { Date = "date", POSIXlt = "timestamp", POSIXct = "timestamp", - stop("Unsupported type for SparkDataFrame:", class(x))) + stop("Unsupported type for SparkDataFrame: ", class(x))) } #' return the SparkSession diff --git a/R/pkg/R/deserialize.R b/R/pkg/R/deserialize.R index 4676da6e239a..3e7c456bd548 100644 --- a/R/pkg/R/deserialize.R +++ b/R/pkg/R/deserialize.R @@ -57,7 +57,7 @@ readTypedObject <- function(con, type) { "s" = readStruct(con), "n" = NULL, "j" = getJobj(readString(con)), - stop("Unsupported type for deserialization", type)) + stop("Unsupported type for deserialization ", type)) } readStringData <- function(con, len) { diff --git a/R/pkg/R/schema.R b/R/pkg/R/schema.R index ff40ada4ca29..ca2c1b6f7a5f 100644 --- a/R/pkg/R/schema.R +++ b/R/pkg/R/schema.R @@ -200,7 +200,7 @@ checkType <- function(type) { }) } - stop("Unsupported type for SparkDataframe:", type) + stop("Unsupported type for SparkDataframe: ", type) } #' @param type The data type of the field From 1940eb81487b7c92b4096dd260489b9001767995 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Mon, 27 Apr 2020 16:48:59 +0800 Subject: [PATCH 03/10] linting --- R/pkg/R/DataFrame.R | 2 +- R/pkg/R/install.R | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index da09ff40a91a..eeb32336d252 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -2589,7 +2589,7 @@ setMethod("join", if (is.null(joinType)) { sdf <- callJMethod(x@sdf, "join", y@sdf, joinExpr@jc) } else { - valid_join_types = c("inner", "cross", + valid_join_types <- c("inner", "cross", "outer", "full", "fullouter", "full_outer", "left", "leftouter", "left_outer", "right", "rightouter", "right_outer", diff --git a/R/pkg/R/install.R b/R/pkg/R/install.R index 42c2ab80cd3b..5fad401b411a 100644 --- a/R/pkg/R/install.R +++ b/R/pkg/R/install.R @@ -108,7 +108,7 @@ install.spark <- function(hadoopVersion = "2.7", mirrorUrl = NULL, domain = NA) } else { message(gettextf("%s for Hadoop %s found, setting SPARK_HOME to %s", - version, if (hadoopVersion == 'without') 'Free build' else hadoopVersion, + version, if (hadoopVersion == "without") "Free build" else hadoopVersion, packageLocalDir, domain = "R-SparkR"), domain = NA) } @@ -141,11 +141,11 @@ install.spark <- function(hadoopVersion = "2.7", mirrorUrl = NULL, # or, tar command can return failure code success <- tryCatch(untar(tarfile = packageLocalPath, exdir = localDir) == 0, error = function(e) { - message(e, '\n') + message(e, "\n") FALSE }, warning = function(w) { - message(w, '\n') + message(w, "\n") FALSE }) if (!tarExists || overwrite || !success) { @@ -240,11 +240,11 @@ directDownloadTar <- function(mirrorUrl, version, hadoopVersion, packageName, pa downloadUrl <- function(remotePath, localPath) { isFail <- tryCatch(download.file(remotePath, localPath), error = function(e) { - message(e, '\n') + message(e, "\n") TRUE }, warning = function(w) { - message(w, '\n') + message(w, "\n") TRUE }) !isFail From e4b8ca94c34065a962ff12295ad91b103a05afe5 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Mon, 27 Apr 2020 18:40:13 +0800 Subject: [PATCH 04/10] fix broken test --- R/pkg/tests/fulltests/test_sparkSQL.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R index c892feb61da8..72682027f838 100644 --- a/R/pkg/tests/fulltests/test_sparkSQL.R +++ b/R/pkg/tests/fulltests/test_sparkSQL.R @@ -2558,7 +2558,7 @@ test_that("join(), crossJoin() and merge() on a DataFrame", { error_msg <- paste("joinType must be one of the following types:", "'inner', 'cross', 'outer', 'full', 'fullouter', 'full_outer',", "'left', 'leftouter', 'left_outer', 'right', 'rightouter', 'right_outer',", - "'semi', 'leftsemi', 'left_semi', 'anti', 'leftanti' or 'left_anti'.") + "'semi', 'left_semi', 'leftsemi', 'anti', 'left_anti', 'leftanti'") expect_error(join(df2, df, df2$name == df$name, "invalid"), error_msg) merged <- merge(df, df2, by.x = "name", by.y = "name", all.x = TRUE, all.y = TRUE) From 5f36b1a9366ca62f644096eeadfc1e7d499d5cd1 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Wed, 29 Apr 2020 10:38:30 +0800 Subject: [PATCH 05/10] revert usage of gettextf --- R/pkg/R/DataFrame.R | 16 ++++++---------- R/pkg/R/SQLContext.R | 5 ++--- R/pkg/R/client.R | 14 +++----------- R/pkg/R/install.R | 28 +++++++++++----------------- R/pkg/R/sparkR.R | 8 +++----- R/pkg/R/utils.R | 10 +++------- 6 files changed, 28 insertions(+), 53 deletions(-) diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index 60ef047bc7f3..72e0d6bab9d3 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -691,13 +691,13 @@ setMethod("storageLevel", #' Coalesce #' -#' Returns a new SparkDataFrame that has exactly \code{numPartitions} partitions. +#' Returns a new SparkDataFrame that has exactly \code{"rtitions} partitions. #' This operation results in a narrow dependency, e.g. if you go from 1000 partitions to 100 #' partitions, there will not be a shuffle, instead each of the 100 new partitions will claim 10 of #' the current partitions. If a larger number of partitions is requested, it will stay at the #' current number of partitions. #' -#' However, if you're doing a drastic coalesce on a SparkDataFrame, e.g. to numPartitions = 1, +#' However, if you're doing a drastic coalesce on a SparkDataFrame, e.g. to "rtitions = 1, #' this may result in your computation taking place on fewer nodes than #' you like (e.g. one node in the case of numPartitions = 1). To avoid this, #' call \code{repartition}. This will add a shuffle step, but means the @@ -829,11 +829,8 @@ setMethod("repartitionByRange", jcol <- lapply(cols, function(c) { c@jc }) sdf <- callJMethod(x@sdf, "repartitionByRange", numToInt(numPartitions), jcol) } else { - stop(gettextf( - "numPartitions and col must be numeric and Column; however, got %s and %s", - class(numPartitions), class(col), domain = "R-SparkR"), - domain = NA - ) + stop("numPartitions and col must be numeric and Column; however, got ", + class(numPartitions), " and ", class(col)) } } else if (!is.null(col)) { # only columns are specified @@ -2620,9 +2617,8 @@ setMethod("join", joinType <- gsub("_", "", joinType, fixed = TRUE) sdf <- callJMethod(x@sdf, "join", y@sdf, joinExpr@jc, joinType) } else { - stop(gettextf("joinType must be one of the following types: '%s'", - paste(valid_join_types, collapse = "', '"), domain = "R-SparkR"), - domain = NA) + stop("joinType must be one of the following types: ", + "'", paste(valid_join_types, collapse = "', '"), "'") } } } diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R index c11645a2b8c4..c0ac68332ec4 100644 --- a/R/pkg/R/SQLContext.R +++ b/R/pkg/R/SQLContext.R @@ -112,7 +112,7 @@ sparkR.conf <- function(key, defaultValue) { error = function(e) { estr <- as.character(e) if (any(grepl("java.util.NoSuchElementException", estr, fixed = TRUE))) { - stop(gettextf("Config '%s' is not set", key, domain = "R-SparkR"), domain = NA) + stop("Config '", key, "' is not set") } else { stop("Unknown error: ", estr) } @@ -208,8 +208,7 @@ getSchema <- function(schema, firstRow = NULL, rdd = NULL) { names <- lapply(names, function(n) { nn <- gsub(".", "_", n, fixed = TRUE) if (nn != n) { - warning(gettextf("Use %s instead of %s as column name", - nn, n, domain = "R-SparkR"), domain = NA) + warning("Use ", nn, " instead of ", n, " as column name") } nn }) diff --git a/R/pkg/R/client.R b/R/pkg/R/client.R index 17382c4c643c..797a5c7da154 100644 --- a/R/pkg/R/client.R +++ b/R/pkg/R/client.R @@ -102,17 +102,9 @@ checkJavaVersion <- function() { javaVersionNum <- as.integer(versions[1]) } if (javaVersionNum < minJavaVersion || javaVersionNum >= maxJavaVersion) { - stop( - gettextf( - "Java version, greater than or equal to %s and less than %s, ", - minJavaVersion, maxJavaVersion, domain = "R-SparkR" - ), - gettextf( - "is required for this package; found version: %s", - javaVersionStr, domain = "R-SparkR" - ), - domain = NA - ) + stop("Java version, greater than or equal to ", minJavaVersion, + " and less than ", maxJavaVersion, ", is required for this ", + "package; found version: ", javaVersionStr) } return(javaVersionNum) } diff --git a/R/pkg/R/install.R b/R/pkg/R/install.R index 74c7863eb8e0..ea2c0b4c0f42 100644 --- a/R/pkg/R/install.R +++ b/R/pkg/R/install.R @@ -103,14 +103,11 @@ install.spark <- function(hadoopVersion = "2.7", mirrorUrl = NULL, # can use dir.exists(packageLocalDir) under R 3.2.0 or later if (!is.na(file.info(packageLocalDir)$isdir) && !overwrite) { if (releaseUrl != "") { - message(gettextf("%s found, setting SPARK_HOME to %s", - packageName, packageLocalDir, domain = "R-SparkR"), - domain = NA) + message(packageName, " found, setting SPARK_HOME to ", packageLocalDir) } else { - message(gettextf("%s for Hadoop %s found, setting SPARK_HOME to %s", - version, if (hadoopVersion == "without") "Free build" else hadoopVersion, - packageLocalDir, domain = "R-SparkR"), - domain = NA) + message(version, " for Hadoop ", + if (hadoopVersion == "without") "Free build" else hadoopVersion, + " found, setting SPARK_HOME to ", packageLocalDir) } Sys.setenv(SPARK_HOME = packageLocalDir) return(invisible(packageLocalDir)) @@ -200,11 +197,9 @@ robustDownloadTar <- function(mirrorUrl, version, hadoopVersion, packageName, pa # remove any partially downloaded file unlink(packageLocalPath) message("Unable to download from default mirror site: ", mirrorUrl) - stop(gettextf("Unable to download Spark %s for Hadoop %s. ", - version, if (hadoopVersion == "without") "Free build" else hadoopVersion, - domain = "R-SparkR"), - "Please check network connection, Hadoop version, or provide other mirror sites.", - domain = NA) + stop("Unable to download Spark ", version, + " for Hadoop ", if (hadoopVersion == "without") "Free build" else hadoopVersion, + ". Please check network connection, Hadoop version, or provide other mirror sites.") } } @@ -230,10 +225,9 @@ getPreferredMirror <- function(version, packageName) { directDownloadTar <- function(mirrorUrl, version, hadoopVersion, packageName, packageLocalPath) { packageRemotePath <- paste0(file.path(mirrorUrl, version, packageName), ".tgz") - message(gettextf("Downloading %s for Hadoop %s from:\n- %s", - version, if (hadoopVersion == "without") "Free build" else hadoopVersion, - packageRemotePath, domain = "R-SparkR"), - domain = NA) + message("Downloading ", version, " for Hadoop ", + if (hadoopVersion == "without") "Free build" else hadoopVersion, + " from:\n- ", packageRemotePath) downloadUrl(packageRemotePath, packageLocalPath) } @@ -289,7 +283,7 @@ sparkCachePath <- function() { Sys.getenv("XDG_CACHE_HOME", file.path(Sys.getenv("HOME"), ".cache")), "spark") } } else { - stop(gettextf("Unknown OS: %s", .Platform$OS.type, domain = "R-SparkR"), domain = NA) + stop("Unknown OS: ", .Platform$OS.type) } normalizePath(path, mustWork = FALSE) } diff --git a/R/pkg/R/sparkR.R b/R/pkg/R/sparkR.R index 30c4bae89d5e..e4a11a5f78a7 100644 --- a/R/pkg/R/sparkR.R +++ b/R/pkg/R/sparkR.R @@ -439,11 +439,9 @@ sparkR.session <- function( rPackageVersion <- paste0(packageVersion("SparkR")) if (jvmVersionStrip != rPackageVersion) { - warning( - "Version mismatch between Spark JVM and SparkR package. ", - gettextf("JVM version was %s, while R package version was %s", - jvmVersion, rPackageVersion, domain = "R-SparkR"), - domain = NA) + warning("Version mismatch between Spark JVM and SparkR package. ", + "JVM version was ", jvmVersion, + ", while R package version was ", rPackageVersion) } sparkSession diff --git a/R/pkg/R/utils.R b/R/pkg/R/utils.R index 4acfd0272ac6..330d15ee0e44 100644 --- a/R/pkg/R/utils.R +++ b/R/pkg/R/utils.R @@ -354,10 +354,8 @@ varargsToStrEnv <- function(...) { } else { value <- pairs[[name]] if (!(is.logical(value) || is.numeric(value) || is.character(value) || is.null(value))) { - stop(gettextf("Unsupported type for %s : %s. ", - name, class(value), domain = "R-SparkR"), - "Supported types are logical, numeric, character and NULL.", - call. = FALSE, domain = NA) + stop("Unsupported type for ", name, " : ", toString(class(value)), ". ", + "Supported types are logical, numeric, character and NULL.") } if (is.logical(value)) { env[[name]] <- tolower(as.character(value)) @@ -371,9 +369,7 @@ varargsToStrEnv <- function(...) { } if (length(ignoredNames) != 0) { - warning(gettextf("Unnamed arguments ignored: %s.", - paste(ignoredNames, collapse = ", "), domain = "R-SparkR"), - call. = FALSE, domain = NA) + warning("Unnamed arguments ignored: ", toString(ignoredNames)) } env } From 4d86dc605ab8844181472d0220ebc24e835f3dff Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Wed, 29 Apr 2020 11:26:36 +0800 Subject: [PATCH 06/10] missing . --- R/pkg/R/utils.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/pkg/R/utils.R b/R/pkg/R/utils.R index 330d15ee0e44..f00dc74f72f0 100644 --- a/R/pkg/R/utils.R +++ b/R/pkg/R/utils.R @@ -369,7 +369,7 @@ varargsToStrEnv <- function(...) { } if (length(ignoredNames) != 0) { - warning("Unnamed arguments ignored: ", toString(ignoredNames)) + warning("Unnamed arguments ignored: ", toString(ignoredNames), ".") } env } From 2c92360a5e637678c37abd44b627230423893523 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Wed, 29 Apr 2020 12:34:52 +0800 Subject: [PATCH 07/10] missing some call.=FALSE --- R/pkg/R/DataFrame.R | 6 +++--- R/pkg/R/utils.R | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index 72e0d6bab9d3..a8bd7198ba1a 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -691,13 +691,13 @@ setMethod("storageLevel", #' Coalesce #' -#' Returns a new SparkDataFrame that has exactly \code{"rtitions} partitions. +#' Returns a new SparkDataFrame that has exactly \code{numPartitions} partitions. #' This operation results in a narrow dependency, e.g. if you go from 1000 partitions to 100 #' partitions, there will not be a shuffle, instead each of the 100 new partitions will claim 10 of #' the current partitions. If a larger number of partitions is requested, it will stay at the #' current number of partitions. #' -#' However, if you're doing a drastic coalesce on a SparkDataFrame, e.g. to "rtitions = 1, +#' However, if you're doing a drastic coalesce on a SparkDataFrame, e.g. to numPartitions = 1, #' this may result in your computation taking place on fewer nodes than #' you like (e.g. one node in the case of numPartitions = 1). To avoid this, #' call \code{repartition}. This will add a shuffle step, but means the @@ -2618,7 +2618,7 @@ setMethod("join", sdf <- callJMethod(x@sdf, "join", y@sdf, joinExpr@jc, joinType) } else { stop("joinType must be one of the following types: ", - "'", paste(valid_join_types, collapse = "', '"), "'") + toString(sQuote(valid_join_types, FALSE))) } } } diff --git a/R/pkg/R/utils.R b/R/pkg/R/utils.R index f00dc74f72f0..65db9c21d9db 100644 --- a/R/pkg/R/utils.R +++ b/R/pkg/R/utils.R @@ -355,7 +355,7 @@ varargsToStrEnv <- function(...) { value <- pairs[[name]] if (!(is.logical(value) || is.numeric(value) || is.character(value) || is.null(value))) { stop("Unsupported type for ", name, " : ", toString(class(value)), ". ", - "Supported types are logical, numeric, character and NULL.") + "Supported types are logical, numeric, character and NULL.", call. = FALSE) } if (is.logical(value)) { env[[name]] <- tolower(as.character(value)) @@ -369,7 +369,7 @@ varargsToStrEnv <- function(...) { } if (length(ignoredNames) != 0) { - warning("Unnamed arguments ignored: ", toString(ignoredNames), ".") + warning("Unnamed arguments ignored: ", toString(ignoredNames), ".", call. = FALSE) } env } From f278f95284b8b925210128127a8b1ec4e070871d Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Wed, 29 Apr 2020 14:03:38 +0800 Subject: [PATCH 08/10] sQuote(., FALSE) is a relatively new addition to R --- R/pkg/R/DataFrame.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index a8bd7198ba1a..83e6031c315c 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -2618,7 +2618,7 @@ setMethod("join", sdf <- callJMethod(x@sdf, "join", y@sdf, joinExpr@jc, joinType) } else { stop("joinType must be one of the following types: ", - toString(sQuote(valid_join_types, FALSE))) + "'", paste(valid_join_types, collapse = "', '"), "'") } } } From 3b3c1afacc3599ab17ab7b19d3fe2ecc4f6fd247 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 30 Apr 2020 02:11:56 +0800 Subject: [PATCH 09/10] pick a more logical ordering for valid joinTypes --- R/pkg/R/DataFrame.R | 2 +- R/pkg/tests/fulltests/test_sparkSQL.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index 593e2f3de791..a0c7eee01b24 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -2607,7 +2607,7 @@ setMethod("join", "outer", "full", "fullouter", "full_outer", "left", "leftouter", "left_outer", "right", "rightouter", "right_outer", - "semi", "left_semi", "leftsemi", "anti", "left_anti", "leftanti") + "semi", "leftsemi", "left_semi", "anti", "leftanti", "left_anti") if (joinType %in% valid_join_types) { joinType <- gsub("_", "", joinType, fixed = TRUE) sdf <- callJMethod(x@sdf, "join", y@sdf, joinExpr@jc, joinType) diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R index e05dae817f92..611d9057c0f1 100644 --- a/R/pkg/tests/fulltests/test_sparkSQL.R +++ b/R/pkg/tests/fulltests/test_sparkSQL.R @@ -2558,7 +2558,7 @@ test_that("join(), crossJoin() and merge() on a DataFrame", { error_msg <- paste("joinType must be one of the following types:", "'inner', 'cross', 'outer', 'full', 'fullouter', 'full_outer',", "'left', 'leftouter', 'left_outer', 'right', 'rightouter', 'right_outer',", - "'semi', 'left_semi', 'leftsemi', 'anti', 'left_anti', 'leftanti'") + "'semi', 'leftsemi', 'left_semi', 'anti', 'leftanti', 'left_anti'") expect_error(join(df2, df, df2$name == df$name, "invalid"), error_msg) merged <- merge(df, df2, by.x = "name", by.y = "name", all.x = TRUE, all.y = TRUE) From 02d17283dacabb04102d276d05942d6f2d442ece Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Fri, 1 May 2020 15:49:29 +0800 Subject: [PATCH 10/10] camelCase --- R/pkg/R/DataFrame.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index a0c7eee01b24..15b3ce293542 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -2603,17 +2603,17 @@ setMethod("join", if (is.null(joinType)) { sdf <- callJMethod(x@sdf, "join", y@sdf, joinExpr@jc) } else { - valid_join_types <- c("inner", "cross", + validJoinTypes <- c("inner", "cross", "outer", "full", "fullouter", "full_outer", "left", "leftouter", "left_outer", "right", "rightouter", "right_outer", "semi", "leftsemi", "left_semi", "anti", "leftanti", "left_anti") - if (joinType %in% valid_join_types) { + if (joinType %in% validJoinTypes) { joinType <- gsub("_", "", joinType, fixed = TRUE) sdf <- callJMethod(x@sdf, "join", y@sdf, joinExpr@jc, joinType) } else { stop("joinType must be one of the following types: ", - "'", paste(valid_join_types, collapse = "', '"), "'") + "'", paste(validJoinTypes, collapse = "', '"), "'") } } }