Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
55 commits
Select commit Hold shift + click to select a range
434ada1
[SPARK-17952][SQL] Nested Java beans support in createDataFrame
michalsenkyr Oct 5, 2018
7dcc90f
[SPARK-25644][SS] Fix java foreachBatch in DataStreamWriter
zsxwing Oct 5, 2018
a433fbc
[SPARK-25626][SQL][TEST] Improve the test execution time of HiveClien…
dilipbiswal Oct 5, 2018
1c9486c
[SPARK-25635][SQL][BUILD] Support selective direct encoding in native…
dongjoon-hyun Oct 5, 2018
bbd038d
[SPARK-25653][TEST] Add tag ExtendedHiveTest for HiveSparkSubmitSuite
gengliangwang Oct 6, 2018
2c6f4d6
[SPARK-25610][SQL][TEST] Improve execution time of DatasetCacheSuite:…
dilipbiswal Oct 6, 2018
58287a3
[SPARK-25646][K8S] Fix docker-image-tool.sh on dev build.
Oct 6, 2018
44cf800
[SPARK-25655][BUILD] Add -Pspark-ganglia-lgpl to the scala style check.
gatorsmile Oct 6, 2018
17781d7
[SPARK-25202][SQL] Implements split with limit sql function
Oct 6, 2018
f2f4e7a
[SPARK-25600][SQL][MINOR] Make use of TypeCoercion.findTightestCommon…
dilipbiswal Oct 6, 2018
1ee472e
[SPARK-25621][SPARK-25622][TEST] Reduce test time of BucketedReadWith…
gengliangwang Oct 6, 2018
edf4286
[SPARK-25488][SQL][TEST] Refactor MiscBenchmark to use main method
wangyum Oct 6, 2018
7ef65c0
[HOT-FIX] Fix compilation errors.
gatorsmile Oct 6, 2018
5a617ec
[MINOR] Clean up the joinCriteria in SQL parser
gatorsmile Oct 6, 2018
9cbf105
[SPARK-25644][SS][FOLLOWUP][BUILD] Fix Scala 2.12 build error due to …
dongjoon-hyun Oct 6, 2018
b0cee96
[SPARK-25062][SQL] Clean up BlockLocations in InMemoryFileIndex
peter-toth Oct 6, 2018
756a3ab
[SPARK-25575][WEBUI][FOLLOWUP] SQL tab in the spark UI support hide t…
shahidki31 Oct 6, 2018
8bb2429
[SPARK-25671] Build external/spark-ganglia-lgpl in Jenkins Test
gatorsmile Oct 6, 2018
fba722e
[SPARK-25539][BUILD] Upgrade lz4-java to 1.5.0 get speed improvement
wangyum Oct 7, 2018
3eb8429
[SPARK-25461][PYSPARK][SQL] Add document for mismatch between return …
viirya Oct 7, 2018
b1328cc
[SPARK-25658][SQL][TEST] Refactor HashByteArrayBenchmark to use main …
wangyum Oct 7, 2018
669ade3
[SPARK-25657][SQL][TEST] Refactor HashBenchmark to use main method
wangyum Oct 7, 2018
ebd899b
[SPARK-25321][ML] Revert SPARK-14681 to avoid API breaking change
WeichenXu123 Oct 7, 2018
2199224
[SPARK-25673][BUILD] Remove Travis CI which enables Java lint check
HyukjinKwon Oct 8, 2018
cb90617
[SPARK-25591][PYSPARK][SQL] Avoid overwriting deserialized accumulator
viirya Oct 8, 2018
1a6815c
[SPARK-25677][DOC] spark.io.compression.codec = org.apache.spark.io.Z…
shivusondur Oct 8, 2018
a853a80
[SPARK-25666][PYTHON] Internally document type conversion between Pyt…
HyukjinKwon Oct 8, 2018
1a28625
[SPARK-25408] Move to more ideomatic Java8
Oct 8, 2018
6353425
[SPARK-25641] Change the spark.shuffle.server.chunkFetchHandlerThread…
Oct 8, 2018
6a60fb0
[SPARK-25630][TEST] Reduce test time of HadoopFsRelationTest
gengliangwang Oct 8, 2018
f9935a3
[SPARK-25639][DOCS] Added docs for foreachBatch, python foreach and m…
tdas Oct 8, 2018
f3fed28
[SPARK-25659][PYTHON][TEST] Test type inference specification for cre…
HyukjinKwon Oct 8, 2018
a4b14a9
[SPARK-25623][SPARK-25624][SPARK-25625][TEST] Reduce test time of Log…
shahidki31 Oct 9, 2018
46fe408
[SPARK-25669][SQL] Check CSV header only when it exists
MaxGekk Oct 9, 2018
e3133f4
[SPARK-25497][SQL] Limit operation within whole stage codegen should …
cloud-fan Oct 9, 2018
deb9588
[SPARK-24851][UI] Map a Stage ID to it's Associated Job ID
Oct 9, 2018
3eee9e0
[SPARK-25535][CORE] Work around bad error handling in commons-crypto.
Oct 9, 2018
faf73dc
[SPARK-25559][FOLLOW-UP] Add comments for partial pushdown of conjunc…
gatorsmile Oct 9, 2018
3caab87
[SPARK-20946][SPARK-25525][SQL][FOLLOW-UP] Update the migration guide.
ueshin Oct 10, 2018
eaafcd8
[SPARK-25605][TESTS] Alternate take. Run cast string to timestamp tes…
srowen Oct 10, 2018
3528c08
[SPARK-25611][SPARK-25612][SQL][TESTS] Improve test run time of Compr…
dilipbiswal Oct 10, 2018
8a7872d
[SPARK-25636][CORE] spark-submit cuts off the failure reason when the…
Oct 10, 2018
6df2345
[SPARK-25699][SQL] Partially push down conjunctive predicated in ORC
gengliangwang Oct 10, 2018
80813e1
[SPARK-25016][BUILD][CORE] Remove support for Hadoop 2.6
srowen Oct 10, 2018
83e19d5
[SPARK-25700][SQL] Creates ReadSupport in only Append Mode in Data So…
HyukjinKwon Oct 11, 2018
8115e6b
[SPARK-25662][SQL][TEST] Refactor DataSourceReadBenchmark to use main…
peter-toth Oct 11, 2018
65f75db
[MINOR][SQL] remove Redundant semicolons
heary-cao Oct 11, 2018
1bb63ae
[SPARK-24109][CORE] Remove class SnappyOutputStreamWrapper
srowen Oct 11, 2018
adf648b
[SPARK-25615][SQL][TEST] Improve the test runtime of KafkaSinkSuite: …
dilipbiswal Oct 11, 2018
69f5e9c
[SPARK-25674][SQL] If the records are incremented by more than 1 at a…
10110346 Oct 11, 2018
a001814
[SPARK-25598][STREAMING][BUILD][TEST-MAVEN] Remove flume connector in…
srowen Oct 11, 2018
39872af
[SPARK-25684][SQL] Organize header related codes in CSV datasource
HyukjinKwon Oct 12, 2018
c9d7d83
[SPARK-25388][TEST][SQL] Detect incorrect nullable of DataType in the…
kiszk Oct 12, 2018
3685130
[SPARK-25690][SQL] Analyzer rule HandleNullInputsForUDF does not stab…
maryannxue Oct 12, 2018
78e1331
[SPARK-25708][SQL] HAVING without GROUP BY means global aggregate
cloud-fan Oct 12, 2018
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 0 additions & 50 deletions .travis.yml

This file was deleted.

15 changes: 12 additions & 3 deletions R/pkg/R/functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -3473,22 +3473,31 @@ setMethod("collect_set",

#' @details
#' \code{split_string}: Splits string on regular expression.
#' Equivalent to \code{split} SQL function.
#' Equivalent to \code{split} SQL function. Optionally a
#' \code{limit} can be specified
#'
#' @rdname column_string_functions
#' @param limit determines the length of the returned array.
#' \itemize{
#' \item \code{limit > 0}: length of the array will be at most \code{limit}
#' \item \code{limit <= 0}: the returned array can have any length
#' }
#'
#' @aliases split_string split_string,Column-method
#' @examples
#'
#' \dontrun{
#' head(select(df, split_string(df$Class, "\\d", 2)))
#' head(select(df, split_string(df$Sex, "a")))
#' head(select(df, split_string(df$Class, "\\d")))
#' # This is equivalent to the following SQL expression
#' head(selectExpr(df, "split(Class, '\\\\d')"))}
#' @note split_string 2.3.0
setMethod("split_string",
signature(x = "Column", pattern = "character"),
function(x, pattern) {
jc <- callJStatic("org.apache.spark.sql.functions", "split", x@jc, pattern)
function(x, pattern, limit = -1) {
jc <- callJStatic("org.apache.spark.sql.functions",
"split", x@jc, pattern, as.integer(limit))
column(jc)
})

Expand Down
2 changes: 1 addition & 1 deletion R/pkg/R/generics.R
Original file line number Diff line number Diff line change
Expand Up @@ -1258,7 +1258,7 @@ setGeneric("sort_array", function(x, asc = TRUE) { standardGeneric("sort_array")

#' @rdname column_string_functions
#' @name NULL
setGeneric("split_string", function(x, pattern) { standardGeneric("split_string") })
setGeneric("split_string", function(x, pattern, ...) { standardGeneric("split_string") })

#' @rdname column_string_functions
#' @name NULL
Expand Down
8 changes: 8 additions & 0 deletions R/pkg/tests/fulltests/test_sparkSQL.R
Original file line number Diff line number Diff line change
Expand Up @@ -1819,6 +1819,14 @@ test_that("string operators", {
collect(select(df4, split_string(df4$a, "\\\\")))[1, 1],
list(list("[email protected] 1", "b"))
)
expect_equal(
collect(select(df4, split_string(df4$a, "\\.", 2)))[1, 1],
list(list("a", "[email protected] 1\\b"))
)
expect_equal(
collect(select(df4, split_string(df4$a, "b", 0)))[1, 1],
list(list("a.", "@c.d 1\\", ""))
)

l5 <- list(list(a = "abc"))
df5 <- createDataFrame(l5)
Expand Down
2 changes: 2 additions & 0 deletions bin/docker-image-tool.sh
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ function build {
img_path=$IMG_PATH
--build-arg
spark_jars=assembly/target/scala-$SPARK_SCALA_VERSION/jars
--build-arg
k8s_tests=resource-managers/kubernetes/integration-tests/tests
)
else
# Not passed as an argument to docker, but used to validate the Spark directory.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,11 +54,8 @@ public final byte[] serialize(Object o) throws Exception {
return ((String) o).getBytes(UTF_8);
} else {
ByteArrayOutputStream bytes = new ByteArrayOutputStream();
GZIPOutputStream out = new GZIPOutputStream(bytes);
try {
try (GZIPOutputStream out = new GZIPOutputStream(bytes)) {
mapper.writeValue(out, o);
} finally {
out.close();
}
return bytes.toByteArray();
}
Expand All @@ -69,11 +66,8 @@ public final <T> T deserialize(byte[] data, Class<T> klass) throws Exception {
if (klass.equals(String.class)) {
return (T) new String(data, UTF_8);
} else {
GZIPInputStream in = new GZIPInputStream(new ByteArrayInputStream(data));
try {
try (GZIPInputStream in = new GZIPInputStream(new ByteArrayInputStream(data))) {
return mapper.readValue(in, klass);
} finally {
in.close();
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ public void testSkip() throws Exception {
public void testNegativeIndexValues() throws Exception {
List<Integer> expected = Arrays.asList(-100, -50, 0, 50, 100);

expected.stream().forEach(i -> {
expected.forEach(i -> {
try {
db.write(createCustomType1(i));
} catch (Exception e) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -159,15 +159,21 @@ public void close() throws IOException {
// accurately report the errors when they happen.
RuntimeException error = null;
byte[] dummy = new byte[8];
try {
doCipherOp(encryptor, dummy, true);
} catch (Exception e) {
error = new RuntimeException(e);
if (encryptor != null) {
try {
doCipherOp(Cipher.ENCRYPT_MODE, dummy, true);
} catch (Exception e) {
error = new RuntimeException(e);
}
encryptor = null;
}
try {
doCipherOp(decryptor, dummy, true);
} catch (Exception e) {
error = new RuntimeException(e);
if (decryptor != null) {
try {
doCipherOp(Cipher.DECRYPT_MODE, dummy, true);
} catch (Exception e) {
error = new RuntimeException(e);
}
decryptor = null;
}
random.close();

Expand All @@ -189,11 +195,11 @@ byte[] rawResponse(byte[] challenge) {
}

private byte[] decrypt(byte[] in) throws GeneralSecurityException {
return doCipherOp(decryptor, in, false);
return doCipherOp(Cipher.DECRYPT_MODE, in, false);
}

private byte[] encrypt(byte[] in) throws GeneralSecurityException {
return doCipherOp(encryptor, in, false);
return doCipherOp(Cipher.ENCRYPT_MODE, in, false);
}

private void initializeForAuth(String cipher, byte[] nonce, SecretKeySpec key)
Expand All @@ -205,11 +211,13 @@ private void initializeForAuth(String cipher, byte[] nonce, SecretKeySpec key)
byte[] iv = new byte[conf.ivLength()];
System.arraycopy(nonce, 0, iv, 0, Math.min(nonce.length, iv.length));

encryptor = CryptoCipherFactory.getCryptoCipher(cipher, cryptoConf);
encryptor.init(Cipher.ENCRYPT_MODE, key, new IvParameterSpec(iv));
CryptoCipher _encryptor = CryptoCipherFactory.getCryptoCipher(cipher, cryptoConf);
_encryptor.init(Cipher.ENCRYPT_MODE, key, new IvParameterSpec(iv));
this.encryptor = _encryptor;

decryptor = CryptoCipherFactory.getCryptoCipher(cipher, cryptoConf);
decryptor.init(Cipher.DECRYPT_MODE, key, new IvParameterSpec(iv));
CryptoCipher _decryptor = CryptoCipherFactory.getCryptoCipher(cipher, cryptoConf);
_decryptor.init(Cipher.DECRYPT_MODE, key, new IvParameterSpec(iv));
this.decryptor = _decryptor;
}

/**
Expand Down Expand Up @@ -241,29 +249,52 @@ private SecretKeySpec generateKey(String kdf, int iterations, byte[] salt, int k
return new SecretKeySpec(key.getEncoded(), conf.keyAlgorithm());
}

private byte[] doCipherOp(CryptoCipher cipher, byte[] in, boolean isFinal)
private byte[] doCipherOp(int mode, byte[] in, boolean isFinal)
throws GeneralSecurityException {

Preconditions.checkState(cipher != null);
CryptoCipher cipher;
switch (mode) {
case Cipher.ENCRYPT_MODE:
cipher = encryptor;
break;
case Cipher.DECRYPT_MODE:
cipher = decryptor;
break;
default:
throw new IllegalArgumentException(String.valueOf(mode));
}

int scale = 1;
while (true) {
int size = in.length * scale;
byte[] buffer = new byte[size];
try {
int outSize = isFinal ? cipher.doFinal(in, 0, in.length, buffer, 0)
: cipher.update(in, 0, in.length, buffer, 0);
if (outSize != buffer.length) {
byte[] output = new byte[outSize];
System.arraycopy(buffer, 0, output, 0, output.length);
return output;
} else {
return buffer;
Preconditions.checkState(cipher != null, "Cipher is invalid because of previous error.");

try {
int scale = 1;
while (true) {
int size = in.length * scale;
byte[] buffer = new byte[size];
try {
int outSize = isFinal ? cipher.doFinal(in, 0, in.length, buffer, 0)
: cipher.update(in, 0, in.length, buffer, 0);
if (outSize != buffer.length) {
byte[] output = new byte[outSize];
System.arraycopy(buffer, 0, output, 0, output.length);
return output;
} else {
return buffer;
}
} catch (ShortBufferException e) {
// Try again with a bigger buffer.
scale *= 2;
}
} catch (ShortBufferException e) {
// Try again with a bigger buffer.
scale *= 2;
}
} catch (InternalError ie) {
// SPARK-25535. The commons-cryto library will throw InternalError if something goes wrong,
// and leave bad state behind in the Java wrappers, so it's not safe to use them afterwards.
if (mode == Cipher.ENCRYPT_MODE) {
this.encryptor = null;
} else {
this.decryptor = null;
}
throw ie;
}
}

Expand Down
Loading