From 97d18feeb8713da42b9f97d2343063bac1cba4b6 Mon Sep 17 00:00:00 2001 From: Dilip Biswal Date: Fri, 5 Oct 2018 00:51:35 -0700 Subject: [PATCH] [SPARK-25610][TEST] Improve execution time of DatasetCacheSuite: cache UDF result correctly --- .../test/scala/org/apache/spark/sql/DatasetCacheSuite.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetCacheSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetCacheSuite.scala index 5c6a021d5b76..fef6ddd0b93c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetCacheSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetCacheSuite.scala @@ -127,8 +127,8 @@ class DatasetCacheSuite extends QueryTest with SharedSQLContext with TimeLimits } test("cache UDF result correctly") { - val expensiveUDF = udf({x: Int => Thread.sleep(5000); x}) - val df = spark.range(0, 10).toDF("a").withColumn("b", expensiveUDF($"a")) + val expensiveUDF = udf({x: Int => Thread.sleep(2000); x}) + val df = spark.range(0, 2).toDF("a").repartition(1).withColumn("b", expensiveUDF($"a")) val df2 = df.agg(sum(df("b"))) df.cache() @@ -136,7 +136,7 @@ class DatasetCacheSuite extends QueryTest with SharedSQLContext with TimeLimits assertCached(df2) // udf has been evaluated during caching, and thus should not be re-evaluated here - failAfter(3 seconds) { + failAfter(2 seconds) { df2.collect() }