From 29daf668c635da94da520044f39089712e444949 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Fri, 11 Oct 2019 10:07:43 -0700 Subject: [PATCH 1/4] [SPARK-29442][SQL] Set `default` mode should override the existing mode --- .../main/scala/org/apache/spark/sql/DataFrameWriter.scala | 3 +-- .../test/scala/org/apache/spark/sql/DataFrameSuite.scala | 8 ++++++++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala index 3f7016df2eb4..ef4b2b7aac27 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala @@ -87,8 +87,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) { case "overwrite" => mode(SaveMode.Overwrite) case "append" => mode(SaveMode.Append) case "ignore" => mode(SaveMode.Ignore) - case "error" | "errorifexists" => mode(SaveMode.ErrorIfExists) - case "default" => this + case "error" | "errorifexists" | "default" => mode(SaveMode.ErrorIfExists) case _ => throw new IllegalArgumentException(s"Unknown save mode: $saveMode. " + "Accepted save modes are 'overwrite', 'append', 'ignore', 'error', 'errorifexists'.") } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala index 52adf3ebdb7a..ffd26816bde8 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala @@ -2203,4 +2203,12 @@ class DataFrameSuite extends QueryTest with SharedSparkSession { |*(1) Range (0, 10, step=1, splits=2)""".stripMargin)) } } + + test("SPARK-29442 Set `default` mode should override the existing mode") { + val df = Seq(Tuple1(1)).toDF() + val writer = df.write.mode("overwrite").mode("default") + val modeField = classOf[DataFrameWriter[Tuple1[Int]]].getDeclaredField("mode") + modeField.setAccessible(true) + assert(SaveMode.ErrorIfExists === modeField.get(writer).asInstanceOf[SaveMode]) + } } From b0c8068facb65e26002c0c2e5762ca0df376ee74 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Fri, 11 Oct 2019 11:04:16 -0700 Subject: [PATCH 2/4] Address comments --- .../main/scala/org/apache/spark/sql/DataFrameWriter.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala index ef4b2b7aac27..69af54cf0643 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala @@ -77,7 +77,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) { *
  • `overwrite`: overwrite the existing data.
  • *
  • `append`: append the data.
  • *
  • `ignore`: ignore the operation (i.e. no-op).
  • - *
  • `error` or `errorifexists`: default option, throw an exception at runtime.
  • + *
  • `error`, `errorifexists`, or `default`: default option, throw an exception at runtime.
  • * * * @since 1.4.0 @@ -88,8 +88,8 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) { case "append" => mode(SaveMode.Append) case "ignore" => mode(SaveMode.Ignore) case "error" | "errorifexists" | "default" => mode(SaveMode.ErrorIfExists) - case _ => throw new IllegalArgumentException(s"Unknown save mode: $saveMode. " + - "Accepted save modes are 'overwrite', 'append', 'ignore', 'error', 'errorifexists'.") + case _ => throw new IllegalArgumentException(s"Unknown save mode: $saveMode. Accepted " + + "save modes are 'overwrite', 'append', 'ignore', 'error', 'errorifexists', 'default'.") } } From a4751f6334fc9977be51e5eb4e36727c9de2a874 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Fri, 11 Oct 2019 12:24:16 -0700 Subject: [PATCH 3/4] Update python --- python/pyspark/sql/readwriter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py index e51ff9bad074..b6fee438e2de 100644 --- a/python/pyspark/sql/readwriter.py +++ b/python/pyspark/sql/readwriter.py @@ -593,7 +593,7 @@ def mode(self, saveMode): * `append`: Append contents of this :class:`DataFrame` to existing data. * `overwrite`: Overwrite existing data. - * `error` or `errorifexists`: Throw an exception if data already exists. + * `error`, `errorifexists`, `default`: Throw an exception if data already exists. * `ignore`: Silently ignore this operation if data already exists. >>> df.write.mode('append').parquet(os.path.join(tempfile.mkdtemp(), 'data')) From b752271e848dc4562fdfe1df242d173a70a8dca0 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Mon, 14 Oct 2019 09:27:36 -0700 Subject: [PATCH 4/4] Remove comment updates --- python/pyspark/sql/readwriter.py | 2 +- .../src/main/scala/org/apache/spark/sql/DataFrameWriter.scala | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py index b6fee438e2de..e51ff9bad074 100644 --- a/python/pyspark/sql/readwriter.py +++ b/python/pyspark/sql/readwriter.py @@ -593,7 +593,7 @@ def mode(self, saveMode): * `append`: Append contents of this :class:`DataFrame` to existing data. * `overwrite`: Overwrite existing data. - * `error`, `errorifexists`, `default`: Throw an exception if data already exists. + * `error` or `errorifexists`: Throw an exception if data already exists. * `ignore`: Silently ignore this operation if data already exists. >>> df.write.mode('append').parquet(os.path.join(tempfile.mkdtemp(), 'data')) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala index 69af54cf0643..a7c4eef4e19f 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala @@ -77,7 +77,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) { *
  • `overwrite`: overwrite the existing data.
  • *
  • `append`: append the data.
  • *
  • `ignore`: ignore the operation (i.e. no-op).
  • - *
  • `error`, `errorifexists`, or `default`: default option, throw an exception at runtime.
  • + *
  • `error` or `errorifexists`: default option, throw an exception at runtime.
  • * * * @since 1.4.0