From dd28b09d5a546a200717a46854a6085ed545fd8e Mon Sep 17 00:00:00 2001
From: turbofei <fwang12@ebay.com>
Date: Tue, 22 Oct 2019 12:09:42 +0800
Subject: [PATCH 1/6] [SPARK-29542] Make the description of
 spark.sql.files.maxPartitionBytes be clearly

---
 .../src/main/scala/org/apache/spark/sql/internal/SQLConf.scala | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 4944099fcc0d..19f482bea68c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -980,7 +980,8 @@ object SQLConf {
       .createWithDefault(true)
 
   val FILES_MAX_PARTITION_BYTES = buildConf("spark.sql.files.maxPartitionBytes")
-    .doc("The maximum number of bytes to pack into a single partition when reading files.")
+    .doc("The maximum number of bytes to pack into a single partition when reading files" +
+      " for data source table.")
     .bytesConf(ByteUnit.BYTE)
     .createWithDefault(128 * 1024 * 1024) // parquet.block.size
 

From 7ecb7b602de82e70ccbd1cf11f86a635092d196d Mon Sep 17 00:00:00 2001
From: turbofei <fwang12@ebay.com>
Date: Tue, 22 Oct 2019 19:59:08 +0800
Subject: [PATCH 2/6] fix style

---
 .../apache/spark/sql/internal/SQLConf.scala   | 24 ++++++++++---------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 19f482bea68c..0924bc347a0f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -980,34 +980,36 @@ object SQLConf {
       .createWithDefault(true)
 
   val FILES_MAX_PARTITION_BYTES = buildConf("spark.sql.files.maxPartitionBytes")
-    .doc("The maximum number of bytes to pack into a single partition when reading files" +
-      " for data source table.")
+    .doc("The maximum number of bytes to pack into a single partition when Spark file-based" +
+      " sources are used to read files.")
     .bytesConf(ByteUnit.BYTE)
     .createWithDefault(128 * 1024 * 1024) // parquet.block.size
 
   val FILES_OPEN_COST_IN_BYTES = buildConf("spark.sql.files.openCostInBytes")
     .internal()
-    .doc("The estimated cost to open a file, measured by the number of bytes could be scanned in" +
-      " the same time. This is used when putting multiple files into a partition. It's better to" +
-      " over estimated, then the partitions with small files will be faster than partitions with" +
-      " bigger files (which is scheduled first).")
+    .doc("The estimated cost to open a file, measured by the number of bytes could be scanned in " +
+      "the same time. This is used when putting multiple file-source files into a partition. " +
+      "It's better to over estimated, then the partitions with small files will be faster than " +
+      "partitions with bigger files (which is scheduled first).")
     .longConf
     .createWithDefault(4 * 1024 * 1024)
 
   val IGNORE_CORRUPT_FILES = buildConf("spark.sql.files.ignoreCorruptFiles")
-    .doc("Whether to ignore corrupt files. If true, the Spark jobs will continue to run when " +
-      "encountering corrupted files and the contents that have been read will still be returned.")
+    .doc("Whether to ignore corrupt file-source files. If true, the Spark jobs will continue to " +
+      "run when encountering corrupted files and the contents that have been read will still be " +
+      "returned.")
     .booleanConf
     .createWithDefault(false)
 
   val IGNORE_MISSING_FILES = buildConf("spark.sql.files.ignoreMissingFiles")
-    .doc("Whether to ignore missing files. If true, the Spark jobs will continue to run when " +
-      "encountering missing files and the contents that have been read will still be returned.")
+    .doc("Whether to ignore missing file-source files. If true, the Spark jobs will continue to " +
+      "run when encountering missing files and the contents that have been read will still be " +
+      "returned.")
     .booleanConf
     .createWithDefault(false)
 
   val MAX_RECORDS_PER_FILE = buildConf("spark.sql.files.maxRecordsPerFile")
-    .doc("Maximum number of records to write out to a single file. " +
+    .doc("Maximum number of records to write out to a single file-source file. " +
       "If this value is zero or negative, there is no limit.")
     .longConf
     .createWithDefault(0)

From 4a56b2e3b397b029598814a9f2c0f43eb3aaa8b2 Mon Sep 17 00:00:00 2001
From: turbofei <fwang12@ebay.com>
Date: Wed, 23 Oct 2019 00:11:46 +0800
Subject: [PATCH 3/6] fix blank

---
 .../main/scala/org/apache/spark/sql/internal/SQLConf.scala    | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 0924bc347a0f..3cc590a4c017 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -980,8 +980,8 @@ object SQLConf {
       .createWithDefault(true)
 
   val FILES_MAX_PARTITION_BYTES = buildConf("spark.sql.files.maxPartitionBytes")
-    .doc("The maximum number of bytes to pack into a single partition when Spark file-based" +
-      " sources are used to read files.")
+    .doc("The maximum number of bytes to pack into a single partition when Spark file-based " +
+      "sources are used to read files.")
     .bytesConf(ByteUnit.BYTE)
     .createWithDefault(128 * 1024 * 1024) // parquet.block.size
 

From 849e99565e045878f46070d4930a774190282814 Mon Sep 17 00:00:00 2001
From: turbofei <fwang12@ebay.com>
Date: Wed, 23 Oct 2019 10:47:11 +0800
Subject: [PATCH 4/6] revert

---
 .../apache/spark/sql/internal/SQLConf.scala   | 23 ++++++++-----------
 1 file changed, 10 insertions(+), 13 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 3cc590a4c017..4944099fcc0d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -980,36 +980,33 @@ object SQLConf {
       .createWithDefault(true)
 
   val FILES_MAX_PARTITION_BYTES = buildConf("spark.sql.files.maxPartitionBytes")
-    .doc("The maximum number of bytes to pack into a single partition when Spark file-based " +
-      "sources are used to read files.")
+    .doc("The maximum number of bytes to pack into a single partition when reading files.")
     .bytesConf(ByteUnit.BYTE)
     .createWithDefault(128 * 1024 * 1024) // parquet.block.size
 
   val FILES_OPEN_COST_IN_BYTES = buildConf("spark.sql.files.openCostInBytes")
     .internal()
-    .doc("The estimated cost to open a file, measured by the number of bytes could be scanned in " +
-      "the same time. This is used when putting multiple file-source files into a partition. " +
-      "It's better to over estimated, then the partitions with small files will be faster than " +
-      "partitions with bigger files (which is scheduled first).")
+    .doc("The estimated cost to open a file, measured by the number of bytes could be scanned in" +
+      " the same time. This is used when putting multiple files into a partition. It's better to" +
+      " over estimated, then the partitions with small files will be faster than partitions with" +
+      " bigger files (which is scheduled first).")
     .longConf
     .createWithDefault(4 * 1024 * 1024)
 
   val IGNORE_CORRUPT_FILES = buildConf("spark.sql.files.ignoreCorruptFiles")
-    .doc("Whether to ignore corrupt file-source files. If true, the Spark jobs will continue to " +
-      "run when encountering corrupted files and the contents that have been read will still be " +
-      "returned.")
+    .doc("Whether to ignore corrupt files. If true, the Spark jobs will continue to run when " +
+      "encountering corrupted files and the contents that have been read will still be returned.")
     .booleanConf
     .createWithDefault(false)
 
   val IGNORE_MISSING_FILES = buildConf("spark.sql.files.ignoreMissingFiles")
-    .doc("Whether to ignore missing file-source files. If true, the Spark jobs will continue to " +
-      "run when encountering missing files and the contents that have been read will still be " +
-      "returned.")
+    .doc("Whether to ignore missing files. If true, the Spark jobs will continue to run when " +
+      "encountering missing files and the contents that have been read will still be returned.")
     .booleanConf
     .createWithDefault(false)
 
   val MAX_RECORDS_PER_FILE = buildConf("spark.sql.files.maxRecordsPerFile")
-    .doc("Maximum number of records to write out to a single file-source file. " +
+    .doc("Maximum number of records to write out to a single file. " +
       "If this value is zero or negative, there is no limit.")
     .longConf
     .createWithDefault(0)

From 1de48df5e9d9d3ca1b0ea648f7c1d9d05d9f4ffd Mon Sep 17 00:00:00 2001
From: turbofei <fwang12@ebay.com>
Date: Wed, 23 Oct 2019 10:51:48 +0800
Subject: [PATCH 5/6] refactor

---
 .../apache/spark/sql/internal/SQLConf.scala    | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 4944099fcc0d..306d6edbcd8d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -980,7 +980,9 @@ object SQLConf {
       .createWithDefault(true)
 
   val FILES_MAX_PARTITION_BYTES = buildConf("spark.sql.files.maxPartitionBytes")
-    .doc("The maximum number of bytes to pack into a single partition when reading files.")
+    .doc("The maximum number of bytes to pack into a single partition when reading files. " +
+      "This configuration is effective only when using file-based sources such as Parquet, JSON " +
+      "and ORC.")
     .bytesConf(ByteUnit.BYTE)
     .createWithDefault(128 * 1024 * 1024) // parquet.block.size
 
@@ -989,25 +991,31 @@ object SQLConf {
     .doc("The estimated cost to open a file, measured by the number of bytes could be scanned in" +
       " the same time. This is used when putting multiple files into a partition. It's better to" +
       " over estimated, then the partitions with small files will be faster than partitions with" +
-      " bigger files (which is scheduled first).")
+      " bigger files (which is scheduled first). This configuration is effective only when using" +
+      " file-based sources such as Parquet, JSON and ORC.")
     .longConf
     .createWithDefault(4 * 1024 * 1024)
 
   val IGNORE_CORRUPT_FILES = buildConf("spark.sql.files.ignoreCorruptFiles")
     .doc("Whether to ignore corrupt files. If true, the Spark jobs will continue to run when " +
-      "encountering corrupted files and the contents that have been read will still be returned.")
+      "encountering corrupted files and the contents that have been read will still be returned. " +
+      "This configuration is effective only when using file-based sources such as Parquet, JSON " +
+      "and ORC.")
     .booleanConf
     .createWithDefault(false)
 
   val IGNORE_MISSING_FILES = buildConf("spark.sql.files.ignoreMissingFiles")
     .doc("Whether to ignore missing files. If true, the Spark jobs will continue to run when " +
-      "encountering missing files and the contents that have been read will still be returned.")
+      "encountering missing files and the contents that have been read will still be returned. " +
+      "This configuration is effective only when using file-based sources such as Parquet, JSON " +
+      "and ORC.")
     .booleanConf
     .createWithDefault(false)
 
   val MAX_RECORDS_PER_FILE = buildConf("spark.sql.files.maxRecordsPerFile")
     .doc("Maximum number of records to write out to a single file. " +
-      "If this value is zero or negative, there is no limit.")
+      "If this value is zero or negative, there is no limit. This configuration is " +
+      "effective only when using file-based sources such as Parquet, JSON and ORC.")
     .longConf
     .createWithDefault(0)
 

From 7e115d834971961957ec73ce0aa6c817b57d3ff2 Mon Sep 17 00:00:00 2001
From: turbofei <fwang12@ebay.com>
Date: Wed, 23 Oct 2019 12:14:45 +0800
Subject: [PATCH 6/6] revert maxRecordsPerFile

---
 .../src/main/scala/org/apache/spark/sql/internal/SQLConf.scala | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 306d6edbcd8d..a02cb832cc53 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -1014,8 +1014,7 @@ object SQLConf {
 
   val MAX_RECORDS_PER_FILE = buildConf("spark.sql.files.maxRecordsPerFile")
     .doc("Maximum number of records to write out to a single file. " +
-      "If this value is zero or negative, there is no limit. This configuration is " +
-      "effective only when using file-based sources such as Parquet, JSON and ORC.")
+      "If this value is zero or negative, there is no limit.")
     .longConf
     .createWithDefault(0)