diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVOptions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVOptions.scala index 2e5539a90c65..b4b25a895016 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVOptions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVOptions.scala @@ -212,6 +212,7 @@ class CSVOptions( val lineSeparatorInWrite: Option[String] = lineSeparator val inputBufferSize: Option[Int] = parameters.get("inputBufferSize").map(_.toInt) + .orElse(SQLConf.get.getConf(SQLConf.CSV_INPUT_BUFFER_SIZE)) /** * The handling method to be used when unescaped quotes are found in the input. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 6317ef262696..f4c236c68dfe 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -2453,6 +2453,16 @@ object SQLConf { .booleanConf .createWithDefault(true) + val CSV_INPUT_BUFFER_SIZE = buildConf("spark.sql.csv.parser.inputBufferSize") + .internal() + .doc("If it is set, it configures the buffer size of CSV input during parsing. " + + "It is the same as inputBufferSize option in CSV which has a higher priority. " + + "Note that this is a workaround for the parsing library's regression, and this " + + "configuration is internal and supposed to be removed in the near future.") + .version("3.0.3") + .intConf + .createOptional + val REPL_EAGER_EVAL_ENABLED = buildConf("spark.sql.repl.eagerEval.enabled") .doc("Enables eager evaluation or not. When true, the top K rows of Dataset will be " + "displayed if and only if the REPL supports the eager evaluation. Currently, the " +