File tree Expand file tree Collapse file tree 2 files changed +19
-1
lines changed
main/scala/org/apache/spark/sql/execution/datasources/csv
test/scala/org/apache/spark/sql/execution/datasources/csv Expand file tree Collapse file tree 2 files changed +19
-1
lines changed Original file line number Diff line number Diff line change @@ -183,11 +183,19 @@ class UnivocityParser(
183183 }
184184 }
185185
186+ private lazy val doParse = if (schema.nonEmpty) {
187+ (input : String ) => convert(tokenizer.parseLine(input))
188+ } else {
189+ // If `columnPruning` enabled and partition attributes scanned only,
190+ // `schema` gets empty.
191+ (_ : String ) => InternalRow .empty
192+ }
193+
186194 /**
187195 * Parses a single CSV string and turns it into either one resulting row or no row (if the
188196 * the record is malformed).
189197 */
190- def parse (input : String ): InternalRow = convert(tokenizer.parseLine( input) )
198+ def parse (input : String ): InternalRow = doParse( input)
191199
192200 private def convert (tokens : Array [String ]): InternalRow = {
193201 if (tokens.length != schema.length) {
Original file line number Diff line number Diff line change @@ -1602,4 +1602,14 @@ class CSVSuite extends QueryTest with SharedSQLContext with SQLTestUtils with Te
16021602 assert(testAppender2.events.asScala
16031603 .exists(msg => msg.getRenderedMessage.contains(" CSV header does not conform to the schema" )))
16041604 }
1605+
1606+ test(" SPARK-24645 skip parsing when columnPruning enabled and partitions scanned only" ) {
1607+ withSQLConf(SQLConf .CSV_PARSER_COLUMN_PRUNING .key -> " true" ) {
1608+ withTempPath { path =>
1609+ val dir = path.getAbsolutePath
1610+ spark.range(10 ).selectExpr(" id % 2 AS p" , " id" ).write.partitionBy(" p" ).csv(dir)
1611+ spark.read.csv(dir).selectExpr(" sum(p)" ).collect()
1612+ }
1613+ }
1614+ }
16051615}
You can’t perform that action at this time.
0 commit comments