Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,12 @@ object CSVUtils {
// with the one below, `filterCommentAndEmpty` but execution path is different. One of them
// might have to be removed in the near future if possible.
import lines.sqlContext.implicits._
val nonEmptyLines = lines.filter(length(trim($"value")) > 0)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@MaxGekk and @cloud-fan, I came up with a better idea to avoid relying on string format in col. Can you take a look again? I think this way is safer.

val aliased = lines.toDF("value")
val nonEmptyLines = aliased.filter(length(trim($"value")) > 0)
if (options.isCommentSet) {
nonEmptyLines.filter(!$"value".startsWith(options.comment.toString))
nonEmptyLines.filter(!$"value".startsWith(options.comment.toString)).as[String]
} else {
nonEmptyLines
nonEmptyLines.as[String]
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2294,6 +2294,13 @@ abstract class CSVSuite extends QueryTest with SharedSparkSession with TestCsvDa
}
}
}

test("SPARK-30810: parses and convert a CSV Dataset having different column from 'value'") {
val ds = spark.range(2).selectExpr("concat('a,b,', id) AS `a.text`").as[String]
val csv = spark.read.option("header", true).option("inferSchema", true).csv(ds)
assert(csv.schema.fieldNames === Seq("a", "b", "0"))
checkAnswer(csv, Row("a", "b", 1))
}
}

class CSVv1Suite extends CSVSuite {
Expand Down