Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

package org.apache.spark.sql.catalyst.csv

object CSVExpressionUtils {
object CSVExprUtils {
/**
* Filter ignorable rows for CSV iterator (lines empty and starting with `comment`).
* This is currently being used in CSV reading path and CSV schema inference.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ class CSVHeaderChecker(
// Note: if there are only comments in the first block, the header would probably
// be not extracted.
if (options.headerFlag && isStartOfFile) {
CSVExpressionUtils.extractHeader(lines, options).foreach { header =>
CSVExprUtils.extractHeader(lines, options).foreach { header =>
checkHeaderColumnNames(tokenizer.parseLine(header))
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ class CSVOptions(
}
}

val delimiter = CSVExpressionUtils.toChar(
val delimiter = CSVExprUtils.toChar(
parameters.getOrElse("sep", parameters.getOrElse("delimiter", ",")))
val parseMode: ParseMode =
parameters.get("mode").map(ParseMode.fromString).getOrElse(PermissiveMode)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -338,7 +338,7 @@ private[sql] object UnivocityParser {

val options = parser.options

val filteredLines: Iterator[String] = CSVExpressionUtils.filterCommentAndEmpty(lines, options)
val filteredLines: Iterator[String] = CSVExprUtils.filterCommentAndEmpty(lines, options)

val safeParser = new FailureSafeParser[String](
input => Seq(parser.parse(input)),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,40 +21,40 @@ import org.apache.spark.SparkFunSuite

class CSVExpressionUtilsSuite extends SparkFunSuite {
test("Can parse escaped characters") {
assert(CSVExpressionUtils.toChar("""\t""") === '\t')
assert(CSVExpressionUtils.toChar("""\r""") === '\r')
assert(CSVExpressionUtils.toChar("""\b""") === '\b')
assert(CSVExpressionUtils.toChar("""\f""") === '\f')
assert(CSVExpressionUtils.toChar("""\"""") === '\"')
assert(CSVExpressionUtils.toChar("""\'""") === '\'')
assert(CSVExpressionUtils.toChar("""\u0000""") === '\u0000')
assert(CSVExpressionUtils.toChar("""\\""") === '\\')
assert(CSVExprUtils.toChar("""\t""") === '\t')
assert(CSVExprUtils.toChar("""\r""") === '\r')
assert(CSVExprUtils.toChar("""\b""") === '\b')
assert(CSVExprUtils.toChar("""\f""") === '\f')
assert(CSVExprUtils.toChar("""\"""") === '\"')
assert(CSVExprUtils.toChar("""\'""") === '\'')
assert(CSVExprUtils.toChar("""\u0000""") === '\u0000')
assert(CSVExprUtils.toChar("""\\""") === '\\')
}

test("Does not accept delimiter larger than one character") {
val exception = intercept[IllegalArgumentException]{
CSVExpressionUtils.toChar("ab")
CSVExprUtils.toChar("ab")
}
assert(exception.getMessage.contains("cannot be more than one character"))
}

test("Throws exception for unsupported escaped characters") {
val exception = intercept[IllegalArgumentException]{
CSVExpressionUtils.toChar("""\1""")
CSVExprUtils.toChar("""\1""")
}
assert(exception.getMessage.contains("Unsupported special character for delimiter"))
}

test("string with one backward slash is prohibited") {
val exception = intercept[IllegalArgumentException]{
CSVExpressionUtils.toChar("""\""")
CSVExprUtils.toChar("""\""")
}
assert(exception.getMessage.contains("Single backslash is prohibited"))
}

test("output proper error message for empty string") {
val exception = intercept[IllegalArgumentException]{
CSVExpressionUtils.toChar("")
CSVExprUtils.toChar("")
}
assert(exception.getMessage.contains("Delimiter cannot be empty string"))
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ package org.apache.spark.sql.execution.datasources.csv

import org.apache.spark.rdd.RDD
import org.apache.spark.sql.Dataset
import org.apache.spark.sql.catalyst.csv.CSVExpressionUtils
import org.apache.spark.sql.catalyst.csv.CSVExprUtils
import org.apache.spark.sql.catalyst.csv.CSVOptions
import org.apache.spark.sql.functions._

Expand Down Expand Up @@ -128,5 +128,5 @@ object CSVUtils {
}

def filterCommentAndEmpty(iter: Iterator[String], options: CSVOptions): Iterator[String] =
CSVExpressionUtils.filterCommentAndEmpty(iter, options)
CSVExprUtils.filterCommentAndEmpty(iter, options)
}