diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py
index e5288636c596..ce428b228338 100644
--- a/python/pyspark/sql/readwriter.py
+++ b/python/pyspark/sql/readwriter.py
@@ -176,7 +176,7 @@ def json(self, path, schema=None, primitivesAsString=None, prefersDecimal=None,
allowComments=None, allowUnquotedFieldNames=None, allowSingleQuotes=None,
allowNumericLeadingZero=None, allowBackslashEscapingAnyCharacter=None,
mode=None, columnNameOfCorruptRecord=None, dateFormat=None, timestampFormat=None,
- multiLine=None, allowUnquotedControlChars=None):
+ multiLine=None, allowUnquotedControlChars=None, lineSep=None):
"""
Loads JSON files and returns the results as a :class:`DataFrame`.
@@ -237,6 +237,8 @@ def json(self, path, schema=None, primitivesAsString=None, prefersDecimal=None,
:param allowUnquotedControlChars: allows JSON Strings to contain unquoted control
characters (ASCII characters with value less than 32,
including tab and line feed characters) or not.
+ :param lineSep: defines the line separator that should be used for parsing. If None is
+ set, it covers all ``\\r``, ``\\r\\n`` and ``\\n``.
>>> df1 = spark.read.json('python/test_support/sql/people.json')
>>> df1.dtypes
@@ -254,7 +256,7 @@ def json(self, path, schema=None, primitivesAsString=None, prefersDecimal=None,
allowBackslashEscapingAnyCharacter=allowBackslashEscapingAnyCharacter,
mode=mode, columnNameOfCorruptRecord=columnNameOfCorruptRecord, dateFormat=dateFormat,
timestampFormat=timestampFormat, multiLine=multiLine,
- allowUnquotedControlChars=allowUnquotedControlChars)
+ allowUnquotedControlChars=allowUnquotedControlChars, lineSep=lineSep)
if isinstance(path, basestring):
path = [path]
if type(path) == list:
@@ -746,7 +748,8 @@ def saveAsTable(self, name, format=None, mode=None, partitionBy=None, **options)
self._jwrite.saveAsTable(name)
@since(1.4)
- def json(self, path, mode=None, compression=None, dateFormat=None, timestampFormat=None):
+ def json(self, path, mode=None, compression=None, dateFormat=None, timestampFormat=None,
+ lineSep=None):
"""Saves the content of the :class:`DataFrame` in JSON format
(`JSON Lines text format or newline-delimited JSON `_) at the
specified path.
@@ -770,12 +773,15 @@ def json(self, path, mode=None, compression=None, dateFormat=None, timestampForm
formats follow the formats at ``java.text.SimpleDateFormat``.
This applies to timestamp type. If None is set, it uses the
default value, ``yyyy-MM-dd'T'HH:mm:ss.SSSXXX``.
+ :param lineSep: defines the line separator that should be used for writing. If None is
+ set, it uses the default value, ``\\n``.
>>> df.write.json(os.path.join(tempfile.mkdtemp(), 'data'))
"""
self.mode(mode)
self._set_opts(
- compression=compression, dateFormat=dateFormat, timestampFormat=timestampFormat)
+ compression=compression, dateFormat=dateFormat, timestampFormat=timestampFormat,
+ lineSep=lineSep)
self._jwrite.json(path)
@since(1.4)
diff --git a/python/pyspark/sql/streaming.py b/python/pyspark/sql/streaming.py
index 07f9ac1b5aa9..490df4accf87 100644
--- a/python/pyspark/sql/streaming.py
+++ b/python/pyspark/sql/streaming.py
@@ -407,7 +407,7 @@ def json(self, path, schema=None, primitivesAsString=None, prefersDecimal=None,
allowComments=None, allowUnquotedFieldNames=None, allowSingleQuotes=None,
allowNumericLeadingZero=None, allowBackslashEscapingAnyCharacter=None,
mode=None, columnNameOfCorruptRecord=None, dateFormat=None, timestampFormat=None,
- multiLine=None, allowUnquotedControlChars=None):
+ multiLine=None, allowUnquotedControlChars=None, lineSep=None):
"""
Loads a JSON file stream and returns the results as a :class:`DataFrame`.
@@ -470,6 +470,8 @@ def json(self, path, schema=None, primitivesAsString=None, prefersDecimal=None,
:param allowUnquotedControlChars: allows JSON Strings to contain unquoted control
characters (ASCII characters with value less than 32,
including tab and line feed characters) or not.
+ :param lineSep: defines the line separator that should be used for parsing. If None is
+ set, it covers all ``\\r``, ``\\r\\n`` and ``\\n``.
>>> json_sdf = spark.readStream.json(tempfile.mkdtemp(), schema = sdf_schema)
>>> json_sdf.isStreaming
@@ -484,7 +486,7 @@ def json(self, path, schema=None, primitivesAsString=None, prefersDecimal=None,
allowBackslashEscapingAnyCharacter=allowBackslashEscapingAnyCharacter,
mode=mode, columnNameOfCorruptRecord=columnNameOfCorruptRecord, dateFormat=dateFormat,
timestampFormat=timestampFormat, multiLine=multiLine,
- allowUnquotedControlChars=allowUnquotedControlChars)
+ allowUnquotedControlChars=allowUnquotedControlChars, lineSep=lineSep)
if isinstance(path, basestring):
return self._df(self._jreader.json(path))
else:
diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py
index 967cc83166f3..505fc056369f 100644
--- a/python/pyspark/sql/tests.py
+++ b/python/pyspark/sql/tests.py
@@ -676,6 +676,23 @@ def test_multiline_json(self):
multiLine=True)
self.assertEqual(people1.collect(), people_array.collect())
+ def test_linesep_json(self):
+ df = self.spark.read.json("python/test_support/sql/people.json", lineSep=",")
+ expected = [Row(_corrupt_record=None, name=u'Michael'),
+ Row(_corrupt_record=u' "age":30}\n{"name":"Justin"', name=None),
+ Row(_corrupt_record=u' "age":19}\n', name=None)]
+ self.assertEqual(df.collect(), expected)
+
+ tpath = tempfile.mkdtemp()
+ shutil.rmtree(tpath)
+ try:
+ df = self.spark.read.json("python/test_support/sql/people.json")
+ df.write.json(tpath, lineSep="!!")
+ readback = self.spark.read.json(tpath, lineSep="!!")
+ self.assertEqual(readback.collect(), df.collect())
+ finally:
+ shutil.rmtree(tpath)
+
def test_multiline_csv(self):
ages_newlines = self.spark.read.csv(
"python/test_support/sql/ages_newlines.csv", multiLine=True)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/CreateJacksonParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/CreateJacksonParser.scala
index 025a388aacaa..a27b73a5e827 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/CreateJacksonParser.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/CreateJacksonParser.scala
@@ -22,6 +22,7 @@ import java.io.{ByteArrayInputStream, InputStream, InputStreamReader}
import com.fasterxml.jackson.core.{JsonFactory, JsonParser}
import org.apache.hadoop.io.Text
+import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.unsafe.types.UTF8String
private[sql] object CreateJacksonParser extends Serializable {
@@ -46,4 +47,15 @@ private[sql] object CreateJacksonParser extends Serializable {
def inputStream(jsonFactory: JsonFactory, record: InputStream): JsonParser = {
jsonFactory.createParser(record)
}
+
+ def internalRow(
+ jsonFactory: JsonFactory,
+ row: InternalRow,
+ charset: Option[String] = None
+ ): JsonParser = {
+ require(charset == Some("UTF-8"))
+ val is = new ByteArrayInputStream(row.getBinary(0))
+
+ inputStream(jsonFactory, is)
+ }
}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala
index 652412b34478..c908c7254b0f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala
@@ -85,6 +85,38 @@ private[sql] class JSONOptions(
val multiLine = parameters.get("multiLine").map(_.toBoolean).getOrElse(false)
+ val charset: Option[String] = Some("UTF-8")
+
+ /**
+ * A sequence of bytes between two consecutive json records. Format of the option is:
+ * selector (1 char) + delimiter body (any length) | sequence of chars
+ * The following selectors are supported:
+ * - 'x' + sequence of bytes in hexadecimal format. For example: "x0a 0d".
+ * Hex pairs can be separated by any chars different from 0-9,A-F,a-f
+ * - '\' - reserved for a sequence of control chars like "\r\n"
+ * and unicode escape like "\u000D\u000A"
+ * - 'r' and '/' - reserved for future use
+ *
+ * Note: the option defines a delimiter for the json reader only, the json writer
+ * uses '\n' as the delimiter of output records (it is converted to sequence of
+ * bytes according to charset)
+ */
+ val lineSeparator: Option[Array[Byte]] = parameters.get("lineSep").collect {
+ case hexs if hexs.startsWith("x") =>
+ hexs.replaceAll("[^0-9A-Fa-f]", "").sliding(2, 2).toArray
+ .map(Integer.parseInt(_, 16).toByte)
+ case reserved if reserved.startsWith("r") || reserved.startsWith("/") =>
+ throw new NotImplementedError(s"the $reserved selector has not supported yet")
+ case delim => delim.getBytes(charset.getOrElse(
+ throw new IllegalArgumentException("Please, set the charset option for the delimiter")))
+ }
+ val lineSeparatorInRead: Option[Array[Byte]] = lineSeparator
+
+ // Note that JSON uses writer with UTF-8 charset. This string will be written out as UTF-8.
+ val lineSeparatorInWrite: String = {
+ lineSeparator.map(new String(_, charset.getOrElse("UTF-8"))).getOrElse("\n")
+ }
+
/** Sets config options on a Jackson [[JsonFactory]]. */
def setJacksonOptions(factory: JsonFactory): Unit = {
factory.configure(JsonParser.Feature.ALLOW_COMMENTS, allowComments)
@@ -96,4 +128,10 @@ private[sql] class JSONOptions(
allowBackslashEscapingAnyCharacter)
factory.configure(JsonParser.Feature.ALLOW_UNQUOTED_CONTROL_CHARS, allowUnquotedControlChars)
}
+
+ def getTextOptions: Map[String, String] = {
+ lineSeparatorInRead.map{ bytes =>
+ "lineSep" -> bytes.map("x%02x".format(_)).mkString
+ }.toMap
+ }
}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonGenerator.scala
index eb06e4f304f0..a94a2fe7881d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonGenerator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonGenerator.scala
@@ -74,6 +74,8 @@ private[sql] class JacksonGenerator(
private val gen = new JsonFactory().createGenerator(writer).setRootValueSeparator(null)
+ private val lineSeparator: String = options.lineSeparatorInWrite
+
private def makeWriter(dataType: DataType): ValueWriter = dataType match {
case NullType =>
(row: SpecializedGetters, ordinal: Int) =>
@@ -251,5 +253,8 @@ private[sql] class JacksonGenerator(
mapType = dataType.asInstanceOf[MapType]))
}
- def writeLineEnding(): Unit = gen.writeRaw('\n')
+ def writeLineEnding(): Unit = {
+ // Note that JSON uses writer with UTF-8 charset. This string will be written out as UTF-8.
+ gen.writeRaw(lineSeparator)
+ }
}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
index 1a5e47508c07..ae3ba1690f69 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
@@ -366,6 +366,8 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
* `java.text.SimpleDateFormat`. This applies to timestamp type.
*
`multiLine` (default `false`): parse one record, which may span multiple lines,
* per file
+ * `lineSep` (default covers all `\r`, `\r\n` and `\n`): defines the line separator
+ * that should be used for parsing.
*
*
* @since 2.0.0
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
index bb93889dc55e..bbc063148a72 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
@@ -518,6 +518,8 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
* `timestampFormat` (default `yyyy-MM-dd'T'HH:mm:ss.SSSXXX`): sets the string that
* indicates a timestamp format. Custom date formats follow the formats at
* `java.text.SimpleDateFormat`. This applies to timestamp type.
+ * `lineSep` (default `\n`): defines the line separator that should
+ * be used for writing.
*
*
* @since 1.4.0
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonDataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonDataSource.scala
index 77e7edc8e7a2..c09a1617690b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonDataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonDataSource.scala
@@ -33,6 +33,7 @@ import org.apache.spark.input.{PortableDataStream, StreamInputFormat}
import org.apache.spark.rdd.{BinaryFileRDD, RDD}
import org.apache.spark.sql.{AnalysisException, Dataset, Encoders, SparkSession}
import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.UnsafeRow
import org.apache.spark.sql.catalyst.json.{CreateJacksonParser, JacksonParser, JSONOptions}
import org.apache.spark.sql.execution.datasources._
import org.apache.spark.sql.execution.datasources.text.TextFileFormat
@@ -92,25 +93,33 @@ object TextInputJsonDataSource extends JsonDataSource {
sparkSession: SparkSession,
inputPaths: Seq[FileStatus],
parsedOptions: JSONOptions): StructType = {
- val json: Dataset[String] = createBaseDataset(sparkSession, inputPaths)
+ val json: Dataset[String] = createBaseDataset(sparkSession, inputPaths, parsedOptions)
inferFromDataset(json, parsedOptions)
}
def inferFromDataset(json: Dataset[String], parsedOptions: JSONOptions): StructType = {
val sampled: Dataset[String] = JsonUtils.sample(json, parsedOptions)
- val rdd: RDD[UTF8String] = sampled.queryExecution.toRdd.map(_.getUTF8String(0))
- JsonInferSchema.infer(rdd, parsedOptions, CreateJacksonParser.utf8String)
+ val rdd: RDD[InternalRow] = sampled.queryExecution.toRdd
+
+ JsonInferSchema.infer[InternalRow](
+ rdd,
+ parsedOptions,
+ CreateJacksonParser.internalRow(_, _, parsedOptions.charset)
+ )
}
private def createBaseDataset(
sparkSession: SparkSession,
- inputPaths: Seq[FileStatus]): Dataset[String] = {
+ inputPaths: Seq[FileStatus],
+ parsedOptions: JSONOptions
+ ): Dataset[String] = {
val paths = inputPaths.map(_.getPath.toString)
sparkSession.baseRelationToDataFrame(
DataSource.apply(
sparkSession,
paths = paths,
- className = classOf[TextFileFormat].getName
+ className = classOf[TextFileFormat].getName,
+ options = parsedOptions.getTextOptions
).resolveRelation(checkFilesExist = false))
.select("value").as(Encoders.STRING)
}
@@ -120,7 +129,7 @@ object TextInputJsonDataSource extends JsonDataSource {
file: PartitionedFile,
parser: JacksonParser,
schema: StructType): Iterator[InternalRow] = {
- val linesReader = new HadoopFileLinesReader(file, conf)
+ val linesReader = new HadoopFileLinesReader(file, parser.options.lineSeparatorInRead, conf)
Option(TaskContext.get()).foreach(_.addTaskCompletionListener(_ => linesReader.close()))
val safeParser = new FailureSafeParser[Text](
input => parser.parse(input, CreateJacksonParser.text, textToUTF8String),
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextOptions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextOptions.scala
index 18698df9fd8e..386512c612ec 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextOptions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextOptions.scala
@@ -41,13 +41,20 @@ private[text] class TextOptions(@transient private val parameters: CaseInsensiti
*/
val wholeText = parameters.getOrElse(WHOLETEXT, "false").toBoolean
- private val lineSeparator: Option[String] = parameters.get(LINE_SEPARATOR).map { sep =>
- require(sep.nonEmpty, s"'$LINE_SEPARATOR' cannot be an empty string.")
- sep
+ val charset: Option[String] = Some("UTF-8")
+
+ val lineSeparator: Option[Array[Byte]] = parameters.get("lineSep").collect {
+ case hexs if hexs.startsWith("x") =>
+ hexs.replaceAll("[^0-9A-Fa-f]", "").sliding(2, 2).toArray
+ .map(Integer.parseInt(_, 16).toByte)
+ case reserved if reserved.startsWith("r") || reserved.startsWith("/") =>
+ throw new NotImplementedError(s"the $reserved selector has not supported yet")
+ case delim => delim.getBytes(charset.getOrElse(
+ throw new IllegalArgumentException("Please, set the charset option for the delimiter")))
}
+
// Note that the option 'lineSep' uses a different default value in read and write.
- val lineSeparatorInRead: Option[Array[Byte]] =
- lineSeparator.map(_.getBytes(StandardCharsets.UTF_8))
+ val lineSeparatorInRead: Option[Array[Byte]] = lineSeparator
val lineSeparatorInWrite: Array[Byte] =
lineSeparatorInRead.getOrElse("\n".getBytes(StandardCharsets.UTF_8))
}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
index 9b17406a816b..ae93965bc50e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
@@ -268,6 +268,8 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo
* `java.text.SimpleDateFormat`. This applies to timestamp type.
* `multiLine` (default `false`): parse one record, which may span multiple lines,
* per file
+ * `lineSep` (default covers all `\r`, `\r\n` and `\n`): defines the line separator
+ * that should be used for parsing.
*
*
* @since 2.0.0
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
index 8c8d41ebf115..8ad31231d51c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
@@ -17,8 +17,9 @@
package org.apache.spark.sql.execution.datasources.json
-import java.io.{File, StringWriter}
+import java.io.{File, FileOutputStream, StringWriter}
import java.nio.charset.StandardCharsets
+import java.nio.file.Files
import java.sql.{Date, Timestamp}
import java.util.Locale
@@ -27,7 +28,7 @@ import org.apache.hadoop.fs.{Path, PathFilter}
import org.apache.hadoop.io.SequenceFile.CompressionType
import org.apache.hadoop.io.compress.GzipCodec
-import org.apache.spark.SparkException
+import org.apache.spark.{SparkException, TestUtils}
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{functions => F, _}
import org.apache.spark.sql.catalyst.json.{CreateJacksonParser, JacksonParser, JSONOptions}
@@ -2063,4 +2064,93 @@ class JsonSuite extends QueryTest with SharedSQLContext with TestJsonData {
)
}
}
+
+ def checkReadJson(
+ charset: String,
+ lineSep: String,
+ inferSchema: Boolean,
+ runId: Int
+ ): Unit = {
+ test(s"checks reading json in ${charset} #${runId}") {
+ val delimInBytes = {
+ if (lineSep.startsWith("x")) {
+ lineSep.replaceAll("[^0-9A-Fa-f]", "")
+ .sliding(2, 2).toArray.map(Integer.parseInt(_, 16).toByte)
+ } else {
+ lineSep.getBytes(charset)
+ }
+ }
+ case class Rec(f1: String, f2: Int) {
+ def json = s"""{"f1":"${f1}", "f2":$f2}"""
+ def bytes = json.getBytes(charset)
+ def row = Row(f1, f2)
+ }
+ val schema = new StructType().add("f1", StringType).add("f2", IntegerType)
+ withTempPath { path =>
+ val records = List(Rec("a", 1), Rec("b", 2))
+ val data = records.map(_.bytes).reduce((a1, a2) => a1 ++ delimInBytes ++ a2)
+ val os = new FileOutputStream(path)
+ os.write(data)
+ os.close()
+ val reader = if (inferSchema) {
+ spark.read
+ } else {
+ spark.read.schema(schema)
+ }
+ val savedDf = reader
+ .option("lineSep", lineSep)
+ .json(path.getCanonicalPath)
+ checkAnswer(savedDf, records.map(_.row))
+ }
+ }
+ }
+
+ // scalastyle:off nonascii
+ List(
+ ("|", "UTF-8", false),
+ ("^", "UTF-8", true),
+ ("::", "UTF-8", true),
+ ("!!!@3", "UTF-8", false),
+ (0x1E.toChar.toString, "UTF-8", true),
+ ("아", "UTF-8", false),
+ ("куку", "UTF-8", true),
+ ("sep", "UTF-8", false),
+ ("x0a 0d", "UTF-8", true),
+ ("x54.45", "UTF-8", false),
+ ("\r\n", "UTF-8", false),
+ ("\r\n", "UTF-8", true),
+ ("\u000d\u000a", "UTF-8", false),
+ ("\u000a\u000d", "UTF-8", true),
+ ("===", "UTF-8", false),
+ ("$^+", "UTF-8", true)
+ ).zipWithIndex.foreach{case ((d, c, s), i) => checkReadJson(c, d, s, i)}
+ // scalastyle:on nonascii
+
+ def testLineSeparator(lineSep: String): Unit = {
+ test(s"SPARK-21289: Support line separator - lineSep: '$lineSep'") {
+ // Write
+ withTempPath { path =>
+ Seq("a", "b", "c").toDF("value").coalesce(1)
+ .write.option("lineSep", lineSep).json(path.getAbsolutePath)
+ val partFile = TestUtils.recursiveList(path).filter(f => f.getName.startsWith("part-")).head
+ val readBack = new String(Files.readAllBytes(partFile.toPath), StandardCharsets.UTF_8)
+ assert(
+ readBack === s"""{"value":"a"}$lineSep{"value":"b"}$lineSep{"value":"c"}$lineSep""")
+ }
+
+ // Roundtrip
+ withTempPath { path =>
+ val df = Seq("a", "b", "c").toDF()
+ df.write.option("lineSep", lineSep).json(path.getAbsolutePath)
+ val readBack = spark.read.option("lineSep", lineSep).json(path.getAbsolutePath)
+ checkAnswer(df, readBack)
+ }
+ }
+ }
+
+ // scalastyle:off nonascii
+ Seq("|", "^", "::", "!!!@3", 0x1E.toChar.toString, "아").foreach { lineSep =>
+ testLineSeparator(lineSep)
+ }
+ // scalastyle:on nonascii
}