Skip to content

Commit 3ff0120

Browse files
HyukjinKwonrxin
authored andcommitted
[SPARK-15250][SQL] Remove deprecated json API in DataFrameReader
## What changes were proposed in this pull request? This PR removes the old `json(path: String)` API which is covered by the new `json(paths: String*)`. ## How was this patch tested? Jenkins tests (existing tests should cover this) Author: hyukjinkwon <[email protected]> Author: Hyukjin Kwon <[email protected]> Closes #13040 from HyukjinKwon/SPARK-15250.
1 parent 5a5b83c commit 3ff0120

File tree

3 files changed

+6
-34
lines changed

3 files changed

+6
-34
lines changed

project/MimaExcludes.scala

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -348,6 +348,9 @@ object MimaExcludes {
348348
) ++ Seq(
349349
// [SPARK-13686][MLLIB][STREAMING] Add a constructor parameter `reqParam` to (Streaming)LinearRegressionWithSGD
350350
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.mllib.regression.LinearRegressionWithSGD.this")
351+
) ++ Seq(
352+
// SPARK-15250 Remove deprecated json API in DataFrameReader
353+
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.sql.DataFrameReader.json")
351354
) ++ Seq(
352355
// SPARK-13920: MIMA checks should apply to @Experimental and @DeveloperAPI APIs
353356
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.Aggregator.combineCombinersByKey"),

python/pyspark/sql/readwriter.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -241,8 +241,8 @@ def json(self, path, schema=None, primitivesAsString=None, prefersDecimal=None,
241241
if columnNameOfCorruptRecord is not None:
242242
self.option("columnNameOfCorruptRecord", columnNameOfCorruptRecord)
243243
if isinstance(path, basestring):
244-
return self._df(self._jreader.json(path))
245-
elif type(path) == list:
244+
path = [path]
245+
if type(path) == list:
246246
return self._df(self._jreader.json(self._sqlContext._sc._jvm.PythonUtils.toSeq(path)))
247247
elif isinstance(path, RDD):
248248
def func(iterator):

sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala

Lines changed: 1 addition & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -277,38 +277,6 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
277277
sparkSession.baseRelationToDataFrame(relation)
278278
}
279279

280-
/**
281-
* Loads a JSON file (one object per line) and returns the result as a [[DataFrame]].
282-
*
283-
* This function goes through the input once to determine the input schema. If you know the
284-
* schema in advance, use the version that specifies the schema to avoid the extra scan.
285-
*
286-
* You can set the following JSON-specific options to deal with non-standard JSON files:
287-
* <li>`primitivesAsString` (default `false`): infers all primitive values as a string type</li>
288-
* <li>`allowComments` (default `false`): ignores Java/C++ style comment in JSON records</li>
289-
* <li>`allowUnquotedFieldNames` (default `false`): allows unquoted JSON field names</li>
290-
* <li>`allowSingleQuotes` (default `true`): allows single quotes in addition to double quotes
291-
* </li>
292-
* <li>`allowNumericLeadingZeros` (default `false`): allows leading zeros in numbers
293-
* (e.g. 00012)</li>
294-
* <li>`mode` (default `PERMISSIVE`): allows a mode for dealing with corrupt records
295-
* during parsing.</li>
296-
* <ul>
297-
* <li>`PERMISSIVE` : sets other fields to `null` when it meets a corrupted record, and puts the
298-
* malformed string into a new field configured by `columnNameOfCorruptRecord`. When
299-
* a schema is set by user, it sets `null` for extra fields.</li>
300-
* <li>`DROPMALFORMED` : ignores the whole corrupted records.</li>
301-
* <li>`FAILFAST` : throws an exception when it meets corrupted records.</li>
302-
* </ul>
303-
* <li>`columnNameOfCorruptRecord` (default `_corrupt_record`): allows renaming the new field
304-
* having malformed string created by `PERMISSIVE` mode. This overrides
305-
* `spark.sql.columnNameOfCorruptRecord`.</li>
306-
*
307-
* @since 1.4.0
308-
*/
309-
// TODO: Remove this one in Spark 2.0.
310-
def json(path: String): DataFrame = format("json").load(path)
311-
312280
/**
313281
* Loads a JSON file (one object per line) and returns the result as a [[DataFrame]].
314282
*
@@ -342,6 +310,7 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
342310
*
343311
* @since 1.6.0
344312
*/
313+
@scala.annotation.varargs
345314
def json(paths: String*): DataFrame = format("json").load(paths : _*)
346315

347316
/**

0 commit comments

Comments
 (0)