4040def _monkey_patch_RDD (sqlCtx ):
4141 def toDF (self , schema = None , sampleRatio = None ):
4242 """
43- Convert current :class:`RDD` into a :class:`DataFrame`
43+ Converts current :class:`RDD` into a :class:`DataFrame`
4444
4545 This is a shorthand for ``sqlCtx.createDataFrame(rdd, schema, sampleRatio)``
4646
@@ -59,13 +59,14 @@ def toDF(self, schema=None, sampleRatio=None):
5959class SQLContext (object ):
6060 """Main entry point for Spark SQL functionality.
6161
62- A SQLContext can be used create L{ DataFrame} , register L{ DataFrame} as
62+ A SQLContext can be used create :class:` DataFrame` , register :class:` DataFrame` as
6363 tables, execute SQL over tables, cache tables, and read parquet files.
6464
65- When created, L{SQLContext} adds a method called ``toDF`` to :class:`RDD`, which could be
66- used to convert an RDD into a DataFrame, it's a shorthand for L{SQLContext.createDataFrame}.
65+ When created, :class:`SQLContext` adds a method called ``toDF`` to :class:`RDD`,
66+ which could be used to convert an RDD into a DataFrame, it's a shorthand for
67+ :func:`SQLContext.createDataFrame`.
6768
68- :param sparkContext: The SparkContext to wrap .
69+ :param sparkContext: The :class:` SparkContext` backing this SQLContext .
6970 :param sqlContext: An optional JVM Scala SQLContext. If set, we do not instantiate a new
7071 SQLContext in the JVM, instead we make all calls to this object.
7172 """
@@ -209,21 +210,23 @@ def applySchema(self, rdd, schema):
209210
210211 def createDataFrame (self , data , schema = None , samplingRatio = None ):
211212 """
212- Create a DataFrame from an RDD of L{tuple}/L{list}, list or L{pandas.DataFrame}.
213+ Creates a :class:`DataFrame` from an :class:`RDD` of :class:`tuple`/:class:`list`,
214+ list or :class:`pandas.DataFrame`.
213215
214216 When ``schema`` is a list of column names, the type of each column
215- will be inferred from ``rdd ``.
217+ will be inferred from ``data ``.
216218
217219 When ``schema`` is ``None``, it will try to infer the schema (column names and types)
218- from ``rdd``, which should be an RDD of :class:`Row`, or L{namedtuple}, or L{dict}.
220+ from ``data``, which should be an RDD of :class:`Row`,
221+ or :class:`namedtuple`, or :class:`dict`.
219222
220223 If schema inference is needed, ``samplingRatio`` is used to determined the ratio of
221224 rows used for schema inference. The first row will be used if ``samplingRatio`` is ``None``.
222225
223- :param data: an RDD of Row/tuple/list/dict, list, or pandas.DataFrame
224- :param schema: a StructType or list of column names. default None.
226+ :param data: an RDD of :class:`Row`/:class:`tuple`/:class:`list`/:class:`dict`,
227+ :class:`list`, or :class:`pandas.DataFrame`.
228+ :param schema: a :class:`StructType` or list of column names. default None.
225229 :param samplingRatio: the sample ratio of rows used for inferring
226- :return: a L{DataFrame}
227230
228231 >>> l = [('Alice', 1)]
229232 >>> sqlCtx.createDataFrame(l).collect()
@@ -309,9 +312,9 @@ def createDataFrame(self, data, schema=None, samplingRatio=None):
309312 return DataFrame (df , self )
310313
311314 def registerDataFrameAsTable (self , df , tableName ):
312- """Registers the given L{ DataFrame} as a temporary table in the catalog.
315+ """Registers the given :class:` DataFrame` as a temporary table in the catalog.
313316
314- Temporary tables exist only during the lifetime of this instance of L{ SQLContext} .
317+ Temporary tables exist only during the lifetime of this instance of :class:` SQLContext` .
315318
316319 >>> sqlCtx.registerDataFrameAsTable(df, "table1")
317320 """
@@ -321,7 +324,7 @@ def registerDataFrameAsTable(self, df, tableName):
321324 raise ValueError ("Can only register DataFrame as table" )
322325
323326 def parquetFile (self , * paths ):
324- """Loads a Parquet file, returning the result as a L{ DataFrame} .
327+ """Loads a Parquet file, returning the result as a :class:` DataFrame` .
325328
326329 >>> import tempfile, shutil
327330 >>> parquetFile = tempfile.mkdtemp()
@@ -339,8 +342,7 @@ def parquetFile(self, *paths):
339342 return DataFrame (jdf , self )
340343
341344 def jsonFile (self , path , schema = None , samplingRatio = 1.0 ):
342- """
343- Loads a text file storing one JSON object per line as a L{DataFrame}.
345+ """Loads a text file storing one JSON object per line as a :class:`DataFrame`.
344346
345347 If the schema is provided, applies the given schema to this JSON dataset.
346348 Otherwise, it samples the dataset with ratio ``samplingRatio`` to determine the schema.
@@ -379,7 +381,7 @@ def jsonFile(self, path, schema=None, samplingRatio=1.0):
379381 return DataFrame (df , self )
380382
381383 def jsonRDD (self , rdd , schema = None , samplingRatio = 1.0 ):
382- """Loads an RDD storing one JSON object per string as a L{ DataFrame} .
384+ """Loads an RDD storing one JSON object per string as a :class:` DataFrame` .
383385
384386 If the schema is provided, applies the given schema to this JSON dataset.
385387 Otherwise, it samples the dataset with ratio ``samplingRatio`` to determine the schema.
@@ -421,7 +423,7 @@ def func(iterator):
421423 return DataFrame (df , self )
422424
423425 def load (self , path = None , source = None , schema = None , ** options ):
424- """Returns the dataset in a data source as a L{ DataFrame} .
426+ """Returns the dataset in a data source as a :class:` DataFrame` .
425427
426428 The data source is specified by the ``source`` and a set of ``options``.
427429 If ``source`` is not specified, the default data source configured by
@@ -455,7 +457,7 @@ def createExternalTable(self, tableName, path=None, source=None,
455457 If ``source`` is not specified, the default data source configured by
456458 ``spark.sql.sources.default`` will be used.
457459
458- Optionally, a schema can be provided as the schema of the returned L{ DataFrame} and
460+ Optionally, a schema can be provided as the schema of the returned :class:` DataFrame` and
459461 created external table.
460462 """
461463 if path is not None :
@@ -476,7 +478,7 @@ def createExternalTable(self, tableName, path=None, source=None,
476478 return DataFrame (df , self )
477479
478480 def sql (self , sqlQuery ):
479- """Returns a L{ DataFrame} representing the result of the given query.
481+ """Returns a :class:` DataFrame` representing the result of the given query.
480482
481483 >>> sqlCtx.registerDataFrameAsTable(df, "table1")
482484 >>> df2 = sqlCtx.sql("SELECT field1 AS f1, field2 as f2 from table1")
@@ -486,7 +488,7 @@ def sql(self, sqlQuery):
486488 return DataFrame (self ._ssql_ctx .sql (sqlQuery ), self )
487489
488490 def table (self , tableName ):
489- """Returns the specified table as a L{ DataFrame} .
491+ """Returns the specified table as a :class:` DataFrame` .
490492
491493 >>> sqlCtx.registerDataFrameAsTable(df, "table1")
492494 >>> df2 = sqlCtx.table("table1")
@@ -496,12 +498,12 @@ def table(self, tableName):
496498 return DataFrame (self ._ssql_ctx .table (tableName ), self )
497499
498500 def tables (self , dbName = None ):
499- """Returns a L{ DataFrame} containing names of tables in the given database.
501+ """Returns a :class:` DataFrame` containing names of tables in the given database.
500502
501503 If ``dbName`` is not specified, the current database will be used.
502504
503505 The returned DataFrame has two columns: ``tableName`` and ``isTemporary``
504- (a column with L{ BooleanType} indicating if a table is a temporary one or not).
506+ (a column with :class:` BooleanType` indicating if a table is a temporary one or not).
505507
506508 >>> sqlCtx.registerDataFrameAsTable(df, "table1")
507509 >>> df2 = sqlCtx.tables()
@@ -545,12 +547,12 @@ def clearCache(self):
545547class HiveContext (SQLContext ):
546548 """A variant of Spark SQL that integrates with data stored in Hive.
547549
548- Configuration for Hive is read from hive-site.xml on the classpath.
550+ Configuration for Hive is read from `` hive-site.xml`` on the classpath.
549551 It supports running both SQL and HiveQL commands.
550552
551553 :param sparkContext: The SparkContext to wrap.
552554 :param hiveContext: An optional JVM Scala HiveContext. If set, we do not instantiate a new
553- L{ HiveContext} in the JVM, instead we make all calls to this object.
555+ :class:` HiveContext` in the JVM, instead we make all calls to this object.
554556 """
555557
556558 def __init__ (self , sparkContext , hiveContext = None ):
0 commit comments