Skip to content

Commit ff26767

Browse files
committed
[SPARK-8146] DataFrame Python API: Alias replace in df.na
Author: Reynold Xin <[email protected]> Closes #6688 from rxin/df-alias-replace and squashes the following commits: 774c19c [Reynold Xin] [SPARK-8146] DataFrame Python API: Alias replace in DataFrameNaFunctions. (cherry picked from commit 0ac4708) Signed-off-by: Reynold Xin <[email protected]>
1 parent b4d5441 commit ff26767

File tree

2 files changed

+22
-26
lines changed

2 files changed

+22
-26
lines changed

python/pyspark/sql/dataframe.py

Lines changed: 22 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -909,8 +909,7 @@ def dropDuplicates(self, subset=None):
909909
@since("1.3.1")
910910
def dropna(self, how='any', thresh=None, subset=None):
911911
"""Returns a new :class:`DataFrame` omitting rows with null values.
912-
913-
This is an alias for ``na.drop()``.
912+
:func:`DataFrame.dropna` and :func:`DataFrameNaFunctions.drop` are aliases of each other.
914913
915914
:param how: 'any' or 'all'.
916915
If 'any', drop a row if it contains any nulls.
@@ -920,13 +919,6 @@ def dropna(self, how='any', thresh=None, subset=None):
920919
This overwrites the `how` parameter.
921920
:param subset: optional list of column names to consider.
922921
923-
>>> df4.dropna().show()
924-
+---+------+-----+
925-
|age|height| name|
926-
+---+------+-----+
927-
| 10| 80|Alice|
928-
+---+------+-----+
929-
930922
>>> df4.na.drop().show()
931923
+---+------+-----+
932924
|age|height| name|
@@ -952,6 +944,7 @@ def dropna(self, how='any', thresh=None, subset=None):
952944
@since("1.3.1")
953945
def fillna(self, value, subset=None):
954946
"""Replace null values, alias for ``na.fill()``.
947+
:func:`DataFrame.fillna` and :func:`DataFrameNaFunctions.fill` are aliases of each other.
955948
956949
:param value: int, long, float, string, or dict.
957950
Value to replace null values with.
@@ -963,7 +956,7 @@ def fillna(self, value, subset=None):
963956
For example, if `value` is a string, and subset contains a non-string column,
964957
then the non-string column is simply ignored.
965958
966-
>>> df4.fillna(50).show()
959+
>>> df4.na.fill(50).show()
967960
+---+------+-----+
968961
|age|height| name|
969962
+---+------+-----+
@@ -973,16 +966,6 @@ def fillna(self, value, subset=None):
973966
| 50| 50| null|
974967
+---+------+-----+
975968
976-
>>> df4.fillna({'age': 50, 'name': 'unknown'}).show()
977-
+---+------+-------+
978-
|age|height| name|
979-
+---+------+-------+
980-
| 10| 80| Alice|
981-
| 5| null| Bob|
982-
| 50| null| Tom|
983-
| 50| null|unknown|
984-
+---+------+-------+
985-
986969
>>> df4.na.fill({'age': 50, 'name': 'unknown'}).show()
987970
+---+------+-------+
988971
|age|height| name|
@@ -1014,6 +997,8 @@ def fillna(self, value, subset=None):
1014997
@since(1.4)
1015998
def replace(self, to_replace, value, subset=None):
1016999
"""Returns a new :class:`DataFrame` replacing a value with another value.
1000+
:func:`DataFrame.replace` and :func:`DataFrameNaFunctions.replace` are
1001+
aliases of each other.
10171002
10181003
:param to_replace: int, long, float, string, or list.
10191004
Value to be replaced.
@@ -1029,7 +1014,7 @@ def replace(self, to_replace, value, subset=None):
10291014
For example, if `value` is a string, and subset contains a non-string column,
10301015
then the non-string column is simply ignored.
10311016
1032-
>>> df4.replace(10, 20).show()
1017+
>>> df4.na.replace(10, 20).show()
10331018
+----+------+-----+
10341019
| age|height| name|
10351020
+----+------+-----+
@@ -1039,7 +1024,7 @@ def replace(self, to_replace, value, subset=None):
10391024
|null| null| null|
10401025
+----+------+-----+
10411026
1042-
>>> df4.replace(['Alice', 'Bob'], ['A', 'B'], 'name').show()
1027+
>>> df4.na.replace(['Alice', 'Bob'], ['A', 'B'], 'name').show()
10431028
+----+------+----+
10441029
| age|height|name|
10451030
+----+------+----+
@@ -1090,9 +1075,9 @@ def replace(self, to_replace, value, subset=None):
10901075
@since(1.4)
10911076
def corr(self, col1, col2, method=None):
10921077
"""
1093-
Calculates the correlation of two columns of a DataFrame as a double value. Currently only
1094-
supports the Pearson Correlation Coefficient.
1095-
:func:`DataFrame.corr` and :func:`DataFrameStatFunctions.corr` are aliases.
1078+
Calculates the correlation of two columns of a DataFrame as a double value.
1079+
Currently only supports the Pearson Correlation Coefficient.
1080+
:func:`DataFrame.corr` and :func:`DataFrameStatFunctions.corr` are aliases of each other.
10961081
10971082
:param col1: The name of the first column
10981083
:param col2: The name of the second column
@@ -1241,7 +1226,10 @@ def toPandas(self):
12411226
import pandas as pd
12421227
return pd.DataFrame.from_records(self.collect(), columns=self.columns)
12431228

1229+
##########################################################################################
12441230
# Pandas compatibility
1231+
##########################################################################################
1232+
12451233
groupby = groupBy
12461234
drop_duplicates = dropDuplicates
12471235

@@ -1261,6 +1249,8 @@ def _to_scala_map(sc, jm):
12611249

12621250
class DataFrameNaFunctions(object):
12631251
"""Functionality for working with missing data in :class:`DataFrame`.
1252+
1253+
.. versionadded:: 1.4
12641254
"""
12651255

12661256
def __init__(self, df):
@@ -1276,9 +1266,16 @@ def fill(self, value, subset=None):
12761266

12771267
fill.__doc__ = DataFrame.fillna.__doc__
12781268

1269+
def replace(self, to_replace, value, subset=None):
1270+
return self.df.replace(to_replace, value, subset)
1271+
1272+
replace.__doc__ = DataFrame.replace.__doc__
1273+
12791274

12801275
class DataFrameStatFunctions(object):
12811276
"""Functionality for statistic functions with :class:`DataFrame`.
1277+
1278+
.. versionadded:: 1.4
12821279
"""
12831280

12841281
def __init__(self, df):

python/pyspark/sql/window.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@ def _to_java_cols(cols):
3232

3333

3434
class Window(object):
35-
3635
"""
3736
Utility functions for defining window in DataFrames.
3837

0 commit comments

Comments
 (0)