Skip to content

Commit d9a4439

Browse files
author
Olivier Girardot
committed
SPARK-7118 Add the coalesce Spark SQL function available in PySpark
No changes to the scala/java part, only changes in Python.
1 parent 4f87e95 commit d9a4439

File tree

1 file changed

+37
-0
lines changed

1 file changed

+37
-0
lines changed

python/pyspark/sql/functions.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
'rand',
3838
'randn',
3939
'sparkPartitionId',
40+
'coalesce',
4041
'udf']
4142

4243

@@ -167,6 +168,42 @@ def approxCountDistinct(col, rsd=None):
167168
return Column(jc)
168169

169170

171+
def coalesce(*cols):
172+
"""Returns the first column that is not null.
173+
174+
>>> cDf = sqlContext.createDataFrame([(None, None), (1, None), (None, 2)], ("a", "b"))
175+
>>> cDf.show()
176+
+----+----+
177+
| a| b|
178+
+----+----+
179+
|null|null|
180+
| 1|null|
181+
|null| 2|
182+
+----+----+
183+
184+
>>> cDf.select(coalesce(cDf["a"], cDf["b"])).show()
185+
+-------------+
186+
|Coalesce(a,b)|
187+
+-------------+
188+
| null|
189+
| 1|
190+
| 2|
191+
+-------------+
192+
193+
>>> cDf.select('*', coalesce(cDf["a"], lit(0.0))).show()
194+
+----+----+---------------+
195+
| a| b|Coalesce(a,0.0)|
196+
+----+----+---------------+
197+
|null|null| 0.0|
198+
| 1|null| 1.0|
199+
|null| 2| 0.0|
200+
+----+----+---------------+
201+
"""
202+
sc = SparkContext._active_spark_context
203+
jc = sc._jvm.functions.coalesce(_to_seq(sc, cols, _to_java_column))
204+
return Column(jc)
205+
206+
170207
def countDistinct(col, *cols):
171208
"""Returns a new :class:`Column` for distinct count of ``col`` or ``cols``.
172209

0 commit comments

Comments
 (0)