Skip to content

Commit 1001ebd

Browse files
itholicueshin
authored andcommitted
Implement DataFrame.pop (#791)
Resolves #788 like pandas.DataFrame.pop (https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.pop.html?highlight=pop#pandas.DataFrame.pop) now we can pop column from koalas DataFrame like below: ```python >>> import databricks.koalas as ks >>> import numpy as np >>> >>> df = ks.DataFrame([('falcon', 'bird', 389.0), ... ('parrot', 'bird', 24.0), ... ('lion', 'mammal', 80.5), ... ('monkey','mammal', np.nan)], ... columns=('name', 'class', 'max_speed')) >>> >>> df.pop('class') 0 bird 1 bird 2 mammal 3 mammal Name: class, dtype: object >>> >>> df name max_speed 0 falcon 389.0 1 parrot 24.0 2 lion 80.5 3 monkey NaN ``` (use pandas.DataFrame.pop example)
1 parent 1bf41b8 commit 1001ebd

File tree

2 files changed

+73
-1
lines changed

2 files changed

+73
-1
lines changed

databricks/koalas/frame.py

+73
Original file line numberDiff line numberDiff line change
@@ -1820,6 +1820,77 @@ def transform(self, func):
18201820

18211821
return DataFrame(internal)
18221822

1823+
def pop(self, item):
1824+
"""
1825+
Return item and drop from frame. Raise KeyError if not found.
1826+
Parameters
1827+
----------
1828+
item : str
1829+
Label of column to be popped.
1830+
Returns
1831+
-------
1832+
Series
1833+
Examples
1834+
--------
1835+
>>> df = ks.DataFrame([('falcon', 'bird', 389.0),
1836+
... ('parrot', 'bird', 24.0),
1837+
... ('lion', 'mammal', 80.5),
1838+
... ('monkey','mammal', np.nan)],
1839+
... columns=('name', 'class', 'max_speed'))
1840+
>>> df
1841+
name class max_speed
1842+
0 falcon bird 389.0
1843+
1 parrot bird 24.0
1844+
2 lion mammal 80.5
1845+
3 monkey mammal NaN
1846+
>>> df.pop('class')
1847+
0 bird
1848+
1 bird
1849+
2 mammal
1850+
3 mammal
1851+
Name: class, dtype: object
1852+
>>> df
1853+
name max_speed
1854+
0 falcon 389.0
1855+
1 parrot 24.0
1856+
2 lion 80.5
1857+
3 monkey NaN
1858+
1859+
Also support for MultiIndex
1860+
1861+
>>> df = ks.DataFrame([('falcon', 'bird', 389.0),
1862+
... ('parrot', 'bird', 24.0),
1863+
... ('lion', 'mammal', 80.5),
1864+
... ('monkey','mammal', np.nan)],
1865+
... columns=('name', 'class', 'max_speed'))
1866+
>>> columns = [('a', 'name'), ('a', 'class'), ('b', 'max_speed')]
1867+
>>> df.columns = pd.MultiIndex.from_tuples(columns)
1868+
>>> df
1869+
a b
1870+
name class max_speed
1871+
0 falcon bird 389.0
1872+
1 parrot bird 24.0
1873+
2 lion mammal 80.5
1874+
3 monkey mammal NaN
1875+
>>> df.pop('a')
1876+
name class
1877+
0 falcon bird
1878+
1 parrot bird
1879+
2 lion mammal
1880+
3 monkey mammal
1881+
>>> df
1882+
b
1883+
max_speed
1884+
0 389.0
1885+
1 24.0
1886+
2 80.5
1887+
3 NaN
1888+
"""
1889+
result = self[item]
1890+
self._internal = self.drop(item)._internal
1891+
1892+
return result
1893+
18231894
@property
18241895
def index(self):
18251896
"""The index (row labels) Column of the DataFrame.
@@ -4469,6 +4540,8 @@ def drop(self, labels=None, axis=1,
44694540
0 1 7
44704541
1 2 8
44714542
4543+
Also support for MultiIndex
4544+
44724545
>>> df = ks.DataFrame({'x': [1, 2], 'y': [3, 4], 'z': [5, 6], 'w': [7, 8]},
44734546
... columns=['x', 'y', 'z', 'w'])
44744547
>>> columns = [('a', 'x'), ('a', 'y'), ('b', 'z'), ('b', 'w')]

databricks/koalas/missing/frame.py

-1
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,6 @@ class _MissingPandasLikeDataFrame(object):
7575
mask = unsupported_function('mask')
7676
mode = unsupported_function('mode')
7777
pct_change = unsupported_function('pct_change')
78-
pop = unsupported_function('pop')
7978
prod = unsupported_function('prod')
8079
product = unsupported_function('product')
8180
quantile = unsupported_function('quantile')

0 commit comments

Comments
 (0)