Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow multi-index column names for inferring return type schema with names. #2117

Merged
merged 1 commit into from
Mar 24, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions databricks/koalas/tests/test_typedef.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,16 @@ def func() -> pd.DataFrame[zip(pdf.columns, pdf.dtypes)]:
expected = StructType([StructField("a", LongType()), StructField("b", LongType())])
self.assertEqual(infer_return_type(func).tpe, expected)

pdf = pd.DataFrame({("x", "a"): [1, 2, 3], ("y", "b"): [3, 4, 5]})

def func() -> pd.DataFrame[zip(pdf.columns, pdf.dtypes)]:
pass

expected = StructType(
[StructField("(x, a)", LongType()), StructField("(y, b)", LongType())]
)
self.assertEqual(infer_return_type(func).tpe, expected)

@unittest.skipIf(
sys.version_info < (3, 7),
"Type inference from pandas instances is supported with Python 3.7+",
Expand Down
10 changes: 9 additions & 1 deletion databricks/koalas/typedef/typehints.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,14 +86,16 @@ def __repr__(self):

class DataFrameType(object):
def __init__(self, tpe, names=None):
from databricks.koalas.utils import name_like_string

if names is None:
# Default names `c0, c1, ... cn`.
self.tpe = types.StructType(
[types.StructField("c%s" % i, tpe[i]) for i in range(len(tpe))]
) # type: types.StructType
else:
self.tpe = types.StructType(
[types.StructField(n, t) for n, t in zip(names, tpe)]
[types.StructField(name_like_string(n), t) for n, t in zip(names, tpe)]
) # type: types.StructType

def __repr__(self):
Expand Down Expand Up @@ -338,6 +340,12 @@ def infer_return_type(f) -> typing.Union[SeriesType, DataFrameType, ScalarType,
... pass
>>> infer_return_type(func).tpe
StructType(List(StructField(a,LongType,true),StructField(b,LongType,true)))

>>> pdf = pd.DataFrame({("x", "a"): [1, 2, 3], ("y", "b"): [3, 4, 5]})
>>> def func() -> ks.DataFrame[zip(pdf.columns, pdf.dtypes)]:
... pass
>>> infer_return_type(func).tpe
StructType(List(StructField((x, a),LongType,true),StructField((y, b),LongType,true)))
"""
# We should re-import to make sure the class 'SeriesType' is not treated as a class
# within this module locally. See Series.__class_getitem__ which imports this class
Expand Down