Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions python/pyspark/sql/connect/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,10 @@ def createDataFrame(
# If no schema supplied by user then get the names of columns only
if schema is None:
_cols = [str(x) if not isinstance(x, str) else x for x in data.columns]
elif isinstance(schema, (list, tuple)) and cast(int, _num_cols) < len(data.columns):
assert isinstance(_cols, list)
_cols.extend([f"_{i + 1}" for i in range(cast(int, _num_cols), len(data.columns))])
_num_cols = len(_cols)

# Determine arrow types to coerce data when creating batches
if isinstance(schema, StructType):
Expand Down Expand Up @@ -309,6 +313,9 @@ def createDataFrame(

_inferred_schema = self._inferSchemaFromList(_data, _cols)

if _cols is not None and cast(int, _num_cols) < len(_cols):
_num_cols = len(_cols)

if _has_nulltype(_inferred_schema):
# For cases like createDataFrame([("Alice", None, 80.1)], schema)
# we can not infer the schema from the data itself.
Expand Down
5 changes: 0 additions & 5 deletions python/pyspark/sql/tests/connect/test_parity_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,11 +90,6 @@ def test_infer_nested_schema(self):
def test_infer_schema(self):
super().test_infer_schema()

# TODO(SPARK-42022): createDataFrame should autogenerate missing column names
@unittest.skip("Fails in Spark Connect, should enable.")
def test_infer_schema_not_enough_names(self):
super().test_infer_schema_not_enough_names()

# TODO(SPARK-42020): createDataFrame with UDT
@unittest.skip("Fails in Spark Connect, should enable.")
def test_infer_schema_specification(self):
Expand Down