Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 25 additions & 14 deletions python/pyspark/sql/connect/proto/expressions_pb2.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -479,23 +479,27 @@ class Expression(google.protobuf.message.Message):
def element_type(self) -> pyspark.sql.connect.proto.types_pb2.DataType:
"""(Deprecated) The element type of the array.

This field is deprecated since Spark 4.1+ and should only be set
if the data_type field is not set. Use data_type field instead.
This field is deprecated since Spark 4.1+. Use data_type field instead.
"""
@property
def elements(
self,
) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[
global___Expression.Literal
]:
"""The literal values that make up the array elements."""
"""The literal values that make up the array elements.

For inferring the data_type.element_type, only the first element needs to
contain the type information.
"""
@property
def data_type(self) -> pyspark.sql.connect.proto.types_pb2.DataType.Array:
"""The type of the array.
"""The type of the array. You don't need to set this field if the type information is not needed.

If the element type can be inferred from the first element of the elements field,
then you don't need to set data_type.element_type to save space. On the other hand,
redundant type information is also acceptable.
then you don't need to set data_type.element_type to save space.

On the other hand, redundant type information is also acceptable.
"""
def __init__(
self,
Expand Down Expand Up @@ -534,8 +538,7 @@ class Expression(google.protobuf.message.Message):
def key_type(self) -> pyspark.sql.connect.proto.types_pb2.DataType:
"""(Deprecated) The key type of the map.

This field is deprecated since Spark 4.1+ and should only be set
if the data_type field is not set. Use data_type field instead.
This field is deprecated since Spark 4.1+. Use data_type field instead.
"""
@property
def value_type(self) -> pyspark.sql.connect.proto.types_pb2.DataType:
Expand All @@ -550,20 +553,29 @@ class Expression(google.protobuf.message.Message):
) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[
global___Expression.Literal
]:
"""The literal keys that make up the map."""
"""The literal keys that make up the map.

For inferring the data_type.key_type, only the first key needs to
contain the type information.
"""
@property
def values(
self,
) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[
global___Expression.Literal
]:
"""The literal values that make up the map."""
"""The literal values that make up the map.

For inferring the data_type.value_type, only the first value needs to
contain the type information.
"""
@property
def data_type(self) -> pyspark.sql.connect.proto.types_pb2.DataType.Map:
"""The type of the map.
"""The type of the map. You don't need to set this field if the type information is not needed.

If the key/value types can be inferred from the first element of the keys/values fields,
then you don't need to set data_type.key_type/data_type.value_type to save space.

On the other hand, redundant type information is also acceptable.
"""
def __init__(
Expand Down Expand Up @@ -608,8 +620,7 @@ class Expression(google.protobuf.message.Message):
"""(Deprecated) The type of the struct.

This field is deprecated since Spark 4.1+ because using DataType as the type of a struct
is ambiguous. This field should only be set if the data_type_struct field is not set.
Use data_type_struct field instead.
is ambiguous. Use data_type_struct field instead.
"""
@property
def elements(
Expand All @@ -620,7 +631,7 @@ class Expression(google.protobuf.message.Message):
"""(Required) The literal values that make up the struct elements."""
@property
def data_type_struct(self) -> pyspark.sql.connect.proto.types_pb2.DataType.Struct:
"""The type of the struct.
"""The type of the struct. You don't need to set this field if the type information is not needed.

Whether data_type_struct.fields.data_type should be set depends on
whether each field's type can be inferred from the elements field.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3419,6 +3419,11 @@ class PlanGenerationTestSuite
mutable.LinkedHashMap("a" -> 1, "b" -> 2),
mutable.LinkedHashMap("a" -> 3, "b" -> 4),
mutable.LinkedHashMap("a" -> 5, "b" -> 6))),
fn.typedLit(
Seq(
mutable.LinkedHashMap("a" -> Seq("1", "2"), "b" -> Seq("3", "4")),
mutable.LinkedHashMap("a" -> Seq("5", "6"), "b" -> Seq("7", "8")),
mutable.LinkedHashMap("a" -> Seq.empty[String], "b" -> Seq.empty[String]))),
fn.typedLit(
mutable.LinkedHashMap(
1 -> mutable.LinkedHashMap("a" -> 1, "b" -> 2),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -217,26 +217,28 @@ message Expression {
message Array {
// (Deprecated) The element type of the array.
//
// This field is deprecated since Spark 4.1+ and should only be set
// if the data_type field is not set. Use data_type field instead.
// This field is deprecated since Spark 4.1+. Use data_type field instead.
DataType element_type = 1 [deprecated = true];

// The literal values that make up the array elements.
//
// For inferring the data_type.element_type, only the first element needs to
// contain the type information.
repeated Literal elements = 2;

// The type of the array.
// The type of the array. You don't need to set this field if the type information is not needed.
//
// If the element type can be inferred from the first element of the elements field,
// then you don't need to set data_type.element_type to save space. On the other hand,
// redundant type information is also acceptable.
// then you don't need to set data_type.element_type to save space.
//
// On the other hand, redundant type information is also acceptable.
DataType.Array data_type = 3;
}

message Map {
// (Deprecated) The key type of the map.
//
// This field is deprecated since Spark 4.1+ and should only be set
// if the data_type field is not set. Use data_type field instead.
// This field is deprecated since Spark 4.1+. Use data_type field instead.
DataType key_type = 1 [deprecated = true];

// (Deprecated) The value type of the map.
Expand All @@ -246,15 +248,22 @@ message Expression {
DataType value_type = 2 [deprecated = true];

// The literal keys that make up the map.
//
// For inferring the data_type.key_type, only the first key needs to
// contain the type information.
repeated Literal keys = 3;

// The literal values that make up the map.
//
// For inferring the data_type.value_type, only the first value needs to
// contain the type information.
repeated Literal values = 4;

// The type of the map.
// The type of the map. You don't need to set this field if the type information is not needed.
//
// If the key/value types can be inferred from the first element of the keys/values fields,
// then you don't need to set data_type.key_type/data_type.value_type to save space.
//
// On the other hand, redundant type information is also acceptable.
DataType.Map data_type = 5;
}
Expand All @@ -263,14 +272,13 @@ message Expression {
// (Deprecated) The type of the struct.
//
// This field is deprecated since Spark 4.1+ because using DataType as the type of a struct
// is ambiguous. This field should only be set if the data_type_struct field is not set.
// Use data_type_struct field instead.
// is ambiguous. Use data_type_struct field instead.
DataType struct_type = 1 [deprecated = true];

// (Required) The literal values that make up the struct elements.
repeated Literal elements = 2;

// The type of the struct.
// The type of the struct. You don't need to set this field if the type information is not needed.
//
// Whether data_type_struct.fields.data_type should be set depends on
// whether each field's type can be inferred from the elements field.
Expand Down
Loading