-
Notifications
You must be signed in to change notification settings - Fork 3k
Spark 4.0 integration #12494
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Spark 4.0 integration #12494
Changes from all commits
5ef9af3
c739558
7b5edd7
c186dde
474eff4
913d74b
0cfbd8a
f8661ce
87c2625
a6fc6b5
2d8e109
5775521
6cb26e3
aaba16e
1520135
66ae025
f167416
80748fb
3832ce9
476b079
80044a5
5170f8c
75d2c14
fb7fb9c
9c63a4c
60568f1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -51,7 +51,7 @@ private MetadataColumns() {} | |
| public static final int SPEC_ID_COLUMN_ID = Integer.MAX_VALUE - 4; | ||
| public static final String SPEC_ID_COLUMN_DOC = "Spec ID used to track the file containing a row"; | ||
| public static final NestedField SPEC_ID = | ||
| NestedField.required( | ||
| NestedField.optional( | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In Spark 4.0, the metadata columns are nullable, so need to change this field to optional. |
||
| SPEC_ID_COLUMN_ID, "_spec_id", Types.IntegerType.get(), SPEC_ID_COLUMN_DOC); | ||
| // the partition column type is not static and depends on all specs in the table | ||
| public static final int PARTITION_COLUMN_ID = Integer.MAX_VALUE - 5; | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -24,6 +24,7 @@ activation = "1.1.1" | |
| aliyun-sdk-oss = "3.10.2" | ||
| analyticsaccelerator = "1.0.0" | ||
| antlr = "4.9.3" | ||
| antlr413 = "4.13.1" # For Spark 4.0 support | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. to be consistent with antlr version in Spark 4.0 |
||
| aircompressor = "0.27" | ||
| apiguardian = "1.1.2" | ||
| arrow = "15.0.2" | ||
|
|
@@ -36,6 +37,7 @@ awssdk-s3accessgrants = "2.3.0" | |
| bson-ver = "4.11.5" | ||
| caffeine = "2.9.3" | ||
| calcite = "1.39.0" | ||
| comet = "0.8.1" | ||
| datasketches = "6.2.0" | ||
| delta-standalone = "3.3.1" | ||
| delta-spark = "3.3.1" | ||
|
|
@@ -81,6 +83,7 @@ slf4j = "2.0.17" | |
| snowflake-jdbc = "3.24.0" | ||
| spark34 = "3.4.4" | ||
| spark35 = "3.5.5" | ||
| spark40 = "4.0.0" | ||
| sqlite-jdbc = "3.49.1.0" | ||
| testcontainers = "1.21.0" | ||
| tez08 = { strictly = "0.8.4"} # see rich version usage explanation above | ||
|
|
@@ -92,6 +95,8 @@ aliyun-sdk-oss = { module = "com.aliyun.oss:aliyun-sdk-oss", version.ref = "aliy | |
| analyticsaccelerator-s3 = { module = "software.amazon.s3.analyticsaccelerator:analyticsaccelerator-s3", version.ref = "analyticsaccelerator" } | ||
| antlr-antlr4 = { module = "org.antlr:antlr4", version.ref = "antlr" } | ||
| antlr-runtime = { module = "org.antlr:antlr4-runtime", version.ref = "antlr" } | ||
| antlr-antlr413 = { module = "org.antlr:antlr4", version.ref = "antlr413" } | ||
| antlr-runtime413 = { module = "org.antlr:antlr4-runtime", version.ref = "antlr413" } | ||
RussellSpitzer marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| arrow-memory-netty = { module = "org.apache.arrow:arrow-memory-netty", version.ref = "arrow" } | ||
| arrow-vector = { module = "org.apache.arrow:arrow-vector", version.ref = "arrow" } | ||
| avro-avro = { module = "org.apache.avro:avro", version.ref = "avro" } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -161,6 +161,18 @@ if (sparkVersions.contains("3.5")) { | |
| project(":iceberg-spark:spark-runtime-3.5_${scalaVersion}").name = "iceberg-spark-runtime-3.5_${scalaVersion}" | ||
| } | ||
|
|
||
| if (sparkVersions.contains("4.0")) { | ||
| include ":iceberg-spark:spark-4.0_2.13" | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does this theoretically create Spark 4.0 if Scala 2.12 is set without error? Feels like we should still use scalaVersion here but have an assert > 2.12 or something.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The problem is that Flink does work with scala 2.13. I use 2.13 to build Spark module only. I think we can't assert > 2.12 because we still need 2.12 to build Flink.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Doesn't that mean when Scala is set to 2.12 we just should not allow building Spark 4.0? |
||
| include ":iceberg-spark:spark-extensions-4.0_2.13" | ||
| include ":iceberg-spark:spark-runtime-4.0_2.13" | ||
| project(":iceberg-spark:spark-4.0_2.13").projectDir = file('spark/v4.0/spark') | ||
| project(":iceberg-spark:spark-4.0_2.13").name = "iceberg-spark-4.0_2.13" | ||
| project(":iceberg-spark:spark-extensions-4.0_2.13").projectDir = file('spark/v4.0/spark-extensions') | ||
| project(":iceberg-spark:spark-extensions-4.0_2.13").name = "iceberg-spark-extensions-4.0_2.13" | ||
| project(":iceberg-spark:spark-runtime-4.0_2.13").projectDir = file('spark/v4.0/spark-runtime') | ||
| project(":iceberg-spark:spark-runtime-4.0_2.13").name = "iceberg-spark-runtime-4.0_2.13" | ||
| } | ||
|
|
||
| if (kafkaVersions.contains("3")) { | ||
| include 'kafka-connect' | ||
| project(':kafka-connect').name = 'iceberg-kafka-connect' | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.