-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-45720] Upgrade AWS SDK to v2 for Spark Kinesis connector module #44211
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
3246036
8940369
bc98524
f5306ed
acfbc87
ec5f749
2ae2a28
0bb8140
28c25ed
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -54,6 +54,17 @@ | |
| <artifactId>jackson-databind</artifactId> | ||
| <scope>provided</scope> | ||
| </dependency> | ||
| <dependency> | ||
| <groupId>com.google.guava</groupId> | ||
| <artifactId>guava</artifactId> | ||
| <version>${connect.guava.version}</version> | ||
| <scope>compile</scope> | ||
|
||
| </dependency> | ||
| <dependency> | ||
| <groupId>com.google.protobuf</groupId> | ||
| <artifactId>protobuf-java</artifactId> | ||
| <scope>compile</scope> | ||
| </dependency> | ||
| <dependency> | ||
| <groupId>org.glassfish.jersey.core</groupId> | ||
| <artifactId>jersey-client</artifactId> | ||
|
|
@@ -158,6 +169,26 @@ | |
| <include>*:*</include> | ||
| </includes> | ||
| </artifactSet> | ||
| <relocations> | ||
| <relocation> | ||
| <pattern>com.google.common</pattern> | ||
| <shadedPattern>${spark.shade.packageName}.kinesis.guava</shadedPattern> | ||
| <includes> | ||
| <include>com.google.common.**</include> | ||
| </includes> | ||
| </relocation> | ||
| <relocation> | ||
| <pattern>com.google.protobuf</pattern> | ||
| <shadedPattern>${spark.shade.packageName}.kinesis.protobuf</shadedPattern> | ||
| <includes> | ||
| <include>com.google.protobuf.**</include> | ||
| </includes> | ||
| </relocation> | ||
| <relocation> | ||
| <pattern>software.amazon.awssdk</pattern> | ||
| <shadedPattern>${spark.shade.packageName}.software.amazon.awssdk</shadedPattern> | ||
| </relocation> | ||
| </relocations> | ||
| <filters> | ||
| <filter> | ||
| <artifact>*:*</artifact> | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -54,14 +54,38 @@ | |
| <scope>test</scope> | ||
| </dependency> | ||
| <dependency> | ||
| <groupId>com.amazonaws</groupId> | ||
| <groupId>software.amazon.kinesis</groupId> | ||
| <artifactId>amazon-kinesis-client</artifactId> | ||
|
||
| <version>${aws.kinesis.client.version}</version> | ||
| <exclusions> | ||
| <exclusion> | ||
| <groupId>com.github.luben</groupId> | ||
| <artifactId>zstd-jni</artifactId> | ||
| </exclusion> | ||
| <exclusion> | ||
| <groupId>com.kjetland</groupId> | ||
| <artifactId>mbknor-jackson-jsonschema_2.12</artifactId> | ||
| </exclusion> | ||
| </exclusions> | ||
| </dependency> | ||
| <!-- The Kinesis-client-library depends on glue-schema-registry which depends on | ||
| mbknor-jackson-jsonschema_2.12. As Spark 4.0 dropped support for Scala 2.12, we have to | ||
| explicitly import mbknor-jackson-jsonschema_2.13 here until glue-schema-registry is updated | ||
| to depend on mbknor-jackson-jsonschema_2.13 --> | ||
| <dependency> | ||
| <groupId>com.amazonaws</groupId> | ||
| <artifactId>aws-java-sdk-sts</artifactId> | ||
| <version>${aws.java.sdk.version}</version> | ||
| <groupId>com.kjetland</groupId> | ||
| <artifactId>mbknor-jackson-jsonschema_2.13</artifactId> | ||
| <version>${mbknor.jsonschema.version}</version> | ||
| </dependency> | ||
| <dependency> | ||
| <groupId>software.amazon.awssdk</groupId> | ||
| <artifactId>sts</artifactId> | ||
| <version>${aws.java.sdk.v2.version}</version> | ||
| </dependency> | ||
| <dependency> | ||
| <groupId>software.amazon.awssdk</groupId> | ||
| <artifactId>apache-client</artifactId> | ||
| <version>${aws.java.sdk.v2.version}</version> | ||
| </dependency> | ||
| <dependency> | ||
| <groupId>com.amazonaws</groupId> | ||
|
|
@@ -76,6 +100,12 @@ | |
| <artifactId>jackson-dataformat-cbor</artifactId> | ||
| <version>${fasterxml.jackson.version}</version> | ||
| </dependency> | ||
| <dependency> | ||
| <groupId>javax.xml.bind</groupId> | ||
| <artifactId>jaxb-api</artifactId> | ||
| <version>${jaxb.version}</version> | ||
| <scope>test</scope> | ||
| </dependency> | ||
| <dependency> | ||
| <groupId>org.mockito</groupId> | ||
| <artifactId>mockito-core</artifactId> | ||
|
|
@@ -90,6 +120,11 @@ | |
| <groupId>org.apache.spark</groupId> | ||
| <artifactId>spark-tags_${scala.binary.version}</artifactId> | ||
| </dependency> | ||
| <dependency> | ||
| <groupId>com.google.guava</groupId> | ||
| <artifactId>guava</artifactId> | ||
| <version>${connect.guava.version}</version> | ||
| </dependency> | ||
|
|
||
| <!-- | ||
| This spark-tags test-dep is needed even though it isn't used in this module, otherwise testing-cmds that exclude | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -19,16 +19,16 @@ package org.apache.spark.examples.streaming | |
|
|
||
| import scala.jdk.CollectionConverters._ | ||
|
|
||
| import com.amazonaws.regions.RegionUtils | ||
| import com.amazonaws.services.kinesis.AmazonKinesis | ||
| import software.amazon.awssdk.regions.servicemetadata.KinesisServiceMetadata | ||
|
|
||
| private[streaming] object KinesisExampleUtils { | ||
| def getRegionNameByEndpoint(endpoint: String): String = { | ||
| val uri = new java.net.URI(endpoint) | ||
| RegionUtils.getRegionsForService(AmazonKinesis.ENDPOINT_PREFIX) | ||
| val kinesisServiceMetadata = new KinesisServiceMetadata() | ||
| kinesisServiceMetadata.regions | ||
| .asScala | ||
| .find(_.getAvailableEndpoints.asScala.toSeq.contains(uri.getHost)) | ||
| .map(_.getName) | ||
| .find(r => kinesisServiceMetadata.endpointFor(r).toString.equals(uri.getHost)) | ||
|
||
| .map(_.id) | ||
| .getOrElse( | ||
| throw new IllegalArgumentException(s"Could not resolve region for endpoint: $endpoint")) | ||
| } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ur, I'm not sure this is okay or not, @junyuc25 .
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could you elaborate a bit more on why this is not OK? I think this pattern is also seen in other modules like https://github.com/apache/spark/blob/master/connector/connect/server/pom.xml#L159-L164
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
cc @dongjoon-hyun
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could you share more about your concern here @dongjoon-hyun ? Thanks!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sorry for being late.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.