Skip to content

Commit

Permalink
Switch to iceberg cloud bundle dependecies (#288)
Browse files Browse the repository at this point in the history
* Use cloud bundle dependencies

* Use cloud bundle dependencies

* Use cloud bundle dependencies
  • Loading branch information
ismailsimsek committed Mar 3, 2024
1 parent dca5a2d commit 0d5e9ce
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 93 deletions.
52 changes: 14 additions & 38 deletions debezium-server-iceberg-sink/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -93,11 +93,21 @@
<artifactId>iceberg-gcp</artifactId>
<version>${version.iceberg}</version>
</dependency>
<dependency>
<groupId>org.apache.iceberg</groupId>
<artifactId>iceberg-gcp-bundle</artifactId>
<version>${version.iceberg}</version>
</dependency>
<dependency>
<groupId>org.apache.iceberg</groupId>
<artifactId>iceberg-aws</artifactId>
<version>${version.iceberg}</version>
</dependency>
<dependency>
<groupId>org.apache.iceberg</groupId>
<artifactId>iceberg-aws-bundle</artifactId>
<version>${version.iceberg}</version>
</dependency>
<dependency>
<groupId>org.apache.iceberg</groupId>
<artifactId>iceberg-dell</artifactId>
Expand All @@ -113,15 +123,12 @@
<artifactId>iceberg-azure</artifactId>
<version>${version.iceberg}</version>
</dependency>
<!-- Google -->
<dependency>
<groupId>com.google.cloud</groupId>
<artifactId>google-cloud-storage</artifactId>
</dependency>
<dependency>
<groupId>com.google.cloud</groupId>
<artifactId>google-cloud-nio</artifactId>
<groupId>org.apache.iceberg</groupId>
<artifactId>iceberg-azure-bundle</artifactId>
<version>${version.iceberg}</version>
</dependency>
<!-- Google -->
<dependency>
<groupId>com.google.cloud.bigdataoss</groupId>
<artifactId>gcs-connector</artifactId>
Expand All @@ -132,37 +139,6 @@
<artifactId>gcsio</artifactId>
<version>${version.googlebigdataoss}</version>
</dependency>
<!-- AWS -->
<dependency>
<groupId>software.amazon.awssdk</groupId>
<artifactId>core</artifactId>
<version>${version.awssdk}</version>
<type>pom</type>
</dependency>
<dependency>
<groupId>software.amazon.awssdk</groupId>
<artifactId>glue</artifactId>
</dependency>
<dependency>
<groupId>software.amazon.awssdk</groupId>
<artifactId>s3</artifactId>
</dependency>
<dependency>
<groupId>software.amazon.awssdk</groupId>
<artifactId>dynamodb</artifactId>
</dependency>
<dependency>
<groupId>software.amazon.awssdk</groupId>
<artifactId>kms</artifactId>
</dependency>
<dependency>
<groupId>software.amazon.awssdk</groupId>
<artifactId>sts</artifactId>
</dependency>
<dependency>
<groupId>software.amazon.awssdk</groupId>
<artifactId>url-connection-client</artifactId>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-aws</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,35 +47,18 @@ static void setup() {
.set("spark.eventLog.enabled", "false")
// enable iceberg SQL Extensions and Catalog
.set("spark.sql.extensions", "org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions")
.set("spark.sql.warehouse.dir", S3_BUCKET)
// hadoop catalog
.set("spark.sql.catalog.spark_catalog", "org.apache.iceberg.spark.SparkSessionCatalog")
.set("spark.sql.catalog.spark_catalog.type", "hadoop")
.set("spark.sql.catalog.spark_catalog.warehouse", S3_BUCKET)
.set("spark.sql.catalog.spark_catalog.default-namespaces", CATALOG_TABLE_NAMESPACE)
.set("spark.sql.catalog.spark_catalog.io-impl", "org.apache.iceberg.aws.s3.S3FileIO")
.set("spark.sql.warehouse.dir", S3_BUCKET)
.set("spark.sql.catalog.spark_catalog.s3.endpoint", "http://localhost:" + S3Minio.getMappedPort().toString())
.set("spark.sql.catalog.spark_catalog.s3.path-style-access", "true")
.set("spark.sql.catalog.spark_catalog.s3.access-key-id", S3Minio.MINIO_ACCESS_KEY)
.set("spark.sql.catalog.spark_catalog.s3.secret-access-key", S3Minio.MINIO_SECRET_KEY)
.set("spark.sql.catalog.spark_catalog.client.region", TestConfigSource.S3_REGION)
.set("spark.sql.catalog.spark_catalog.io-impl", "org.apache.iceberg.aws.s3.S3FileIO")
.set("spark.sql.catalog.spark_catalog.warehouse", S3_BUCKET)
// // JdbcCatalog catalog, add additional catalog
// .set("spark.sql.defaultCatalog", ICEBERG_CATALOG_NAME)
// .set("spark.sql.catalog." + ICEBERG_CATALOG_NAME, "org.apache.iceberg.spark.SparkCatalog")
// .set("spark.sql.catalog." + ICEBERG_CATALOG_NAME + ".warehouse", S3_BUCKET)
// .set("spark.sql.catalog." + ICEBERG_CATALOG_NAME + ".cache-enabled", "false")
// .set("spark.sql.catalog." + ICEBERG_CATALOG_NAME + ".catalog-impl", JdbcCatalog.class.getName())
// .set("spark.sql.catalog." + ICEBERG_CATALOG_NAME + ".default-namespaces", CATALOG_TABLE_NAMESPACE)
// .set("spark.sql.catalog." + ICEBERG_CATALOG_NAME + ".uri", JdbcCatalogDB.container.getJdbcUrl())
// .set("spark.sql.catalog." + ICEBERG_CATALOG_NAME + ".jdbc.user", JdbcCatalogDB.container.getUsername())
// .set("spark.sql.catalog." + ICEBERG_CATALOG_NAME + ".jdbc.password", JdbcCatalogDB.container.getPassword())
// .set("spark.sql.catalog." + ICEBERG_CATALOG_NAME + ".io-impl", "org.apache.iceberg.aws.s3.S3FileIO")
// .set("spark.sql.catalog." + ICEBERG_CATALOG_NAME + ".s3.endpoint", "http://localhost:" + S3Minio.getMappedPort().toString())
// .set("spark.sql.catalog." + ICEBERG_CATALOG_NAME + ".s3.path-style-access", "true")
// .set("spark.sql.catalog." + ICEBERG_CATALOG_NAME + ".s3.access-key-id", S3Minio.MINIO_ACCESS_KEY)
// .set("spark.sql.catalog." + ICEBERG_CATALOG_NAME + ".s3.secret-access-key", S3Minio.MINIO_SECRET_KEY)
;

BaseSparkTest.spark = SparkSession
Expand Down
40 changes: 3 additions & 37 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,6 @@
<version.spark>${version.spark.major}.1</version.spark>
<version.hadoop>3.3.6</version.hadoop>
<version.hive>3.1.3</version.hive>
<!-- Use same version as iceberg https://github.com/apache/iceberg/blob/main/gradle/libs.versions.toml#L31-->
<version.awssdk>2.24.5</version.awssdk>
<!-- Use same version as iceberg https://github.com/apache/iceberg/blob/main/gradle/libs.versions.toml#L44-->
<version.googlelibraries>26.28.0</version.googlelibraries>
<version.googlebigdataoss>2.2.20</version.googlebigdataoss>
<version.testcontainers>1.19.6</version.testcontainers>
<!-- Debezium -->
Expand Down Expand Up @@ -74,29 +70,13 @@
<artifactId>slf4j-jboss-logmanager</artifactId>
<version>[1.2.0.Final,)</version>
</dependency>
<!-- aws -->
<dependency>
<groupId>software.amazon.awssdk</groupId>
<artifactId>bom</artifactId>
<version>${version.awssdk}</version>
<type>pom</type>
<scope>import</scope>
</dependency>
<dependency>
<groupId>io.quarkus</groupId>
<artifactId>quarkus-bom</artifactId>
<version>${version.quarkus}</version>
<type>pom</type>
<scope>import</scope>
</dependency>
<!-- gcp -->
<dependency>
<groupId>com.google.cloud</groupId>
<artifactId>libraries-bom</artifactId>
<version>${version.googlelibraries}</version>
<type>pom</type>
<scope>import</scope>
</dependency>
<!-- MySQL JDBC Driver, Binlog reader, Geometry support -->
<dependency>
<groupId>mysql</groupId>
Expand All @@ -111,27 +91,13 @@
<type>pom</type>
<scope>import</scope>
</dependency>

<dependency>
<groupId>io.debezium</groupId>
<artifactId>debezium-server-batch</artifactId>
<version>${project.version}</version>
</dependency>
<!-- Debezium Scripting -->
<dependency>
<groupId>org.codehaus.groovy</groupId>
<artifactId>groovy</artifactId>
<version>${version.groovy}</version>
</dependency>
<dependency>
<groupId>org.codehaus.groovy</groupId>
<artifactId>groovy-json</artifactId>
<version>${version.groovy}</version>
</dependency>
<dependency>
<groupId>org.codehaus.groovy</groupId>
<artifactId>groovy-jsr223</artifactId>
<artifactId>groovy-bom</artifactId>
<version>${version.groovy}</version>
<type>pom</type>
<scope>import</scope>
</dependency>
<!-- Test dependencies -->
<dependency>
Expand Down

0 comments on commit 0d5e9ce

Please sign in to comment.