diff --git a/debezium-server-iceberg-dist/pom.xml b/debezium-server-iceberg-dist/pom.xml index a02c5ee3..1f0530ad 100644 --- a/debezium-server-iceberg-dist/pom.xml +++ b/debezium-server-iceberg-dist/pom.xml @@ -89,10 +89,6 @@ io.debezium debezium-connector-oracle - - io.debezium - debezium-connector-oracle - io.debezium debezium-connector-db2 diff --git a/debezium-server-iceberg-dist/src/main/resources/assemblies/server-distribution.xml b/debezium-server-iceberg-dist/src/main/resources/assemblies/server-distribution.xml index 7bce4041..0d1338c5 100644 --- a/debezium-server-iceberg-dist/src/main/resources/assemblies/server-distribution.xml +++ b/debezium-server-iceberg-dist/src/main/resources/assemblies/server-distribution.xml @@ -30,6 +30,8 @@ org.glassfish.jersey.*:*:* org.eclipse.jetty:*:* org.apache.maven:*:* + log4j:log4j:* + ch.qos.reload4j:reload4j diff --git a/debezium-server-iceberg-dist/src/main/resources/distro/conf/application.properties.example b/debezium-server-iceberg-dist/src/main/resources/distro/conf/application.properties.example index 70c9297b..0733217e 100644 --- a/debezium-server-iceberg-dist/src/main/resources/distro/conf/application.properties.example +++ b/debezium-server-iceberg-dist/src/main/resources/distro/conf/application.properties.example @@ -1,38 +1,62 @@ +# Use iceberg sink debezium.sink.type=iceberg + +# Run without Kafka, use local file to store checkpoints +debezium.source.database.history=io.debezium.relational.history.FileDatabaseHistory +debezium.source.database.history.file.filename=data/status.dat + +# Iceberg sink config debezium.sink.iceberg.table-prefix=debeziumcdc_ debezium.sink.iceberg.upsert=true debezium.sink.iceberg.upsert-keep-deletes=true debezium.sink.iceberg.write.format.default=parquet debezium.sink.iceberg.catalog-name=mycatalog +# Hadoop catalog, you can use other catalog supported by iceberg as well debezium.sink.iceberg.type=hadoop debezium.sink.iceberg.warehouse=s3a://my-bucket/iceberg_warehouse debezium.sink.iceberg.table-namespace=debeziumevents -# s3 conf + +# S3 config debezium.sink.iceberg.fs.defaultFS=s3a://my-bucket debezium.sink.iceberg.com.amazonaws.services.s3.enableV4=true debezium.sink.iceberg.com.amazonaws.services.s3a.enableV4=true debezium.sink.iceberg.fs.s3a.aws.credentials.provider=com.amazonaws.auth.DefaultAWSCredentialsProviderChain -debezium.sink.iceberg.fs.s3a.access.key=my-aws-access-key -debezium.sink.iceberg.fs.s3a.secret.key=my-secret-access-key +debezium.sink.iceberg.fs.s3a.access.key=AWS_ACCESS_KEY +debezium.sink.iceberg.fs.s3a.secret.key=AWS_SECRET_ACCESS_KEY debezium.sink.iceberg.fs.s3a.path.style.access=true debezium.sink.iceberg.fs.s3a.impl=org.apache.hadoop.fs.s3a.S3AFileSystem -# enable event schemas + +# enable event schemas - mandate debezium.format.value.schemas.enable=true debezium.format.key.schemas.enable=true debezium.format.value=json debezium.format.key=json -# source db -debezium.source.connector.class=io.debezium.connector.postgresql.PostgresConnector -debezium.source.offset.storage.file.filename=data/offsets.dat -debezium.source.offset.flush.interval.ms=0 -debezium.source.database.hostname=localhost -debezium.source.database.port=5432 -debezium.source.database.user=postgres -debezium.source.database.password=postgres -debezium.source.database.dbname=postgres -debezium.source.database.server.name=tutorial -debezium.source.schema.include.list=inventory +# postgres source +#debezium.source.connector.class=io.debezium.connector.postgresql.PostgresConnector +#debezium.source.offset.storage.file.filename=data/offsets.dat +#debezium.source.offset.flush.interval.ms=0 +#debezium.source.database.hostname=localhost +#debezium.source.database.port=5432 +#debezium.source.database.user=postgres +#debezium.source.database.password=postgres +#debezium.source.database.dbname=postgres +#debezium.source.database.server.name=tutorial +#debezium.source.schema.include.list=inventory + +# sql server source +#debezium.source.connector.class=io.debezium.connector.sqlserver.SqlServerConnector +#debezium.source.offset.storage.file.filename=data/offsets.dat +#debezium.source.offset.flush.interval.ms=0 +#debezium.source.database.hostname=localhost +#debezium.source.database.port=5432 +#debezium.source.database.user=debezium +#debezium.source.database.password=debezium +#debezium.source.database.dbname=debezium +#debezium.source.database.server.name=tutorial +#debezium.source.schema.include.list=inventory +# mandate for sql server source, avoid error when snapshot and schema change +#debezium.source.include.schema.changes=false # do event flattening. unwrap message! debezium.transforms=unwrap @@ -43,6 +67,7 @@ debezium.transforms.unwrap.drop.tombstones=true # ############ SET LOG LEVELS ############ quarkus.log.level=INFO +quarkus.log.console.json=false # hadoop, parquet quarkus.log.category."org.apache.hadoop".level=WARN quarkus.log.category."org.apache.parquet".level=WARN diff --git a/debezium-server-iceberg-sink/pom.xml b/debezium-server-iceberg-sink/pom.xml index fa6f5e22..5f3fd2c5 100644 --- a/debezium-server-iceberg-sink/pom.xml +++ b/debezium-server-iceberg-sink/pom.xml @@ -53,6 +53,27 @@ iceberg-spark-runtime-3.2_2.13 ${version.iceberg} + + + com.google.cloud + google-cloud-storage + 2.11.3 + + + com.google.cloud + google-cloud-nio + 0.124.14 + + + com.google.cloud.bigdataoss + gcs-connector + hadoop3-2.2.7 + + + com.google.cloud.bigdataoss + gcsio + 2.2.7 + software.amazon.awssdk @@ -103,12 +124,30 @@ log4j log4j + + org.slf4j + slf4j-reload4j + org.apache.hadoop hadoop-client ${version.hadoop} + + + log4j + log4j + + + org.slf4j + slf4j-log4j12 + + + org.slf4j + slf4j-reload4j + + diff --git a/debezium-server-iceberg-sink/src/main/resources/conf/application.properties.example b/debezium-server-iceberg-sink/src/main/resources/conf/application.properties.example index 9b518ddb..0733217e 100644 --- a/debezium-server-iceberg-sink/src/main/resources/conf/application.properties.example +++ b/debezium-server-iceberg-sink/src/main/resources/conf/application.properties.example @@ -67,6 +67,7 @@ debezium.transforms.unwrap.drop.tombstones=true # ############ SET LOG LEVELS ############ quarkus.log.level=INFO +quarkus.log.console.json=false # hadoop, parquet quarkus.log.category."org.apache.hadoop".level=WARN quarkus.log.category."org.apache.parquet".level=WARN diff --git a/pom.xml b/pom.xml index 2f9be467..0659993f 100644 --- a/pom.xml +++ b/pom.xml @@ -59,6 +59,12 @@ kafka-clients ${version.kafkaclients} + + + org.jboss.slf4j + slf4j-jboss-logmanager + [1.2.0.Final,) + software.amazon.awssdk diff --git a/python/setup.py b/python/setup.py index cedcfb3d..e9ec80fc 100644 --- a/python/setup.py +++ b/python/setup.py @@ -1,4 +1,5 @@ import os + from setuptools import setup, find_packages setup_py_dir = os.path.dirname(os.path.abspath(__file__)) @@ -18,7 +19,6 @@ url='https://debezium.io/', include_package_data=True, license="Apache License 2.0", - test_suite='tests', - install_requires=["pyjnius==1.4.0"], + install_requires=["pyjnius==1.4.2"], python_requires='>=3', )