From 5a0d4cbc1d2981dc94be4902ddf8c060f7f2ad52 Mon Sep 17 00:00:00 2001 From: ismail simsek <6005685+ismailsimsek@users.noreply.github.com> Date: Sun, 29 Jan 2023 18:52:02 +0100 Subject: [PATCH] Improve deduplicateBatch logic (#168) * Improve deduplicateBatch logic * Improve deduplicateBatch logic * Improve deduplicateBatch logic --- .github/workflows/build.yml | 3 ++ .../tableoperator/IcebergTableOperator.java | 32 ++++++++----------- 2 files changed, 17 insertions(+), 18 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index d6e5c466..c9aecadc 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -17,6 +17,9 @@ on: - '.idea/**' - '.run/**' +env: + SPARK_LOCAL_IP: 127.0.0.1 + jobs: build: diff --git a/debezium-server-iceberg-sink/src/main/java/io/debezium/server/iceberg/tableoperator/IcebergTableOperator.java b/debezium-server-iceberg-sink/src/main/java/io/debezium/server/iceberg/tableoperator/IcebergTableOperator.java index 479ec038..48ac9211 100644 --- a/debezium-server-iceberg-sink/src/main/java/io/debezium/server/iceberg/tableoperator/IcebergTableOperator.java +++ b/debezium-server-iceberg-sink/src/main/java/io/debezium/server/iceberg/tableoperator/IcebergTableOperator.java @@ -58,24 +58,20 @@ public class IcebergTableOperator { private List deduplicateBatch(List events) { - ConcurrentHashMap icebergRecordsmap = new ConcurrentHashMap<>(); - - for (IcebergChangeEvent e : events) { - - // deduplicate using key(PK) @TODO improve using map.merge - if (icebergRecordsmap.containsKey(e.key())) { - - // replace it if it's new - if (this.compareByTsThenOp(icebergRecordsmap.get(e.key()).value(), e.value()) <= 0) { - icebergRecordsmap.put(e.key(), e); - } - - } else { - icebergRecordsmap.put(e.key(), e); - } - - } - return new ArrayList<>(icebergRecordsmap.values()); + ConcurrentHashMap deduplicatedEvents = new ConcurrentHashMap<>(); + + events.forEach(e -> + // deduplicate using key(PK) + deduplicatedEvents.merge(e.key(), e, (oldValue, newValue) -> { + if (this.compareByTsThenOp(oldValue.value(), newValue.value()) <= 0) { + return newValue; + } else { + return oldValue; + } + }) + ); + + return new ArrayList<>(deduplicatedEvents.values()); } /**