-
Notifications
You must be signed in to change notification settings - Fork 728
Insert activities via Kafka Connect #2736
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
5207c54
52501c3
ede2b9f
a2d019f
294c054
e9c920e
43b57e4
915346a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -23,7 +23,7 @@ import { IMemberIdentity, IntegrationResultType, PlatformType, SegmentData } fro | |
|
|
||
| import { IRepositoryOptions } from '@/database/repositories/IRepositoryOptions' | ||
| import OrganizationRepository from '@/database/repositories/organizationRepository' | ||
| import { getDataSinkWorkerEmitter } from '@/serverless/utils/queueService' | ||
| import { QUEUE_CLIENT, getDataSinkWorkerEmitter } from '@/serverless/utils/queueService' | ||
|
|
||
| import { GITHUB_CONFIG, IS_DEV_ENV, IS_TEST_ENV } from '../conf' | ||
| import ActivityRepository from '../database/repositories/activityRepository' | ||
|
|
@@ -174,7 +174,7 @@ export default class ActivityService extends LoggerBase { | |
| ) | ||
|
|
||
| record = await ActivityRepository.create(data, repositoryOptions) | ||
| await insertActivities([{ ...data, id: record.id }], true) | ||
| await insertActivities(QUEUE_CLIENT(), [{ ...data, id: record.id }], true) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🛠️ Refactor suggestion Add error handling or retry logic for queue insertion. |
||
|
|
||
| // Only track activity's platform and timestamp and memberId. It is completely annonymous. | ||
| telemetryTrack( | ||
|
|
||
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -161,6 +161,23 @@ services: | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| networks: | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| - crowd-bridge | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| kafka-connect: | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| build: | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| context: scaffold/kafka-connect | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| restart: unless-stopped | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| entrypoint: | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| - connect-standalone | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| - /etc/kafka-connect/worker-local.properties | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| - /etc/kafka-connect/console-local-sink.properties | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| - /etc/kafka-connect/questdb-local-sink.properties | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| volumes: | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| - kafka-connect-dev:/storage | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| - ./scaffold/kafka-connect/worker-local.properties:/etc/kafka-connect/worker-local.properties | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| - ./scaffold/kafka-connect/console-local-sink.properties:/etc/kafka-connect/console-local-sink.properties | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| - ./scaffold/kafka-connect/questdb-local-sink.properties:/etc/kafka-connect/questdb-local-sink.properties | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| networks: | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| - crowd-bridge | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
Comment on lines
+164
to
+180
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🛠️ Refactor suggestion Add essential service configurations for Kafka Connect The Kafka Connect service configuration needs improvements for production readiness:
Consider these improvements: kafka-connect:
build:
context: scaffold/kafka-connect
restart: unless-stopped
+ depends_on:
+ kafka:
+ condition: service_started
+ healthcheck:
+ test: ["CMD-SHELL", "curl -f http://localhost:8083/connectors || exit 1"]
+ interval: 30s
+ timeout: 10s
+ retries: 3
+ start_period: 30s
+ deploy:
+ resources:
+ limits:
+ memory: 1G
+ reservations:
+ memory: 512M
entrypoint:
- connect-standaloneAlso, consider switching to distributed mode for better scalability: entrypoint:
- - connect-standalone
+ - connect-distributed
- /etc/kafka-connect/worker-local.properties
- - /etc/kafka-connect/console-local-sink.properties
- - /etc/kafka-connect/questdb-local-sink.properties📝 Committable suggestion
Suggested change
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| temporal: | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| build: | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| context: scaffold/temporal | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
@@ -182,3 +199,4 @@ volumes: | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| opensearch-dev: | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| s3-dev: | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| redis-dev: | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| kafka-connect-dev: | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,13 @@ | ||
| FROM confluentinc/cp-kafka-connect:7.8.0-2-ubi8 | ||
|
|
||
| USER root | ||
|
|
||
| RUN yum install -y jq findutils unzip | ||
|
|
||
| RUN confluent-hub install snowflakeinc/snowflake-kafka-connector:2.5.0 --no-prompt | ||
| RUN confluent-hub install questdb/kafka-questdb-connector:0.12 --no-prompt | ||
|
Comment on lines
+5
to
+8
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🛠️ Refactor suggestion Optimize Dockerfile layers and security
Consider this optimization: -RUN yum install -y jq findutils unzip
-
-RUN confluent-hub install snowflakeinc/snowflake-kafka-connector:2.5.0 --no-prompt
-RUN confluent-hub install questdb/kafka-questdb-connector:0.12 --no-prompt
+RUN yum install -y jq-1.6-* findutils-4.6.0-* unzip-6.0-* \
+ && yum clean all \
+ && rm -rf /var/cache/yum \
+ && confluent-hub install snowflakeinc/snowflake-kafka-connector:2.5.0 --no-prompt \
+ && confluent-hub install questdb/kafka-questdb-connector:0.12 --no-prompt
|
||
|
|
||
| VOLUME /storage | ||
|
|
||
| USER appuser | ||
|
|
||
| Original file line number | Diff line number | Diff line change | ||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,21 @@ | ||||||||||||
| #!/usr/bin/env bash | ||||||||||||
|
|
||||||||||||
| set -euo pipefail | ||||||||||||
|
|
||||||||||||
| TAG="sjc.ocir.io/axbydjxa5zuh/kafka-connect:$(date +%s)" | ||||||||||||
| readonly TAG | ||||||||||||
|
Comment on lines
+5
to
+6
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🛠️ Refactor suggestion Parameterize the registry path for better flexibility The registry path Consider using environment variables: -TAG="sjc.ocir.io/axbydjxa5zuh/kafka-connect:$(date +%s)"
+REGISTRY_PATH="${REGISTRY_PATH:-sjc.ocir.io/axbydjxa5zuh}"
+TAG="${REGISTRY_PATH}/kafka-connect:$(date +%s)"📝 Committable suggestion
Suggested change
|
||||||||||||
|
|
||||||||||||
| docker build -t "${TAG}" . | ||||||||||||
|
|
||||||||||||
| echo "----------------------------------------" | ||||||||||||
| echo "Image built with tag: ${TAG}" | ||||||||||||
| echo "----------------------------------------" | ||||||||||||
| echo -n "Type 'y' and press Enter to push the image to the registry. Ctrl+C to cancel: " | ||||||||||||
| read -r PUSH | ||||||||||||
| if [ "${PUSH}" = "y" ]; then | ||||||||||||
| echo "Pushing image to the registry..." | ||||||||||||
| echo "----------------------------------------" | ||||||||||||
| docker push "${TAG}" | ||||||||||||
| else | ||||||||||||
| echo "Skipping push" | ||||||||||||
| fi | ||||||||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,6 @@ | ||
| name=console-sink | ||
| connector.class=FileStreamSinkConnector | ||
| tasks.max=1 | ||
| topics=activities | ||
| value.converter=org.apache.kafka.connect.json.JsonConverter | ||
| value.converter.schemas.enable=false |
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,12 @@ | ||||||||||||||||||||||||||||||||||
| name=questdb-sink | ||||||||||||||||||||||||||||||||||
| client.conf.string=http::addr=questdb:9000; | ||||||||||||||||||||||||||||||||||
| topics=activities | ||||||||||||||||||||||||||||||||||
| table=activities | ||||||||||||||||||||||||||||||||||
| connector.class=io.questdb.kafka.QuestDBSinkConnector | ||||||||||||||||||||||||||||||||||
|
Comment on lines
+1
to
+5
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🛠️ Refactor suggestion Add error handling and monitoring configurations The connector configuration lacks essential error handling and monitoring settings. Add these configurations: name=questdb-sink
client.conf.string=http::addr=questdb:9000;
topics=activities
table=activities
connector.class=io.questdb.kafka.QuestDBSinkConnector
+errors.tolerance=all
+errors.deadletterqueue.topic.name=dlq-activities
+errors.deadletterqueue.topic.replication.factor=1
+errors.deadletterqueue.context.headers.enable=true
+errors.log.enable=true
+errors.log.include.messages=true📝 Committable suggestion
Suggested change
|
||||||||||||||||||||||||||||||||||
| value.converter=org.apache.kafka.connect.json.JsonConverter | ||||||||||||||||||||||||||||||||||
| include.key=false | ||||||||||||||||||||||||||||||||||
| key.converter=org.apache.kafka.connect.storage.StringConverter | ||||||||||||||||||||||||||||||||||
| timestamp.field.name=timestamp | ||||||||||||||||||||||||||||||||||
| timestamp.string.fields=createdAt,updatedAt | ||||||||||||||||||||||||||||||||||
| timestamp.string.format=yyyy-MM-ddTHH:mm:ss.SSSZ | ||||||||||||||||||||||||||||||||||
| value.converter.schemas.enable=false | ||||||||||||||||||||||||||||||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,13 @@ | ||
| bootstrap.servers=kafka:9092 | ||
| group.id=kafka-connect | ||
|
Comment on lines
+1
to
+2
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add security configurations for Kafka connection The current configuration lacks security settings for Kafka connection. Add these security configurations: bootstrap.servers=kafka:9092
group.id=kafka-connect
+security.protocol=SSL
+ssl.truststore.location=/etc/kafka/secrets/kafka.connect.truststore.jks
+ssl.truststore.password=${file:/etc/kafka/secrets/connect-secrets:truststore-password}
+ssl.keystore.location=/etc/kafka/secrets/kafka.connect.keystore.jks
+ssl.keystore.password=${file:/etc/kafka/secrets/connect-secrets:keystore-password}
+ssl.key.password=${file:/etc/kafka/secrets/connect-secrets:key-password}
|
||
|
|
||
| key.converter=org.apache.kafka.connect.storage.StringConverter | ||
| value.converter=org.apache.kafka.connect.json.JsonConverter | ||
| internal.key.converter=org.apache.kafka.connect.storage.StringConverter | ||
| internal.value.converter=org.apache.kafka.connect.json.JsonConverter | ||
| key.converter.schemas.enable=true | ||
| value.converter.schemas.enable=true | ||
|
|
||
| offset.storage.file.filename=/storage/connect.offsets | ||
| offset.flush.interval.ms=10000 | ||
| plugin.path=/usr/share/java,/usr/share/filestream-connectors,/usr/share/confluent-hub-components | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -57,6 +57,7 @@ setImmediate(async () => { | |
| dataSinkWorkerEmitter, | ||
| redis, | ||
| temporal, | ||
| queueClient, | ||
| log, | ||
| ) | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -38,7 +38,7 @@ export async function moveActivitiesBetweenMembers( | |
| if (!memberExists) { | ||
| return | ||
| } | ||
| await moveActivitiesToNewMember(svc.questdbSQL, primaryId, secondaryId, tenantId) | ||
| await moveActivitiesToNewMember(svc.questdbSQL, svc.queue, primaryId, secondaryId, tenantId) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 💡 Codebase verification Based on the code analysis, I can now provide the final response: Queue error handling needs to be improved in activity merging flow The implementation in
🔗 Analysis chainEnsure proper error handling if the queue is unavailable. You are now passing svc.queue to moveActivitiesToNewMember. Confirm that any internal logic gracefully handles a null or undefined queue. Consider adding logging or a fallback if queue usage fails. Let me gather information about the error handling in the Let me check the implementation details of the function to understand how it handles the queue parameter. Let me check the implementation of the Let me check the implementation of 🏁 Scripts executedThe following scripts were executed for the analysis: Error: No shell scripts found Script: #!/bin/bash
# Find the implementation of moveActivitiesToNewMember function
ast-grep --pattern 'function moveActivitiesToNewMember($_) {
$$$
}'
# Also search for any direct references to this function
rg "moveActivitiesToNewMember" -A 5
Length of output: 1977 Script: #!/bin/bash
# Get the implementation of moveActivitiesToNewMember from the data-access-layer
cat services/libs/data-access-layer/src/old/apps/entity_merging_worker/index.ts
Length of output: 4938 Script: #!/bin/bash
# Get the implementation of updateActivities function
cat services/libs/data-access-layer/src/activities/update.ts
Length of output: 2103 Script: #!/bin/bash
# Get the implementation of insertActivities function
cat services/libs/data-access-layer/src/activities/ilp.ts
Length of output: 2521 |
||
| } | ||
|
|
||
| export async function moveActivitiesWithIdentityToAnotherMember( | ||
|
|
@@ -67,6 +67,7 @@ export async function moveActivitiesWithIdentityToAnotherMember( | |
| )) { | ||
| await moveIdentityActivitiesToNewMember( | ||
| svc.questdbSQL, | ||
| svc.queue, | ||
| tenantId, | ||
| fromId, | ||
| toId, | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🛠️ Refactor suggestion
Passing QUEUE_CLIENT() to insertActivities.
Good shift toward a queue-based approach. Consider robust error-handling for partial insertions or queue outages, potentially logging failures or implementing rollback logic.