diff --git a/cloud_gcp/src/main/scala/ai/chronon/integrations/cloud_gcp/DataprocSubmitter.scala b/cloud_gcp/src/main/scala/ai/chronon/integrations/cloud_gcp/DataprocSubmitter.scala index 16c296e509..1d967c6c23 100644 --- a/cloud_gcp/src/main/scala/ai/chronon/integrations/cloud_gcp/DataprocSubmitter.scala +++ b/cloud_gcp/src/main/scala/ai/chronon/integrations/cloud_gcp/DataprocSubmitter.scala @@ -145,7 +145,14 @@ class DataprocSubmitter(jobControllerClient: JobControllerClient, conf: Submitte // override the local dir for rocksdb as the default ends up being too large file name size wise "state.backend.rocksdb.localdir" -> "/tmp/flink-state", "state.checkpoint-storage" -> "filesystem", - "rest.flamegraph.enabled" -> "true" + "rest.flamegraph.enabled" -> "true", + // wire up prometheus reporter - prom reporter plays well with Google ops agent that can be installed in DataProc + // as we can have a couple of containers on a given node, we use a port range + "metrics.reporters" -> "prom", + "metrics.reporter.prom.factory.class" -> "org.apache.flink.metrics.prometheus.PrometheusReporterFactory", + "metrics.reporter.prom.host" -> "localhost", + "metrics.reporter.prom.port" -> "9250-9260", + "metrics.reporter.statsd.interval" -> "60 SECONDS" ) val flinkJobBuilder = FlinkJob diff --git a/flink/BUILD.bazel b/flink/BUILD.bazel index 6b7ab2540f..a2ad6903a9 100644 --- a/flink/BUILD.bazel +++ b/flink/BUILD.bazel @@ -28,6 +28,7 @@ scala_library( maven_artifact("org.apache.hadoop:hadoop-client-api"), maven_artifact("org.apache.hadoop:hadoop-yarn-api"), maven_artifact("org.apache.commons:commons-lang3"), + maven_artifact("org.apache.flink:flink-metrics-prometheus"), ], ) diff --git a/maven_install.json b/maven_install.json index a075fe3891..5a7d07ddb5 100755 --- a/maven_install.json +++ b/maven_install.json @@ -1,7 +1,7 @@ { "__AUTOGENERATED_FILE_DO_NOT_MODIFY_THIS_FILE_MANUALLY": "THERE_IS_NO_DATA_ONLY_ZUUL", - "__INPUT_ARTIFACTS_HASH": -1077230992, - "__RESOLVED_ARTIFACTS_HASH": 1195036016, + "__INPUT_ARTIFACTS_HASH": -1448107542, + "__RESOLVED_ARTIFACTS_HASH": -193250516, "artifacts": { "ant:ant": { "shasums": { @@ -2828,6 +2828,13 @@ }, "version": "1.17.0" }, + "org.apache.flink:flink-metrics-prometheus": { + "shasums": { + "jar": "0e1cbd8a012fd7d8588117e82d6bfde7bf0d138c9766e561b2d806b8dc92a9e2", + "sources": "a6dbcb2de09ae49f593ad654d92ec2bca809de96b9e79a90328f5fff9e6204a0" + }, + "version": "1.17.0" + }, "org.apache.flink:flink-optimizer": { "shasums": { "jar": "0ee4fedac51b4e811fa0f30065b1b3ff7c2eff84f208df5f946ae6c58b2f5736", @@ -7136,6 +7143,11 @@ "io.dropwizard.metrics:metrics-core", "org.slf4j:slf4j-api" ], + "org.apache.flink:flink-metrics-prometheus": [ + "com.google.code.findbugs:jsr305", + "org.apache.flink:flink-shaded-force-shading", + "org.slf4j:slf4j-api" + ], "org.apache.flink:flink-optimizer": [ "com.google.code.findbugs:jsr305", "org.apache.flink:flink-core", @@ -13699,6 +13711,12 @@ "org.apache.flink.dropwizard", "org.apache.flink.dropwizard.metrics" ], + "org.apache.flink:flink-metrics-prometheus": [ + "io.prometheus.client", + "io.prometheus.client.exporter", + "io.prometheus.client.exporter.common", + "org.apache.flink.metrics.prometheus" + ], "org.apache.flink:flink-optimizer": [ "org.apache.flink.optimizer", "org.apache.flink.optimizer.costs", @@ -24586,6 +24604,8 @@ "org.apache.flink:flink-metrics-core:jar:sources", "org.apache.flink:flink-metrics-dropwizard", "org.apache.flink:flink-metrics-dropwizard:jar:sources", + "org.apache.flink:flink-metrics-prometheus", + "org.apache.flink:flink-metrics-prometheus:jar:sources", "org.apache.flink:flink-optimizer", "org.apache.flink:flink-optimizer:jar:sources", "org.apache.flink:flink-queryable-state-client-java", @@ -26016,6 +26036,8 @@ "org.apache.flink:flink-metrics-core:jar:sources", "org.apache.flink:flink-metrics-dropwizard", "org.apache.flink:flink-metrics-dropwizard:jar:sources", + "org.apache.flink:flink-metrics-prometheus", + "org.apache.flink:flink-metrics-prometheus:jar:sources", "org.apache.flink:flink-optimizer", "org.apache.flink:flink-optimizer:jar:sources", "org.apache.flink:flink-queryable-state-client-java", @@ -27446,6 +27468,8 @@ "org.apache.flink:flink-metrics-core:jar:sources", "org.apache.flink:flink-metrics-dropwizard", "org.apache.flink:flink-metrics-dropwizard:jar:sources", + "org.apache.flink:flink-metrics-prometheus", + "org.apache.flink:flink-metrics-prometheus:jar:sources", "org.apache.flink:flink-optimizer", "org.apache.flink:flink-optimizer:jar:sources", "org.apache.flink:flink-queryable-state-client-java", @@ -29969,6 +29993,18 @@ "org.apache.flink.util.TestLoggerExtension" ] }, + "org.apache.flink:flink-metrics-prometheus": { + "org.apache.flink.metrics.reporter.MetricReporterFactory": [ + "org.apache.flink.metrics.prometheus.PrometheusPushGatewayReporterFactory", + "org.apache.flink.metrics.prometheus.PrometheusReporterFactory" + ] + }, + "org.apache.flink:flink-metrics-prometheus:jar:sources": { + "org.apache.flink.metrics.reporter.MetricReporterFactory": [ + "org.apache.flink.metrics.prometheus.PrometheusPushGatewayReporterFactory", + "org.apache.flink.metrics.prometheus.PrometheusReporterFactory" + ] + }, "org.apache.flink:flink-rpc-akka-loader": { "org.apache.flink.runtime.rpc.RpcSystemLoader": [ "org.apache.flink.runtime.rpc.akka.AkkaRpcSystemLoader" diff --git a/tools/build_rules/dependencies/maven_repository.bzl b/tools/build_rules/dependencies/maven_repository.bzl index 07e3c93810..dabd7ea084 100644 --- a/tools/build_rules/dependencies/maven_repository.bzl +++ b/tools/build_rules/dependencies/maven_repository.bzl @@ -164,6 +164,7 @@ maven_repository = repository( # Flink "org.apache.flink:flink-metrics-dropwizard:1.17.0", + "org.apache.flink:flink-metrics-prometheus:1.17.0", "org.apache.flink:flink-clients:1.17.0", "org.apache.flink:flink-yarn:1.17.0", "org.apache.flink:flink-runtime:1.17.0",