From e0bf4674bd84bdef6e8502aacf8f7525032256ef Mon Sep 17 00:00:00 2001 From: Gregor Zeitlinger Date: Wed, 23 Apr 2025 12:00:38 +0200 Subject: [PATCH] otel health check extension is faster than trying to scrape prom metrics --- docker/Dockerfile | 2 +- docker/otelcol-config.yaml | 6 ++++++ docker/run-all.sh | 30 +----------------------------- 3 files changed, 8 insertions(+), 30 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 35cea57b..a7157923 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -40,7 +40,7 @@ RUN yum install -y unzip dos2unix jq # installs for the final image # see https://github.com/thomasdarimont/keycloak/blob/main/docs/guides/server/containers.adoc#installing-additional-rpm-packages RUN mkdir -p /mnt/rootfs -RUN dnf install --installroot /mnt/rootfs curl-minimal jq procps --releasever 9 --setopt install_weak_deps=false --nodocs -y && \ +RUN dnf install --installroot /mnt/rootfs curl-minimal --releasever 9 --setopt install_weak_deps=false --nodocs -y && \ dnf --installroot /mnt/rootfs clean all && \ rpm --root /mnt/rootfs -e --nodeps setup diff --git a/docker/otelcol-config.yaml b/docker/otelcol-config.yaml index ba4fa425..beee00be 100644 --- a/docker/otelcol-config.yaml +++ b/docker/otelcol-config.yaml @@ -13,6 +13,11 @@ receivers: static_configs: - targets: ["localhost:8888"] +extensions: + health_check: + endpoint: 0.0.0.0:13133 + path: "/ready" + processors: batch: @@ -41,6 +46,7 @@ exporters: verbosity: detailed service: + extensions: [health_check] pipelines: traces: receivers: [otlp] diff --git a/docker/run-all.sh b/docker/run-all.sh index 2f589b75..b8a415de 100755 --- a/docker/run-all.sh +++ b/docker/run-all.sh @@ -43,15 +43,13 @@ services["loki"]="http://localhost:3100/ready" services["prometheus"]="http://localhost:9090/api/v1/status/runtimeinfo" services["tempo"]="http://localhost:3200/ready" services["pyroscope"]="http://localhost:4040/ready" +services["otelcol"]="http://localhost:13133/ready" # Initialize service_ready status to false for all services for service in "${!services[@]}"; do service_ready[$service]=false done -# Also check OpenTelemetry collector separately (since it uses a different check method) -service_ready["otelcol"]=false - # Function to check if a service is ready check_service_ready() { local service=$1 @@ -80,29 +78,6 @@ check_service_ready() { return 1 } -# Function to check if OpenTelemetry collector is ready -check_otelcol_ready() { - # Skip if already marked as ready - if [[ ${service_ready["otelcol"]} == true ]]; then - return 0 - fi - - # Check if collector is ready via Prometheus metric - if curl -sg 'http://localhost:9090/api/v1/query?query=otelcol_process_uptime_total{}' 2>/dev/null | jq -r .data.result[0].value[1] 2>/dev/null | grep '[0-9]' >/dev/null; then - # Calculate and display startup time - end_time=$(date +%s) - # shellcheck disable=SC2154 - otelcol_start_time=${start_time_otelcol} - elapsed=$((end_time - otelcol_start_time)) - elapsed_times["otelcol"]=$elapsed - service_ready["otelcol"]=true - echo "OpenTelemetry collector is up and running. Startup time: ${elapsed} seconds" - return 0 - fi - - return 1 -} - # Wait for all services to be ready all_ready=false while [[ $all_ready == false ]]; do @@ -111,9 +86,6 @@ while [[ $all_ready == false ]]; do check_service_ready "$service" "${services[$service]}" done - # Check OpenTelemetry collector - check_otelcol_ready - # Check if all services are ready all_ready=true for service in "${!service_ready[@]}"; do