diff --git a/README.md b/README.md index 72b7eb0..7f4f9bb 100644 --- a/README.md +++ b/README.md @@ -43,10 +43,10 @@ go run github.com/open-telemetry/opentelemetry-collector-contrib/cmd/telemetryge | Category | Components | |---|---| -| **Extensions** | zpages, healthcheck, pprof, basicauth, bearertokenauth, oauth2client, oidc | -| **Receivers** | otlp (gRPC + HTTP), nop | +| **Extensions** | zpages, pprof, basicauth, bearertokenauth, oauth2client, oidc, filestorage | +| **Receivers** | otlp (gRPC + HTTP), nop, hostmetrics, filelog | | **Exporters** | otlp (gRPC), otlphttp, file, debug, nop | -| **Processors** | batch, attributes, resource, span, probabilisticsampler, filter, transform | +| **Processors** | attributes, resource, span, probabilisticsampler, filter, transform, redaction | | **Connectors** | forward | All components track the latest stable upstream OpenTelemetry Collector release. See [`manifest.yaml`](distributions/tulip/manifest.yaml) for exact versions. diff --git a/distributions/tulip/config.yaml b/distributions/tulip/config.yaml index 1e7f3fb..bac53c2 100644 --- a/distributions/tulip/config.yaml +++ b/distributions/tulip/config.yaml @@ -1,5 +1,4 @@ extensions: - health_check: {} pprof: {} receivers: @@ -11,13 +10,35 @@ exporters: nop: otlp: endpoint: otlp.example.com:4317 + sending_queue: + enabled: true + queue_size: 1000 + batch: + flush_timeout: 200ms + min_size: 8192 + retry_on_failure: + enabled: true + initial_interval: 5s + max_interval: 30s + max_elapsed_time: 300s otlphttp: endpoint: https://otlp.example.com/otlp + sending_queue: + enabled: true + queue_size: 1000 + batch: + flush_timeout: 200ms + min_size: 8192 + retry_on_failure: + enabled: true + initial_interval: 5s + max_interval: 30s + max_elapsed_time: 300s service: - extensions: [ health_check, pprof ] + extensions: [ pprof ] pipelines: traces: receivers: [ otlp ] - processors: [ ] + processors: [] exporters: [ nop, otlp, otlphttp ] diff --git a/distributions/tulip/manifest.yaml b/distributions/tulip/manifest.yaml index c94050b..a9d56aa 100644 --- a/distributions/tulip/manifest.yaml +++ b/distributions/tulip/manifest.yaml @@ -18,49 +18,55 @@ dist: module: github.com/ollygarden/tulip/tulip name: tulip description: OllyGarden Tulip - Enterprise-supported OpenTelemetry Collector distribution - version: 0.144.0 + version: 0.151.0 output_path: ./_build build_tags: "grpcnotrace" -# Extensions: Authentication, health checks, profiling +# Extensions: Authentication, profiling, storage extensions: - - gomod: go.opentelemetry.io/collector/extension/zpagesextension v0.145.0 - - gomod: github.com/open-telemetry/opentelemetry-collector-contrib/extension/healthcheckextension v0.145.0 - - gomod: github.com/open-telemetry/opentelemetry-collector-contrib/extension/pprofextension v0.145.0 - - gomod: github.com/open-telemetry/opentelemetry-collector-contrib/extension/basicauthextension v0.145.0 - - gomod: github.com/open-telemetry/opentelemetry-collector-contrib/extension/bearertokenauthextension v0.145.0 - - gomod: github.com/open-telemetry/opentelemetry-collector-contrib/extension/oauth2clientauthextension v0.145.0 - - gomod: github.com/open-telemetry/opentelemetry-collector-contrib/extension/oidcauthextension v0.145.0 + - gomod: go.opentelemetry.io/collector/extension/zpagesextension v0.151.0 + - gomod: github.com/open-telemetry/opentelemetry-collector-contrib/extension/pprofextension v0.151.0 + - gomod: github.com/open-telemetry/opentelemetry-collector-contrib/extension/basicauthextension v0.151.0 + - gomod: github.com/open-telemetry/opentelemetry-collector-contrib/extension/bearertokenauthextension v0.151.0 + - gomod: github.com/open-telemetry/opentelemetry-collector-contrib/extension/oauth2clientauthextension v0.151.0 + - gomod: github.com/open-telemetry/opentelemetry-collector-contrib/extension/oidcauthextension v0.151.0 + - gomod: github.com/open-telemetry/opentelemetry-collector-contrib/extension/storage/filestorage v0.151.0 # Receivers: Ingesting telemetry data receivers: - - gomod: go.opentelemetry.io/collector/receiver/nopreceiver v0.145.0 - - gomod: go.opentelemetry.io/collector/receiver/otlpreceiver v0.145.0 + - gomod: go.opentelemetry.io/collector/receiver/nopreceiver v0.151.0 + - gomod: go.opentelemetry.io/collector/receiver/otlpreceiver v0.151.0 + - gomod: github.com/open-telemetry/opentelemetry-collector-contrib/receiver/hostmetricsreceiver v0.151.0 + - gomod: github.com/open-telemetry/opentelemetry-collector-contrib/receiver/filelogreceiver v0.151.0 # Exporters: Sending telemetry to backends exporters: - - gomod: go.opentelemetry.io/collector/exporter/debugexporter v0.145.0 - - gomod: go.opentelemetry.io/collector/exporter/nopexporter v0.145.0 - - gomod: go.opentelemetry.io/collector/exporter/otlpexporter v0.145.0 - - gomod: go.opentelemetry.io/collector/exporter/otlphttpexporter v0.145.0 - - gomod: github.com/open-telemetry/opentelemetry-collector-contrib/exporter/fileexporter v0.145.0 + - gomod: go.opentelemetry.io/collector/exporter/debugexporter v0.151.0 + - gomod: go.opentelemetry.io/collector/exporter/nopexporter v0.151.0 + - gomod: go.opentelemetry.io/collector/exporter/otlpexporter v0.151.0 + - gomod: go.opentelemetry.io/collector/exporter/otlphttpexporter v0.151.0 + - gomod: github.com/open-telemetry/opentelemetry-collector-contrib/exporter/fileexporter v0.151.0 # Processors: Data transformation and enrichment +# Note: batchprocessor was removed in the LTS May 2026 release. +# It has been deprecated upstream (PR #15046) and has a known data loss bug (#12443). +# Use exporter-level sending_queue + batch configuration instead. +# See docs/tulip-lts-may2026-upgrade-plan.md for migration details. processors: - - gomod: go.opentelemetry.io/collector/processor/batchprocessor v0.145.0 - - gomod: github.com/open-telemetry/opentelemetry-collector-contrib/processor/attributesprocessor v0.145.0 - - gomod: github.com/open-telemetry/opentelemetry-collector-contrib/processor/resourceprocessor v0.145.0 - - gomod: github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanprocessor v0.145.0 - - gomod: github.com/open-telemetry/opentelemetry-collector-contrib/processor/probabilisticsamplerprocessor v0.145.0 - - gomod: github.com/open-telemetry/opentelemetry-collector-contrib/processor/filterprocessor v0.145.0 - - gomod: github.com/open-telemetry/opentelemetry-collector-contrib/processor/transformprocessor v0.145.0 + - gomod: github.com/open-telemetry/opentelemetry-collector-contrib/processor/attributesprocessor v0.151.0 + - gomod: github.com/open-telemetry/opentelemetry-collector-contrib/processor/resourceprocessor v0.151.0 + - gomod: github.com/open-telemetry/opentelemetry-collector-contrib/processor/spanprocessor v0.151.0 + - gomod: github.com/open-telemetry/opentelemetry-collector-contrib/processor/probabilisticsamplerprocessor v0.151.0 + - gomod: github.com/open-telemetry/opentelemetry-collector-contrib/processor/filterprocessor v0.151.0 + - gomod: github.com/open-telemetry/opentelemetry-collector-contrib/processor/transformprocessor v0.151.0 + - gomod: github.com/open-telemetry/opentelemetry-collector-contrib/processor/redactionprocessor v0.151.0 # Connectors: Pipeline routing connectors: - - gomod: go.opentelemetry.io/collector/connector/forwardconnector v0.145.0 + - gomod: go.opentelemetry.io/collector/connector/forwardconnector v0.151.0 # Providers: Configuration sources providers: - - gomod: go.opentelemetry.io/collector/confmap/provider/envprovider v1.51.0 - - gomod: go.opentelemetry.io/collector/confmap/provider/fileprovider v1.51.0 - - gomod: go.opentelemetry.io/collector/confmap/provider/yamlprovider v1.51.0 \ No newline at end of file + - gomod: go.opentelemetry.io/collector/confmap/provider/envprovider v1.57.0 + - gomod: go.opentelemetry.io/collector/confmap/provider/fileprovider v1.57.0 + - gomod: go.opentelemetry.io/collector/confmap/provider/yamlprovider v1.57.0 \ No newline at end of file diff --git a/distributions/tulip/tulip-test-docker.yaml b/distributions/tulip/tulip-test-docker.yaml index 8c998cb..7b71e06 100644 --- a/distributions/tulip/tulip-test-docker.yaml +++ b/distributions/tulip/tulip-test-docker.yaml @@ -1,6 +1,4 @@ extensions: - health_check: - endpoint: 0.0.0.0:13133 pprof: endpoint: 0.0.0.0:1777 @@ -30,7 +28,7 @@ processors: action: upsert service: - extensions: [ health_check, pprof ] + extensions: [ pprof ] pipelines: traces: receivers: [ otlp ] diff --git a/distributions/tulip/tulip-test.yaml b/distributions/tulip/tulip-test.yaml index fe8b1a4..d2de863 100644 --- a/distributions/tulip/tulip-test.yaml +++ b/distributions/tulip/tulip-test.yaml @@ -1,6 +1,5 @@ # This file is synched with the README.md file, to make the quickstart easier. So, if changes were made here, please update the README.md file. extensions: - health_check: {} pprof: {} receivers: @@ -31,7 +30,7 @@ processors: action: upsert service: - extensions: [ health_check, pprof ] + extensions: [ pprof ] pipelines: traces: receivers: [ otlp ] diff --git a/docs/tulip-lts-may2026-upgrade-plan.md b/docs/tulip-lts-may2026-upgrade-plan.md new file mode 100644 index 0000000..98133f9 --- /dev/null +++ b/docs/tulip-lts-may2026-upgrade-plan.md @@ -0,0 +1,192 @@ +# Tulip LTS May 2026 — Upgrade Plan + +## What is an LTS release? + +LTS (Long-Term Support) is a release model where a specific version receives extended maintenance, security patches, and bug fixes over a longer period than regular releases. Unlike the upstream OpenTelemetry Collector, which ships new versions roughly every two weeks, an LTS release provides a **stable, validated baseline** that production environments can depend on without the risk of frequent breaking changes. + +### Why ship an LTS? + +The upstream OpenTelemetry Collector moves fast — new releases every ~2 weeks, components changing stability levels, deprecations, and breaking changes. This velocity is great for innovation but creates challenges for production deployments: + +- **Upgrade fatigue:** Keeping up with biweekly releases is unsustainable for teams running the collector at scale. +- **Stability risk:** Not every upstream release is equally battle-tested. Some introduce regressions that get fixed in the next release. +- **Component churn:** Components get deprecated (e.g., batchprocessor), replaced, or have their APIs changed. Teams need time to plan migrations. +- **Support burden:** Supporting arbitrary collector versions is impractical. An LTS gives a defined, tested target. + +The Tulip LTS provides: + +- **A curated, validated component set** — every component is reviewed for stability, active maintenance, and known issues before inclusion. +- **A predictable upgrade path** — instead of chasing every upstream release, teams upgrade LTS-to-LTS with clear migration documentation. +- **Production-ready defaults** — the default configuration includes OOM protection, retry logic. +- **Extended support window** — security patches and critical fixes backported without requiring a full version upgrade. + +## Overview + +This document defines the component composition and configuration changes for the Tulip LTS release, targeting May 2026. The LTS follows a **stability-first** approach: only stable/beta components with active maintainers are included. + +**Version bump:** v0.145.0 → v0.151.0 (all components) + +The target version v0.151.0 was selected as the latest stable upstream release at the time of this LTS cut, verified against the official otelcol-contrib distribution manifest: +- https://github.com/open-telemetry/opentelemetry-collector-releases/blob/main/distributions/otelcol-contrib/manifest.yaml + +--- + +## Component Changes + +### Removed: batchprocessor + +**Reason:** The batchprocessor has been formally deprecated in the OpenTelemetry Collector (PR [#15046](https://github.com/open-telemetry/opentelemetry-collector/pull/15046), April 2026). It also has a known data loss bug ([#12443](https://github.com/open-telemetry/opentelemetry-collector/issues/12443)) where data is silently dropped when a downstream exporter rejects and has no queue/retry configured. + +**Replacement:** Exporter-level batching via `sending_queue` + `batch` configuration on each exporter. This is the officially recommended path and provides stronger delivery guarantees because: + +- Data is durably enqueued in the exporter's persistent sending queue before acknowledgment +- Batching and queueing are consolidated within the exporter +- Data can be written to disk and recovered after a Collector restart +- No silent data loss — failed sends are retried with backoff + +#### Migration: before and after + +**Before (batchprocessor in pipeline):** + +```yaml +processors: + batch: + send_batch_size: 8192 + timeout: 200ms + +exporters: + otlp: + endpoint: otlp.example.com:4317 + +service: + pipelines: + traces: + receivers: [otlp] + processors: [batch] + exporters: [otlp] +``` + +**After (exporter-level batching):** + +```yaml +exporters: + otlp: + endpoint: otlp.example.com:4317 + sending_queue: + enabled: true + queue_size: 1000 + batch: + flush_timeout: 200ms + min_size: 8192 + retry_on_failure: + enabled: true + initial_interval: 5s + max_interval: 30s + max_elapsed_time: 300s + +service: + pipelines: + traces: + receivers: [otlp] + processors: [] + exporters: [otlp] +``` + +#### References + +- Deprecation PR: https://github.com/open-telemetry/opentelemetry-collector/pull/15046 +- Data loss bug: https://github.com/open-telemetry/opentelemetry-collector/issues/12443 +- Resolution discussion: https://github.com/open-telemetry/opentelemetry-collector/issues/15047 +- Docs removal: https://github.com/open-telemetry/opentelemetry-collector/issues/13766 + +--- + +### Removed: healthcheckextension + +**Reason:** The healthcheck extension v1 and v2 share code between them using a feature flag ([#42256](https://github.com/open-telemetry/opentelemetry-collector-contrib/issues/42256)). Including v1 would indirectly support v2, which is not the objective for an LTS release where component behavior must be fully predictable and stable. + +--- + +### Added: redactionprocessor + +**Reason:** Allows redacting sensitive data from telemetry attributes before export. Essential for enterprise environments with data privacy requirements (PII, HIPAA, GDPR). This is a contrib component with stable maturity. + +--- + +### Added: filestorage extension + +**Reason:** Provides persistent file-based storage for components that need durable state across collector restarts (e.g., exporter sending queues, receiver checkpoints). Critical for reliable telemetry delivery in production. + +--- + +### Added: hostmetricsreceiver + +**Reason:** Collects host-level metrics (CPU, memory, disk, network, filesystem, processes) from the machine running the collector. Essential for infrastructure monitoring use cases where the collector also serves as a host metrics agent. + +--- + +### Added: filelogreceiver + +**Reason:** Reads and parses log data from files on the host. Enables log collection pipelines where applications write to local files. Widely used in production for collecting application logs, system logs, and container logs. + +--- + +## Final LTS Component Manifest (27 components) + +### Extensions (7) + +| Component | Source | Stability | Note | +|-----------|--------|-----------|------| +| zpagesextension | core | stable | | +| pprofextension | contrib | beta | | +| basicauthextension | contrib | beta | | +| bearertokenauthextension | contrib | beta | | +| oauth2clientauthextension | contrib | beta | | +| oidcauthextension | contrib | beta | | +| filestorage | contrib | beta | NEW | + +### Receivers (4) + +| Component | Source | Stability | Note | +|-----------|--------|-----------|------| +| otlpreceiver | core | stable | | +| nopreceiver | core | stable | | +| hostmetricsreceiver | contrib | beta | NEW | +| filelogreceiver | contrib | beta | NEW | + +### Exporters (5) + +| Component | Source | Stability | +|-----------|--------|-----------| +| debugexporter | core | stable | +| nopexporter | core | stable | +| otlpexporter | core | stable | +| otlphttpexporter | core | stable | +| fileexporter | contrib | beta | + +### Processors (7) + +| Component | Source | Stability | Note | +|-----------|--------|-----------|------| +| attributesprocessor | contrib | stable | | +| resourceprocessor | contrib | stable | | +| spanprocessor | contrib | stable | | +| probabilisticsamplerprocessor | contrib | stable | | +| filterprocessor | contrib | stable | | +| transformprocessor | contrib | stable | | +| redactionprocessor | contrib | stable | NEW | + +### Connectors (1) + +| Component | Source | Stability | +|-----------|--------|-----------| +| forwardconnector | core | stable | + +### Providers (3) + +| Component | Source | Stability | +|-----------|--------|-----------| +| envprovider | core | stable | +| fileprovider | core | stable | +| yamlprovider | core | stable | + diff --git a/test/generate-trace.sh b/test/generate-trace.sh index db461b8..131cb77 100755 --- a/test/generate-trace.sh +++ b/test/generate-trace.sh @@ -12,7 +12,7 @@ if [[ -z $distribution ]]; then exit 1 fi -tracegen -otlp-endpoint localhost:4317 -otlp-insecure -service e2e-test &>> ./test/logs/tracegen-${distribution}.log +tracegen -otlp-endpoint localhost:4317 -otlp-insecure -service e2e-test >> ./test/logs/tracegen-${distribution}.log 2>&1 if [ $? != 0 ]; then echo "Failed to generate a trace." exit 1 diff --git a/test/start-otelcol.sh b/test/start-otelcol.sh index 8deb435..25d3bb5 100755 --- a/test/start-otelcol.sh +++ b/test/start-otelcol.sh @@ -30,7 +30,7 @@ do exit 1 fi - curl -s localhost:13133 | grep "Server available" > /dev/null + grep -q "Everything is ready" "./test/logs/otelcol-${distribution}.log" if [ $? == 0 ]; then echo "✅ The '${distribution}' distribution of the OpenTelemetry Collector started." break