diff --git a/Cargo.lock b/Cargo.lock index 51a4a74000..f8c6618a21 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -21,7 +21,7 @@ dependencies = [ "http 1.4.0", "serde_json", "tokio", - "tower 0.5.2", + "tower", ] [[package]] @@ -304,7 +304,7 @@ dependencies = [ "aws-smithy-http-client", "aws-smithy-runtime-api", "aws-types", - "axum 0.8.7", + "axum", "base64 0.22.1", "blake3", "bloomfilter", @@ -371,7 +371,8 @@ dependencies = [ "num-traits", "oci-client", "once_cell", - "opentelemetry", + "opentelemetry 0.30.0", + "opentelemetry-appender-tracing", "opentelemetry-aws", "opentelemetry-datadog", "opentelemetry-http", @@ -436,7 +437,7 @@ dependencies = [ "tokio-util", "tonic", "tonic-build", - "tower 0.5.2", + "tower", "tower-http", "tower-service", "tower-test", @@ -473,7 +474,7 @@ dependencies = [ "once_cell", "serde_json", "tokio", - "tower 0.5.2", + "tower", ] [[package]] @@ -574,30 +575,7 @@ dependencies = [ "serde_json", "serde_json_bytes", "tokio", - "tower 0.5.2", -] - -[[package]] -name = "async-channel" -version = "1.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81953c529336010edd6d8e358f886d9581267795c61b19475b71314bffa46d35" -dependencies = [ - "concurrent-queue", - "event-listener 2.5.3", - "futures-core", -] - -[[package]] -name = "async-channel" -version = "2.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "924ed96dd52d1b75e9c1a3e6275715fd320f5f9439fb5a4a11fa51f4221158d2" -dependencies = [ - "concurrent-queue", - "event-listener-strategy", - "futures-core", - "pin-project-lite", + "tower", ] [[package]] @@ -614,35 +592,6 @@ dependencies = [ "tokio", ] -[[package]] -name = "async-executor" -version = "1.13.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "497c00e0fd83a72a79a39fcbd8e3e2f055d6f6c7e025f3b3d91f4f8e76527fb8" -dependencies = [ - "async-task", - "concurrent-queue", - "fastrand", - "futures-lite", - "pin-project-lite", - "slab", -] - -[[package]] -name = "async-global-executor" -version = "2.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05b1b633a2115cd122d73b955eadd9916c18c8f510ec9cd1686404c60ad1c29c" -dependencies = [ - "async-channel 2.5.0", - "async-executor", - "async-io", - "async-lock", - "blocking", - "futures-lite", - "once_cell", -] - [[package]] name = "async-graphql" version = "7.0.17" @@ -683,7 +632,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8725874ecfbf399e071150b8619c4071d7b2b7a2f117e173dddef53c6bdb6bb1" dependencies = [ "async-graphql", - "axum 0.8.7", + "axum", "bytes", "futures-util", "serde_json", @@ -734,98 +683,6 @@ dependencies = [ "serde_json", ] -[[package]] -name = "async-io" -version = "2.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "19634d6336019ef220f09fd31168ce5c184b295cbf80345437cc36094ef223ca" -dependencies = [ - "async-lock", - "cfg-if", - "concurrent-queue", - "futures-io", - "futures-lite", - "parking", - "polling", - "rustix", - "slab", - "windows-sys 0.60.2", -] - -[[package]] -name = "async-lock" -version = "3.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5fd03604047cee9b6ce9de9f70c6cd540a0520c813cbd49bae61f33ab80ed1dc" -dependencies = [ - "event-listener 5.4.1", - "event-listener-strategy", - "pin-project-lite", -] - -[[package]] -name = "async-process" -version = "2.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "65daa13722ad51e6ab1a1b9c01299142bc75135b337923cfa10e79bbbd669f00" -dependencies = [ - "async-channel 2.5.0", - "async-io", - "async-lock", - "async-signal", - "async-task", - "blocking", - "cfg-if", - "event-listener 5.4.1", - "futures-lite", - "rustix", -] - -[[package]] -name = "async-signal" -version = "0.2.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f567af260ef69e1d52c2b560ce0ea230763e6fbb9214a85d768760a920e3e3c1" -dependencies = [ - "async-io", - "async-lock", - "atomic-waker", - "cfg-if", - "futures-core", - "futures-io", - "rustix", - "signal-hook-registry", - "slab", - "windows-sys 0.60.2", -] - -[[package]] -name = "async-std" -version = "1.13.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c8e079a4ab67ae52b7403632e4618815d6db36d2a010cfe41b02c1b1578f93b" -dependencies = [ - "async-channel 1.9.0", - "async-global-executor", - "async-io", - "async-lock", - "async-process", - "crossbeam-utils", - "futures-channel", - "futures-core", - "futures-io", - "futures-lite", - "gloo-timers", - "kv-log-macro", - "log", - "memchr", - "once_cell", - "pin-project-lite", - "pin-utils", - "slab", - "wasm-bindgen-futures", -] - [[package]] name = "async-stream" version = "0.3.6" @@ -848,12 +705,6 @@ dependencies = [ "syn 2.0.106", ] -[[package]] -name = "async-task" -version = "4.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b75356056920673b02621b35afd0f7dda9306d03c79a30f5c56c44cf256e3de" - [[package]] name = "async-trait" version = "0.1.89" @@ -1035,7 +886,7 @@ dependencies = [ "rustls-pki-types", "tokio", "tokio-rustls", - "tower 0.5.2", + "tower", "tracing", ] @@ -1153,40 +1004,13 @@ dependencies = [ "tracing", ] -[[package]] -name = "axum" -version = "0.7.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "edca88bc138befd0323b20752846e6587272d3b03b0343c8ea28a6f819e6e71f" -dependencies = [ - "async-trait", - "axum-core 0.4.5", - "bytes", - "futures-util", - "http 1.4.0", - "http-body 1.0.1", - "http-body-util", - "itoa", - "matchit 0.7.3", - "memchr", - "mime", - "percent-encoding", - "pin-project-lite", - "rustversion", - "serde", - "sync_wrapper", - "tower 0.5.2", - "tower-layer", - "tower-service", -] - [[package]] name = "axum" version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5b098575ebe77cb6d14fc7f32749631a6e44edbef6b796f89b020e99ba20d425" dependencies = [ - "axum-core 0.5.5", + "axum-core", "base64 0.22.1", "bytes", "form_urlencoded", @@ -1197,7 +1021,7 @@ dependencies = [ "hyper", "hyper-util", "itoa", - "matchit 0.8.4", + "matchit", "memchr", "mime", "percent-encoding", @@ -1210,32 +1034,12 @@ dependencies = [ "sync_wrapper", "tokio", "tokio-tungstenite", - "tower 0.5.2", + "tower", "tower-layer", "tower-service", "tracing", ] -[[package]] -name = "axum-core" -version = "0.4.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09f2bd6146b97ae3359fa0cc6d6b376d9539582c7b4220f041a33ec24c226199" -dependencies = [ - "async-trait", - "bytes", - "futures-util", - "http 1.4.0", - "http-body 1.0.1", - "http-body-util", - "mime", - "pin-project-lite", - "rustversion", - "sync_wrapper", - "tower-layer", - "tower-service", -] - [[package]] name = "axum-core" version = "0.5.5" @@ -1363,19 +1167,6 @@ dependencies = [ "generic-array", ] -[[package]] -name = "blocking" -version = "1.6.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e83f8d02be6967315521be875afa792a316e28d57b5a2d401897e2a7921b7f21" -dependencies = [ - "async-channel 2.5.0", - "async-task", - "futures-io", - "futures-lite", - "piper", -] - [[package]] name = "bloomfilter" version = "3.0.1" @@ -1504,7 +1295,7 @@ dependencies = [ "http 1.4.0", "serde_json", "tokio", - "tower 0.5.2", + "tower", ] [[package]] @@ -1716,15 +1507,6 @@ dependencies = [ "windows-sys 0.45.0", ] -[[package]] -name = "concurrent-queue" -version = "2.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ca0197aee26d1ae37445ee532fefce43251d24cc7c166799f4d46817f1d3973" -dependencies = [ - "crossbeam-utils", -] - [[package]] name = "console" version = "0.15.11" @@ -1810,7 +1592,7 @@ dependencies = [ "apollo-router", "async-trait", "http 1.4.0", - "tower 0.5.2", + "tower", "tracing", ] @@ -1848,7 +1630,7 @@ dependencies = [ "http 1.4.0", "serde_json", "tokio", - "tower 0.5.2", + "tower", ] [[package]] @@ -2496,43 +2278,16 @@ version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5692dd7b5a1978a5aeb0ce83b7655c58ca8efdcb79d21036ea249da95afec2c6" -[[package]] -name = "event-listener" -version = "2.5.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0206175f82b8d6bf6652ff7d71a1e27fd2e4efde587fd368662814d6ec1d9ce0" - -[[package]] -name = "event-listener" -version = "5.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e13b66accf52311f30a0db42147dadea9850cb48cd070028831ae5f5d4b856ab" -dependencies = [ - "concurrent-queue", - "parking", - "pin-project-lite", -] - -[[package]] -name = "event-listener-strategy" -version = "0.5.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8be9f3dfaaffdae2972880079a491a1a8bb7cbed0b8dd7a347f668b4150a3b93" -dependencies = [ - "event-listener 5.4.1", - "pin-project-lite", -] - [[package]] name = "everything-subgraph" version = "0.1.0" dependencies = [ "async-graphql", "async-graphql-axum", - "axum 0.8.7", + "axum", "env_logger", "tokio", - "tower 0.5.2", + "tower", ] [[package]] @@ -2551,7 +2306,7 @@ dependencies = [ "serde", "serde_json", "tokio", - "tower 0.5.2", + "tower", "tracing", ] @@ -2673,7 +2428,7 @@ dependencies = [ "http 1.4.0", "serde_json", "tokio", - "tower 0.5.2", + "tower", "tracing", ] @@ -2686,7 +2441,7 @@ dependencies = [ "http 1.4.0", "serde_json", "tokio", - "tower 0.5.2", + "tower", ] [[package]] @@ -2841,19 +2596,6 @@ version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" -[[package]] -name = "futures-lite" -version = "2.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f78e10609fe0e0b3f4157ffab1876319b5b0db102a2c60dc4626306dc46b44ad" -dependencies = [ - "fastrand", - "futures-core", - "futures-io", - "parking", - "pin-project-lite", -] - [[package]] name = "futures-macro" version = "0.3.31" @@ -3044,18 +2786,6 @@ dependencies = [ "regex-syntax", ] -[[package]] -name = "gloo-timers" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbb143cf96099802033e0d4f4963b19fd2e0b728bcf076cd9cf7f6634f092994" -dependencies = [ - "futures-channel", - "futures-core", - "js-sys", - "wasm-bindgen", -] - [[package]] name = "graphql-introspection-query" version = "0.2.0" @@ -3233,7 +2963,7 @@ dependencies = [ "serde", "serde_json", "tokio", - "tower 0.5.2", + "tower", "tracing", ] @@ -3767,15 +3497,6 @@ version = "1.70.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" -[[package]] -name = "itertools" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57" -dependencies = [ - "either", -] - [[package]] name = "itertools" version = "0.13.0" @@ -3934,7 +3655,7 @@ dependencies = [ "http 1.4.0", "serde_json", "tokio", - "tower 0.5.2", + "tower", ] [[package]] @@ -3957,15 +3678,6 @@ dependencies = [ "libc", ] -[[package]] -name = "kv-log-macro" -version = "1.0.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0de8b303297635ad57c9f5059fd9cee7a47f8e8daa09df0fcd07dd39fb22977f" -dependencies = [ - "log", -] - [[package]] name = "lazy_static" version = "1.5.0" @@ -4076,9 +3788,6 @@ name = "log" version = "0.4.29" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" -dependencies = [ - "value-bag", -] [[package]] name = "loom" @@ -4123,12 +3832,6 @@ dependencies = [ "regex-automata", ] -[[package]] -name = "matchit" -version = "0.7.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94" - [[package]] name = "matchit" version = "0.8.4" @@ -4612,7 +4315,7 @@ dependencies = [ "http 1.4.0", "serde_json", "tokio", - "tower 0.5.2", + "tower", ] [[package]] @@ -4623,116 +4326,137 @@ checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e" [[package]] name = "opentelemetry" -version = "0.24.0" +version = "0.27.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c365a63eec4f55b7efeceb724f1336f26a9cf3427b70e59e2cd2a5b947fba96" +checksum = "ab70038c28ed37b97d8ed414b6429d343a8bbf44c9f79ec854f3a643029ba6d7" dependencies = [ "futures-core", "futures-sink", "js-sys", - "once_cell", "pin-project-lite", "thiserror 1.0.69", + "tracing", +] + +[[package]] +name = "opentelemetry" +version = "0.30.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aaf416e4cb72756655126f7dd7bb0af49c674f4c1b9903e80c009e0c37e552e6" +dependencies = [ + "futures-core", + "futures-sink", + "js-sys", + "pin-project-lite", + "thiserror 2.0.17", + "tracing", +] + +[[package]] +name = "opentelemetry-appender-tracing" +version = "0.27.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab5feffc321035ad94088a7e5333abb4d84a8726e54a802e736ce9dd7237e85b" +dependencies = [ + "opentelemetry 0.27.1", + "tracing", + "tracing-core", + "tracing-subscriber", ] [[package]] name = "opentelemetry-aws" -version = "0.12.0" +version = "0.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f2e5bd1a2e1d14877086a2defe4ac968f42a6a15cfc5862a0f0ecd0f3530135" +checksum = "6f3632d7494b282abac9039ef96e8619d86e8a3d0650dc6b9072920d25e2310a" dependencies = [ - "once_cell", - "opentelemetry", + "opentelemetry 0.30.0", "opentelemetry_sdk", + "tracing", ] [[package]] name = "opentelemetry-datadog" -version = "0.12.0" +version = "0.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e55061f0b4acd624ce67434c4a6d6d1b5c341d62564bf80094bdaef884f1bf5b" +checksum = "9f313abaf8192afe5b239582626cd1dc60065e46cf05f396ff8b2116da1f2de3" dependencies = [ "ahash", - "futures-core", "http 1.4.0", "indexmap 2.12.1", - "itertools 0.11.0", "itoa", - "once_cell", - "opentelemetry", + "opentelemetry 0.30.0", "opentelemetry-http", "opentelemetry-semantic-conventions", "opentelemetry_sdk", "reqwest", "rmp", "ryu", - "thiserror 1.0.69", + "thiserror 2.0.17", "url", ] [[package]] name = "opentelemetry-http" -version = "0.13.0" +version = "0.30.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad31e9de44ee3538fb9d64fe3376c1362f406162434609e79aea2a41a0af78ab" +checksum = "50f6639e842a97dbea8886e3439710ae463120091e2e064518ba8e716e6ac36d" dependencies = [ "async-trait", "bytes", "http 1.4.0", - "opentelemetry", + "opentelemetry 0.30.0", "reqwest", ] [[package]] name = "opentelemetry-jaeger-propagator" -version = "0.3.0" +version = "0.30.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc0a68a13b92fc708d875ad659b08b35d08b8ef2403e01944b39ca21e5b08b17" +checksum = "090b8ec07bb2e304b529581aa1fe530d7861298c9ef549ebbf44a4a56472c539" dependencies = [ - "opentelemetry", + "opentelemetry 0.30.0", ] [[package]] name = "opentelemetry-otlp" -version = "0.17.0" +version = "0.30.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6b925a602ffb916fb7421276b86756027b37ee708f9dce2dbdcc51739f07e727" +checksum = "dbee664a43e07615731afc539ca60c6d9f1a9425e25ca09c57bc36c87c55852b" dependencies = [ - "async-trait", - "futures-core", "http 1.4.0", - "opentelemetry", + "opentelemetry 0.30.0", "opentelemetry-http", "opentelemetry-proto", "opentelemetry_sdk", "prost", "reqwest", - "thiserror 1.0.69", + "thiserror 2.0.17", "tokio", "tonic", ] [[package]] name = "opentelemetry-prometheus" -version = "0.17.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc4191ce34aa274621861a7a9d68dbcf618d5b6c66b10081631b61fd81fbc015" +version = "0.29.1" +source = "git+https://github.com/sandhose/opentelemetry-rust.git?branch=otel-prometheus-0.30#a60122ce4d534631b70222d84a19bfcb8e9d69a2" dependencies = [ "once_cell", - "opentelemetry", + "opentelemetry 0.30.0", "opentelemetry_sdk", "prometheus", - "protobuf", + "tracing", ] [[package]] name = "opentelemetry-proto" -version = "0.7.0" +version = "0.30.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30ee9f20bff9c984511a02f082dc8ede839e4a9bf15cc2487c8d6fea5ad850d9" +checksum = "2e046fd7660710fe5a05e8748e70d9058dc15c94ba914e7c4faa7c728f0e8ddc" dependencies = [ + "base64 0.22.1", "hex", - "opentelemetry", + "opentelemetry 0.30.0", "opentelemetry_sdk", "prost", "serde", @@ -4741,66 +4465,53 @@ dependencies = [ [[package]] name = "opentelemetry-semantic-conventions" -version = "0.16.0" +version = "0.30.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1cefe0543875379e47eb5f1e68ff83f45cc41366a92dfd0d073d513bf68e9a05" +checksum = "83d059a296a47436748557a353c5e6c5705b9470ef6c95cfc52c21a8814ddac2" [[package]] name = "opentelemetry-stdout" -version = "0.5.0" +version = "0.30.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d408d4345b8be6129a77c46c3bfc75f0d3476f3091909c7dd99c1f3d78582287" +checksum = "447191061af41c3943e082ea359ab8b64ff27d6d34d30d327df309ddef1eef6f" dependencies = [ - "async-trait", "chrono", - "futures-util", - "opentelemetry", + "opentelemetry 0.30.0", "opentelemetry_sdk", - "ordered-float", - "serde", - "serde_json", - "thiserror 1.0.69", ] [[package]] name = "opentelemetry-zipkin" -version = "0.22.0" +version = "0.30.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e68336254a44c5c20574989699582175910b933be85a593a13031ee58811d93d" +checksum = "8823568a9abd4003d1acc564bcfec8e3406a8c73203f8f50bdfc88b44d7ecfb6" dependencies = [ - "async-trait", - "futures-core", "http 1.4.0", "once_cell", - "opentelemetry", + "opentelemetry 0.30.0", "opentelemetry-http", - "opentelemetry-semantic-conventions", "opentelemetry_sdk", "reqwest", "serde", "serde_json", - "thiserror 1.0.69", + "thiserror 2.0.17", "typed-builder", ] [[package]] name = "opentelemetry_sdk" -version = "0.24.1" +version = "0.30.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "692eac490ec80f24a17828d49b40b60f5aeaccdfe6a503f939713afd22bc28df" +checksum = "11f644aa9e5e31d11896e024305d7e3c98a88884d9f8919dbf37a9991bc47a4b" dependencies = [ - "async-std", - "async-trait", "futures-channel", "futures-executor", "futures-util", - "glob", - "once_cell", - "opentelemetry", + "opentelemetry 0.30.0", "percent-encoding", - "rand 0.8.5", + "rand 0.9.2", "serde_json", - "thiserror 1.0.69", + "thiserror 2.0.17", "tokio", "tokio-stream", ] @@ -4811,15 +4522,6 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" -[[package]] -name = "ordered-float" -version = "4.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7bb71e1b3fa6ca1c61f383464aaf2bb0e2f8e772a1f01d486832464de363b951" -dependencies = [ - "num-traits", -] - [[package]] name = "outref" version = "0.5.2" @@ -4848,12 +4550,6 @@ dependencies = [ "winapi", ] -[[package]] -name = "parking" -version = "2.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f38d5652c16fde515bb1ecef450ab0f6a219d619a7274976324d5e377f7dceba" - [[package]] name = "parking_lot" version = "0.12.5" @@ -5007,17 +4703,6 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" -[[package]] -name = "piper" -version = "0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96c8c490f422ef9a4efd2cb5b42b76c8613d7e7dfc1caf667b8a3350a5acc066" -dependencies = [ - "atomic-waker", - "fastrand", - "futures-io", -] - [[package]] name = "pkcs8" version = "0.10.2" @@ -5062,20 +4747,6 @@ dependencies = [ "plotters-backend", ] -[[package]] -name = "polling" -version = "3.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5bd19146350fe804f7cb2669c851c03d69da628803dab0d98018142aaa5d829" -dependencies = [ - "cfg-if", - "concurrent-queue", - "hermit-abi", - "pin-project-lite", - "rustix", - "windows-sys 0.60.2", -] - [[package]] name = "portable-atomic" version = "1.11.1" @@ -5212,9 +4883,9 @@ dependencies = [ [[package]] name = "prometheus" -version = "0.13.4" +version = "0.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d33c28a30771f7f96db69893f78b857f7450d7e0237e9c8fc6427a81bae7ed1" +checksum = "3ca5326d8d0b950a9acd87e6a3f94745394f62e4dae1b1ee22b2bc0c394af43a" dependencies = [ "cfg-if", "fnv", @@ -5222,7 +4893,7 @@ dependencies = [ "memchr", "parking_lot", "protobuf", - "thiserror 1.0.69", + "thiserror 2.0.17", ] [[package]] @@ -5237,7 +4908,7 @@ dependencies = [ "serde", "serde_json", "tokio", - "tower 0.5.2", + "tower", ] [[package]] @@ -5308,9 +4979,23 @@ dependencies = [ [[package]] name = "protobuf" -version = "2.28.0" +version = "3.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d65a1d4ddae7d8b5de68153b48f6aa3bba8cb002b243dbdbc55a5afbc98f99f4" +dependencies = [ + "once_cell", + "protobuf-support", + "thiserror 1.0.69", +] + +[[package]] +name = "protobuf-support" +version = "3.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "106dd99e98437432fed6519dedecfade6a06a73bb7b2a1e019fdd2bee5778d94" +checksum = "3e36c2f31e0a47f9280fb347ef5e461ffcd2c52dd520d8e216b52f93b0b0d7d6" +dependencies = [ + "thiserror 1.0.69", +] [[package]] name = "quinn" @@ -5646,7 +5331,7 @@ dependencies = [ "tokio", "tokio-rustls", "tokio-util", - "tower 0.5.2", + "tower", "tower-http", "tower-service", "url", @@ -5701,7 +5386,7 @@ dependencies = [ "http 1.4.0", "serde_json", "tokio", - "tower 0.5.2", + "tower", ] [[package]] @@ -5713,7 +5398,7 @@ dependencies = [ "http 1.4.0", "serde_json", "tokio", - "tower 0.5.2", + "tower", ] [[package]] @@ -5725,7 +5410,7 @@ dependencies = [ "http 1.4.0", "serde_json", "tokio", - "tower 0.5.2", + "tower", ] [[package]] @@ -5737,7 +5422,7 @@ dependencies = [ "http 1.4.0", "serde_json", "tokio", - "tower 0.5.2", + "tower", ] [[package]] @@ -5749,7 +5434,7 @@ dependencies = [ "http 1.4.0", "serde_json", "tokio", - "tower 0.5.2", + "tower", ] [[package]] @@ -5823,7 +5508,7 @@ dependencies = [ "serde", "serde_json", "serde_json_bytes", - "tower 0.5.2", + "tower", ] [[package]] @@ -6548,7 +6233,7 @@ dependencies = [ "apollo-compiler", "apollo-router", "async-trait", - "tower 0.5.2", + "tower", "tracing", ] @@ -6755,7 +6440,7 @@ dependencies = [ "http-body-util", "serde_json", "tokio", - "tower 0.5.2", + "tower", ] [[package]] @@ -6979,13 +6664,12 @@ dependencies = [ [[package]] name = "tonic" -version = "0.12.3" +version = "0.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "877c5b330756d856ffcc4553ab34a5684481ade925ecc54bcd1bf02b1d0d4d52" +checksum = "7e581ba15a835f4d9ea06c55ab1bd4dce26fc53752c69a04aac00703bfb49ba9" dependencies = [ - "async-stream", "async-trait", - "axum 0.7.9", + "axum", "base64 0.22.1", "bytes", "flate2", @@ -7000,12 +6684,11 @@ dependencies = [ "pin-project", "prost", "rustls-native-certs", - "rustls-pemfile", "socket2 0.5.10", "tokio", "tokio-rustls", "tokio-stream", - "tower 0.4.13", + "tower", "tower-layer", "tower-service", "tracing", @@ -7025,26 +6708,6 @@ dependencies = [ "syn 2.0.106", ] -[[package]] -name = "tower" -version = "0.4.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8fa9be0de6cf49e536ce1851f987bd21a43b771b09473c3549a6c853db37c1c" -dependencies = [ - "futures-core", - "futures-util", - "indexmap 1.9.3", - "pin-project", - "pin-project-lite", - "rand 0.8.5", - "slab", - "tokio", - "tokio-util", - "tower-layer", - "tower-service", - "tracing", -] - [[package]] name = "tower" version = "0.5.2" @@ -7089,7 +6752,7 @@ dependencies = [ "pin-project-lite", "tokio", "tokio-util", - "tower 0.5.2", + "tower", "tower-layer", "tower-service", "tracing", @@ -7190,13 +6853,13 @@ dependencies = [ [[package]] name = "tracing-opentelemetry" -version = "0.25.0" +version = "0.31.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9784ed4da7d921bc8df6963f8c80a0e4ce34ba6ba76668acadd3edbd985ff3b" +checksum = "ddcf5959f39507d0d04d6413119c04f33b623f4f951ebcbdddddfad2d0623a9c" dependencies = [ "js-sys", "once_cell", - "opentelemetry", + "opentelemetry 0.30.0", "opentelemetry_sdk", "smallvec", "tracing", @@ -7327,18 +6990,18 @@ checksum = "6af6ae20167a9ece4bcb41af5b80f8a1f1df981f6391189ce00fd257af04126a" [[package]] name = "typed-builder" -version = "0.18.2" +version = "0.20.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77739c880e00693faef3d65ea3aad725f196da38b22fdc7ea6ded6e1ce4d3add" +checksum = "cd9d30e3a08026c78f246b173243cf07b3696d274debd26680773b6773c2afc7" dependencies = [ "typed-builder-macro", ] [[package]] name = "typed-builder-macro" -version = "0.18.2" +version = "0.20.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f718dfaf347dcb5b983bfc87608144b0bad87970aebcbea5ce44d2a30c08e63" +checksum = "3c36781cc0e46a83726d9879608e4cf6c2505237e263a8eb8c24502989cfdb28" dependencies = [ "proc-macro2", "quote", @@ -7523,12 +7186,6 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" -[[package]] -name = "value-bag" -version = "1.12.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ba6f5989077681266825251a52748b8c1d8a4ad098cc37e440103d0ea717fc0" - [[package]] name = "version_check" version = "0.9.5" diff --git a/Cargo.toml b/Cargo.toml index 2f470b9af6..99265398c9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -75,3 +75,7 @@ sha1 = "0.10.6" tempfile = "3.10.1" tokio = { version = "1.36.0", features = ["full"] } tower = { version = "0.5.1", features = ["full"] } + + +[patch.crates-io] +opentelemetry-prometheus = { git = "https://github.com/sandhose/opentelemetry-rust.git", branch = "otel-prometheus-0.30"} \ No newline at end of file diff --git a/apollo-router/Cargo.toml b/apollo-router/Cargo.toml index d54343a06d..8194869a70 100644 --- a/apollo-router/Cargo.toml +++ b/apollo-router/Cargo.toml @@ -161,20 +161,23 @@ once_cell = "1.19.0" # groups `^tracing` and `^opentelemetry*` dependencies together as of # https://github.com/apollographql/router/pull/1509. A comment which exists # there (and on `tracing` packages below) should be updated should this change. -opentelemetry = { version = "0.24.0", features = ["trace", "metrics"] } -opentelemetry_sdk = { version = "0.24.1", default-features = false, features = [ +opentelemetry = { version = "0.30.0", features = ["trace", "metrics"] } +opentelemetry_sdk = { version = "0.30.0", default-features = false, features = [ "rt-tokio", "trace", + "spec_unstable_metrics_views", + "experimental_trace_batch_span_processor_with_async_runtime", + "metrics", + "experimental_metrics_custom_reader", + "experimental_metrics_periodicreader_with_async_runtime", ] } -opentelemetry-aws = "0.12.0" -# START TEMP DATADOG Temporarily remove until we upgrade otel to the latest version -# This means including the rmp library -# opentelemetry-datadog = { version = "0.12.0", features = ["reqwest-client"] } +opentelemetry-appender-tracing = "0.27.0" +opentelemetry-aws = "0.18.0" +opentelemetry-datadog = { version = "0.18.0", features = ["agent-sampling", "reqwest-client"] } rmp = "0.8" -# END TEMP DATADOG -opentelemetry-http = "0.13.0" -opentelemetry-jaeger-propagator = "0.3.0" -opentelemetry-otlp = { version = "0.17.0", default-features = false, features = [ +opentelemetry-http = "0.30.0" +opentelemetry-jaeger-propagator = "0.30.0" +opentelemetry-otlp = { version = "0.30.0", default-features = false, features = [ "grpc-tonic", "gzip-tonic", "tonic", @@ -183,16 +186,17 @@ opentelemetry-otlp = { version = "0.17.0", default-features = false, features = "metrics", "reqwest-client", "trace", + "tls" ] } -opentelemetry-semantic-conventions = "0.16.0" -opentelemetry-zipkin = { version = "0.22.0", default-features = false, features = [ +opentelemetry-semantic-conventions = { version = "0.30.0", features = ["semconv_experimental"]} +opentelemetry-zipkin = { version = "0.30.0", default-features = false, features = [ "reqwest-client", "reqwest-rustls", ] } -opentelemetry-prometheus = "0.17.0" +opentelemetry-prometheus = "0.29.1" paste = "1.0.15" pin-project-lite = "0.2.14" -prometheus = "0.13" +prometheus = "0.14" prost = "0.13.0" prost-types = "0.13.0" proteus = "0.5.0" @@ -233,11 +237,10 @@ thiserror = "2.0.0" tokio.workspace = true tokio-stream = { version = "0.1.15", features = ["sync", "net", "fs"] } tokio-util = { version = "0.7.11", features = ["net", "codec", "time", "compat"] } -tonic = { version = "0.12.3", features = [ +tonic = { version = "0.13.1", features = [ "transport", - "tls", - "tls-roots", "gzip", + "tls-native-roots", ] } tower.workspace = true tower-http = { version = "0.6.2", features = ["full"] } @@ -319,16 +322,16 @@ memchr = { version = "2.7.4", default-features = false } mockall = "0.14.0" num-traits = "0.2.19" once_cell.workspace = true -opentelemetry-stdout = { version = "0.5.0", features = ["trace"] } -opentelemetry = { version = "0.24.0", features = ["testing"] } -opentelemetry_sdk = { version = "0.24.1", features = ["testing"] } -opentelemetry-proto = { version = "0.7.0", features = [ +opentelemetry-stdout = { version = "0.30.0", features = ["trace"] } +opentelemetry = { version = "0.30.0", features = ["testing"] } +opentelemetry_sdk = { version = "0.30.0", features = ["testing"] } +opentelemetry-proto = { version = "0.30.0", features = [ "metrics", "trace", "gen-tonic-messages", "with-serde", ] } -opentelemetry-datadog = { version = "0.12.0", features = ["reqwest-client"] } +opentelemetry-datadog = { version = "0.18.0", features = ["agent-sampling", "reqwest-client"] } p256 = "0.13.2" pretty_assertions = "1.4.1" reqwest = { version = "0.12.9", default-features = false, features = [ @@ -355,7 +358,7 @@ tracing-subscriber = { version = "0.3.20", default-features = false, features = "env-filter", "fmt", ] } -tracing-opentelemetry = "0.25.0" +tracing-opentelemetry = "0.31.0" tracing-test = "=0.2.5" tracing-mock = "0.1.0-beta.1" walkdir = "2.5.0" diff --git a/apollo-router/src/axum_factory/metrics.rs b/apollo-router/src/axum_factory/metrics.rs index 9f2d3ced3e..58a522fbde 100644 --- a/apollo-router/src/axum_factory/metrics.rs +++ b/apollo-router/src/axum_factory/metrics.rs @@ -32,7 +32,7 @@ pub(crate) mod jemalloc { tracing::warn!("Failed to read jemalloc {} stats", stringify!($name)); } }) - .init() + .build() }; } diff --git a/apollo-router/src/cache/metrics.rs b/apollo-router/src/cache/metrics.rs index 3e63a7032a..ad98a46410 100644 --- a/apollo-router/src/cache/metrics.rs +++ b/apollo-router/src/cache/metrics.rs @@ -206,7 +206,7 @@ impl RedisMetricsCollector { &[KeyValue::new("kind", caller)], ); }) - .init() + .build() } /// Generic method to create a weighted average gauge @@ -237,7 +237,7 @@ impl RedisMetricsCollector { gauge.observe(average, &[KeyValue::new("kind", caller)]); }) - .init() + .build() } fn create_client_count_gauge() -> ObservableGauge { @@ -249,7 +249,7 @@ impl RedisMetricsCollector { .with_callback(move |gauge| { gauge.observe(ACTIVE_CLIENT_COUNT.load(Ordering::Relaxed), &[]); }) - .init() + .build() } /// Spawn the metrics collection task @@ -521,7 +521,7 @@ mod tests { // Verify Redis connection metrics are emitted. // Since this metric is based on a global AtomicU64, it's not unique across tests - so // we can only reliably check for metric existence, rather than a specific value. - crate::metrics::collect_metrics().metric_exists::( + crate::metrics::collect_metrics().metric_exists( "apollo.router.cache.redis.clients", MetricType::Gauge, &[], diff --git a/apollo-router/src/cache/storage.rs b/apollo-router/src/cache/storage.rs index bfb8eed3e5..bb13a745ba 100644 --- a/apollo-router/src/cache/storage.rs +++ b/apollo-router/src/cache/storage.rs @@ -128,7 +128,7 @@ where ], ) }) - .init() + .build() } fn create_cache_estimated_storage_size_gauge(&self) -> ObservableGauge { @@ -153,7 +153,7 @@ where ) } }) - .init() + .build() } /// `init_from_redis` is called with values newly deserialized from Redis cache @@ -346,6 +346,7 @@ mod test { cache.activate(); cache.insert("test".to_string(), Stuff {}).await; + assert_gauge!( "apollo.router.cache.storage.estimated_size", 1, diff --git a/apollo-router/src/compute_job/metrics.rs b/apollo-router/src/compute_job/metrics.rs index 5689f2432a..18bce7299d 100644 --- a/apollo-router/src/compute_job/metrics.rs +++ b/apollo-router/src/compute_job/metrics.rs @@ -97,36 +97,41 @@ mod tests { use crate::compute_job::ComputeJobType; use crate::compute_job::metrics::JobWatcher; use crate::compute_job::metrics::Outcome; + use crate::metrics::FutureMetricsExt; - #[test] - fn test_job_watcher() { - let check_histogram_count = - |count: u64, job_type: &'static str, job_outcome: &'static str| { - assert_histogram_count!( - "apollo.router.compute_jobs.duration", - count, - "job.type" = job_type, - "job.outcome" = job_outcome - ); - }; + #[tokio::test(flavor = "multi_thread")] + async fn test_job_watcher() { + async { + let check_histogram_count = + |count: u64, job_type: &'static str, job_outcome: &'static str| { + assert_histogram_count!( + "apollo.router.compute_jobs.duration", + count, + "job.type" = job_type, + "job.outcome" = job_outcome + ); + }; - { - let _job_watcher = JobWatcher::new(ComputeJobType::Introspection); - } - check_histogram_count(1, "introspection", "abandoned"); - - { - let mut job_watcher = JobWatcher::new(ComputeJobType::QueryParsing); - job_watcher.outcome = Outcome::ExecutedOk; - } - check_histogram_count(1, "query_parsing", "executed_ok"); + { + let _job_watcher = JobWatcher::new(ComputeJobType::Introspection); + } + check_histogram_count(1, "introspection", "abandoned"); - for count in 1..5 { { - let mut job_watcher = JobWatcher::new(ComputeJobType::QueryPlanning); - job_watcher.outcome = Outcome::RejectedQueueFull; + let mut job_watcher = JobWatcher::new(ComputeJobType::QueryParsing); + job_watcher.outcome = Outcome::ExecutedOk; + } + check_histogram_count(1, "query_parsing", "executed_ok"); + + for count in 1..5 { + { + let mut job_watcher = JobWatcher::new(ComputeJobType::QueryPlanning); + job_watcher.outcome = Outcome::RejectedQueueFull; + } + check_histogram_count(count, "query_planning", "rejected_queue_full"); } - check_histogram_count(count, "query_planning", "rejected_queue_full"); } + .with_metrics() + .await; } } diff --git a/apollo-router/src/compute_job/mod.rs b/apollo-router/src/compute_job/mod.rs index f5e3bff302..76a38bd883 100644 --- a/apollo-router/src/compute_job/mod.rs +++ b/apollo-router/src/compute_job/mod.rs @@ -314,7 +314,7 @@ pub(crate) fn create_queue_size_gauge() -> ObservableGauge { "Number of computation jobs (parsing, planning, …) waiting to be scheduled", ) .with_callback(move |m| m.observe(queue().queued_count() as u64, &[])) - .init() + .build() } #[cfg(test)] diff --git a/apollo-router/src/compute_job/snapshots/apollo_router__compute_job__tests__observability@logs.snap b/apollo-router/src/compute_job/snapshots/apollo_router__compute_job__tests__observability@logs.snap index cf3b122079..d06adf65f4 100644 --- a/apollo-router/src/compute_job/snapshots/apollo_router__compute_job__tests__observability@logs.snap +++ b/apollo-router/src/compute_job/snapshots/apollo_router__compute_job__tests__observability@logs.snap @@ -1,6 +1,7 @@ --- source: apollo-router/src/compute_job/mod.rs expression: yaml +snapshot_kind: text --- - fields: {} level: INFO diff --git a/apollo-router/src/configuration/metrics.rs b/apollo-router/src/configuration/metrics.rs index 8ed4a58851..a9ee6b45a7 100644 --- a/apollo-router/src/configuration/metrics.rs +++ b/apollo-router/src/configuration/metrics.rs @@ -591,7 +591,7 @@ impl From for Metrics { .with_callback(move |observer| { observer.observe(value, &attributes); }) - .init() + .build() }) .collect(), } @@ -615,17 +615,24 @@ mod test { #[test] fn test_metrics() { for file_name in Asset::iter() { - let source = Asset::get(&file_name).expect("test file must exist"); - let input = std::str::from_utf8(&source.data) - .expect("expected utf8") - .to_string(); - let yaml = &serde_yaml::from_str::(&input) - .expect("config must be valid yaml"); - - let mut data = InstrumentData::default(); - data.populate_config_instruments(yaml); - let _metrics: Metrics = data.into(); - assert_non_zero_metrics_snapshot!(file_name); + let file_name = file_name.to_string(); + // Spawn a new thread (and therefore meter provider) per file so metrics don't carry + // over to next iteration. + std::thread::spawn(move || { + let source = Asset::get(&file_name).expect("test file must exist"); + let input = std::str::from_utf8(&source.data) + .expect("expected utf8") + .to_string(); + let yaml = &serde_yaml::from_str::(&input) + .expect("config must be valid yaml"); + + let mut data = InstrumentData::default(); + data.populate_config_instruments(yaml); + let _metrics: Metrics = data.into(); + assert_non_zero_metrics_snapshot!(file_name); + }) + .join() + .expect("metrics test thread panicked") } } diff --git a/apollo-router/src/configuration/snapshots/apollo_router__configuration__tests__schema_generation.snap b/apollo-router/src/configuration/snapshots/apollo_router__configuration__tests__schema_generation.snap index 5bf47248b1..043c789795 100644 --- a/apollo-router/src/configuration/snapshots/apollo_router__configuration__tests__schema_generation.snap +++ b/apollo-router/src/configuration/snapshots/apollo_router__configuration__tests__schema_generation.snap @@ -1,6 +1,7 @@ --- source: apollo-router/src/configuration/tests.rs expression: "&schema" +snapshot_kind: text --- { "$schema": "http://json-schema.org/draft-07/schema#", diff --git a/apollo-router/src/executable.rs b/apollo-router/src/executable.rs index 63dfb69083..3b6e998a67 100644 --- a/apollo-router/src/executable.rs +++ b/apollo-router/src/executable.rs @@ -441,7 +441,6 @@ impl Executable { if apollo_telemetry_initialized { // We should be good to shutdown OpenTelemetry now as the router should have finished everything. tokio::task::spawn_blocking(move || { - opentelemetry::global::shutdown_tracer_provider(); meter_provider_internal().shutdown(); }) .await?; diff --git a/apollo-router/src/logging/mod.rs b/apollo-router/src/logging/mod.rs index 5d0b75879d..7d2ebbf20b 100644 --- a/apollo-router/src/logging/mod.rs +++ b/apollo-router/src/logging/mod.rs @@ -68,7 +68,7 @@ pub(crate) mod test { } else { let parsed_log: Vec = log .lines() - .map(|line| { + .filter_map(|line| { let mut line: serde_json::Value = serde_json::from_str(line).unwrap(); // move the message field to the top level let fields = line @@ -78,11 +78,19 @@ pub(crate) mod test { .unwrap() .as_object_mut() .unwrap(); + // Remove noisy OTel logs when meter provider is dropped + if matches!( + fields.get("name").and_then(|v| v.as_str()), + Some("MeterProvider.Drop") + ) { + return None; + } + let message = fields.remove("message").unwrap_or_default(); line.as_object_mut() .unwrap() .insert("message".to_string(), message); - line + Some(line) }) .collect(); serde_json::json!(parsed_log) diff --git a/apollo-router/src/metrics/aggregation.rs b/apollo-router/src/metrics/aggregation.rs index 3ff20ff97b..91802ed381 100644 --- a/apollo-router/src/metrics/aggregation.rs +++ b/apollo-router/src/metrics/aggregation.rs @@ -1,32 +1,25 @@ -use std::any::Any; use std::borrow::Cow; use std::collections::HashMap; use std::mem; +use std::mem::take; use std::sync::Arc; use derive_more::From; use itertools::Itertools; +use opentelemetry::InstrumentationScope; use opentelemetry::KeyValue; -use opentelemetry::metrics::AsyncInstrument; use opentelemetry::metrics::Callback; -use opentelemetry::metrics::CallbackRegistration; use opentelemetry::metrics::Counter; use opentelemetry::metrics::Gauge; use opentelemetry::metrics::Histogram; use opentelemetry::metrics::InstrumentProvider; use opentelemetry::metrics::Meter; use opentelemetry::metrics::MeterProvider; -use opentelemetry::metrics::ObservableCounter; use opentelemetry::metrics::ObservableGauge; use opentelemetry::metrics::ObservableUpDownCounter; -use opentelemetry::metrics::Observer; -use opentelemetry::metrics::SyncCounter; -use opentelemetry::metrics::SyncGauge; -use opentelemetry::metrics::SyncHistogram; -use opentelemetry::metrics::SyncUpDownCounter; +use opentelemetry::metrics::SyncInstrument; use opentelemetry::metrics::UpDownCounter; -use opentelemetry::metrics::noop::NoopMeterProvider; -use opentelemetry_sdk::metrics::SdkMeterProvider; +use opentelemetry_sdk::metrics::{ManualReader, SdkMeterProvider}; use parking_lot::Mutex; use strum::Display; use strum::EnumCount; @@ -86,7 +79,13 @@ impl Default for Inner { providers: (0..MeterProviderType::COUNT) .map(|_| { ( - FilterMeterProvider::public(SdkMeterProvider::default()), + FilterMeterProvider::public( + SdkMeterProvider::builder() + // Set with a noop reader so Async Instruments' pipelines don't + // have an InstrumentCreationError that they have no inserters + // (i.e. readers) when users don't enable telemetry. + .with_reader(ManualReader::default()) + .build()), HashMap::new(), ) }) @@ -220,24 +219,14 @@ impl Inner { pub(crate) fn invalidate(&mut self) { self.registered_instruments.clear() } - pub(crate) fn meter(&mut self, name: impl Into>) -> Meter { - self.versioned_meter( - name, - None::>, - None::>, - None, - ) + pub(crate) fn meter(&mut self, name: &'static str) -> Meter { + self.meter_with_scope(InstrumentationScope::builder(name).build()) } - pub(crate) fn versioned_meter( - &mut self, - name: impl Into>, - version: Option>>, - schema_url: Option>>, - attributes: Option>, - ) -> Meter { - let name = name.into(); - let version = version.map(|v| v.into()); - let schema_url = schema_url.map(|v| v.into()); + + pub(crate) fn meter_with_scope(&mut self, scope: InstrumentationScope) -> Meter { + let version = scope.version().map(|v| Cow::Owned(v.to_string())); + let schema_url = scope.schema_url().map(|v| Cow::Owned(v.to_string())); + let name: Cow<'static, str> = Cow::Owned(scope.name().to_string()); let mut meters = Vec::with_capacity(self.providers.len()); for (provider, existing_meters) in &mut self.providers { @@ -248,18 +237,10 @@ impl Inner { version: version.clone(), schema_url: schema_url.clone(), }) - .or_insert_with(|| { - provider.versioned_meter( - name.clone(), - version.clone(), - schema_url.clone(), - attributes.clone(), - ) - }) + .or_insert_with(|| provider.meter_with_scope(scope.clone())) .clone(), ); } - Meter::new(Arc::new(AggregateInstrumentProvider { meters })) } @@ -277,19 +258,24 @@ impl Inner { } impl MeterProvider for AggregateMeterProvider { - fn versioned_meter( - &self, - name: impl Into>, - version: Option>>, - schema_url: Option>>, - attributes: Option>, - ) -> Meter { + fn meter(&self, name: &'static str) -> Meter { + let mut inner = self.inner.lock(); + if let Some(inner) = inner.as_mut() { + inner.meter(name) + } else { + // The meter was used after shutdown. Fall back to a meter from a provider with no + // readers since the instrument cannot actually be used + SdkMeterProvider::default().meter(name) + } + } + fn meter_with_scope(&self, scope: opentelemetry::InstrumentationScope) -> Meter { let mut inner = self.inner.lock(); if let Some(inner) = inner.as_mut() { - inner.versioned_meter(name, version, schema_url, attributes) + inner.meter_with_scope(scope) } else { - // The meter was used after shutdown. Default to Noop since the instrument cannot actually be used - NoopMeterProvider::default().versioned_meter(name, version, schema_url, attributes) + // The meter was used after shutdown. Fall back to a meter from a provider with no + // readers since the instrument cannot actually be used + SdkMeterProvider::default().meter_with_scope(scope) } } } @@ -302,36 +288,20 @@ pub(crate) struct AggregateCounter { delegates: Vec>, } -impl SyncCounter for AggregateCounter { - fn add(&self, value: T, attributes: &[KeyValue]) { +impl SyncInstrument for AggregateCounter { + fn measure(&self, value: T, attributes: &[KeyValue]) { for counter in &self.delegates { counter.add(value, attributes) } } } -pub(crate) struct AggregateObservableCounter { - delegates: Vec<(ObservableCounter, Option)>, -} - -impl AsyncInstrument for AggregateObservableCounter { - fn observe(&self, value: T, attributes: &[KeyValue]) { - for (counter, _) in &self.delegates { - counter.observe(value, attributes) - } - } - - fn as_any(&self) -> Arc { - unreachable!() - } -} - pub(crate) struct AggregateHistogram { delegates: Vec>, } -impl SyncHistogram for AggregateHistogram { - fn record(&self, value: T, attributes: &[KeyValue]) { +impl SyncInstrument for AggregateHistogram { + fn measure(&self, value: T, attributes: &[KeyValue]) { for histogram in &self.delegates { histogram.record(value, attributes) } @@ -342,164 +312,122 @@ pub(crate) struct AggregateUpDownCounter { delegates: Vec>, } -impl SyncUpDownCounter for AggregateUpDownCounter { - fn add(&self, value: T, attributes: &[KeyValue]) { +impl SyncInstrument for AggregateUpDownCounter { + fn measure(&self, value: T, attributes: &[KeyValue]) { for counter in &self.delegates { counter.add(value, attributes) } } } -pub(crate) struct AggregateObservableUpDownCounter { - delegates: Vec<(ObservableUpDownCounter, Option)>, -} - -impl AsyncInstrument for AggregateObservableUpDownCounter { - fn observe(&self, value: T, attributes: &[KeyValue]) { - for (counter, _) in &self.delegates { - counter.observe(value, attributes) - } - } - - fn as_any(&self) -> Arc { - unreachable!() - } -} - pub(crate) struct AggregateGauge { delegates: Vec>, } -impl SyncGauge for AggregateGauge { - fn record(&self, value: T, attributes: &[KeyValue]) { +impl SyncInstrument for AggregateGauge { + fn measure(&self, value: T, attributes: &[KeyValue]) { for gauge in &self.delegates { gauge.record(value, attributes) } } } -pub(crate) struct AggregateObservableGauge { - delegates: Vec<(ObservableGauge, Option)>, -} - -impl AsyncInstrument for AggregateObservableGauge { - fn observe(&self, measurement: T, attributes: &[KeyValue]) { - for (gauge, _) in &self.delegates { - gauge.observe(measurement, attributes) - } - } - - fn as_any(&self) -> Arc { - unreachable!() - } -} // Observable instruments don't need to have a ton of optimisation because they are only read on demand. macro_rules! aggregate_observable_instrument_fn { + ($name:ident, $ty:ty, $instrument:ident) => { + fn $name( + &self, + mut builder: opentelemetry::metrics::AsyncInstrumentBuilder<'_, $instrument<$ty>, $ty>, + ) -> $instrument<$ty> { + let callbacks: Vec>> = take(&mut builder.callbacks) + .into_iter() + .map(Arc::from) + .collect_vec(); + let name = builder.name.clone(); + let description = builder.description.clone(); + let unit = builder.unit.clone(); + + // Build the originally defined instrument for each meter. Most importantly, this will + // register the callbacks + let mut handles = Vec::with_capacity(self.meters.len()); + for meter in &self.meters { + let mut b = meter.$name(name.clone()); + if let Some(ref d) = description { + b = b.with_description(d.clone()); + } + if let Some(ref u) = unit { + b = b.with_unit(u.clone()); + } + for cb in &callbacks { + let cb = Arc::clone(cb); + b = b.with_callback(move |inst| cb(inst)); + } + // TODO do we instead wrap in an instrument wrapper and return so it can be dropped? + // TODO is this even possible since there's no delegate? + handles.push(b.build()); + } + + $instrument::new() + } + }; +} + +macro_rules! aggregate_instrument_fn { ($name:ident, $ty:ty, $wrapper:ident, $implementation:ident) => { fn $name( &self, - name: Cow<'static, str>, - description: Option>, - unit: Option>, - callback: Vec>, - ) -> opentelemetry::metrics::Result<$wrapper<$ty>> { - let callback: Vec>> = - callback.into_iter().map(|c| Arc::new(c)).collect_vec(); + builder: opentelemetry::metrics::InstrumentBuilder<'_, $wrapper<$ty>>, + ) -> $wrapper<$ty> { let delegates = self .meters .iter() - .map(|meter| { - let mut builder = meter.$name(name.clone()); - if let Some(description) = &description { - builder = builder.with_description(description.clone()); + .map(|p| { + let mut instrument_builder = p.$name(builder.name.clone()); + if let Some(ref desc) = builder.description { + instrument_builder = instrument_builder.with_description(desc.clone()); } - if let Some(unit) = &unit { - builder = builder.with_unit(unit.clone()); + if let Some(ref u) = builder.unit { + instrument_builder = instrument_builder.with_unit(u.clone()); } - // We must not set callback in the builder as it will leak memory. - // Instead we use callback registration on the meter provider as it allows unregistration - // Also we need to filter out no-op instruments as passing these to the meter provider as these will fail with a cryptic message about different implementations. - // Confusingly the implementation of as_any() on an instrument will return 'other stuff'. In particular no-ops return Arc<()>. This is why we need to check for this. - let delegate: $wrapper<$ty> = builder.try_init()?; - let registration = if delegate.clone().as_any().downcast_ref::<()>().is_some() { - // This is a no-op instrument, so we don't need to register a callback. - None - } else { - let delegate = delegate.clone(); - let callback = callback.clone(); - Some( - meter.register_callback(&[delegate.clone().as_any()], move |_| { - for callback in &callback { - callback(&delegate); - } - })?, - ) - }; - let result: opentelemetry::metrics::Result<_> = - Ok((delegate, registration.map(DroppingUnregister))); - result + instrument_builder.build() }) - .try_collect()?; - Ok($wrapper::new(Arc::new($implementation { delegates }))) + .collect(); + $wrapper::new(Arc::new($implementation { delegates })) } }; } -struct DroppingUnregister(Box); - -macro_rules! aggregate_instrument_fn { +macro_rules! aggregate_histogram_fn { ($name:ident, $ty:ty, $wrapper:ident, $implementation:ident) => { fn $name( &self, - name: Cow<'static, str>, - description: Option>, - unit: Option>, - ) -> opentelemetry::metrics::Result<$wrapper<$ty>> { + builder: opentelemetry::metrics::HistogramBuilder<'_, $wrapper<$ty>>, + ) -> $wrapper<$ty> { let delegates = self .meters .iter() .map(|p| { - let mut b = p.$name(name.clone()); - if let Some(description) = &description { - b = b.with_description(description.clone()); + let mut instrument_builder = p.$name(builder.name.clone()); + if let Some(ref desc) = builder.description { + instrument_builder = instrument_builder.with_description(desc.clone()); } - if let Some(unit) = &unit { - b = b.with_unit(unit.clone()); + if let Some(ref u) = builder.unit { + instrument_builder = instrument_builder.with_unit(u.clone()); } - b.try_init() + instrument_builder.build() }) - .try_collect()?; - Ok($wrapper::new(Arc::new($implementation { delegates }))) + .collect(); + $wrapper::new(Arc::new($implementation { delegates })) } }; } -impl Drop for DroppingUnregister { - fn drop(&mut self) { - if let Err(e) = self.0.unregister() { - ::tracing::error!(error = %e, "failed to unregister callback") - } - } -} impl InstrumentProvider for AggregateInstrumentProvider { aggregate_instrument_fn!(u64_counter, u64, Counter, AggregateCounter); aggregate_instrument_fn!(f64_counter, f64, Counter, AggregateCounter); - aggregate_observable_instrument_fn!( - f64_observable_counter, - f64, - ObservableCounter, - AggregateObservableCounter - ); - aggregate_observable_instrument_fn!( - u64_observable_counter, - u64, - ObservableCounter, - AggregateObservableCounter - ); - - aggregate_instrument_fn!(u64_histogram, u64, Histogram, AggregateHistogram); - aggregate_instrument_fn!(f64_histogram, f64, Histogram, AggregateHistogram); + aggregate_histogram_fn!(u64_histogram, u64, Histogram, AggregateHistogram); + aggregate_histogram_fn!(f64_histogram, f64, Histogram, AggregateHistogram); aggregate_instrument_fn!( i64_up_down_counter, @@ -520,43 +448,17 @@ impl InstrumentProvider for AggregateInstrumentProvider { aggregate_observable_instrument_fn!( i64_observable_up_down_counter, i64, - ObservableUpDownCounter, - AggregateObservableUpDownCounter + ObservableUpDownCounter ); aggregate_observable_instrument_fn!( f64_observable_up_down_counter, f64, - ObservableUpDownCounter, - AggregateObservableUpDownCounter + ObservableUpDownCounter ); - aggregate_observable_instrument_fn!( - f64_observable_gauge, - f64, - ObservableGauge, - AggregateObservableGauge - ); - aggregate_observable_instrument_fn!( - i64_observable_gauge, - i64, - ObservableGauge, - AggregateObservableGauge - ); - aggregate_observable_instrument_fn!( - u64_observable_gauge, - u64, - ObservableGauge, - AggregateObservableGauge - ); - - fn register_callback( - &self, - _instruments: &[Arc], - _callbacks: Box, - ) -> opentelemetry::metrics::Result> { - // We may implement this in future, but for now we don't need it and it's a pain to implement because we need to unwrap the aggregate instruments and pass them to the meter provider that owns them. - unimplemented!("register_callback is not supported on AggregateInstrumentProvider"); - } + aggregate_observable_instrument_fn!(f64_observable_gauge, f64, ObservableGauge); + aggregate_observable_instrument_fn!(i64_observable_gauge, i64, ObservableGauge); + aggregate_observable_instrument_fn!(u64_observable_gauge, u64, ObservableGauge); } #[cfg(test)] @@ -564,27 +466,20 @@ mod test { use std::sync::Arc; use std::sync::Weak; use std::sync::atomic::AtomicBool; - use std::sync::atomic::AtomicI64; use std::time::Duration; - use async_trait::async_trait; - use opentelemetry::global::GlobalMeterProvider; + use opentelemetry::InstrumentationScope; use opentelemetry::metrics::MeterProvider; - use opentelemetry::metrics::Result; - use opentelemetry_sdk::metrics::Aggregation; - use opentelemetry_sdk::metrics::InstrumentKind; + use opentelemetry_sdk::error::OTelSdkResult; use opentelemetry_sdk::metrics::ManualReader; use opentelemetry_sdk::metrics::MeterProviderBuilder; - use opentelemetry_sdk::metrics::PeriodicReader; + use opentelemetry_sdk::metrics::periodic_reader_with_async_runtime::PeriodicReader; + use opentelemetry_sdk::runtime; use opentelemetry_sdk::metrics::Pipeline; - use opentelemetry_sdk::metrics::data::Gauge; + use opentelemetry_sdk::metrics::Temporality; use opentelemetry_sdk::metrics::data::ResourceMetrics; - use opentelemetry_sdk::metrics::data::Temporality; - use opentelemetry_sdk::metrics::exporter::PushMetricsExporter; - use opentelemetry_sdk::metrics::reader::AggregationSelector; + use opentelemetry_sdk::metrics::exporter::PushMetricExporter; use opentelemetry_sdk::metrics::reader::MetricReader; - use opentelemetry_sdk::metrics::reader::TemporalitySelector; - use opentelemetry_sdk::runtime; use crate::metrics::aggregation::AggregateMeterProvider; use crate::metrics::aggregation::MeterProviderType; @@ -593,179 +488,30 @@ mod test { #[derive(Clone, Debug)] struct SharedReader(Arc); - impl TemporalitySelector for SharedReader { - fn temporality(&self, kind: InstrumentKind) -> Temporality { - self.0.temporality(kind) - } - } - - impl AggregationSelector for SharedReader { - fn aggregation(&self, kind: InstrumentKind) -> Aggregation { - self.0.aggregation(kind) - } - } - impl MetricReader for SharedReader { fn register_pipeline(&self, pipeline: Weak) { self.0.register_pipeline(pipeline) } - fn collect(&self, rm: &mut ResourceMetrics) -> Result<()> { + fn collect(&self, rm: &mut ResourceMetrics) -> OTelSdkResult { self.0.collect(rm) } - fn force_flush(&self) -> Result<()> { + fn force_flush(&self) -> OTelSdkResult { self.0.force_flush() } - fn shutdown(&self) -> Result<()> { + fn shutdown(&self) -> OTelSdkResult { self.0.shutdown() } - } - - #[test] - fn test_i64_gauge_drop() { - let reader = SharedReader(Arc::new(ManualReader::builder().build())); - let delegate = MeterProviderBuilder::default() - .with_reader(reader.clone()) - .build(); - let meter_provider = AggregateMeterProvider::default(); - meter_provider.set( - MeterProviderType::Public, - FilterMeterProvider::public(delegate), - ); - let meter = meter_provider.meter("test"); - - let observe_counter = Arc::new(AtomicI64::new(0)); - let callback_observe_counter = observe_counter.clone(); - let gauge = meter - .i64_observable_gauge("test") - .with_callback(move |i| { - let count = - callback_observe_counter.fetch_add(1, std::sync::atomic::Ordering::SeqCst); - i.observe(count + 1, &[]) - }) - .init(); - - let mut result = ResourceMetrics { - resource: Default::default(), - scope_metrics: Default::default(), - }; - - // Fetching twice will call the observer twice - reader - .collect(&mut result) - .expect("metrics must be collected"); - reader - .collect(&mut result) - .expect("metrics must be collected"); - - assert_eq!(get_gauge_value(&mut result), 2); - - // Dropping the gauge should remove the observer registration - drop(gauge); - - // No further increment will happen - reader - .collect(&mut result) - .expect("metrics must be collected"); - - assert_eq!(observe_counter.load(std::sync::atomic::Ordering::SeqCst), 2); - } - - #[test] - fn test_i64_gauge_lifecycle() { - let reader = SharedReader(Arc::new(ManualReader::builder().build())); - - let delegate = MeterProviderBuilder::default() - .with_reader(reader.clone()) - .build(); - let meter_provider = AggregateMeterProvider::default(); - meter_provider.set( - MeterProviderType::Public, - FilterMeterProvider::public(delegate), - ); - let meter = meter_provider.meter("test"); - - let observe_counter = Arc::new(AtomicI64::new(0)); - let callback_observe_counter1 = observe_counter.clone(); - let callback_observe_counter2 = observe_counter.clone(); - let gauge1 = meter - .i64_observable_gauge("test") - .with_callback(move |i| { - let count = - callback_observe_counter1.fetch_add(1, std::sync::atomic::Ordering::SeqCst); - i.observe(count + 1, &[]) - }) - .init(); - - let mut result = ResourceMetrics { - resource: Default::default(), - scope_metrics: Default::default(), - }; - - // Fetching metrics will call the observer - reader - .collect(&mut result) - .expect("metrics must be collected"); - - assert_eq!(get_gauge_value(&mut result), 1); - drop(gauge1); - - // The first gauge is dropped, let's create a new one - let gauge2 = meter - .i64_observable_gauge("test") - .with_callback(move |i| { - let count = - callback_observe_counter2.fetch_add(1, std::sync::atomic::Ordering::SeqCst); - i.observe(count + 1, &[]) - }) - .init(); - - // Fetching metrics will call the observer ONLY on the remaining gauge - reader - .collect(&mut result) - .expect("metrics must be collected"); - - assert_eq!(get_gauge_value(&mut result), 2); - drop(gauge2); - } + fn temporality(&self, _kind: opentelemetry_sdk::metrics::InstrumentKind) -> Temporality { + Temporality::Cumulative + } - fn get_gauge_value(result: &mut ResourceMetrics) -> i64 { - assert_eq!(result.scope_metrics.len(), 1); - assert_eq!(result.scope_metrics.first().unwrap().metrics.len(), 1); - assert_eq!( - result - .scope_metrics - .first() - .unwrap() - .metrics - .first() - .unwrap() - .data - .as_any() - .downcast_ref::>() - .unwrap() - .data_points - .len(), - 1 - ); - result - .scope_metrics - .first() - .unwrap() - .metrics - .first() - .unwrap() - .data - .as_any() - .downcast_ref::>() - .unwrap() - .data_points - .first() - .unwrap() - .value + fn shutdown_with_timeout(&self, _timeout: Duration) -> OTelSdkResult { + self.shutdown() + } } #[test] @@ -775,27 +521,24 @@ mod test { let reader = SharedReader(Arc::new(ManualReader::builder().build())); - let delegate = MeterProviderBuilder::default() - .with_reader(reader.clone()) - .build(); - let meter_provider = AggregateMeterProvider::default(); meter_provider.set( MeterProviderType::OtelDefault, - FilterMeterProvider::public(GlobalMeterProvider::new(delegate)), + FilterMeterProvider::public( + MeterProviderBuilder::default() + .with_reader(reader.clone()) + .build(), + ), ); let counter = meter_provider - .versioned_meter("test", None::, None::, None) + .meter_with_scope(InstrumentationScope::builder("test").build()) .u64_counter("test.counter") - .init(); + .build(); counter.add(1, &[]); - let mut resource_metrics = ResourceMetrics { - resource: Default::default(), - scope_metrics: vec![], - }; + let mut resource_metrics = ResourceMetrics::default(); reader.collect(&mut resource_metrics).unwrap(); - assert_eq!(1, resource_metrics.scope_metrics.len()); + assert_eq!(1, resource_metrics.scope_metrics().count()); } struct TestExporter { @@ -803,45 +546,43 @@ mod test { shutdown: Arc, } - impl AggregationSelector for TestExporter { - fn aggregation(&self, _kind: InstrumentKind) -> Aggregation { - Aggregation::Default - } - } - - impl TemporalitySelector for TestExporter { - fn temporality(&self, _kind: InstrumentKind) -> Temporality { - Temporality::Cumulative + impl PushMetricExporter for TestExporter { + async fn export(&self, _metrics: &ResourceMetrics) -> OTelSdkResult { + self.count(); + Ok(()) } - } - #[async_trait] - impl PushMetricsExporter for TestExporter { - async fn export(&self, _metrics: &mut ResourceMetrics) -> Result<()> { + fn force_flush(&self) -> OTelSdkResult { self.count(); Ok(()) } - async fn force_flush(&self) -> Result<()> { + fn shutdown_with_timeout(&self, _timeout: Duration) -> OTelSdkResult { self.count(); + self.shutdown + .store(true, std::sync::atomic::Ordering::SeqCst); Ok(()) } - fn shutdown(&self) -> Result<()> { + fn shutdown(&self) -> OTelSdkResult { self.count(); self.shutdown .store(true, std::sync::atomic::Ordering::SeqCst); Ok(()) } + + fn temporality(&self) -> Temporality { + Temporality::Cumulative + } } impl TestExporter { fn count(&self) { let counter = self .meter_provider - .versioned_meter("test", None::, None::, None) + .meter_with_scope(InstrumentationScope::builder("test").build()) .u64_counter("test.counter") - .init(); + .build(); counter.add(1, &[]); } } @@ -854,13 +595,13 @@ mod test { let shutdown = Arc::new(AtomicBool::new(false)); let periodic_reader = reader(&meter_provider, &shutdown); - let delegate = MeterProviderBuilder::default() - .with_reader(periodic_reader) - .build(); - meter_provider.set( MeterProviderType::OtelDefault, - FilterMeterProvider::public(GlobalMeterProvider::new(delegate)), + FilterMeterProvider::public( + MeterProviderBuilder::default() + .with_reader(periodic_reader) + .build(), + ), ); tokio::time::sleep(Duration::from_millis(20)).await; @@ -878,27 +619,26 @@ mod test { let shutdown1 = Arc::new(AtomicBool::new(false)); let periodic_reader = reader(&meter_provider, &shutdown1); - let delegate = MeterProviderBuilder::default() - .with_reader(periodic_reader) - .build(); - meter_provider.set( MeterProviderType::OtelDefault, - FilterMeterProvider::public(GlobalMeterProvider::new(delegate)), + FilterMeterProvider::public( + MeterProviderBuilder::default() + .with_reader(periodic_reader) + .build(), + ), ); tokio::time::sleep(Duration::from_millis(20)).await; let shutdown2 = Arc::new(AtomicBool::new(false)); let periodic_reader = reader(&meter_provider, &shutdown2); - let delegate = MeterProviderBuilder::default() - .with_reader(periodic_reader) - .build(); - - // Setting the meter provider should not deadlock. meter_provider.set( MeterProviderType::OtelDefault, - FilterMeterProvider::public(GlobalMeterProvider::new(delegate)), + FilterMeterProvider::public( + MeterProviderBuilder::default() + .with_reader(periodic_reader) + .build(), + ), ); tokio::time::sleep(Duration::from_millis(20)).await; @@ -911,16 +651,12 @@ mod test { fn reader( meter_provider: &AggregateMeterProvider, shutdown: &Arc, - ) -> PeriodicReader { - PeriodicReader::builder( - TestExporter { - meter_provider: meter_provider.clone(), - shutdown: shutdown.clone(), - }, - runtime::Tokio, - ) + ) -> PeriodicReader { + PeriodicReader::builder(TestExporter { + meter_provider: meter_provider.clone(), + shutdown: shutdown.clone(), + }, runtime::Tokio) .with_interval(Duration::from_millis(10)) - .with_timeout(Duration::from_millis(10)) .build() } } diff --git a/apollo-router/src/metrics/filter.rs b/apollo-router/src/metrics/filter.rs index c2a1031081..65732a8857 100644 --- a/apollo-router/src/metrics/filter.rs +++ b/apollo-router/src/metrics/filter.rs @@ -1,11 +1,9 @@ -use std::any::Any; use std::borrow::Cow; use std::sync::Arc; use buildstructor::buildstructor; +use opentelemetry::InstrumentationScope; use opentelemetry::KeyValue; -use opentelemetry::metrics::Callback; -use opentelemetry::metrics::CallbackRegistration; use opentelemetry::metrics::Counter; use opentelemetry::metrics::Gauge; use opentelemetry::metrics::Histogram; @@ -15,37 +13,65 @@ use opentelemetry::metrics::MeterProvider as OtelMeterProvider; use opentelemetry::metrics::ObservableCounter; use opentelemetry::metrics::ObservableGauge; use opentelemetry::metrics::ObservableUpDownCounter; -use opentelemetry::metrics::Observer; use opentelemetry::metrics::UpDownCounter; -use opentelemetry::metrics::noop::NoopMeterProvider; use regex::Regex; +//`opentelemetry::global::GlobalMeterProvider` type alias was made private so we recreate it here +type GlobalMeterProvider = Arc; + #[derive(Clone)] pub(crate) enum MeterProvider { Regular(opentelemetry_sdk::metrics::SdkMeterProvider), - Global(opentelemetry::global::GlobalMeterProvider), + Global(GlobalMeterProvider), } impl MeterProvider { fn versioned_meter( &self, - name: impl Into>, + name: &'static str, version: Option>>, schema_url: Option>>, attributes: Option>, ) -> Meter { match &self { MeterProvider::Regular(provider) => { - provider.versioned_meter(name, version, schema_url, attributes) + let mut builder = InstrumentationScope::builder(name); + if let Some(v) = version { + builder = builder.with_version(v.into()); + } + if let Some(s) = schema_url { + builder = builder.with_schema_url(s.into()); + } + if let Some(ref attrs) = attributes { + builder = builder.with_attributes(attrs.clone()); + } + provider.meter_with_scope(builder.build()) } MeterProvider::Global(provider) => { - provider.versioned_meter(name, version, schema_url, attributes) + let mut builder = InstrumentationScope::builder(name); + if let Some(v) = version { + builder = builder.with_version(v.into()); + } + if let Some(s) = schema_url { + builder = builder.with_schema_url(s.into()); + } + if let Some(ref attrs) = attributes { + builder = builder.with_attributes(attrs.clone()); + } + provider.meter_with_scope(builder.build()) } } } + fn meter_with_scope(&self, scope: &InstrumentationScope) -> Meter { + match &self { + MeterProvider::Regular(provider) => provider.meter_with_scope(scope.clone()), + MeterProvider::Global(provider) => provider.meter_with_scope(scope.clone()), + } + } + #[cfg(test)] - fn force_flush(&self) -> opentelemetry::metrics::Result<()> { + fn force_flush(&self) -> opentelemetry_sdk::error::OTelSdkResult { match self { MeterProvider::Regular(provider) => provider.force_flush(), MeterProvider::Global(_provider) => Ok(()), @@ -59,8 +85,8 @@ impl From for MeterProvider { } } -impl From for MeterProvider { - fn from(provider: opentelemetry::global::GlobalMeterProvider) -> Self { +impl From for MeterProvider { + fn from(provider: GlobalMeterProvider) -> Self { MeterProvider::Global(provider) } } @@ -124,7 +150,7 @@ impl FilterMeterProvider { } #[cfg(test)] - pub(crate) fn force_flush(&self) -> opentelemetry::metrics::Result<()> { + pub(crate) fn force_flush(&self) -> opentelemetry_sdk::error::OTelSdkResult { self.delegate.force_flush() } } @@ -140,54 +166,82 @@ macro_rules! filter_instrument_fn { ($name:ident, $ty:ty, $wrapper:ident) => { fn $name( &self, - name: Cow<'static, str>, - description: Option>, - unit: Option>, - ) -> opentelemetry::metrics::Result<$wrapper<$ty>> { - let mut builder = match (&self.deny, &self.allow) { + builder: opentelemetry::metrics::InstrumentBuilder<'_, $wrapper<$ty>>, + ) -> $wrapper<$ty> { + let name = builder.name.to_string(); + match (&self.deny, &self.allow) { // Deny match takes precedence over allow match - (Some(deny), _) if deny.is_match(&name) => self.noop.$name(name), - (_, Some(allow)) if !allow.is_match(&name) => self.noop.$name(name), - (_, _) => self.delegate.$name(name), - }; - if let Some(description) = &description { - builder = builder.with_description(description.clone()) + (Some(deny), _) if deny.is_match(&name) => self.noop.$name(builder.name).build(), + (_, Some(allow)) if !allow.is_match(&name) => self.noop.$name(builder.name).build(), + (_, _) => { + let mut instrument_builder = self.delegate.$name(builder.name); + if let Some(ref description) = builder.description { + instrument_builder = + instrument_builder.with_description(description.clone()); + } + if let Some(ref unit) = builder.unit { + instrument_builder = instrument_builder.with_unit(unit.clone()); + } + instrument_builder.build() + } } - if let Some(unit) = &unit { - builder = builder.with_unit(unit.clone()); - } - builder.try_init() } }; } -macro_rules! filter_observable_instrument_fn { +macro_rules! filter_histogram_fn { ($name:ident, $ty:ty, $wrapper:ident) => { fn $name( &self, - name: Cow<'static, str>, - description: Option>, - unit: Option>, - callback: Vec>, - ) -> opentelemetry::metrics::Result<$wrapper<$ty>> { - let mut builder = match (&self.deny, &self.allow) { + builder: opentelemetry::metrics::HistogramBuilder<'_, $wrapper<$ty>>, + ) -> $wrapper<$ty> { + let name = builder.name.to_string(); + match (&self.deny, &self.allow) { // Deny match takes precedence over allow match - (Some(deny), _) if deny.is_match(&name) => self.noop.$name(name), - (_, Some(allow)) if !allow.is_match(&name) => self.noop.$name(name), - (_, _) => self.delegate.$name(name), - }; - if let Some(description) = &description { - builder = builder.with_description(description.clone()); - } - if let Some(unit) = &unit { - builder = builder.with_unit(unit.clone()); + (Some(deny), _) if deny.is_match(&name) => self.noop.$name(builder.name).build(), + (_, Some(allow)) if !allow.is_match(&name) => self.noop.$name(builder.name).build(), + (_, _) => { + let mut instrument_builder = self.delegate.$name(builder.name); + if let Some(ref description) = builder.description { + instrument_builder = + instrument_builder.with_description(description.clone()); + } + if let Some(ref unit) = builder.unit { + instrument_builder = instrument_builder.with_unit(unit.clone()); + } + instrument_builder.build() + } } + } + }; +} - for callback in callback { - builder = builder.with_callback(callback); +macro_rules! filter_observable_instrument_fn { + ($name:ident, $ty:ty, $wrapper:ident) => { + fn $name( + &self, + builder: opentelemetry::metrics::AsyncInstrumentBuilder<'_, $wrapper<$ty>, $ty>, + ) -> $wrapper<$ty> { + let name = builder.name.to_string(); + match (&self.deny, &self.allow) { + // Deny match takes precedence over allow match + (Some(deny), _) if deny.is_match(&name) => self.noop.$name(builder.name).build(), + (_, Some(allow)) if !allow.is_match(&name) => self.noop.$name(builder.name).build(), + (_, _) => { + let mut instrument_builder = self.delegate.$name(builder.name); + for callback in builder.callbacks { + instrument_builder = instrument_builder.with_callback(callback); + } + if let Some(ref description) = builder.description { + instrument_builder = + instrument_builder.with_description(description.clone()); + } + if let Some(ref unit) = builder.unit { + instrument_builder = instrument_builder.with_unit(unit.clone()); + } + instrument_builder.build() + } } - - builder.try_init() } }; } @@ -203,8 +257,8 @@ impl InstrumentProvider for FilteredInstrumentProvider { filter_observable_instrument_fn!(f64_observable_counter, f64, ObservableCounter); filter_observable_instrument_fn!(u64_observable_counter, u64, ObservableCounter); - filter_instrument_fn!(u64_histogram, u64, Histogram); - filter_instrument_fn!(f64_histogram, f64, Histogram); + filter_histogram_fn!(u64_histogram, u64, Histogram); + filter_histogram_fn!(f64_histogram, f64, Histogram); filter_instrument_fn!(i64_up_down_counter, i64, UpDownCounter); filter_instrument_fn!(f64_up_down_counter, f64, UpDownCounter); @@ -215,29 +269,23 @@ impl InstrumentProvider for FilteredInstrumentProvider { filter_observable_instrument_fn!(f64_observable_gauge, f64, ObservableGauge); filter_observable_instrument_fn!(i64_observable_gauge, i64, ObservableGauge); filter_observable_instrument_fn!(u64_observable_gauge, u64, ObservableGauge); - - fn register_callback( - &self, - instruments: &[Arc], - callbacks: Box, - ) -> opentelemetry::metrics::Result> { - self.delegate.register_callback(instruments, callbacks) - } } impl opentelemetry::metrics::MeterProvider for FilterMeterProvider { - fn versioned_meter( - &self, - name: impl Into>, - version: Option>>, - schema_url: Option>>, - attributes: Option>, - ) -> Meter { + fn meter(&self, name: &'static str) -> Meter { Meter::new(Arc::new(FilteredInstrumentProvider { - noop: NoopMeterProvider::default().meter(""), + noop: opentelemetry::global::meter_provider().meter("noop"), delegate: self .delegate - .versioned_meter(name, version, schema_url, attributes), + .versioned_meter(name, None::<&str>, None::<&str>, None), + deny: self.deny.clone(), + allow: self.allow.clone(), + })) + } + fn meter_with_scope(&self, scope: opentelemetry::InstrumentationScope) -> Meter { + Meter::new(Arc::new(FilteredInstrumentProvider { + noop: opentelemetry::global::meter_provider().meter("noop"), + delegate: self.delegate.meter_with_scope(&scope), deny: self.deny.clone(), allow: self.allow.clone(), })) @@ -246,155 +294,163 @@ impl opentelemetry::metrics::MeterProvider for FilterMeterProvider { #[cfg(test)] mod test { - use opentelemetry::global::GlobalMeterProvider; + use opentelemetry::InstrumentationScope; + use opentelemetry::global; use opentelemetry::metrics::MeterProvider; + use opentelemetry_sdk::metrics::InMemoryMetricExporter; use opentelemetry_sdk::metrics::MeterProviderBuilder; - use opentelemetry_sdk::metrics::PeriodicReader; + use opentelemetry_sdk::metrics::periodic_reader_with_async_runtime::PeriodicReader; use opentelemetry_sdk::runtime; - use opentelemetry_sdk::testing::metrics::InMemoryMetricsExporter; - use crate::metrics::filter::FilterMeterProvider; #[tokio::test(flavor = "multi_thread")] async fn test_private_metrics() { - let exporter = InMemoryMetricsExporter::default(); + let exporter = InMemoryMetricExporter::default(); let meter_provider = FilterMeterProvider::apollo( MeterProviderBuilder::default() .with_reader(PeriodicReader::builder(exporter.clone(), runtime::Tokio).build()) .build(), ); - let filtered = meter_provider.versioned_meter("filtered", "".into(), "".into(), None); + let filtered = + meter_provider.meter_with_scope(InstrumentationScope::builder("filtered").build()); // Matches allow filtered .u64_counter("apollo.router.operations") - .init() + .build() .add(1, &[]); filtered .u64_counter("apollo.router.operations.test") - .init() + .build() .add(1, &[]); filtered .u64_counter("apollo.graphos.cloud.test") - .init() + .build() .add(1, &[]); filtered .u64_counter("apollo.router.query_planning.test") - .init() + .build() .add(1, &[]); filtered .u64_counter("apollo.router.lifecycle.api_schema") - .init() + .build() .add(1, &[]); filtered .u64_counter("apollo.router.operations.connectors") - .init() + .build() .add(1, &[]); filtered .u64_observable_gauge("apollo.router.schema.connectors") .with_callback(move |observer| observer.observe(1, &[])) - .init(); + .build(); // Mismatches allow filtered .u64_counter("apollo.router.unknown.test") - .init() + .build() .add(1, &[]); // Matches deny filtered .u64_counter("apollo.router.operations.error") - .init() + .build() .add(1, &[]); meter_provider.force_flush().unwrap(); - let metrics: Vec<_> = exporter - .get_finished_metrics() - .unwrap() - .into_iter() - .flat_map(|m| m.scope_metrics.into_iter()) - .flat_map(|m| m.metrics) + let resource_metrics = exporter.get_finished_metrics().unwrap(); + let metrics: Vec<_> = resource_metrics + .iter() + .flat_map(|rm| rm.scope_metrics()) + .flat_map(|sm| sm.metrics()) .collect(); // Matches allow assert!( metrics .iter() - .any(|m| m.name == "apollo.router.operations.test") + .any(|m| m.name() == "apollo.router.operations.test") ); - assert!(metrics.iter().any(|m| m.name == "apollo.router.operations")); + assert!( + metrics + .iter() + .any(|m| m.name() == "apollo.router.operations") + ); assert!( metrics .iter() - .any(|m| m.name == "apollo.graphos.cloud.test") + .any(|m| m.name() == "apollo.graphos.cloud.test") ); assert!( metrics .iter() - .any(|m| m.name == "apollo.router.lifecycle.api_schema") + .any(|m| m.name() == "apollo.router.lifecycle.api_schema") ); assert!( metrics .iter() - .any(|m| m.name == "apollo.router.operations.connectors") + .any(|m| m.name() == "apollo.router.operations.connectors") ); assert!( metrics .iter() - .any(|m| m.name == "apollo.router.schema.connectors") + .any(|m| m.name() == "apollo.router.schema.connectors") ); // Mismatches allow assert!( !metrics .iter() - .any(|m| m.name == "apollo.router.unknown.test") + .any(|m| m.name() == "apollo.router.unknown.test") ); // Matches deny assert!( !metrics .iter() - .any(|m| m.name == "apollo.router.operations.error") + .any(|m| m.name() == "apollo.router.operations.error") ); } #[tokio::test(flavor = "multi_thread")] async fn test_description_and_unit() { - let exporter = InMemoryMetricsExporter::default(); + let exporter = InMemoryMetricExporter::default(); let meter_provider = FilterMeterProvider::apollo( MeterProviderBuilder::default() .with_reader(PeriodicReader::builder(exporter.clone(), runtime::Tokio).build()) .build(), ); - let filtered = meter_provider.versioned_meter("filtered", "".into(), "".into(), None); + let filtered = + meter_provider.meter_with_scope(InstrumentationScope::builder("filtered").build()); filtered .u64_counter("apollo.router.operations") .with_description("desc") .with_unit("ms") - .init() + .build() .add(1, &[]); meter_provider.force_flush().unwrap(); - let metrics: Vec<_> = exporter - .get_finished_metrics() - .unwrap() - .into_iter() - .flat_map(|m| m.scope_metrics.into_iter()) - .flat_map(|m| m.metrics) + let resource_metrics = exporter.get_finished_metrics().unwrap(); + let metrics: Vec<_> = resource_metrics + .iter() + .flat_map(|rm| rm.scope_metrics()) + .flat_map(|sm| sm.metrics()) .collect(); - assert!(metrics.iter().any(|m| m.name == "apollo.router.operations" - && m.description == "desc" - && m.unit == "ms")); + assert!( + metrics + .iter() + .any(|m| m.name() == "apollo.router.operations" + && m.description() == "desc" + && m.unit() == "ms") + ); } #[tokio::test(flavor = "multi_thread")] async fn test_public_metrics_using_meter_provider() { - let exporter = InMemoryMetricsExporter::default(); + let exporter = InMemoryMetricExporter::default(); test_public_metrics( exporter.clone(), MeterProviderBuilder::default() @@ -406,120 +462,116 @@ mod test { #[tokio::test(flavor = "multi_thread")] async fn test_public_metrics_using_global_meter_provider() { - let exporter = InMemoryMetricsExporter::default(); - - test_public_metrics( - exporter.clone(), - GlobalMeterProvider::new( - MeterProviderBuilder::default() - .with_reader(PeriodicReader::builder(exporter.clone(), runtime::Tokio).build()) - .build(), - ), - ) - .await; + let exporter = InMemoryMetricExporter::default(); + global::set_meter_provider( + MeterProviderBuilder::default() + .with_reader(PeriodicReader::builder(exporter.clone(), runtime::Tokio).build()) + .build(), + ); + test_public_metrics(exporter.clone(), global::meter_provider()).await; } async fn test_public_metrics>( - exporter: InMemoryMetricsExporter, + exporter: InMemoryMetricExporter, meter_provider: T, ) { let meter_provider = FilterMeterProvider::public(meter_provider); - let filtered = meter_provider.versioned_meter("filtered", "".into(), "".into(), None); + let filtered = + meter_provider.meter_with_scope(InstrumentationScope::builder("filtered").build()); filtered .u64_counter("apollo.router.config") - .init() + .build() .add(1, &[]); filtered .u64_counter("apollo.router.config.test") - .init() + .build() .add(1, &[]); filtered .u64_counter("apollo.router.entities") - .init() + .build() .add(1, &[]); filtered .u64_counter("apollo.router.entities.test") - .init() + .build() .add(1, &[]); filtered .u64_counter("apollo.router.operations.connectors") - .init() + .build() .add(1, &[]); filtered .u64_observable_gauge("apollo.router.schema.connectors") .with_callback(move |observer| observer.observe(1, &[])) - .init(); + .build(); meter_provider.force_flush().unwrap(); - let metrics: Vec<_> = exporter - .get_finished_metrics() - .unwrap() - .into_iter() - .flat_map(|m| m.scope_metrics.into_iter()) - .flat_map(|m| m.metrics) + let resource_metrics = exporter.get_finished_metrics().unwrap(); + let metrics: Vec<_> = resource_metrics + .iter() + .flat_map(|rm| rm.scope_metrics()) + .flat_map(|sm| sm.metrics()) .collect(); - assert!(!metrics.iter().any(|m| m.name == "apollo.router.config")); + assert!(!metrics.iter().any(|m| m.name() == "apollo.router.config")); assert!( !metrics .iter() - .any(|m| m.name == "apollo.router.config.test") + .any(|m| m.name() == "apollo.router.config.test") ); - assert!(!metrics.iter().any(|m| m.name == "apollo.router.entities")); + assert!(!metrics.iter().any(|m| m.name() == "apollo.router.entities")); assert!( !metrics .iter() - .any(|m| m.name == "apollo.router.entities.test") + .any(|m| m.name() == "apollo.router.entities.test") ); assert!( !metrics .iter() - .any(|m| m.name == "apollo.router.operations.connectors") + .any(|m| m.name() == "apollo.router.operations.connectors") ); assert!( !metrics .iter() - .any(|m| m.name == "apollo.router.schema.connectors") + .any(|m| m.name() == "apollo.router.schema.connectors") ); } #[tokio::test(flavor = "multi_thread")] async fn test_private_realtime_metrics() { - let exporter = InMemoryMetricsExporter::default(); + let exporter = InMemoryMetricExporter::default(); let meter_provider = FilterMeterProvider::apollo_realtime( MeterProviderBuilder::default() .with_reader(PeriodicReader::builder(exporter.clone(), runtime::Tokio).build()) .build(), ); - let filtered = meter_provider.versioned_meter("filtered", "".into(), "".into(), None); + let filtered = + meter_provider.meter_with_scope(InstrumentationScope::builder("filtered").build()); filtered .u64_counter("apollo.router.operations.error") - .init() + .build() .add(1, &[]); filtered .u64_counter("apollo.router.operations.mismatch") - .init() + .build() .add(1, &[]); meter_provider.force_flush().unwrap(); - let metrics: Vec<_> = exporter - .get_finished_metrics() - .unwrap() - .into_iter() - .flat_map(|m| m.scope_metrics.into_iter()) - .flat_map(|m| m.metrics) + let resource_metrics = exporter.get_finished_metrics().unwrap(); + let metrics: Vec<_> = resource_metrics + .iter() + .flat_map(|rm| rm.scope_metrics()) + .flat_map(|sm| sm.metrics()) .collect(); // Matches assert!( metrics .iter() - .any(|m| m.name == "apollo.router.operations.error") + .any(|m| m.name() == "apollo.router.operations.error") ); // Mismatches assert!( !metrics .iter() - .any(|m| m.name == "apollo.router.operations.mismatch") + .any(|m| m.name() == "apollo.router.operations.mismatch") ); } } diff --git a/apollo-router/src/metrics/mod.rs b/apollo-router/src/metrics/mod.rs index 1f8f0aefe7..2877e49865 100644 --- a/apollo-router/src/metrics/mod.rs +++ b/apollo-router/src/metrics/mod.rs @@ -158,11 +158,13 @@ impl NoopGuard { pub(crate) mod test_utils { use std::cmp::Ordering; use std::collections::BTreeMap; + use std::collections::HashMap; use std::fmt::Debug; use std::fmt::Display; use std::sync::Arc; use std::sync::OnceLock; use std::sync::Weak; + use std::time::Duration; use itertools::Itertools; use num_traits::NumCast; @@ -171,68 +173,66 @@ pub(crate) mod test_utils { use opentelemetry::KeyValue; use opentelemetry::StringValue; use opentelemetry::Value; - use opentelemetry_sdk::metrics::Aggregation; - use opentelemetry_sdk::metrics::AttributeSet; use opentelemetry_sdk::metrics::InstrumentKind; use opentelemetry_sdk::metrics::ManualReader; use opentelemetry_sdk::metrics::MeterProviderBuilder; use opentelemetry_sdk::metrics::Pipeline; - use opentelemetry_sdk::metrics::data::DataPoint; - use opentelemetry_sdk::metrics::data::Gauge; - use opentelemetry_sdk::metrics::data::Histogram; + use opentelemetry_sdk::metrics::Temporality; + use opentelemetry_sdk::metrics::data::ExponentialHistogramDataPoint; + use opentelemetry_sdk::metrics::data::GaugeDataPoint; use opentelemetry_sdk::metrics::data::HistogramDataPoint; use opentelemetry_sdk::metrics::data::Metric; + use opentelemetry_sdk::metrics::data::MetricData; use opentelemetry_sdk::metrics::data::ResourceMetrics; - use opentelemetry_sdk::metrics::data::Sum; - use opentelemetry_sdk::metrics::data::Temporality; - use opentelemetry_sdk::metrics::reader::AggregationSelector; + use opentelemetry_sdk::metrics::data::SumDataPoint; use opentelemetry_sdk::metrics::reader::MetricReader; - use opentelemetry_sdk::metrics::reader::TemporalitySelector; use serde::Serialize; use tokio::task_local; use crate::metrics::aggregation::AggregateMeterProvider; use crate::metrics::aggregation::MeterProviderType; use crate::metrics::filter::FilterMeterProvider; - task_local! { - pub(crate) static AGGREGATE_METER_PROVIDER_ASYNC: OnceLock<(AggregateMeterProvider, ClonableManualReader)>; - } - thread_local! { - pub(crate) static AGGREGATE_METER_PROVIDER: OnceLock<(AggregateMeterProvider, ClonableManualReader)> = const { OnceLock::new() }; - } #[derive(Debug, Clone, Default)] pub(crate) struct ClonableManualReader { reader: Arc, } - impl TemporalitySelector for ClonableManualReader { - fn temporality(&self, kind: InstrumentKind) -> Temporality { - self.reader.temporality(kind) - } - } - - impl AggregationSelector for ClonableManualReader { - fn aggregation(&self, kind: InstrumentKind) -> Aggregation { - self.reader.aggregation(kind) - } - } impl MetricReader for ClonableManualReader { fn register_pipeline(&self, pipeline: Weak) { self.reader.register_pipeline(pipeline) } - fn collect(&self, rm: &mut ResourceMetrics) -> opentelemetry::metrics::Result<()> { + fn collect(&self, rm: &mut ResourceMetrics) -> opentelemetry_sdk::error::OTelSdkResult { self.reader.collect(rm) } - fn force_flush(&self) -> opentelemetry::metrics::Result<()> { + fn force_flush(&self) -> opentelemetry_sdk::error::OTelSdkResult { self.reader.force_flush() } - fn shutdown(&self) -> opentelemetry::metrics::Result<()> { + fn shutdown(&self) -> opentelemetry_sdk::error::OTelSdkResult { self.reader.shutdown() } + + fn shutdown_with_timeout( + &self, + timeout: Duration, + ) -> opentelemetry_sdk::error::OTelSdkResult { + self.reader.shutdown_with_timeout(timeout) + } + + fn temporality(&self, _kind: InstrumentKind) -> Temporality { + Temporality::Cumulative + } + } + + task_local! { + pub(crate) static AGGREGATE_METER_PROVIDER_ASYNC: OnceLock<(AggregateMeterProvider, ClonableManualReader)>; + } + + thread_local! { + pub(crate) static AGGREGATE_METER_PROVIDER: OnceLock<&'static (AggregateMeterProvider, ClonableManualReader)> = const { OnceLock::new() }; } fn create_test_meter_provider() -> (AggregateMeterProvider, ClonableManualReader) { @@ -252,37 +252,45 @@ pub(crate) mod test_utils { (meter_provider, reader) } } + fn create_test_meter_provider_leaked() -> &'static (AggregateMeterProvider, ClonableManualReader) + { + Box::leak(Box::new(create_test_meter_provider())) + } pub(crate) fn meter_provider_and_readers() -> (AggregateMeterProvider, ClonableManualReader) { if tokio::runtime::Handle::try_current().is_ok() { + // Multi-threaded tests should use a new meter provider on each thread so they don't + // interfere with eachother's metrics AGGREGATE_METER_PROVIDER_ASYNC .try_with(|cell| cell.get_or_init(create_test_meter_provider).clone()) // We need to silently fail here. // Otherwise we fail every multi-threaded test that touches metrics .unwrap_or_default() } else { + // We deliberately leak the test MeterProvider instead of letting it Drop. + // Dropping `opentelemetry_sdk::metrics::SdkMeterProviderInner` calls `shutdown()`, + // which logs via OpenTelemetry’s internal logging macros (otel_* -> tracing::*). Those + // logs go through tracing-subscriber, which uses thread-local state. If this Drop runs + // from a thread-local destructor (e.g. in tests using TLS), tracing_subscriber’s TLS + // access happens during TLS teardown and panics with: "cannot access a Thread Local + // Storage value during or after destruction: AccessError" To avoid that panic in tests, + // we never Drop the provider here. AGGREGATE_METER_PROVIDER - .with(|cell| cell.get_or_init(create_test_meter_provider).clone()) + .with(|cell| { + let pair = cell.get_or_init(create_test_meter_provider_leaked); + (*pair).clone() + }) + .clone() } } + #[derive(Default)] pub(crate) struct Metrics { resource_metrics: ResourceMetrics, } - impl Default for Metrics { - fn default() -> Self { - Metrics { - resource_metrics: ResourceMetrics { - resource: Default::default(), - scope_metrics: vec![], - }, - } - } - } - pub(crate) fn collect_metrics() -> Metrics { let mut metrics = Metrics::default(); - let (_, reader) = meter_provider_and_readers(); + let (_mp, reader) = meter_provider_and_readers(); reader .collect(&mut metrics.resource_metrics) .expect("Failed to collect metrics. Did you forget to use `async{}.with_metrics()`? See dev-docs/metrics.md"); @@ -292,13 +300,11 @@ pub(crate) mod test_utils { impl Metrics { pub(crate) fn find(&self, name: &str) -> Option<&opentelemetry_sdk::metrics::data::Metric> { self.resource_metrics - .scope_metrics - .iter() + .scope_metrics() .flat_map(|scope_metrics| { scope_metrics - .metrics - .iter() - .filter(|metric| metric.name == name) + .metrics() + .filter(|metric| metric.name() == name) }) .next() } @@ -312,21 +318,20 @@ pub(crate) mod test_utils { count: bool, attributes: &[KeyValue], ) -> bool { - let attributes = AttributeSet::from(attributes); if let Some(value) = value.to_u64() - && self.metric_matches(name, &ty, value, count, &attributes) + && self.metric_matches(name, &ty, value, count, attributes) { return true; } if let Some(value) = value.to_i64() - && self.metric_matches(name, &ty, value, count, &attributes) + && self.metric_matches(name, &ty, value, count, attributes) { return true; } if let Some(value) = value.to_f64() - && self.metric_matches(name, &ty, value, count, &attributes) + && self.metric_matches(name, &ty, value, count, attributes) { return true; } @@ -340,74 +345,300 @@ pub(crate) mod test_utils { ty: &MetricType, value: T, count: bool, - attributes: &AttributeSet, + attributes: &[KeyValue], ) -> bool { if let Some(metric) = self.find(name) { - // Try to downcast the metric to each type of aggregation and assert that the value is correct. - if let Some(gauge) = metric.data.as_any().downcast_ref::>() { - // Find the datapoint with the correct attributes. - if matches!(ty, MetricType::Gauge) { - return gauge.data_points.iter().any(|datapoint| { - datapoint.value == value - && Self::equal_attributes(attributes, &datapoint.attributes) - }); + match metric.data() { + opentelemetry_sdk::metrics::data::AggregatedMetrics::F64(metric_data) + if value.to_f64().is_some() => + { + let value = value.to_f64().unwrap(); + use opentelemetry_sdk::metrics::data::MetricData::*; + match metric_data { + Gauge(gauge) if matches!(ty, MetricType::Gauge) => { + return gauge.data_points().any(|datapoint| { + datapoint.value() == value + && Self::equal_attributes( + attributes, + &datapoint.attributes().cloned().collect::>(), + ) + }); + } + Sum(sum) + if matches!( + ty, + MetricType::Counter | MetricType::UpDownCounter + ) => + { + return sum.data_points().any(|datapoint| { + datapoint.value() == value + && Self::equal_attributes( + attributes, + &datapoint.attributes().cloned().collect::>(), + ) + }); + } + Histogram(histogram) if matches!(ty, MetricType::Histogram) => { + if count { + return histogram.data_points().any(|datapoint| { + datapoint.count() == value as u64 + && Self::equal_attributes( + attributes, + &datapoint + .attributes() + .cloned() + .collect::>(), + ) + }); + } else { + return histogram.data_points().any(|datapoint| { + datapoint.sum() == value + && Self::equal_attributes( + attributes, + &datapoint + .attributes() + .cloned() + .collect::>(), + ) + }); + } + } + _ => {} + } } - } else if let Some(sum) = metric.data.as_any().downcast_ref::>() { - // Note that we can't actually tell if the sum is monotonic or not, so we just check if it's a sum. - if matches!(ty, MetricType::Counter | MetricType::UpDownCounter) { - return sum.data_points.iter().any(|datapoint| { - datapoint.value == value - && Self::equal_attributes(attributes, &datapoint.attributes) - }); + opentelemetry_sdk::metrics::data::AggregatedMetrics::U64(metric_data) + if value.to_u64().is_some() => + { + let value = value.to_u64().unwrap(); + use opentelemetry_sdk::metrics::data::MetricData::*; + match metric_data { + Gauge(gauge) if matches!(ty, MetricType::Gauge) => { + return gauge.data_points().any(|datapoint| { + datapoint.value() == value + && Self::equal_attributes( + attributes, + &datapoint.attributes().cloned().collect::>(), + ) + }); + } + Sum(sum) + if matches!( + ty, + MetricType::Counter | MetricType::UpDownCounter + ) => + { + return sum.data_points().any(|datapoint| { + datapoint.value() == value + && Self::equal_attributes( + attributes, + &datapoint.attributes().cloned().collect::>(), + ) + }); + } + Histogram(histogram) if matches!(ty, MetricType::Histogram) => { + if count { + return histogram.data_points().any(|datapoint| { + datapoint.count() == value + && Self::equal_attributes( + attributes, + &datapoint + .attributes() + .cloned() + .collect::>(), + ) + }); + } else { + return histogram.data_points().any(|datapoint| { + datapoint.sum() == value + && Self::equal_attributes( + attributes, + &datapoint + .attributes() + .cloned() + .collect::>(), + ) + }); + } + } + _ => {} + } } - } else if let Some(histogram) = metric.data.as_any().downcast_ref::>() - && matches!(ty, MetricType::Histogram) - { - if count { - return histogram.data_points.iter().any(|datapoint| { - datapoint.count == value.to_u64().unwrap() - && Self::equal_attributes(attributes, &datapoint.attributes) - }); - } else { - return histogram.data_points.iter().any(|datapoint| { - datapoint.sum == value - && Self::equal_attributes(attributes, &datapoint.attributes) - }); + opentelemetry_sdk::metrics::data::AggregatedMetrics::I64(metric_data) + if value.to_i64().is_some() => + { + let value = value.to_i64().unwrap(); + use opentelemetry_sdk::metrics::data::MetricData::*; + match metric_data { + Gauge(gauge) if matches!(ty, MetricType::Gauge) => { + return gauge.data_points().any(|datapoint| { + datapoint.value() == value + && Self::equal_attributes( + attributes, + &datapoint.attributes().cloned().collect::>(), + ) + }); + } + Sum(sum) + if matches!( + ty, + MetricType::Counter | MetricType::UpDownCounter + ) => + { + return sum.data_points().any(|datapoint| { + datapoint.value() == value + && Self::equal_attributes( + attributes, + &datapoint.attributes().cloned().collect::>(), + ) + }); + } + Histogram(histogram) if matches!(ty, MetricType::Histogram) => { + if count { + return histogram.data_points().any(|datapoint| { + datapoint.count() == value as u64 + && Self::equal_attributes( + attributes, + &datapoint + .attributes() + .cloned() + .collect::>(), + ) + }); + } else { + return histogram.data_points().any(|datapoint| { + datapoint.sum() == value + && Self::equal_attributes( + attributes, + &datapoint + .attributes() + .cloned() + .collect::>(), + ) + }); + } + } + _ => {} + } } + _ => {} } } false } - pub(crate) fn metric_exists( + pub(crate) fn metric_exists( &self, name: &str, ty: MetricType, attributes: &[KeyValue], ) -> bool { - let attributes = AttributeSet::from(attributes); if let Some(metric) = self.find(name) { - // Try to downcast the metric to each type of aggregation and assert that the value is correct. - if let Some(gauge) = metric.data.as_any().downcast_ref::>() { - // Find the datapoint with the correct attributes. - if matches!(ty, MetricType::Gauge) { - return gauge.data_points.iter().any(|datapoint| { - Self::equal_attributes(&attributes, &datapoint.attributes) - }); + match metric.data() { + opentelemetry_sdk::metrics::data::AggregatedMetrics::F64(metric_data) => { + use opentelemetry_sdk::metrics::data::MetricData::*; + match metric_data { + Gauge(gauge) if matches!(ty, MetricType::Gauge) => { + return gauge.data_points().any(|datapoint| { + Self::equal_attributes( + attributes, + &datapoint.attributes().cloned().collect::>(), + ) + }); + } + Sum(sum) + if matches!( + ty, + MetricType::Counter | MetricType::UpDownCounter + ) => + { + return sum.data_points().any(|datapoint| { + Self::equal_attributes( + attributes, + &datapoint.attributes().cloned().collect::>(), + ) + }); + } + Histogram(histogram) if matches!(ty, MetricType::Histogram) => { + return histogram.data_points().any(|datapoint| { + Self::equal_attributes( + attributes, + &datapoint.attributes().cloned().collect::>(), + ) + }); + } + _ => {} + } } - } else if let Some(sum) = metric.data.as_any().downcast_ref::>() { - // Note that we can't actually tell if the sum is monotonic or not, so we just check if it's a sum. - if matches!(ty, MetricType::Counter | MetricType::UpDownCounter) { - return sum.data_points.iter().any(|datapoint| { - Self::equal_attributes(&attributes, &datapoint.attributes) - }); + opentelemetry_sdk::metrics::data::AggregatedMetrics::U64(metric_data) => { + use opentelemetry_sdk::metrics::data::MetricData::*; + match metric_data { + Gauge(gauge) if matches!(ty, MetricType::Gauge) => { + return gauge.data_points().any(|datapoint| { + Self::equal_attributes( + attributes, + &datapoint.attributes().cloned().collect::>(), + ) + }); + } + Sum(sum) + if matches!( + ty, + MetricType::Counter | MetricType::UpDownCounter + ) => + { + return sum.data_points().any(|datapoint| { + Self::equal_attributes( + attributes, + &datapoint.attributes().cloned().collect::>(), + ) + }); + } + Histogram(histogram) if matches!(ty, MetricType::Histogram) => { + return histogram.data_points().any(|datapoint| { + Self::equal_attributes( + attributes, + &datapoint.attributes().cloned().collect::>(), + ) + }); + } + _ => {} + } + } + opentelemetry_sdk::metrics::data::AggregatedMetrics::I64(metric_data) => { + use opentelemetry_sdk::metrics::data::MetricData::*; + match metric_data { + Gauge(gauge) if matches!(ty, MetricType::Gauge) => { + return gauge.data_points().any(|datapoint| { + Self::equal_attributes( + attributes, + &datapoint.attributes().cloned().collect::>(), + ) + }); + } + Sum(sum) + if matches!( + ty, + MetricType::Counter | MetricType::UpDownCounter + ) => + { + return sum.data_points().any(|datapoint| { + Self::equal_attributes( + attributes, + &datapoint.attributes().cloned().collect::>(), + ) + }); + } + Histogram(histogram) if matches!(ty, MetricType::Histogram) => { + return histogram.data_points().any(|datapoint| { + Self::equal_attributes( + attributes, + &datapoint.attributes().cloned().collect::>(), + ) + }); + } + _ => {} + } } - } else if let Some(histogram) = metric.data.as_any().downcast_ref::>() - && matches!(ty, MetricType::Histogram) - { - return histogram.data_points.iter().any(|datapoint| { - Self::equal_attributes(&attributes, &datapoint.attributes) - }); } } false @@ -416,10 +647,9 @@ pub(crate) mod test_utils { #[allow(dead_code)] pub(crate) fn all(self) -> Vec { self.resource_metrics - .scope_metrics - .into_iter() + .scope_metrics() .flat_map(|scope_metrics| { - scope_metrics.metrics.into_iter().map(|metric| { + scope_metrics.metrics().map(|metric| { let serde_metric: SerdeMetric = metric.into(); serde_metric }) @@ -447,18 +677,25 @@ pub(crate) mod test_utils { .collect() } - fn equal_attributes(expected: &AttributeSet, actual: &[KeyValue]) -> bool { - // If lengths are different, we can short circuit. This also accounts for a bug where - // an empty attributes list would always be considered "equal" due to zip capping at - // the shortest iter's length - if expected.iter().count() != actual.len() { + fn equal_attributes(expected: &[KeyValue], actual: &[KeyValue]) -> bool { + // If we have fewer actual than expected, short circuit. Sometimes actual can have + // harmless duplicates so we use > instead of == (e.g. custom instruments with response + // event attrs) + if expected.len() > actual.len() { return false; } - // This works because the attributes are always sorted - expected.iter().zip(actual.iter()).all(|((k, v), kv)| { - kv.key == *k - && (kv.value == *v || kv.value == Value::String(StringValue::from(""))) - }) + + let actual_map: HashMap<_, _> = actual.iter().map(|kv| (&kv.key, &kv.value)).collect(); + + expected + .iter() + .all(|expected_kv| match actual_map.get(&expected_kv.key) { + None => false, + Some(actual_value) => { + *actual_value == &expected_kv.value + || *actual_value == &Value::String(StringValue::from("")) + } + }) } } @@ -516,35 +753,47 @@ pub(crate) mod test_utils { } impl SerdeMetricData { - fn extract_datapoints + Clone + 'static>( + fn extract_datapoints + Copy + 'static>( metric_data: &mut SerdeMetricData, - value: &dyn opentelemetry_sdk::metrics::data::Aggregation, + value: &MetricData, ) { - if let Some(gauge) = value.as_any().downcast_ref::>() { - gauge.data_points.iter().for_each(|datapoint| { - metric_data.datapoints.push(datapoint.into()); - }); - } - if let Some(sum) = value.as_any().downcast_ref::>() { - sum.data_points.iter().for_each(|datapoint| { - metric_data.datapoints.push(datapoint.into()); - }); - } - if let Some(histogram) = value.as_any().downcast_ref::>() { - histogram.data_points.iter().for_each(|datapoint| { - metric_data.datapoints.push(datapoint.into()); - }); + use MetricData::*; + match value { + Gauge(gauge) => metric_data + .datapoints + .extend(gauge.data_points().map(Into::into)), + Sum(sum) => metric_data + .datapoints + .extend(sum.data_points().map(Into::into)), + + Histogram(histogram) => metric_data + .datapoints + .extend(histogram.data_points().map(Into::into)), + + ExponentialHistogram(exponential_histogram) => metric_data + .datapoints + .extend(exponential_histogram.data_points().map(Into::into)), } } } - impl From for SerdeMetric { - fn from(value: Metric) -> Self { + impl From<&Metric> for SerdeMetric { + fn from(value: &Metric) -> Self { let mut serde_metric = SerdeMetric { - name: value.name.into_owned(), - description: value.description.into_owned(), - unit: value.unit.to_string(), - data: value.data.into(), + name: value.name().to_string(), + description: value.description().to_string(), + unit: value.unit().to_string(), + data: match value.data() { + opentelemetry_sdk::metrics::data::AggregatedMetrics::F64(metric_data) => { + metric_data.into() + } + opentelemetry_sdk::metrics::data::AggregatedMetrics::U64(metric_data) => { + metric_data.into() + } + opentelemetry_sdk::metrics::data::AggregatedMetrics::I64(metric_data) => { + metric_data.into() + } + }, }; // Sort the datapoints so that we can compare them serde_metric.data.datapoints.sort(); @@ -567,18 +816,34 @@ pub(crate) mod test_utils { } } - impl From<&DataPoint> for SerdeMetricDataPoint + impl From<&SumDataPoint> for SerdeMetricDataPoint + where + T: Into + Copy, + { + fn from(value: &SumDataPoint) -> Self { + SerdeMetricDataPoint { + value: Some(value.value().into()), + sum: None, + count: None, + attributes: value + .attributes() + .map(|kv| (kv.key.to_string(), Self::convert(&kv.value))) + .collect(), + } + } + } + + impl From<&GaugeDataPoint> for SerdeMetricDataPoint where - T: Into + Clone, + T: Into + Copy, { - fn from(value: &DataPoint) -> Self { + fn from(value: &GaugeDataPoint) -> Self { SerdeMetricDataPoint { - value: Some(value.value.clone().into()), + value: Some(value.value().into()), sum: None, count: None, attributes: value - .attributes - .iter() + .attributes() .map(|kv| (kv.key.to_string(), Self::convert(&kv.value))) .collect(), } @@ -597,35 +862,54 @@ pub(crate) mod test_utils { Array::I64(v) => v.into(), Array::F64(v) => v.into(), Array::String(v) => v.iter().map(|v| v.to_string()).collect::>().into(), + _ => unreachable!(), }, + _ => unreachable!(), } } } impl From<&HistogramDataPoint> for SerdeMetricDataPoint where - T: Into + Clone, + T: Into + Copy, { fn from(value: &HistogramDataPoint) -> Self { SerdeMetricDataPoint { - sum: Some(value.sum.clone().into()), + sum: Some(value.sum().into()), value: None, - count: Some(value.count), + count: Some(value.count()), attributes: value - .attributes - .iter() + .attributes() .map(|kv| (kv.key.to_string(), Self::convert(&kv.value))) .collect(), } } } - impl From> for SerdeMetricData { - fn from(value: Box) -> Self { + impl From<&ExponentialHistogramDataPoint> for SerdeMetricDataPoint + where + T: Into + Copy, + { + fn from(value: &ExponentialHistogramDataPoint) -> Self { + SerdeMetricDataPoint { + sum: Some(value.sum().into()), + value: None, + count: Some(value.count() as u64), + attributes: value + .attributes() + .map(|kv| (kv.key.to_string(), Self::convert(&kv.value))) + .collect(), + } + } + } + + impl From<&MetricData> for SerdeMetricData + where + T: Into + Copy + 'static, + { + fn from(value: &MetricData) -> Self { let mut metric_data = SerdeMetricData::default(); - Self::extract_datapoints::(&mut metric_data, value.as_ref()); - Self::extract_datapoints::(&mut metric_data, value.as_ref()); - Self::extract_datapoints::(&mut metric_data, value.as_ref()); + Self::extract_datapoints(&mut metric_data, value); metric_data } } @@ -1081,7 +1365,7 @@ macro_rules! metric { builder = builder.with_unit($unit); } - builder.init() + builder.build() }; if cache_callsite { @@ -1257,36 +1541,36 @@ macro_rules! assert_counter_not_exists { ($($name:ident).+, $value: ty, $($attr_key:literal = $attr_value:expr),+) => { let attributes = &[$(opentelemetry::KeyValue::new($attr_key, $attr_value)),+]; - let result = crate::metrics::collect_metrics().metric_exists::<$value>(stringify!($($name).+), crate::metrics::test_utils::MetricType::Counter, attributes); + let result = crate::metrics::collect_metrics().metric_exists(stringify!($($name).+), crate::metrics::test_utils::MetricType::Counter, attributes); assert_no_metric!(result, $name, None, None, None, attributes); }; ($($name:ident).+, $value: ty, $($($attr_key:ident).+ = $attr_value:expr),+) => { let attributes = &[$(opentelemetry::KeyValue::new(stringify!($($attr_key).+), $attr_value)),+]; - let result = crate::metrics::collect_metrics().metric_exists::<$value>(stringify!($($name).+), crate::metrics::test_utils::MetricType::Counter, attributes); + let result = crate::metrics::collect_metrics().metric_exists(stringify!($($name).+), crate::metrics::test_utils::MetricType::Counter, attributes); assert_no_metric!(result, $name, None, None, None, attributes); }; ($name:literal, $value: ty, $($attr_key:literal = $attr_value:expr),+) => { let attributes = &[$(opentelemetry::KeyValue::new($attr_key, $attr_value)),+]; - let result = crate::metrics::collect_metrics().metric_exists::<$value>($name, crate::metrics::test_utils::MetricType::Counter, attributes); + let result = crate::metrics::collect_metrics().metric_exists($name, crate::metrics::test_utils::MetricType::Counter, attributes); assert_no_metric!(result, $name, None, None, None, attributes); }; ($name:literal, $value: ty, $($($attr_key:ident).+ = $attr_value:expr),+) => { let attributes = &[$(opentelemetry::KeyValue::new(stringify!($($attr_key).+), $attr_value)),+]; - let result = crate::metrics::collect_metrics().metric_exists::<$value>($name, crate::metrics::test_utils::MetricType::Counter, attributes); + let result = crate::metrics::collect_metrics().metric_exists($name, crate::metrics::test_utils::MetricType::Counter, attributes); assert_no_metric!(result, $name, None, None, None, attributes); }; ($name:literal, $value: ty, $attributes: expr) => { - let result = crate::metrics::collect_metrics().metric_exists::<$value>($name, crate::metrics::test_utils::MetricType::Counter, $attributes); + let result = crate::metrics::collect_metrics().metric_exists($name, crate::metrics::test_utils::MetricType::Counter, $attributes); assert_no_metric!(result, $name, None, None, None, &$attributes); }; ($name:literal, $value: ty) => { - let result = crate::metrics::collect_metrics().metric_exists::<$value>($name, crate::metrics::test_utils::MetricType::Counter, &[]); + let result = crate::metrics::collect_metrics().metric_exists($name, crate::metrics::test_utils::MetricType::Counter, &[]); assert_no_metric!(result, $name, None, None, None, &[]); }; } @@ -1444,30 +1728,30 @@ macro_rules! assert_histogram_exists { ($($name:ident).+, $value: ty, $($attr_key:literal = $attr_value:expr),+) => { let attributes = &[$(opentelemetry::KeyValue::new($attr_key, $attr_value)),+]; - let result = crate::metrics::collect_metrics().metric_exists::<$value>(stringify!($($name).+), crate::metrics::test_utils::MetricType::Histogram, attributes); + let result = crate::metrics::collect_metrics().metric_exists(stringify!($($name).+), crate::metrics::test_utils::MetricType::Histogram, attributes); assert_metric!(result, $name, None, None, None, attributes); }; ($($name:ident).+, $value: ty, $($($attr_key:ident).+ = $attr_value:expr),+) => { let attributes = &[$(opentelemetry::KeyValue::new(stringify!($($attr_key).+), $attr_value)),+]; - let result = crate::metrics::collect_metrics().metric_exists::<$value>(stringify!($($name).+), crate::metrics::test_utils::MetricType::Histogram, attributes); + let result = crate::metrics::collect_metrics().metric_exists(stringify!($($name).+), crate::metrics::test_utils::MetricType::Histogram, attributes); assert_metric!(result, $name, None, None, None, attributes); }; ($name:literal, $value: ty, $($attr_key:literal = $attr_value:expr),+) => { let attributes = &[$(opentelemetry::KeyValue::new($attr_key, $attr_value)),+]; - let result = crate::metrics::collect_metrics().metric_exists::<$value>($name, crate::metrics::test_utils::MetricType::Histogram, attributes); + let result = crate::metrics::collect_metrics().metric_exists($name, crate::metrics::test_utils::MetricType::Histogram, attributes); assert_metric!(result, $name, None, None, None, attributes); }; ($name:literal, $value: ty, $($($attr_key:ident).+ = $attr_value:expr),+) => { let attributes = &[$(opentelemetry::KeyValue::new(stringify!($($attr_key).+), $attr_value)),+]; - let result = crate::metrics::collect_metrics().metric_exists::<$value>($name, crate::metrics::test_utils::MetricType::Histogram, attributes); + let result = crate::metrics::collect_metrics().metric_exists($name, crate::metrics::test_utils::MetricType::Histogram, attributes); assert_metric!(result, $name, None, None, None, attributes); }; ($name:literal, $value: ty) => { - let result = crate::metrics::collect_metrics().metric_exists::<$value>($name, crate::metrics::test_utils::MetricType::Histogram, &[]); + let result = crate::metrics::collect_metrics().metric_exists($name, crate::metrics::test_utils::MetricType::Histogram, &[]); assert_metric!(result, $name, None, None, None, &[]); }; } @@ -1481,30 +1765,30 @@ macro_rules! assert_histogram_not_exists { ($($name:ident).+, $value: ty, $($attr_key:literal = $attr_value:expr),+) => { let attributes = &[$(opentelemetry::KeyValue::new($attr_key, $attr_value)),+]; - let result = crate::metrics::collect_metrics().metric_exists::<$value>(stringify!($($name).+), crate::metrics::test_utils::MetricType::Histogram, attributes); + let result = crate::metrics::collect_metrics().metric_exists(stringify!($($name).+), crate::metrics::test_utils::MetricType::Histogram, attributes); assert_no_metric!(result, $name, None, None, None, attributes); }; ($($name:ident).+, $value: ty, $($($attr_key:ident).+ = $attr_value:expr),+) => { let attributes = &[$(opentelemetry::KeyValue::new(stringify!($($attr_key).+), $attr_value)),+]; - let result = crate::metrics::collect_metrics().metric_exists::<$value>(stringify!($($name).+), crate::metrics::test_utils::MetricType::Histogram, attributes); + let result = crate::metrics::collect_metrics().metric_exists(stringify!($($name).+), crate::metrics::test_utils::MetricType::Histogram, attributes); assert_no_metric!(result, $name, None, None, None, attributes); }; ($name:literal, $value: ty, $($attr_key:literal = $attr_value:expr),+) => { let attributes = &[$(opentelemetry::KeyValue::new($attr_key, $attr_value)),+]; - let result = crate::metrics::collect_metrics().metric_exists::<$value>($name, crate::metrics::test_utils::MetricType::Histogram, attributes); + let result = crate::metrics::collect_metrics().metric_exists($name, crate::metrics::test_utils::MetricType::Histogram, attributes); assert_no_metric!(result, $name, None, None, None, attributes); }; ($name:literal, $value: ty, $($($attr_key:ident).+ = $attr_value:expr),+) => { let attributes = &[$(opentelemetry::KeyValue::new(stringify!($($attr_key).+), $attr_value)),+]; - let result = crate::metrics::collect_metrics().metric_exists::<$value>($name, crate::metrics::test_utils::MetricType::Histogram, attributes); + let result = crate::metrics::collect_metrics().metric_exists($name, crate::metrics::test_utils::MetricType::Histogram, attributes); assert_no_metric!(result, $name, None, None, None, attributes); }; ($name:literal, $value: ty) => { - let result = crate::metrics::collect_metrics().metric_exists::<$value>($name, crate::metrics::test_utils::MetricType::Histogram, &[]); + let result = crate::metrics::collect_metrics().metric_exists($name, crate::metrics::test_utils::MetricType::Histogram, &[]); assert_no_metric!(result, $name, None, None, None, &[]); }; } @@ -1665,7 +1949,7 @@ mod test { fn assert_unit(name: &str, unit: &str) { let collected_metrics = crate::metrics::collect_metrics(); let metric = collected_metrics.find(name).unwrap(); - assert_eq!(metric.unit, unit); + assert_eq!(metric.unit(), unit); } #[test] @@ -1675,13 +1959,13 @@ mod test { .meter("test") .u64_observable_gauge("test") .with_callback(|m| m.observe(5, &[])) - .init(); + .build(); assert_gauge!("test", 5); } #[test] fn test_gauge_record() { - let gauge = meter_provider().meter("test").u64_gauge("test").init(); + let gauge = meter_provider().meter("test").u64_gauge("test").build(); gauge.record(5, &[]); assert_gauge!("test", 5); } @@ -1942,7 +2226,7 @@ mod test { .meter("test") .u64_observable_gauge("test") .with_callback(|m| m.observe(5, &[])) - .init(); + .build(); assert_histogram_sum!("test", 1, "attr" = "val"); } .with_metrics() diff --git a/apollo-router/src/plugins/cache/entity.rs b/apollo-router/src/plugins/cache/entity.rs index ab3935a112..c973aa40bb 100644 --- a/apollo-router/src/plugins/cache/entity.rs +++ b/apollo-router/src/plugins/cache/entity.rs @@ -885,7 +885,7 @@ impl CacheService { value.as_str().map(|s| { let mut digest = Sha256::new(); digest.update(s); - hex::encode(digest.finalize().as_slice()) + hex::encode(digest.finalize()) }) }) }) @@ -1288,7 +1288,7 @@ pub(crate) fn hash_vary_headers(headers: &http::HeaderMap) -> String { } } - hex::encode(digest.finalize().as_slice()) + hex::encode(digest.finalize()) } // XXX(@goto-bus-stop): this doesn't make much sense: QueryHash already includes the operation name. @@ -1300,7 +1300,7 @@ pub(crate) fn hash_query(query_hash: &QueryHash, body: &graphql::Request) -> Str digest.update(body.operation_name.as_deref().unwrap_or("-").as_bytes()); digest.update(&[0u8; 1][..]); - hex::encode(digest.finalize().as_slice()) + hex::encode(digest.finalize()) } pub(crate) fn hash_additional_data( @@ -1334,7 +1334,7 @@ pub(crate) fn hash_additional_data( } } - hex::encode(digest.finalize().as_slice()) + hex::encode(digest.finalize()) } // build a cache key for the root operation @@ -1533,7 +1533,7 @@ fn hash_representation_inner( let mut digest = Sha256::new(); hash_object(&mut digest, representation, selection_set); - hex::encode(digest.finalize().as_slice()) + hex::encode(digest.finalize()) } // Hash the whole representation diff --git a/apollo-router/src/plugins/connectors/tracing.rs b/apollo-router/src/plugins/connectors/tracing.rs index 99f5039098..0213a809a7 100644 --- a/apollo-router/src/plugins/connectors/tracing.rs +++ b/apollo-router/src/plugins/connectors/tracing.rs @@ -27,7 +27,7 @@ pub(crate) fn connect_spec_version_instrument( ) }) }) - .init() + .build() }) } diff --git a/apollo-router/src/plugins/demand_control/cost_calculator/snapshots/apollo_router__plugins__demand_control__cost_calculator__static_cost__tests__federated_query_with_typenames@logs.snap b/apollo-router/src/plugins/demand_control/cost_calculator/snapshots/apollo_router__plugins__demand_control__cost_calculator__static_cost__tests__federated_query_with_typenames@logs.snap index d39a85b259..40943489bc 100644 --- a/apollo-router/src/plugins/demand_control/cost_calculator/snapshots/apollo_router__plugins__demand_control__cost_calculator__static_cost__tests__federated_query_with_typenames@logs.snap +++ b/apollo-router/src/plugins/demand_control/cost_calculator/snapshots/apollo_router__plugins__demand_control__cost_calculator__static_cost__tests__federated_query_with_typenames@logs.snap @@ -1,5 +1,6 @@ --- source: apollo-router/src/plugins/demand_control/cost_calculator/static_cost.rs expression: yaml +snapshot_kind: text --- [] diff --git a/apollo-router/src/plugins/demand_control/cost_calculator/static_cost.rs b/apollo-router/src/plugins/demand_control/cost_calculator/static_cost.rs index 70595ab172..cecbcf0ed0 100644 --- a/apollo-router/src/plugins/demand_control/cost_calculator/static_cost.rs +++ b/apollo-router/src/plugins/demand_control/cost_calculator/static_cost.rs @@ -1086,7 +1086,7 @@ mod tests { } // This was previously logging a warning for every __typename in the response. At the time of writing, // this should not produce logs. Generally, it should not produce undue noise for valid requests. - .with_subscriber(assert_snapshot_subscriber!()) + .with_subscriber(assert_snapshot_subscriber!(tracing_core::LevelFilter::WARN)) .await } diff --git a/apollo-router/src/plugins/fleet_detector.rs b/apollo-router/src/plugins/fleet_detector.rs index ecacff6f26..1611a5d90b 100644 --- a/apollo-router/src/plugins/fleet_detector.rs +++ b/apollo-router/src/plugins/fleet_detector.rs @@ -114,7 +114,7 @@ impl GaugeStore { .with_callback(move |i| { i.observe(1, &attributes); }) - .init(), + .build(), ); } // apollo.router.instance.cpu_freq @@ -136,7 +136,7 @@ impl GaugeStore { cpus.iter().map(|cpu| cpu.frequency()).sum::() / cpus.len() as u64; gauge.observe(cpu_freq, &[]) }) - .init(), + .build(), ); } // apollo.router.instance.cpu_count @@ -161,7 +161,7 @@ impl GaugeStore { ], ) }) - .init(), + .build(), ); } // apollo.router.instance.total_memory @@ -183,7 +183,7 @@ impl GaugeStore { ) }) .with_unit("bytes") - .init(), + .build(), ); } { @@ -204,7 +204,7 @@ impl GaugeStore { } gauge.observe(1, attributes.as_slice()) }) - .init(), + .build(), ) } GaugeStore::Active(gauges) diff --git a/apollo-router/src/plugins/progressive_override/mod.rs b/apollo-router/src/plugins/progressive_override/mod.rs index 4c8d26dd77..7c6f12d61c 100644 --- a/apollo-router/src/plugins/progressive_override/mod.rs +++ b/apollo-router/src/plugins/progressive_override/mod.rs @@ -269,7 +269,7 @@ fn hash_operation(operation: &Option, operation_name: &Option) - if let Some(operation_name) = operation_name { digest.update(operation_name.as_bytes()); } - hex::encode(digest.finalize().as_slice()) + hex::encode(digest.finalize()) } register_plugin!("apollo", "progressive_override", ProgressiveOverridePlugin); diff --git a/apollo-router/src/plugins/record_replay/recording.rs b/apollo-router/src/plugins/record_replay/recording.rs index 9c589c9478..7b2580c575 100644 --- a/apollo-router/src/plugins/record_replay/recording.rs +++ b/apollo-router/src/plugins/record_replay/recording.rs @@ -32,7 +32,7 @@ impl Recording { let mut digest = Sha256::new(); let req = serde_json::to_string(&self.client_request).expect("can serialize"); digest.update(req); - let hash = hex::encode(digest.finalize().as_slice()); + let hash = hex::encode(digest.finalize()); PathBuf::from(format!("{operation_name}-{hash}.json")) } diff --git a/apollo-router/src/plugins/telemetry/apollo_exporter.rs b/apollo-router/src/plugins/telemetry/apollo_exporter.rs index e6b539e279..f2f6a62481 100644 --- a/apollo-router/src/plugins/telemetry/apollo_exporter.rs +++ b/apollo-router/src/plugins/telemetry/apollo_exporter.rs @@ -16,7 +16,7 @@ use http::header::CONTENT_ENCODING; use http::header::CONTENT_TYPE; use http::header::RETRY_AFTER; use http::header::USER_AGENT; -use opentelemetry::ExportError; +use opentelemetry_sdk::ExportError; use parking_lot::Mutex; pub(crate) use prost::*; use reqwest::Client; diff --git a/apollo-router/src/plugins/telemetry/apollo_otlp_exporter.rs b/apollo-router/src/plugins/telemetry/apollo_otlp_exporter.rs index 6926d5522e..3bbd22ac96 100644 --- a/apollo-router/src/plugins/telemetry/apollo_otlp_exporter.rs +++ b/apollo-router/src/plugins/telemetry/apollo_otlp_exporter.rs @@ -1,21 +1,19 @@ use derivative::Derivative; -use futures::TryFutureExt; -use futures::future; -use futures::future::BoxFuture; -use opentelemetry::InstrumentationLibrary; +use opentelemetry::InstrumentationScope; use opentelemetry::KeyValue; use opentelemetry::trace::Event; use opentelemetry::trace::SpanContext; use opentelemetry::trace::Status; use opentelemetry::trace::TraceFlags; use opentelemetry::trace::TraceState; -use opentelemetry_otlp::SpanExporterBuilder; +use opentelemetry_otlp::Compression::Gzip; use opentelemetry_otlp::WithExportConfig; +use opentelemetry_otlp::WithTonicConfig; use opentelemetry_sdk::Resource; -use opentelemetry_sdk::export::trace::ExportResult; -use opentelemetry_sdk::export::trace::SpanData; -use opentelemetry_sdk::export::trace::SpanExporter; +use opentelemetry_sdk::error::OTelSdkResult; +use opentelemetry_sdk::trace::SpanData; use opentelemetry_sdk::trace::SpanEvents; +use opentelemetry_sdk::trace::SpanExporter; use opentelemetry_sdk::trace::SpanLinks; use sys_info::hostname; use tonic::metadata::MetadataMap; @@ -49,7 +47,7 @@ pub(crate) struct ApolloOtlpExporter { batch_config: BatchProcessorConfig, endpoint: Url, apollo_key: String, - intrumentation_library: InstrumentationLibrary, + intrumentation_scope: InstrumentationScope, #[derivative(Debug = "ignore")] otlp_exporter: opentelemetry_otlp::SpanExporter, errors_configuration: ErrorsConfiguration, @@ -71,47 +69,47 @@ impl ApolloOtlpExporter { let mut metadata = MetadataMap::new(); metadata.insert("apollo.api.key", MetadataValue::try_from(apollo_key)?); let mut otlp_exporter = match protocol { - Protocol::Grpc => SpanExporterBuilder::from( - opentelemetry_otlp::new_exporter() - .tonic() - .with_tls_config(ClientTlsConfig::new().with_native_roots()) - .with_timeout(batch_config.max_export_timeout) - .with_endpoint(endpoint.to_string()) - .with_metadata(metadata) - .with_compression(opentelemetry_otlp::Compression::Gzip), - ) - .build_span_exporter()?, + Protocol::Grpc => opentelemetry_otlp::SpanExporter::builder() + .with_tonic() + .with_tls_config(ClientTlsConfig::new().with_native_roots()) + .with_timeout(batch_config.max_export_timeout) + .with_endpoint(endpoint.to_string()) + .with_metadata(metadata) + .with_compression(Gzip) + .build()?, // So far only using HTTP path for testing - the Studio backend only accepts GRPC today. - Protocol::Http => SpanExporterBuilder::from( - opentelemetry_otlp::new_exporter() - .http() - .with_timeout(batch_config.max_export_timeout) - .with_endpoint(endpoint.to_string()), - ) - .build_span_exporter()?, + Protocol::Http => opentelemetry_otlp::SpanExporter::builder() + .with_http() + .with_timeout(batch_config.max_export_timeout) + .with_endpoint(endpoint.to_string()) + .build()?, }; - otlp_exporter.set_resource(&Resource::new([ - KeyValue::new("apollo.router.id", router_id()), - KeyValue::new("apollo.graph.ref", apollo_graph_ref.to_string()), - KeyValue::new("apollo.schema.id", schema_id.to_string()), - KeyValue::new( - "apollo.user.agent", - format!( - "{}@{}", - std::env!("CARGO_PKG_NAME"), - std::env!("CARGO_PKG_VERSION") - ), - ), - KeyValue::new("apollo.client.host", hostname()?), - KeyValue::new("apollo.client.uname", get_uname()?), - ])); + otlp_exporter.set_resource( + &Resource::builder() + .with_attributes([ + KeyValue::new("apollo.router.id", router_id()), + KeyValue::new("apollo.graph.ref", apollo_graph_ref.to_string()), + KeyValue::new("apollo.schema.id", schema_id.to_string()), + KeyValue::new( + "apollo.user.agent", + format!( + "{}@{}", + std::env!("CARGO_PKG_NAME"), + std::env!("CARGO_PKG_VERSION") + ), + ), + KeyValue::new("apollo.client.host", hostname()?), + KeyValue::new("apollo.client.uname", get_uname()?), + ]) + .build(), + ); Ok(Self { endpoint: endpoint.clone(), batch_config: batch_config.clone(), apollo_key: apollo_key.to_string(), - intrumentation_library: InstrumentationLibrary::builder(GLOBAL_TRACER_NAME) + intrumentation_scope: InstrumentationScope::builder(GLOBAL_TRACER_NAME) .with_version(format!( "{}@{}", std::env!("CARGO_PKG_NAME"), @@ -197,7 +195,7 @@ impl ApolloOtlpExporter { events: Self::extract_span_events(&span), links: SpanLinks::default(), status: span.status, - instrumentation_lib: self.intrumentation_library.clone(), + instrumentation_scope: self.intrumentation_scope.clone(), dropped_attributes_count: span.droppped_attribute_count, } } @@ -250,27 +248,25 @@ impl ApolloOtlpExporter { events: Self::extract_span_events(&span), links: SpanLinks::default(), status, - instrumentation_lib: self.intrumentation_library.clone(), + instrumentation_scope: self.intrumentation_scope.clone(), dropped_attributes_count: span.droppped_attribute_count, } } - pub(crate) fn export(&mut self, spans: Vec) -> BoxFuture<'static, ExportResult> { - let fut = self.otlp_exporter.export(spans); - Box::pin(fut.and_then(|_| { - // re-use the metric we already have in apollo_exporter but attach the protocol - u64_counter!( - "apollo.router.telemetry.studio.reports", - "The number of reports submitted to Studio by the Router", - 1, - report.type = ROUTER_REPORT_TYPE_TRACES, - report.protocol = ROUTER_TRACING_PROTOCOL_OTLP - ); - future::ready(Ok(())) - })) + pub(crate) async fn export(&mut self, spans: Vec) -> OTelSdkResult { + let result = self.otlp_exporter.export(spans).await; + // re-use the metric we already have in apollo_exporter but attach the protocol + u64_counter!( + "apollo.router.telemetry.studio.reports", + "The number of reports submitted to Studio by the Router", + 1, + report.type = ROUTER_REPORT_TYPE_TRACES, + report.protocol = ROUTER_TRACING_PROTOCOL_OTLP + ); + result } - pub(crate) fn shutdown(&mut self) { + pub(crate) fn shutdown(&mut self) -> OTelSdkResult { self.otlp_exporter.shutdown() } } diff --git a/apollo-router/src/plugins/telemetry/config.rs b/apollo-router/src/plugins/telemetry/config.rs index 51d48b8d51..79e30a710f 100644 --- a/apollo-router/src/plugins/telemetry/config.rs +++ b/apollo-router/src/plugins/telemetry/config.rs @@ -1,4 +1,6 @@ //! Configuration for the telemetry plugin. + +use std::borrow::Cow; use std::collections::BTreeMap; use std::collections::HashSet; @@ -7,12 +9,8 @@ use http::HeaderName; use num_traits::ToPrimitive; use opentelemetry::Array; use opentelemetry::Value; -use opentelemetry::metrics::MetricsError; -use opentelemetry_sdk::metrics::Aggregation; -use opentelemetry_sdk::metrics::Instrument; +use opentelemetry_sdk::metrics::{Instrument, InstrumentKind, StreamBuilder}; use opentelemetry_sdk::metrics::Stream; -use opentelemetry_sdk::metrics::View; -use opentelemetry_sdk::metrics::new_view; use opentelemetry_sdk::trace::SpanLimits; use schemars::JsonSchema; use serde::Deserialize; @@ -161,36 +159,63 @@ pub(crate) struct MetricView { pub(crate) allowed_attribute_keys: Option>, } -impl TryInto> for MetricView { - type Error = MetricsError; +pub(crate) type OTelMetricView = Box Option + Send + Sync>; - fn try_into(self) -> Result, Self::Error> { - let aggregation = self.aggregation.map(|aggregation| match aggregation { - MetricAggregation::Histogram { buckets } => Aggregation::ExplicitBucketHistogram { - boundaries: buckets, - record_min_max: true, - }, - MetricAggregation::Drop => Aggregation::Drop, +impl MetricView { + pub (crate) fn try_into_otel_metric_view_with(self, configure_builder_hook: F) -> Result + where + F: Fn(&Instrument, StreamBuilder) -> StreamBuilder + Send + Sync + 'static, + { + let target_name = self.rename.clone().unwrap_or_else(|| self.name.clone()); + + let otel_aggregation = self.aggregation.map(|aggregation| match aggregation { + MetricAggregation::Histogram { buckets } => { + opentelemetry_sdk::metrics::Aggregation::ExplicitBucketHistogram { + boundaries: buckets, + record_min_max: true, + } + } + MetricAggregation::Drop => opentelemetry_sdk::metrics::Aggregation::Drop, }); - let instrument = Instrument::new().name(self.name); - let mut mask = Stream::new(); - if let Some(new_name) = self.rename { - mask = mask.name(new_name); - } - if let Some(desc) = self.description { - mask = mask.description(desc); - } - if let Some(unit) = self.unit { - mask = mask.unit(unit); - } - if let Some(aggregation) = aggregation { - mask = mask.aggregation(aggregation); - } - if let Some(allowed_attribute_keys) = self.allowed_attribute_keys { - mask = mask.allowed_attribute_keys(allowed_attribute_keys.into_iter().map(Key::new)); - } - new_view(instrument, mask) + let allowed_keys: Option> = self + .allowed_attribute_keys + .map(|set| set.into_iter().map(Key::new).collect()); + + Ok(Box::new(move |i: &Instrument| { + if i.name() != self.name { + return None; + } + + let mut builder = Stream::builder().with_name(target_name.clone()); + + builder = configure_builder_hook(i, builder); + + if let Some(desc) = &self.description { + builder = builder.with_description(desc.clone()); + } + if let Some(unit) = &self.unit { + builder = builder.with_unit(unit.clone()); + } + if matches!(i.kind(), InstrumentKind::Histogram) { + if let Some(ref agg) = otel_aggregation { + builder = builder.with_aggregation(agg.clone()); + } + } + if let Some(ref keys) = allowed_keys { + builder = builder.with_allowed_attribute_keys(keys.clone()); + } + + builder.build().ok() + })) + } +} + +impl TryInto for MetricView { + type Error = String; + + fn try_into(self) -> Result { + self.try_into_otel_metric_view_with(|_, b| b) } } @@ -648,6 +673,7 @@ impl From for AttributeArray { opentelemetry::Array::String(v) => { AttributeArray::String(v.into_iter().map(|v| v.into()).collect()) } + _ => AttributeArray::String(vec![]), } } } @@ -697,23 +723,26 @@ impl From<&TracingCommon> for opentelemetry_sdk::trace::Config { if config.parent_based_sampler { sampler = parent_based(sampler); } + if config.preview_datadog_agent_sampling.unwrap_or_default() { - common = common.with_sampler(DatadogAgentSampling::new( - sampler, - config.parent_based_sampler, - )); + common.sampler = Box::new( + DatadogAgentSampling::new( + sampler, + config.parent_based_sampler, + ) + ); } else { - common = common.with_sampler(sampler); + common.sampler = Box::new(sampler); } - common = common.with_max_events_per_span(config.max_events_per_span); - common = common.with_max_attributes_per_span(config.max_attributes_per_span); - common = common.with_max_links_per_span(config.max_links_per_span); - common = common.with_max_attributes_per_event(config.max_attributes_per_event); - common = common.with_max_attributes_per_link(config.max_attributes_per_link); + common.span_limits.max_events_per_span = config.max_events_per_span; + common.span_limits.max_attributes_per_span = config.max_attributes_per_span; + common.span_limits.max_links_per_span = config.max_links_per_span; + common.span_limits.max_attributes_per_event = config.max_attributes_per_event; + common.span_limits.max_attributes_per_link = config.max_attributes_per_link; // Take the default first, then config, then env resources, then env variable. Last entry wins - common = common.with_resource(config.to_resource()); + common.resource = Cow::Owned(config.to_resource()); common } } diff --git a/apollo-router/src/plugins/telemetry/config_new/apollo/instruments.rs b/apollo-router/src/plugins/telemetry/config_new/apollo/instruments.rs index 4196811d41..67b7d15659 100644 --- a/apollo-router/src/plugins/telemetry/config_new/apollo/instruments.rs +++ b/apollo-router/src/plugins/telemetry/config_new/apollo/instruments.rs @@ -313,7 +313,7 @@ fn create_subgraph_and_connector_shared_static_instruments() -> HashMap Option { opentelemetry::Value::String(s) => s.as_str().parse::().ok(), opentelemetry::Value::Bool(_) => None, opentelemetry::Value::Array(_) => None, + _ => unreachable!(), } } diff --git a/apollo-router/src/plugins/telemetry/config_new/mod.rs b/apollo-router/src/plugins/telemetry/config_new/mod.rs index 5b616bbf74..7a56bd2ae0 100644 --- a/apollo-router/src/plugins/telemetry/config_new/mod.rs +++ b/apollo-router/src/plugins/telemetry/config_new/mod.rs @@ -173,7 +173,7 @@ pub(crate) fn trace_id() -> Option { pub(crate) fn get_baggage(key: &str) -> Option { let context = Span::current().context(); let baggage = context.baggage(); - baggage.get(key).cloned() + baggage.get(key).map(|v| v.as_str().to_string().into()) } pub(crate) trait ToOtelValue { @@ -248,6 +248,7 @@ impl From for AttributeValue { opentelemetry::Value::F64(v) => AttributeValue::F64(v), opentelemetry::Value::String(v) => AttributeValue::String(v.into()), opentelemetry::Value::Array(v) => AttributeValue::Array(v.into()), + _ => unreachable!(), } } } diff --git a/apollo-router/src/plugins/telemetry/config_new/router_overhead/instruments.rs b/apollo-router/src/plugins/telemetry/config_new/router_overhead/instruments.rs index 98c1095fe6..334e69829c 100644 --- a/apollo-router/src/plugins/telemetry/config_new/router_overhead/instruments.rs +++ b/apollo-router/src/plugins/telemetry/config_new/router_overhead/instruments.rs @@ -36,7 +36,7 @@ pub(crate) fn create_static_instrument(enabled: bool) -> Option<(String, StaticI .with_description( "Router processing overhead (time not spent waiting for subgraphs or connectors to respond).", ) - .init(), + .build(), ), )) } diff --git a/apollo-router/src/plugins/telemetry/dynamic_attribute.rs b/apollo-router/src/plugins/telemetry/dynamic_attribute.rs index 04688ebc64..6e508f8692 100644 --- a/apollo-router/src/plugins/telemetry/dynamic_attribute.rs +++ b/apollo-router/src/plugins/telemetry/dynamic_attribute.rs @@ -240,13 +240,14 @@ impl EventDynAttribute for ::tracing::Span { Some(otel_data) => match &mut otel_data.event_attributes { Some(event_attributes) => { event_attributes.extend( - attributes.map(|KeyValue { key, value }| (key, value)), + attributes + .map(|KeyValue { key, value, .. }| (key, value)), ); } None => { otel_data.event_attributes = Some( attributes - .map(|KeyValue { key, value }| (key, value)) + .map(|KeyValue { key, value, .. }| (key, value)) .collect(), ); } diff --git a/apollo-router/src/plugins/telemetry/error_handler.rs b/apollo-router/src/plugins/telemetry/error_handler.rs index 435d0ee70c..bb09cd3a5d 100644 --- a/apollo-router/src/plugins/telemetry/error_handler.rs +++ b/apollo-router/src/plugins/telemetry/error_handler.rs @@ -1,119 +1,15 @@ use std::fmt::Debug; use std::time::Duration; -use std::time::Instant; use async_trait::async_trait; -use dashmap::DashMap; -use futures::future::BoxFuture; -use once_cell::sync::OnceCell; -use opentelemetry::metrics::MetricsError; -use opentelemetry_sdk::export::trace::ExportResult; -use opentelemetry_sdk::export::trace::SpanData; -use opentelemetry_sdk::export::trace::SpanExporter; -use opentelemetry_sdk::metrics::Aggregation; -use opentelemetry_sdk::metrics::InstrumentKind; +use futures::TryFutureExt; +use opentelemetry_sdk::error::OTelSdkError; +use opentelemetry_sdk::error::OTelSdkResult; use opentelemetry_sdk::metrics::data::ResourceMetrics; -use opentelemetry_sdk::metrics::data::Temporality; -use opentelemetry_sdk::metrics::exporter::PushMetricsExporter; -use opentelemetry_sdk::metrics::reader::AggregationSelector; -use opentelemetry_sdk::metrics::reader::TemporalitySelector; - -#[derive(Eq, PartialEq, Hash)] -enum ErrorType { - Trace, - Metric, - Other, -} -static OTEL_ERROR_LAST_LOGGED: OnceCell> = OnceCell::new(); - -pub(crate) fn handle_error>(err: T) { - // We have to rate limit these errors because when they happen they are very frequent. - // Use a dashmap to store the message type with the last time it was logged. - handle_error_with_map(err, OTEL_ERROR_LAST_LOGGED.get_or_init(DashMap::new)); -} - -// Allow for map injection to avoid using global map in tests -fn handle_error_with_map>( - err: T, - last_logged_map: &DashMap, -) { - let err = err.into(); - - // We don't want the dashmap to get big, so we key the error messages by type. - let error_type = match err { - opentelemetry::global::Error::Trace(_) => ErrorType::Trace, - opentelemetry::global::Error::Metric(_) => ErrorType::Metric, - _ => ErrorType::Other, - }; - #[cfg(not(test))] - let threshold = Duration::from_secs(10); - #[cfg(test)] - let threshold = Duration::from_millis(100); - - if let opentelemetry::global::Error::Metric(err) = &err { - // For now we have to suppress Metrics error: reader is shut down or not registered - // https://github.com/open-telemetry/opentelemetry-rust/issues/1244 - - if err.to_string() == "Metrics error: reader is shut down or not registered" { - return; - } - - // Keep track of the number of cardinality overflow errors otel emits. This can be removed after upgrading to 0.28.0 when the cardinality limit is removed. - // The version upgrade will also cause this log to be removed from our visibility even if we were set up custom a cardinality limit. - // https://github.com/open-telemetry/opentelemetry-rust/pull/2528 - if err - .to_string() - .contains("Maximum data points for metric stream exceeded. Entry added to overflow.") - { - u64_counter!( - "apollo.router.telemetry.metrics.cardinality_overflow", - "A count of how often a telemetry metric hit the hard cardinality limit", - 1 - ); - } - } - - // Copy here so that we don't retain a mutable reference into the dashmap and lock the shard - let now = Instant::now(); - let last_logged = *last_logged_map - .entry(error_type) - .and_modify(|last_logged| { - if last_logged.elapsed() > threshold { - *last_logged = now; - } - }) - .or_insert_with(|| now); - - if last_logged == now { - // These events are logged with explicitly no parent. This allows them to be detached from traces. - match err { - opentelemetry::global::Error::Trace(err) => { - ::tracing::error!("OpenTelemetry trace error occurred: {}", err) - } - opentelemetry::global::Error::Metric(err) => { - if let MetricsError::Other(msg) = &err { - if msg.contains("Warning") { - ::tracing::warn!(parent: None, "OpenTelemetry metric warning occurred: {}", msg); - return; - } - - // TODO: We should be able to remove this after upgrading to 0.26.0, which addresses the double-shutdown - // called out in https://github.com/open-telemetry/opentelemetry-rust/issues/1661 - if msg == "metrics provider already shut down" { - return; - } - } - ::tracing::error!(parent: None, "OpenTelemetry metric error occurred: {}", err); - } - opentelemetry::global::Error::Other(err) => { - ::tracing::error!(parent: None, "OpenTelemetry error occurred: {}", err) - } - other => { - ::tracing::error!(parent: None, "OpenTelemetry error occurred: {:?}", other) - } - } - } -} +use opentelemetry_sdk::metrics::exporter::PushMetricExporter; +use opentelemetry_sdk::metrics::Temporality; +use opentelemetry_sdk::trace::SpanData; +use opentelemetry_sdk::trace::SpanExporter; /// Wrapper that modifies trace export errors to include exporter name pub(crate) struct NamedSpanExporter { @@ -136,18 +32,20 @@ impl Debug for NamedSpanExporter { } impl SpanExporter for NamedSpanExporter { - fn export(&mut self, batch: Vec) -> BoxFuture<'static, ExportResult> { + fn export(&self, batch: Vec) -> impl Future + Send { let name = self.name; let fut = self.inner.export(batch); Box::pin(async move { fut.await.map_err(|err| { let modified = format!("[{} traces] {}", name, err); - opentelemetry::trace::TraceError::from(modified) + // Recreate as an internal failure to allow us to write a tagged message. This has + // the unfortunate side effect of removing the original type + OTelSdkError::InternalFailure(modified) }) }) } - fn shutdown(&mut self) { + fn shutdown(&mut self) -> OTelSdkResult { self.inner.shutdown() } @@ -168,7 +66,7 @@ impl NamedMetricsExporter { } } -impl Debug for NamedMetricsExporter { +impl Debug for NamedMetricsExporter { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("NamedMetricsExporter") .field("name", &self.name) @@ -176,193 +74,54 @@ impl Debug for NamedMetricsExporter { } } -impl AggregationSelector for NamedMetricsExporter { - fn aggregation(&self, kind: InstrumentKind) -> Aggregation { - self.inner.aggregation(kind) - } -} - -impl TemporalitySelector for NamedMetricsExporter { - fn temporality(&self, kind: InstrumentKind) -> Temporality { - self.inner.temporality(kind) - } -} - -fn prefix_metrics_error(name: &'static str, err: MetricsError) -> MetricsError { - match err { - MetricsError::Other(msg) => MetricsError::Other(format!("[{} metrics] {}", name, msg)), - MetricsError::Config(msg) => MetricsError::Config(format!("[{} metrics] {}", name, msg)), - MetricsError::ExportErr(inner) => { - MetricsError::Other(format!("[{} metrics] {}", name, inner)) - } - // Don't modify instrument configuration errors - not related to export - MetricsError::InvalidInstrumentConfiguration(msg) => { - MetricsError::InvalidInstrumentConfiguration(msg) - } - _ => MetricsError::Other(format!("[{} metrics] {}", name, err)), - } +fn prefix_metrics_error(name: &'static str, err: OTelSdkError) -> OTelSdkError { + let modified = format!("[{} metrics] {}", name, err); + // Recreate as an internal failure to allow us to write a tagged message. This has + // the unfortunate side effect of removing the original type + OTelSdkError::InternalFailure(modified) } #[async_trait] -impl PushMetricsExporter for NamedMetricsExporter { - async fn export(&self, metrics: &mut ResourceMetrics) -> opentelemetry::metrics::Result<()> { +impl PushMetricExporter for NamedMetricsExporter { + fn export(&self, metrics: &ResourceMetrics) -> impl Future + Send { self.inner .export(metrics) - .await .map_err(|err| prefix_metrics_error(self.name, err)) } - async fn force_flush(&self) -> opentelemetry::metrics::Result<()> { + fn force_flush(&self) -> OTelSdkResult { self.inner .force_flush() - .await .map_err(|err| prefix_metrics_error(self.name, err)) } - fn shutdown(&self) -> opentelemetry::metrics::Result<()> { + fn shutdown_with_timeout(&self, timeout: Duration) -> OTelSdkResult { + self.inner.shutdown_with_timeout(timeout) + } + + fn shutdown(&self) -> OTelSdkResult { self.inner .shutdown() .map_err(|err| prefix_metrics_error(self.name, err)) } + + fn temporality(&self) -> Temporality { + self.inner.temporality() + } } #[cfg(test)] mod tests { use std::fmt::Debug; - use std::ops::DerefMut; - use std::sync::Arc; use std::time::Duration; - use dashmap::DashMap; - use futures::future::BoxFuture; - use opentelemetry::metrics::MetricsError; - use opentelemetry_sdk::export::trace::SpanData; - use opentelemetry_sdk::export::trace::SpanExporter; + use opentelemetry_sdk::error::OTelSdkError; + use opentelemetry_sdk::error::OTelSdkResult; use opentelemetry_sdk::metrics::data::ResourceMetrics; - use opentelemetry_sdk::metrics::exporter::PushMetricsExporter; - use parking_lot::Mutex; - use tracing_core::Event; - use tracing_core::Field; - use tracing_core::Subscriber; - use tracing_core::field::Visit; - use tracing_futures::WithSubscriber; - use tracing_subscriber::Layer; - use tracing_subscriber::layer::Context; - use tracing_subscriber::layer::SubscriberExt; - - use crate::metrics::FutureMetricsExt; - use crate::plugins::telemetry::error_handler::handle_error_with_map; - - #[tokio::test] - async fn test_handle_error_throttling() { - let error_map = DashMap::new(); - // Set up a fake subscriber so we can check log events. If this is useful then maybe it can be factored out into something reusable - #[derive(Default)] - struct TestVisitor { - log_entries: Vec, - } - - #[derive(Default, Clone)] - struct TestLayer { - visitor: Arc>, - } - impl TestLayer { - fn assert_log_entry_count(&self, message: &str, expected: usize) { - let log_entries = self.visitor.lock().log_entries.clone(); - let actual = log_entries.iter().filter(|e| e.contains(message)).count(); - assert_eq!(actual, expected); - } - } - impl Visit for TestVisitor { - fn record_debug(&mut self, field: &Field, value: &dyn Debug) { - self.log_entries - .push(format!("{}={:?}", field.name(), value)); - } - } - - impl Layer for TestLayer - where - S: Subscriber, - Self: 'static, - { - fn on_event(&self, event: &Event<'_>, _ctx: Context<'_, S>) { - event.record(self.visitor.lock().deref_mut()) - } - } - - let test_layer = TestLayer::default(); - - async { - // Log twice rapidly, they should get deduped - handle_error_with_map( - opentelemetry::global::Error::Other("other error".to_string()), - &error_map, - ); - handle_error_with_map( - opentelemetry::global::Error::Other("other error".to_string()), - &error_map, - ); - handle_error_with_map( - opentelemetry::global::Error::Trace("trace error".to_string().into()), - &error_map, - ); - } - .with_subscriber(tracing_subscriber::registry().with(test_layer.clone())) - .await; - - test_layer.assert_log_entry_count("other error", 1); - test_layer.assert_log_entry_count("trace error", 1); - - // Sleep a bit and then log again, it should get logged - tokio::time::sleep(Duration::from_millis(200)).await; - async { - handle_error_with_map( - opentelemetry::global::Error::Other("other error".to_string()), - &error_map, - ); - } - .with_subscriber(tracing_subscriber::registry().with(test_layer.clone())) - .await; - test_layer.assert_log_entry_count("other error", 2); - } - - #[tokio::test] - async fn test_cardinality_overflow_1() { - async { - let error_map = DashMap::new(); - let msg = "Warning: Maximum data points for metric stream exceeded. Entry added to overflow. Subsequent overflows to same metric until next collect will not be logged."; - handle_error_with_map( - opentelemetry::global::Error::Metric(opentelemetry::metrics::MetricsError::Other(msg.to_string())), - &error_map, - ); - - assert_counter!( - "apollo.router.telemetry.metrics.cardinality_overflow", - 1 - ); - } - .with_metrics() - .await; - } - - #[tokio::test] - async fn test_cardinality_overflow_2() { - async { - let error_map = DashMap::new(); - let msg = - "Warning: Maximum data points for metric stream exceeded. Entry added to overflow."; - handle_error_with_map( - opentelemetry::global::Error::Metric(opentelemetry::metrics::MetricsError::Other( - msg.to_string(), - )), - &error_map, - ); - - assert_counter!("apollo.router.telemetry.metrics.cardinality_overflow", 1); - } - .with_metrics() - .await; - } + use opentelemetry_sdk::metrics::exporter::PushMetricExporter; + use opentelemetry_sdk::metrics::Temporality; + use opentelemetry_sdk::trace::SpanData; + use opentelemetry_sdk::trace::SpanExporter; // Mock span exporter to test failures #[derive(Debug)] @@ -370,13 +129,15 @@ mod tests { impl SpanExporter for FailingSpanExporter { fn export( - &mut self, + &self, _batch: Vec, - ) -> BoxFuture<'static, opentelemetry_sdk::export::trace::ExportResult> { - Box::pin(async { Err(opentelemetry::trace::TraceError::from("connection failed")) }) + ) -> impl std::future::Future + Send { + Box::pin(async { Err(OTelSdkError::InternalFailure("connection failed".into())) }) } - fn shutdown(&mut self) {} + fn shutdown(&mut self) -> OTelSdkResult { + Ok(()) + } fn set_resource(&mut self, _resource: &opentelemetry_sdk::Resource) {} } @@ -384,7 +145,7 @@ mod tests { #[tokio::test] async fn test_named_span_exporter_adds_prefix() { let inner = FailingSpanExporter; - let mut named = super::NamedSpanExporter::new(inner, "test-exporter"); + let named = super::NamedSpanExporter::new(inner, "test-exporter"); let result = named.export(vec![]).await; @@ -401,51 +162,29 @@ mod tests { error_type: &'static str, } - #[async_trait::async_trait] - impl PushMetricsExporter for FailingMetricsExporter { - async fn export( - &self, - _metrics: &mut ResourceMetrics, - ) -> opentelemetry::metrics::Result<()> { + impl PushMetricExporter for FailingMetricsExporter { + async fn export(&self, _metrics: &ResourceMetrics) -> OTelSdkResult { match self.error_type { - "other" => Err(MetricsError::Other("export failed".to_string())), - "config" => Err(MetricsError::Config("invalid config".to_string())), + "other" => Err(OTelSdkError::InternalFailure("export failed".to_string())), + "config" => Err(OTelSdkError::InternalFailure("invalid config".to_string())), _ => Ok(()), } } - async fn force_flush(&self) -> opentelemetry::metrics::Result<()> { + fn force_flush(&self) -> OTelSdkResult { Ok(()) } - fn shutdown(&self) -> opentelemetry::metrics::Result<()> { + fn shutdown_with_timeout(&self, _timeout: Duration) -> OTelSdkResult { Ok(()) } - } - - impl opentelemetry_sdk::metrics::reader::AggregationSelector for FailingMetricsExporter { - fn aggregation( - &self, - _kind: opentelemetry_sdk::metrics::InstrumentKind, - ) -> opentelemetry_sdk::metrics::Aggregation { - opentelemetry_sdk::metrics::Aggregation::Default - } - } - impl opentelemetry_sdk::metrics::reader::TemporalitySelector for FailingMetricsExporter { - fn temporality( - &self, - _kind: opentelemetry_sdk::metrics::InstrumentKind, - ) -> opentelemetry_sdk::metrics::data::Temporality { - opentelemetry_sdk::metrics::data::Temporality::Cumulative + fn shutdown(&self) -> OTelSdkResult { + Ok(()) } - } - fn empty_resource_metrics() -> ResourceMetrics { - use opentelemetry_sdk::Resource; - ResourceMetrics { - resource: Resource::empty(), - scope_metrics: vec![], + fn temporality(&self) -> Temporality { + Temporality::Cumulative } } @@ -456,12 +195,12 @@ mod tests { }; let named = super::NamedMetricsExporter::new(inner, "test-exporter"); - let result = named.export(&mut empty_resource_metrics()).await; + let result = named.export(&ResourceMetrics::default()).await; assert!(result.is_err()); let err = result.unwrap_err(); match err { - MetricsError::Other(msg) => { + OTelSdkError::InternalFailure(msg) => { assert!(msg.contains("[test-exporter metrics]")); assert!(msg.contains("export failed")); } @@ -471,12 +210,13 @@ mod tests { #[test] fn test_prefix_metrics_error() { - let err = MetricsError::Config("bad config".to_string()); + let err = OTelSdkError::InternalFailure("bad config".to_string()); let prefixed = super::prefix_metrics_error("test-exporter", err); + // OTelSdkError::InternalFailure to_string() automatically prepends "Operation failed:". match prefixed { - MetricsError::Config(msg) => { - assert_eq!(msg, "[test-exporter metrics] bad config"); + OTelSdkError::InternalFailure(msg) => { + assert_eq!(msg, "[test-exporter metrics] Operation failed: bad config"); } _ => panic!("Expected Config variant"), } diff --git a/apollo-router/src/plugins/telemetry/fmt_layer.rs b/apollo-router/src/plugins/telemetry/fmt_layer.rs index c791568df7..db26949163 100644 --- a/apollo-router/src/plugins/telemetry/fmt_layer.rs +++ b/apollo-router/src/plugins/telemetry/fmt_layer.rs @@ -72,6 +72,13 @@ struct NoOpLayer; impl Layer for NoOpLayer {} +fn is_otel_internal_target(target: &str) -> bool { + // These are handled by `otel_layers.rs`. + // We can't follow the preferred path of using with_filter() due to a reload bug in `tracing` + // https://github.com/tokio-rs/tracing/issues/1629 + target.starts_with("opentelemetry") +} + pub(crate) struct FmtLayer { fmt_event: T, excluded_attributes: HashSet<&'static str>, @@ -107,6 +114,9 @@ where id: &tracing_core::span::Id, ctx: Context<'_, S>, ) { + if is_otel_internal_target(attrs.metadata().target()) { + return; + } if let Some(span) = ctx.span(id) { let mut visitor = FieldsVisitor::new(&self.excluded_attributes); // We're checking if it's sampled to not add both attributes in OtelData and our LogAttributes @@ -132,6 +142,9 @@ where fn on_record(&self, id: &Id, values: &Record<'_>, ctx: Context<'_, S>) { if let Some(span) = ctx.span(id) { + if is_otel_internal_target(span.metadata().target()) { + return; + } let mut extensions = span.extensions_mut(); if let Some(fields) = extensions.get_mut::() { let mut visitor = FieldsVisitor::new(&self.excluded_attributes); @@ -148,6 +161,9 @@ where } fn on_event(&self, event: &Event<'_>, ctx: Context<'_, S>) { + if is_otel_internal_target(event.metadata().target()) { + return; + } let mut visitor = FieldsVisitor::new(&self.excluded_attributes); event.record(&mut visitor); if visitor.omit_from_logs { @@ -280,6 +296,7 @@ mod tests { use apollo_federation::connectors::runtime::responses::MappedResponse; use http::HeaderValue; use http::header::CONTENT_LENGTH; + use opentelemetry_sdk::resource::Resource; use parking_lot::Mutex; use parking_lot::MutexGuard; use tests::events::EventLevel; @@ -553,7 +570,7 @@ connector: display_resource: false, ..Default::default() }; - let format = Json::new(Default::default(), json_format); + let format = Json::new(Resource::builder().build(), json_format); let fmt_layer = FmtLayer::new(format, buff.clone()).boxed(); ::tracing::subscriber::with_default( @@ -574,7 +591,7 @@ connector: ansi_escape_codes: false, ..Default::default() }; - let format = Text::new(Default::default(), text_format); + let format = Text::new(Resource::builder().build(), text_format); let fmt_layer = FmtLayer::new(format, buff.clone()).boxed(); ::tracing::subscriber::with_default( @@ -592,7 +609,7 @@ connector: ansi_escape_codes: false, ..Default::default() }; - let format = Text::new(Default::default(), text_format); + let format = Text::new(Resource::builder().build(), text_format); let fmt_layer = FmtLayer::new(format, buff.clone()).boxed(); ::tracing::subscriber::with_default( @@ -641,7 +658,7 @@ connector: display_resource: false, ..Default::default() }; - let format = Json::new(Default::default(), text_format); + let format = Json::new(Resource::builder().build(), text_format); let fmt_layer = FmtLayer::new(format, buff.clone()).boxed(); ::tracing::subscriber::with_default( @@ -690,7 +707,7 @@ connector: display_resource: false, ..Default::default() }; - let format = Json::new(Default::default(), text_format); + let format = Json::new(Resource::builder().build(), text_format); let fmt_layer = FmtLayer::new(format, buff.clone()).boxed(); let event_config: events::Events = serde_yaml::from_str(EVENT_CONFIGURATION).unwrap(); @@ -937,7 +954,7 @@ connector: display_resource: false, ..Default::default() }; - let format = Json::new(Default::default(), text_format); + let format = Json::new(Resource::builder().build(), text_format); let fmt_layer = FmtLayer::new( RateLimitFormatter::new(format, &RateLimit::default()), buff.clone(), @@ -1043,7 +1060,7 @@ subgraph: ansi_escape_codes: false, ..Default::default() }; - let format = Text::new(Default::default(), text_format); + let format = Text::new(Resource::builder().build(), text_format); let fmt_layer = FmtLayer::new(format, buff.clone()).boxed(); let event_config: events::Events = serde_yaml::from_str(EVENT_CONFIGURATION).unwrap(); diff --git a/apollo-router/src/plugins/telemetry/formatters/json.rs b/apollo-router/src/plugins/telemetry/formatters/json.rs index 961d549a9b..abdbf5b33b 100644 --- a/apollo-router/src/plugins/telemetry/formatters/json.rs +++ b/apollo-router/src/plugins/telemetry/formatters/json.rs @@ -180,6 +180,7 @@ where let array = array.iter().map(|a| a.as_str()).collect::>(); serializer.serialize_entry(kv.key.as_str(), &array)?; } + _ => unreachable!(), } } } @@ -276,7 +277,7 @@ where event_attributes .take() .into_iter() - .map(|KeyValue { key, value }| (key, value)) + .map(|KeyValue { key, value, .. }| (key, value)) .collect() }) } diff --git a/apollo-router/src/plugins/telemetry/formatters/mod.rs b/apollo-router/src/plugins/telemetry/formatters/mod.rs index 7d4dd9671a..0b73dea879 100644 --- a/apollo-router/src/plugins/telemetry/formatters/mod.rs +++ b/apollo-router/src/plugins/telemetry/formatters/mod.rs @@ -250,7 +250,9 @@ pub(crate) fn to_list(resource: Resource) -> Vec<(String, serde_json::Value)> { .map(|s| serde_json::Value::String(s.to_string())) .collect(), ), + _ => unreachable!(), }, + _ => unreachable!(), }, ) }) diff --git a/apollo-router/src/plugins/telemetry/formatters/text.rs b/apollo-router/src/plugins/telemetry/formatters/text.rs index 2766b78866..70c471c76d 100644 --- a/apollo-router/src/plugins/telemetry/formatters/text.rs +++ b/apollo-router/src/plugins/telemetry/formatters/text.rs @@ -394,7 +394,7 @@ where event_attributes .take() .into_iter() - .map(|KeyValue { key, value }| (key, value)) + .map(|KeyValue { key, value, .. }| (key, value)) .collect() }) } diff --git a/apollo-router/src/plugins/telemetry/metrics/apollo/mod.rs b/apollo-router/src/plugins/telemetry/metrics/apollo/mod.rs index ce139d9552..3b1ee5c6ad 100644 --- a/apollo-router/src/plugins/telemetry/metrics/apollo/mod.rs +++ b/apollo-router/src/plugins/telemetry/metrics/apollo/mod.rs @@ -4,10 +4,13 @@ use std::sync::atomic::Ordering; use std::time::Duration; use opentelemetry::KeyValue; -use opentelemetry_otlp::MetricsExporterBuilder; +use opentelemetry_otlp::MetricExporterBuilder; use opentelemetry_otlp::WithExportConfig; +use opentelemetry_otlp::WithTonicConfig; use opentelemetry_sdk::Resource; -use opentelemetry_sdk::metrics::PeriodicReader; +use opentelemetry_sdk::metrics::{Aggregation, InstrumentKind}; +use opentelemetry_sdk::metrics::Instrument; +use opentelemetry_sdk::metrics::Stream; use opentelemetry_sdk::runtime; use prometheus::exponential_buckets; use sys_info::hostname; @@ -26,8 +29,6 @@ use crate::plugins::telemetry::apollo_exporter::get_uname; use crate::plugins::telemetry::config::ApolloMetricsReferenceMode; use crate::plugins::telemetry::config::Conf; use crate::plugins::telemetry::error_handler::NamedMetricsExporter; -use crate::plugins::telemetry::metrics::CustomAggregationSelector; -use crate::plugins::telemetry::otlp::CustomTemporalitySelector; use crate::plugins::telemetry::otlp::Protocol; use crate::plugins::telemetry::otlp::TelemetryDataKind; use crate::plugins::telemetry::otlp::process_endpoint; @@ -115,16 +116,17 @@ impl Config { tracing::info!("configuring Apollo OTLP metrics: {}", batch_config); let mut metadata = MetadataMap::new(); metadata.insert("apollo.api.key", key.parse()?); - let exporter = match otlp_protocol { - Protocol::Grpc => MetricsExporterBuilder::Tonic( - opentelemetry_otlp::new_exporter() - .tonic() - .with_tls_config(ClientTlsConfig::new().with_native_roots()) - .with_endpoint(endpoint.as_str()) - .with_timeout(batch_config.max_export_timeout) - .with_metadata(metadata.clone()) - .with_compression(opentelemetry_otlp::Compression::Gzip), - ), + let exporter: opentelemetry_otlp::MetricExporter = match otlp_protocol { + Protocol::Grpc => MetricExporterBuilder::new() + .with_tonic() + .with_tls_config(ClientTlsConfig::new().with_native_roots()) + .with_endpoint(endpoint.as_str()) + .with_timeout(batch_config.max_export_timeout) + .with_metadata(metadata.clone()) + .with_temporality(opentelemetry_sdk::metrics::Temporality::Delta) + .with_compression(opentelemetry_otlp::Compression::Gzip) + .build()?, + // While Apollo doesn't use the HTTP protocol, we support it here for // use in tests to enable WireMock. Protocol::Http => { @@ -133,104 +135,119 @@ impl Config { &TelemetryDataKind::Metrics, &Protocol::Http, )?; - let mut otlp_exporter = opentelemetry_otlp::new_exporter() - .http() + let mut otlp_exporter = MetricExporterBuilder::new() + .with_http() .with_protocol(opentelemetry_otlp::Protocol::Grpc) + .with_temporality(opentelemetry_sdk::metrics::Temporality::Delta) .with_timeout(batch_config.max_export_timeout); if let Some(endpoint) = maybe_endpoint { otlp_exporter = otlp_exporter.with_endpoint(endpoint); } - MetricsExporterBuilder::Http(otlp_exporter) + otlp_exporter.build()? } - } - .build_metrics_exporter( - Box::new(CustomTemporalitySelector( - opentelemetry_sdk::metrics::data::Temporality::Delta, - )), - Box::new( - CustomAggregationSelector::builder() - .boundaries(default_buckets()) - .build(), - ), - )?; - // MetricsExporterBuilder does not implement Clone, so we need to create a new builder for the realtime exporter - let realtime_exporter = match otlp_protocol { - Protocol::Grpc => MetricsExporterBuilder::Tonic( - opentelemetry_otlp::new_exporter() - .tonic() - .with_tls_config(ClientTlsConfig::new().with_native_roots()) - .with_endpoint(endpoint.as_str()) - .with_timeout(batch_config.max_export_timeout) - .with_metadata(metadata.clone()) - .with_compression(opentelemetry_otlp::Compression::Gzip), - ), + }; + // MetricExporterBuilder does not implement Clone, so we need to create a new builder for the realtime exporter + let realtime_exporter: opentelemetry_otlp::MetricExporter = match otlp_protocol { + Protocol::Grpc => opentelemetry_otlp::MetricExporterBuilder::new() + .with_tonic() + .with_tls_config(ClientTlsConfig::new().with_native_roots()) + .with_endpoint(endpoint.as_str()) + .with_timeout(batch_config.max_export_timeout) + .with_metadata(metadata.clone()) + .with_temporality(opentelemetry_sdk::metrics::Temporality::Delta) + .with_compression(opentelemetry_otlp::Compression::Gzip) + .build()?, Protocol::Http => { let maybe_endpoint = process_endpoint( &Some(endpoint.to_string()), &TelemetryDataKind::Metrics, &Protocol::Http, )?; - let mut otlp_exporter = opentelemetry_otlp::new_exporter() - .http() + let mut otlp_exporter = opentelemetry_otlp::MetricExporterBuilder::new() + .with_http() .with_protocol(opentelemetry_otlp::Protocol::Grpc) + .with_temporality(opentelemetry_sdk::metrics::Temporality::Delta) .with_timeout(batch_config.max_export_timeout); if let Some(endpoint) = maybe_endpoint { otlp_exporter = otlp_exporter.with_endpoint(endpoint); } - MetricsExporterBuilder::Http(otlp_exporter) + otlp_exporter.build()? } - } - .build_metrics_exporter( - Box::new(CustomTemporalitySelector( - opentelemetry_sdk::metrics::data::Temporality::Delta, - )), - // This aggregation uses the Apollo histogram format where a duration, x, in μs is - // counted in the bucket of index max(0, min(ceil(ln(x)/ln(1.1)), 383)). - Box::new( - CustomAggregationSelector::builder() - .boundaries( - // Returns [~1.4ms ... ~5min] - exponential_buckets(0.001399084909, 1.1, 129).unwrap(), - ) - .build(), - ), - )?; + }; + let named_exporter = NamedMetricsExporter::new(exporter, "apollo"); let named_realtime_exporter = NamedMetricsExporter::new(realtime_exporter, "apollo"); - let default_reader = PeriodicReader::builder(named_exporter, runtime::Tokio) + let default_reader = opentelemetry_sdk::metrics::periodic_reader_with_async_runtime::PeriodicReader::builder(named_exporter, runtime::Tokio) .with_interval(Duration::from_secs(60)) .with_timeout(batch_config.max_export_timeout) .build(); - let realtime_reader = PeriodicReader::builder(named_realtime_exporter, runtime::Tokio) + let realtime_reader = opentelemetry_sdk::metrics::periodic_reader_with_async_runtime::PeriodicReader::builder(named_realtime_exporter, runtime::Tokio) .with_interval(batch_config.scheduled_delay) .with_timeout(batch_config.max_export_timeout) .build(); - let resource = Resource::new([ - KeyValue::new("apollo.router.id", router_id()), - KeyValue::new("apollo.graph.ref", reference.to_string()), - KeyValue::new("apollo.schema.id", schema_id.to_string()), - KeyValue::new( - "apollo.user.agent", - format!( - "{}@{}", - std::env!("CARGO_PKG_NAME"), - std::env!("CARGO_PKG_VERSION") + let resource = Resource::builder() + .with_attributes([ + KeyValue::new("apollo.router.id", router_id()), + KeyValue::new("apollo.graph.ref", reference.to_string()), + KeyValue::new("apollo.schema.id", schema_id.to_string()), + KeyValue::new( + "apollo.user.agent", + format!( + "{}@{}", + std::env!("CARGO_PKG_NAME"), + std::env!("CARGO_PKG_VERSION") + ), ), - ), - KeyValue::new("apollo.client.host", hostname()?), - KeyValue::new("apollo.client.uname", get_uname()?), - ]); + KeyValue::new("apollo.client.host", hostname()?), + KeyValue::new("apollo.client.uname", get_uname()?), + ]) + .build(); + + let view_default_aggregation = |i: &Instrument| { + if matches!(i.kind(), InstrumentKind::Histogram) { + Some( + Stream::builder() + .with_aggregation(Aggregation::ExplicitBucketHistogram { + boundaries: default_buckets(), + record_min_max: true, + }) + .build() + .unwrap(), + ) + } else { + None + } + }; + + let view_custom_aggregation = |i: &Instrument| { + if matches!(i.kind(), InstrumentKind::Histogram) { + Some( + Stream::builder() + .with_aggregation(Aggregation::ExplicitBucketHistogram { + boundaries: exponential_buckets(0.001399084909, 1.1, 129).unwrap(), + record_min_max: true, + }) + .build() + .unwrap(), + ) + } else { + None + } + }; builder .with_reader(MeterProviderType::Apollo, default_reader) - .with_resource(MeterProviderType::Apollo, resource.clone()); + .with_resource(MeterProviderType::Apollo, resource.clone()) + .with_view(MeterProviderType::Apollo, view_default_aggregation); builder .with_reader(MeterProviderType::ApolloRealtime, realtime_reader) - .with_resource(MeterProviderType::ApolloRealtime, resource.clone()); + .with_resource(MeterProviderType::ApolloRealtime, resource.clone()) + .with_view(MeterProviderType::ApolloRealtime, view_custom_aggregation); + Ok(()) } diff --git a/apollo-router/src/plugins/telemetry/metrics/mod.rs b/apollo-router/src/plugins/telemetry/metrics/mod.rs index b8d0fac56d..6c46e0c7d9 100644 --- a/apollo-router/src/plugins/telemetry/metrics/mod.rs +++ b/apollo-router/src/plugins/telemetry/metrics/mod.rs @@ -1,43 +1,4 @@ -use opentelemetry_sdk::metrics::Aggregation; -use opentelemetry_sdk::metrics::InstrumentKind; -use opentelemetry_sdk::metrics::reader::AggregationSelector; pub(crate) mod apollo; pub(crate) mod local_type_stats; pub(crate) mod otlp; pub(crate) mod prometheus; - -#[derive(Clone, Default, Debug)] -pub(crate) struct CustomAggregationSelector { - boundaries: Vec, - record_min_max: bool, -} - -#[buildstructor::buildstructor] -impl CustomAggregationSelector { - #[builder] - pub(crate) fn new( - boundaries: Vec, - record_min_max: Option, - ) -> CustomAggregationSelector { - Self { - boundaries, - record_min_max: record_min_max.unwrap_or(true), - } - } -} - -impl AggregationSelector for CustomAggregationSelector { - fn aggregation(&self, kind: InstrumentKind) -> Aggregation { - match kind { - InstrumentKind::Counter - | InstrumentKind::UpDownCounter - | InstrumentKind::ObservableCounter - | InstrumentKind::ObservableUpDownCounter => Aggregation::Sum, - InstrumentKind::Gauge | InstrumentKind::ObservableGauge => Aggregation::LastValue, - InstrumentKind::Histogram => Aggregation::ExplicitBucketHistogram { - boundaries: self.boundaries.clone(), - record_min_max: self.record_min_max, - }, - } - } -} diff --git a/apollo-router/src/plugins/telemetry/metrics/otlp.rs b/apollo-router/src/plugins/telemetry/metrics/otlp.rs index 443c135b3c..9861847f2c 100644 --- a/apollo-router/src/plugins/telemetry/metrics/otlp.rs +++ b/apollo-router/src/plugins/telemetry/metrics/otlp.rs @@ -1,12 +1,9 @@ -use opentelemetry_otlp::MetricsExporterBuilder; -use opentelemetry_sdk::metrics::PeriodicReader; -use opentelemetry_sdk::runtime; use tower::BoxError; - +use opentelemetry_sdk::metrics::periodic_reader_with_async_runtime::PeriodicReader; +use opentelemetry_sdk::runtime; use crate::metrics::aggregation::MeterProviderType; use crate::plugins::telemetry::config::Conf; use crate::plugins::telemetry::error_handler::NamedMetricsExporter; -use crate::plugins::telemetry::metrics::CustomAggregationSelector; use crate::plugins::telemetry::otlp::TelemetryDataKind; use crate::plugins::telemetry::reload::metrics::MetricsBuilder; use crate::plugins::telemetry::reload::metrics::MetricsConfigurator; @@ -21,22 +18,12 @@ impl MetricsConfigurator for super::super::otlp::Config { } fn configure(&self, builder: &mut MetricsBuilder) -> Result<(), BoxError> { - let exporter_builder: MetricsExporterBuilder = self.exporter(TelemetryDataKind::Metrics)?; - let exporter = exporter_builder.build_metrics_exporter( - (&self.temporality).into(), - Box::new( - CustomAggregationSelector::builder() - .boundaries(builder.metrics_common().buckets.clone()) - .build(), - ), - )?; - + let exporter = self.metric_exporter(TelemetryDataKind::Metrics)?; let named_exporter = NamedMetricsExporter::new(exporter, "otlp"); builder.with_reader( MeterProviderType::Public, PeriodicReader::builder(named_exporter, runtime::Tokio) .with_interval(self.batch_processor.scheduled_delay) - .with_timeout(self.batch_processor.max_export_timeout) .build(), ); diff --git a/apollo-router/src/plugins/telemetry/metrics/prometheus.rs b/apollo-router/src/plugins/telemetry/metrics/prometheus.rs index 125ff7c1e9..7c9fffde76 100644 --- a/apollo-router/src/plugins/telemetry/metrics/prometheus.rs +++ b/apollo-router/src/plugins/telemetry/metrics/prometheus.rs @@ -4,6 +4,7 @@ use std::task::Poll; use futures::future::BoxFuture; use http::StatusCode; use opentelemetry_prometheus::ResourceSelector; +use opentelemetry_sdk::metrics::InstrumentKind; use prometheus::Encoder; use prometheus::Registry; use prometheus::TextEncoder; @@ -15,7 +16,6 @@ use tower_service::Service; use crate::ListenAddr; use crate::metrics::aggregation::MeterProviderType; use crate::plugins::telemetry::config::Conf; -use crate::plugins::telemetry::metrics::CustomAggregationSelector; use crate::plugins::telemetry::reload::metrics::MetricsBuilder; use crate::plugins::telemetry::reload::metrics::MetricsConfigurator; use crate::services::router; @@ -78,18 +78,13 @@ impl MetricsConfigurator for Config { let registry = Registry::new(); let exporter = opentelemetry_prometheus::exporter() - .with_aggregation_selector( - CustomAggregationSelector::builder() - .boundaries(builder.metrics_common().buckets.clone()) - .record_min_max(true) - .build(), - ) .with_resource_selector(self.resource_selector) .with_registry(registry.clone()) .build()?; - builder.with_reader(MeterProviderType::Public, exporter); - builder.with_prometheus_registry(registry); + builder + .with_reader(MeterProviderType::Public, exporter) + .with_prometheus_registry(registry); Ok(()) } } diff --git a/apollo-router/src/plugins/telemetry/mod.rs b/apollo-router/src/plugins/telemetry/mod.rs index 1da910e7f4..131d27a070 100644 --- a/apollo-router/src/plugins/telemetry/mod.rs +++ b/apollo-router/src/plugins/telemetry/mod.rs @@ -15,7 +15,6 @@ use config_new::connector::instruments::ConnectorInstruments; use config_new::instruments::InstrumentsConfig; use config_new::instruments::StaticInstrument; use config_new::router_overhead; -use error_handler::handle_error; use futures::StreamExt; use futures::future::BoxFuture; use futures::future::ready; @@ -153,6 +152,7 @@ pub(crate) mod dynamic_attribute; mod endpoint; mod error_counter; mod error_handler; +mod otel_layers; mod fmt_layer; pub(crate) mod formatters; mod logging; @@ -167,6 +167,7 @@ mod span_factory; pub(crate) mod tracing; pub(crate) mod utils; + // Tracing consts pub(crate) const CLIENT_NAME: &str = "apollo::telemetry::client_name"; pub(crate) const CLIENT_LIBRARY_NAME: &str = "apollo::telemetry::client_library_name"; @@ -237,7 +238,7 @@ impl LruSizeInstrument { gauge.observe(value.load(std::sync::atomic::Ordering::Relaxed), &[]); } }) - .init(); + .build(); Self { value, @@ -322,9 +323,6 @@ impl PluginPrivate for Telemetry { } } - opentelemetry::global::set_error_handler(handle_error) - .expect("otel error handler lock poisoned, fatal"); - let mut config = init.config; config.instrumentation.spans.update_defaults(); config.instrumentation.instruments.update_defaults(); @@ -339,7 +337,6 @@ impl PluginPrivate for Telemetry { let (activation, custom_endpoints, apollo_metrics_sender) = reload::prepare(&init.previous_config, &config)?; - if config.instrumentation.spans.mode == SpanMode::Deprecated { ::tracing::warn!( "telemetry.instrumentation.spans.mode is currently set to 'deprecated', either explicitly or via defaulting. Set telemetry.instrumentation.spans.mode explicitly in your router.yaml to 'spec_compliant' for log and span attributes that follow OpenTelemetry semantic conventions. This option will be defaulted to 'spec_compliant' in a future release and eventually removed altogether" @@ -2456,7 +2453,6 @@ mod tests { "http.response.status_code" = 400, "acme.my_attribute" = "application/json", "error.type" = "Bad Request", - "http.response.status_code" = 400, "network.protocol.version" = "HTTP/1.1" ); } @@ -3036,7 +3032,8 @@ mod tests { .await; plugin.activate(); make_supergraph_request(plugin.as_ref()).await; - u64_histogram!("apollo.test.histo", "it's a test", 1u64); + u64_histogram!("apollo.test.custom", "should have custom buckets", 1u64); + u64_histogram!("apollo.test.global", "should have global buckets", 1u64); assert_prometheus_metrics!(plugin); } .with_metrics() diff --git a/apollo-router/src/plugins/telemetry/otel/layer.rs b/apollo-router/src/plugins/telemetry/otel/layer.rs index 9e8f623656..99d5c22f8a 100644 --- a/apollo-router/src/plugins/telemetry/otel/layer.rs +++ b/apollo-router/src/plugins/telemetry/otel/layer.rs @@ -237,11 +237,11 @@ impl field::Visit for SpanEventVisitor<'_, '_> { field: &tracing_core::Field, value: &(dyn std::error::Error + 'static), ) { - let mut chain: Vec = Vec::new(); + let mut chain: Vec = Vec::new(); let mut next_err = value.source(); while let Some(err) = next_err { - chain.push(err.to_string().into()); + chain.push(err.to_string()); next_err = err.source(); } @@ -250,7 +250,7 @@ impl field::Visit for SpanEventVisitor<'_, '_> { if self.exception_config.record { self.event_builder .attributes - .push(Key::new(FIELD_EXCEPTION_MESSAGE).string(error_msg.clone())); + .push(KeyValue::new(FIELD_EXCEPTION_MESSAGE, error_msg.clone())); // NOTE: This is actually not the stacktrace of the exception. This is // the "source chain". It represents the hierarchy of errors from the @@ -258,9 +258,12 @@ impl field::Visit for SpanEventVisitor<'_, '_> { // of the callsites in the code that led to the error happening. // `std::error::Error::backtrace` is a nightly-only API and cannot be // used here until the feature is stabilized. - self.event_builder - .attributes - .push(Key::new(FIELD_EXCEPTION_STACKTRACE).array(chain.clone())); + self.event_builder.attributes.push(KeyValue::new( + FIELD_EXCEPTION_STACKTRACE, + opentelemetry::Value::Array(opentelemetry::Array::String( + chain.clone().into_iter().map(|s| s.into()).collect(), + )), + )); } if self.exception_config.propagate @@ -277,16 +280,20 @@ impl field::Visit for SpanEventVisitor<'_, '_> { // used here until the feature is stabilized. attrs.push(KeyValue::new( FIELD_EXCEPTION_STACKTRACE, - Value::Array(chain.clone().into()), + Value::Array(opentelemetry::Array::String( + chain.clone().into_iter().map(|s| s.into()).collect(), + )), )); } - - self.event_builder - .attributes - .push(Key::new(field.name()).string(error_msg)); self.event_builder .attributes - .push(Key::new(format!("{}.chain", field.name())).array(chain)); + .push(KeyValue::new(field.name(), error_msg)); + self.event_builder.attributes.push(KeyValue::new( + format!("{}.chain", field.name()), + opentelemetry::Value::Array(opentelemetry::Array::String( + chain.clone().into_iter().map(|s| s.into()).collect(), + )), + )); } } @@ -367,7 +374,7 @@ impl field::Visit for SpanAttributeVisitor<'_> { OTEL_STATUS_MESSAGE => { self.span_builder.status = otel::Status::error(format!("{value:?}")) } - _ => self.record(Key::new(field.name()).string(format!("{value:?}"))), + _ => self.record(KeyValue::new(field.name(), format!("{value:?}"))), } } @@ -391,7 +398,7 @@ impl field::Visit for SpanAttributeVisitor<'_> { let error_msg = value.to_string(); if self.exception_config.record { - self.record(Key::new(FIELD_EXCEPTION_MESSAGE).string(error_msg.clone())); + self.record(KeyValue::new(FIELD_EXCEPTION_MESSAGE, error_msg.clone())); // NOTE: This is actually not the stacktrace of the exception. This is // the "source chain". It represents the hierarchy of errors from the @@ -399,11 +406,24 @@ impl field::Visit for SpanAttributeVisitor<'_> { // of the callsites in the code that led to the error happening. // `std::error::Error::backtrace` is a nightly-only API and cannot be // used here until the feature is stabilized. - self.record(Key::new(FIELD_EXCEPTION_STACKTRACE).array(chain.clone())); + self.record(KeyValue::new( + FIELD_EXCEPTION_STACKTRACE, + opentelemetry::Value::Array(opentelemetry::Array::String( + chain + .iter() + .map(|s| s.as_str().to_string().into()) + .collect(), + )), + )); } - self.record(Key::new(field.name()).string(error_msg)); - self.record(Key::new(format!("{}.chain", field.name())).array(chain)); + self.record(KeyValue::new(field.name(), error_msg)); + self.record(KeyValue::new( + format!("{}.chain", field.name()), + opentelemetry::Value::Array(opentelemetry::Array::String( + chain.clone().into_iter().collect(), + )), + )); } } @@ -939,9 +959,8 @@ where // Performing read operations before getting a write lock to avoid a deadlock // See https://github.com/tokio-rs/tracing/issues/763 let meta = event.metadata(); - let target = Key::new("target"); - let target = target.string(meta.target()); + let target = KeyValue::new("target", meta.target()); let mut extensions = span.extensions_mut(); let mut otel_data = extensions.get_mut::(); @@ -950,7 +969,7 @@ where let mut otel_event = otel::Event::new( String::new(), SystemTime::now(), - vec![Key::new("level").string(meta.level().as_str()), target], + vec![KeyValue::new("level", meta.level().as_str()), target], 0, ); let mut span_event_visit = SpanEventVisitor { diff --git a/apollo-router/src/plugins/telemetry/otel/named_runtime_channel.rs b/apollo-router/src/plugins/telemetry/otel/named_runtime_channel.rs index c49b76e332..b859faba1c 100644 --- a/apollo-router/src/plugins/telemetry/otel/named_runtime_channel.rs +++ b/apollo-router/src/plugins/telemetry/otel/named_runtime_channel.rs @@ -1,7 +1,7 @@ use std::fmt::Debug; +use std::future::Future; use std::time::Duration; -use futures::future::BoxFuture; use opentelemetry_sdk::runtime::Runtime; use opentelemetry_sdk::runtime::RuntimeChannel; use opentelemetry_sdk::runtime::Tokio; @@ -25,18 +25,14 @@ impl NamedTokioRuntime { } impl Runtime for NamedTokioRuntime { - type Interval = ::Interval; - type Delay = ::Delay; - - fn interval(&self, duration: Duration) -> Self::Interval { - self.parent.interval(duration) - } - - fn spawn(&self, future: BoxFuture<'static, ()>) { + fn spawn(&self, future: F) + where + F: Future + Send + 'static, + { self.parent.spawn(future) } - fn delay(&self, duration: Duration) -> Self::Delay { + fn delay(&self, duration: Duration) -> impl Future + Send + 'static { self.parent.delay(duration) } } diff --git a/apollo-router/src/plugins/telemetry/otel/tracer.rs b/apollo-router/src/plugins/telemetry/otel/tracer.rs index 163b9791aa..3b7a565adf 100644 --- a/apollo-router/src/plugins/telemetry/otel/tracer.rs +++ b/apollo-router/src/plugins/telemetry/otel/tracer.rs @@ -156,16 +156,15 @@ fn process_sampling_result( mod tests { use opentelemetry::trace::SpanBuilder; use opentelemetry::trace::SpanId; - use opentelemetry::trace::TracerProvider as _; - use opentelemetry_sdk::trace::Config; + use opentelemetry::trace::TracerProvider; use opentelemetry_sdk::trace::Sampler; - use opentelemetry_sdk::trace::TracerProvider; + use opentelemetry_sdk::trace::TracerProviderBuilder; use super::*; #[test] fn assigns_default_trace_id_if_missing() { - let provider = TracerProvider::default(); + let provider = TracerProviderBuilder::default().build(); let tracer = provider.tracer("test"); let mut builder = SpanBuilder::from_name("empty".to_string()); builder.span_id = Some(SpanId::from(1u64)); @@ -210,8 +209,8 @@ mod tests { #[test] fn sampled_context() { for (name, sampler, parent_cx, previous_sampling_result, is_sampled) in sampler_data() { - let provider = TracerProvider::builder() - .with_config(Config::default().with_sampler(sampler)) + let provider = TracerProviderBuilder::default() + .with_sampler(sampler) .build(); let tracer = provider.tracer("test"); let mut builder = SpanBuilder::from_name("parent".to_string()); diff --git a/apollo-router/src/plugins/telemetry/otel_layers.rs b/apollo-router/src/plugins/telemetry/otel_layers.rs new file mode 100644 index 0000000000..4a23f0c5f7 --- /dev/null +++ b/apollo-router/src/plugins/telemetry/otel_layers.rs @@ -0,0 +1,989 @@ +use std::fmt::Debug; +use std::time::Duration; +use std::time::Instant; +use dashmap::DashMap; +use tracing_core::field::Visit; +use tracing_core::metadata::Level; +use tracing_core::Event; +use tracing_core::Field; +use tracing_core::Subscriber; +use tracing_subscriber::layer::Context; +use tracing_subscriber::Layer; + +#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)] +enum ErrorType { + Trace, + Metric, + Other, +} + +pub(super) struct OtelErrorLayer { + last_logged: DashMap, +} + +impl OtelErrorLayer { + pub(super) fn new() -> Self { + Self { + last_logged: DashMap::new(), + } + } + + // Allow for map injection to avoid using global map in tests + #[cfg(test)] + fn with_map(last_logged: DashMap) -> Self { + Self { last_logged } + } + + fn threshold() -> Duration { + #[cfg(test)] + { + Duration::from_millis(100) + } + #[cfg(not(test))] + { + Duration::from_secs(10) + } + } + + fn classify(&self, target: &str, msg: &str) -> ErrorType { + if target.contains("metrics") || msg.contains("Metrics error:") { + ErrorType::Metric + } else if target.contains("trace") { + ErrorType::Trace + } else { + ErrorType::Other + } + } + + fn message_prefix(level: Level, error_type: ErrorType) -> Option { + let severity_str = match level { + Level::ERROR => "error", + Level::WARN => "warning", + _ => return None, + }; + + let kind_str = match error_type { + ErrorType::Trace => "trace", + ErrorType::Metric => "metric", + ErrorType::Other => "", + }; + + Some(if kind_str.is_empty() { + format!("OpenTelemetry {severity_str} occurred") + } else { + format!("OpenTelemetry {kind_str} {severity_str} occurred") + }) + } + + fn should_log(&self, error_type: ErrorType) -> bool { + let now = Instant::now(); + let threshold = Self::threshold(); + + let last_logged = *self + .last_logged + .entry(error_type) + .and_modify(|last| { + if last.elapsed() > threshold { + *last = now; + } + }) + .or_insert(now); + + last_logged == now + } +} + +#[derive(Default)] +struct MessageVisitor { + message: Option, +} + +impl Visit for MessageVisitor { + fn record_str(&mut self, field: &Field, value: &str) { + if field.name() == "message" { + self.message = Some(value.to_string()); + } + } + + fn record_debug(&mut self, field: &Field, value: &dyn Debug) { + if field.name() == "message" && self.message.is_none() { + self.message = Some(format!("{value:?}")) + } + } +} + +impl Layer for OtelErrorLayer +where + S: Subscriber, +{ + fn on_event(&self, event: &Event<'_>, ctx: Context<'_, S>) { + let meta = event.metadata(); + if !meta.target().starts_with("opentelemetry") { + return; + } + let level = *meta.level(); + match *meta.level() { + Level::ERROR | Level::WARN => {} + _ => return, + } + + // Pull message string out of trace event + let mut visitor = MessageVisitor::default(); + event.record(&mut visitor); + let Some(msg) = visitor.message else { + return; + }; + + let error_type = self.classify(meta.target(), &msg); + + // Keep track of the number of cardinality overflow errors otel emits. This can be removed + // after we introduce a way for users to configure custom cardinality limits. + if msg.contains("Warning: Maximum data points for metric stream exceeded.") { + u64_counter!( + "apollo.router.telemetry.metrics.cardinality_overflow", + "A count of how often a telemetry metric hit the hard cardinality limit", + 1 + ); + } + + // Rate limit repetitive logs + if !self.should_log(error_type) { + return; + } + + // Emit as router logs detached from spans + let Some(message_prefix) = Self::message_prefix(level, error_type) else { + return; + }; + let full_message = format!("{}: {}", message_prefix, msg); + let otel_target = meta.target().to_string(); + let name = meta.name().to_string(); + + let metadata = match level { + Level::ERROR => &OTEL_ERROR_METADATA_ERROR, + Level::WARN => &OTEL_ERROR_METADATA_WARN, + _ => return, + }; + + let fields = metadata.fields(); + let message_field = fields.field("message").expect("message field must exist"); + let otel_target_field = fields + .field("otel.target") + .expect("otel.target field must exist"); + let name_field = fields.field("name").expect("name field must exist"); + let values = [ + (&message_field, Some(&full_message as &dyn tracing::Value)), + (&otel_target_field, Some(&otel_target as &dyn tracing::Value)), + (&name_field, Some(&name as &dyn tracing::Value)), + ]; + let value_set = fields.value_set(&values); + + let new_event = Event::new(metadata, &value_set); + ctx.event(&new_event); + } +} + + +/// Re-emits OpenTelemetry internal `tracing` events (targets starting with "opentelemetry") as +/// router logs, while ensuring we only emit a single non-empty `message` field. This fixes a bug +/// in OTel logging where their `tracing` macros emit two values for the `message` field, one being +/// an empty string (https://github.com/tokio-rs/tracing/issues/3195). +/// +/// We intentionally only re-emit INFO/DEBUG/TRACE. WARN/ERROR are handled by `OtelErrorLayer`, +/// which adds prefixes, classification and rate-limiting. +pub(super) struct ReemitOtelEventsLayer; + +// One metadata+callsite per level so we preserve the original verbosity. +// We construct these events explicitly and record them through `Context::event` to avoid +// re-entering the global dispatcher from within `on_event`. + +static OTEL_ERROR_CALLSITE_ERROR: tracing_core::callsite::DefaultCallsite = + tracing_core::callsite::DefaultCallsite::new(&OTEL_ERROR_METADATA_ERROR); +static OTEL_ERROR_METADATA_ERROR: tracing_core::Metadata = tracing_core::metadata! { + name: "otel_internal", + target: "apollo_router::otel_internal", + level: Level::ERROR, + fields: &["message","otel.target", "name"], + callsite: &OTEL_ERROR_CALLSITE_ERROR, + kind: tracing_core::metadata::Kind::EVENT, +}; + + +static OTEL_ERROR_CALLSITE_WARN: tracing_core::callsite::DefaultCallsite = + tracing_core::callsite::DefaultCallsite::new(&OTEL_ERROR_METADATA_WARN); +static OTEL_ERROR_METADATA_WARN: tracing_core::Metadata = tracing_core::metadata! { + name: "otel_internal", + target: "apollo_router::otel_internal", + level: Level::WARN, + fields: &["message","otel.target", "name"], + callsite: &OTEL_ERROR_CALLSITE_WARN, + kind: tracing_core::metadata::Kind::EVENT, +}; + +static OTEL_REEMIT_CALLSITE_INFO: tracing_core::callsite::DefaultCallsite = + tracing_core::callsite::DefaultCallsite::new(&OTEL_REEMIT_METADATA_INFO); +static OTEL_REEMIT_METADATA_INFO: tracing_core::Metadata = tracing_core::metadata! { + name: "otel_internal", + target: "apollo_router::otel_internal", + level: Level::INFO, + fields: &["message", "otel.target", "name"], + callsite: &OTEL_REEMIT_CALLSITE_INFO, + kind: tracing_core::metadata::Kind::EVENT, +}; + +static OTEL_REEMIT_CALLSITE_DEBUG: tracing_core::callsite::DefaultCallsite = + tracing_core::callsite::DefaultCallsite::new(&OTEL_REEMIT_METADATA_DEBUG); +static OTEL_REEMIT_METADATA_DEBUG: tracing_core::Metadata = tracing_core::metadata! { + name: "otel_internal", + target: "apollo_router::otel_internal", + level: Level::DEBUG, + fields: &["message", "otel.target", "name"], + callsite: &OTEL_REEMIT_CALLSITE_DEBUG, + kind: tracing_core::metadata::Kind::EVENT, +}; + +static OTEL_REEMIT_CALLSITE_TRACE: tracing_core::callsite::DefaultCallsite = + tracing_core::callsite::DefaultCallsite::new(&OTEL_REEMIT_METADATA_TRACE); +static OTEL_REEMIT_METADATA_TRACE: tracing_core::Metadata = tracing_core::metadata! { + name: "otel_internal", + target: "apollo_router::otel_internal", + level: Level::TRACE, + fields: &["message", "otel.target", "name"], + callsite: &OTEL_REEMIT_CALLSITE_TRACE, + kind: tracing_core::metadata::Kind::EVENT, +}; + +impl ReemitOtelEventsLayer { + fn metadata_for_level(level: &Level) -> Option<&'static tracing_core::Metadata<'static>> { + match *level { + Level::INFO => Some(&OTEL_REEMIT_METADATA_INFO), + Level::DEBUG => Some(&OTEL_REEMIT_METADATA_DEBUG), + Level::TRACE => Some(&OTEL_REEMIT_METADATA_TRACE), + _ => None, + } + } +} + +impl Layer for ReemitOtelEventsLayer { + fn on_event(&self, event: &Event<'_>, ctx: Context<'_, S>) { + let meta = event.metadata(); + + // Only rewrite OTel's internal logs. + if !meta.target().starts_with("opentelemetry") { + return; + } + + // Only re-emit INFO/DEBUG/TRACE; WARN/ERROR are handled by `OtelErrorLayer`. + let Some(metadata) = Self::metadata_for_level(meta.level()) else { + return; + }; + + // Capture the last non-empty `message` value and ignore empty ones. + struct CaptureMessage { + message: Option, + } + + impl Visit for CaptureMessage { + fn record_str(&mut self, field: &Field, value: &str) { + if field.name() == "message" && !value.is_empty() { + self.message = Some(value.to_string()); + } + } + + fn record_debug(&mut self, field: &Field, value: &dyn Debug) { + if field.name() != "message" { + return; + } + // OTel's implicit message often records as "\"\"". + let mut s = format!("{value:?}"); + if s == "\"\"" || s.is_empty() { + return; + } + // If it's a Debug string literal, remove quotes. + if let (Some(stripped), true) = (s.strip_prefix('"'), s.ends_with('"')) { + if let Some(stripped) = stripped.strip_suffix('"') { + s = stripped.to_string(); + } + } + if !s.is_empty() { + self.message = Some(s); + } + } + } + + let mut visitor = CaptureMessage { message: None }; + event.record(&mut visitor); + + let Some(message) = visitor.message else { + return; + }; + + let otel_target = meta.target().to_string(); + let name = meta.name().to_string(); + let fields = metadata.fields(); + + let message_field = fields.field("message").expect("message field must exist"); + let otel_target_field = fields + .field("otel.target") + .expect("otel.target field must exist"); + let name_field = fields.field("name").expect("name field must exist"); + let values = [ + (&message_field, Some(&message as &dyn tracing::Value)), + (&otel_target_field, Some(&otel_target as &dyn tracing::Value)), + (&name_field, Some(&name as &dyn tracing::Value)), + ]; + let value_set = fields.value_set(&values); + + // Build a real `tracing_core::Event` (callsite is registered via DefaultCallsite), + // then dispatch through the global dispatcher so EnvFilter still applies. + let new_event: Event = if let Some(parent) = event.parent().cloned() { + Event::new_child_of(parent, metadata, &value_set) + } else { + Event::new(metadata, &value_set) + }; + + ctx.event(&new_event) + } +} + + +#[cfg(test)] +mod tests { + use std::time::Duration; + + use dashmap::DashMap; + use tracing_core::Level; + use serde::Deserialize; + use serde_json::Value; + use std::collections::HashMap; + use std::io; + use std::sync::{Arc, Mutex}; + use tracing_subscriber::filter::filter_fn; + use tracing_subscriber::Layer; + use crate::plugins::telemetry::formatters::json::Json; + use tracing_subscriber::layer::SubscriberExt; + + use crate::metrics::FutureMetricsExt; + use crate::plugins::telemetry::fmt_layer::FmtLayer; + use crate::plugins::telemetry::otel_layers::{OtelErrorLayer, ReemitOtelEventsLayer}; + + #[tokio::test] + async fn test_error_layer_throttles_repeated_messages() { + let layer = super::OtelErrorLayer::with_map(DashMap::new()); + assert!( + layer.should_log(super::ErrorType::Metric), + "first metric error should be logged" + ); + assert!( + !layer.should_log(super::ErrorType::Metric), + "second metric error within threshold should be suppressed" + ); + // Wait longer than the test threshold (100ms) so the window expires + tokio::time::sleep(Duration::from_millis(200)).await; + assert!( + layer.should_log(super::ErrorType::Metric), + "metric error after threshold should be logged again" + ); + } + + #[test] + fn test_message_prefix_error_metric() { + let prefix = super::OtelErrorLayer::message_prefix(Level::ERROR, super::ErrorType::Metric) + .expect("prefix should be generated for metric errors"); + + assert_eq!(prefix, "OpenTelemetry metric error occurred"); + } + + #[test] + fn test_message_prefix_error_trace() { + let prefix = super::OtelErrorLayer::message_prefix(Level::ERROR, super::ErrorType::Trace) + .expect("prefix should be generated for trace errors"); + + assert_eq!(prefix, "OpenTelemetry trace error occurred"); + } + + #[test] + fn test_message_prefix_error_other() { + let prefix = super::OtelErrorLayer::message_prefix(Level::ERROR, super::ErrorType::Other) + .expect("prefix should be generated for generic errors"); + + assert_eq!(prefix, "OpenTelemetry error occurred"); + } + + #[test] + fn test_message_prefix_warn_metric() { + let prefix = super::OtelErrorLayer::message_prefix(Level::WARN, super::ErrorType::Metric) + .expect("prefix should be generated for metric warnings"); + + assert_eq!(prefix, "OpenTelemetry metric warning occurred"); + } + + #[test] + fn test_message_prefix_warn_trace() { + let prefix = super::OtelErrorLayer::message_prefix(Level::WARN, super::ErrorType::Trace) + .expect("prefix should be generated for trace warnings"); + + assert_eq!(prefix, "OpenTelemetry trace warning occurred"); + } + + #[test] + fn test_message_prefix_warn_other() { + let prefix = super::OtelErrorLayer::message_prefix(Level::WARN, super::ErrorType::Other) + .expect("prefix should be generated for generic warnings"); + + assert_eq!(prefix, "OpenTelemetry warning occurred"); + } + + #[test] + fn test_message_prefix_non_error_levels_return_none() { + assert!( + super::OtelErrorLayer::message_prefix(Level::INFO, super::ErrorType::Metric,).is_none(), + "INFO level should not produce a prefix", + ); + + assert!( + super::OtelErrorLayer::message_prefix(Level::DEBUG, super::ErrorType::Trace,).is_none(), + "DEBUG level should not produce a prefix", + ); + + assert!( + super::OtelErrorLayer::message_prefix(Level::TRACE, super::ErrorType::Other,).is_none(), + "TRACE level should not produce a prefix", + ); + } + + #[tokio::test] + async fn test_cardinality_overflow_1() { + use tracing_subscriber::layer::SubscriberExt; + use tracing_subscriber::registry::Registry; + + async { + let otel_layer = super::OtelErrorLayer::new(); + let subscriber = Registry::default().with(otel_layer); + let _guard = tracing::subscriber::set_default(subscriber); + + let msg = "Metrics error: Warning: Maximum data points for metric stream exceeded. \ + Entry added to overflow. Subsequent overflows to same metric until next \ + collect will not be logged."; + + tracing::warn!( + target: "opentelemetry::metrics", + "{msg}" + ); + + assert_counter!("apollo.router.telemetry.metrics.cardinality_overflow", 1); + } + .with_metrics() + .await; + } + + #[tokio::test] + async fn test_cardinality_overflow_2() { + use tracing_subscriber::layer::SubscriberExt; + use tracing_subscriber::registry::Registry; + + async { + let otel_layer = super::OtelErrorLayer::new(); + let subscriber = Registry::default().with(otel_layer); + let _guard = tracing::subscriber::set_default(subscriber); + + let msg = "Warning: Maximum data points for metric stream exceeded. Entry added to overflow."; + + tracing::warn!( + target: "opentelemetry::metrics", + "{msg}" + ); + + assert_counter!("apollo.router.telemetry.metrics.cardinality_overflow", 1); + } + .with_metrics() + .await; + } + + #[derive(Clone)] + struct BufMakeWriter(Arc>>); + + struct BufWriter(Arc>>); + + impl<'a> tracing_subscriber::fmt::MakeWriter<'a> for BufMakeWriter { + type Writer = BufWriter; + + fn make_writer(&'a self) -> Self::Writer { + BufWriter(self.0.clone()) + } + } + + impl io::Write for BufWriter { + fn write(&mut self, buf: &[u8]) -> io::Result { + let mut locked = self.0.lock().expect("lock"); + locked.extend_from_slice(buf); + Ok(buf.len()) + } + + fn flush(&mut self) -> io::Result<()> { + Ok(()) + } + } + + #[derive(Debug, Deserialize)] + struct LogLine { + message: String, + + #[serde(flatten)] + rest: HashMap, + } + + fn take_lines(buf: &Arc>>) -> Vec { + let bytes = std::mem::take(&mut *buf.lock().expect("lock")); + let s = String::from_utf8(bytes).expect("utf8"); + s.lines() + .map(|l| l.to_string()) + .filter(|l| !l.is_empty()) + .collect() + } + + #[test] + fn otel_error_layer_reemits_metric_warn_as_router_log() { + let buf = Arc::new(Mutex::new(Vec::::new())); + let make_writer = BufMakeWriter(buf.clone()); + + let fmt_layer = + FmtLayer::new(Json::default(), make_writer).with_filter(filter_fn(|meta| { + !meta.target().starts_with("opentelemetry") + })); + + let subscriber = tracing_subscriber::registry() + .with(fmt_layer) + .with(OtelErrorLayer::new()) + .with(tracing_subscriber::filter::LevelFilter::TRACE); + + tracing::subscriber::with_default(subscriber, || { + tracing::event!( + target: "opentelemetry::metrics", + Level::WARN, + message = "Warning: Maximum data points for metric stream exceeded." + ); + }); + + let lines = take_lines(&buf); + assert_eq!(lines.len(), 1); + + let parsed: LogLine = serde_json::from_str(&lines[0]).expect("valid JSON"); + assert_eq!( + parsed.message, + "OpenTelemetry metric warning occurred: Warning: Maximum data points for metric stream exceeded." + ); + assert_eq!( + parsed.rest.get("otel.target").and_then(|v| v.as_str()), + Some("opentelemetry::metrics"), + "OtelErrorLayer output should include the original OTel target" + ); + } + + #[test] + fn otel_error_layer_reemits_trace_error_as_router_log() { + let buf = Arc::new(Mutex::new(Vec::::new())); + let make_writer = BufMakeWriter(buf.clone()); + + let fmt_layer = + FmtLayer::new(Json::default(), make_writer).with_filter(filter_fn(|meta| { + !meta.target().starts_with("opentelemetry") + })); + + let subscriber = tracing_subscriber::registry() + .with(fmt_layer) + .with(OtelErrorLayer::new()) + .with(tracing_subscriber::filter::LevelFilter::TRACE); + + tracing::subscriber::with_default(subscriber, || { + tracing::event!( + target: "opentelemetry_sdk::trace::span_processor", + Level::ERROR, + message = "export failed" + ); + }); + + let lines = take_lines(&buf); + assert_eq!(lines.len(), 1); + + let parsed: LogLine = serde_json::from_str(&lines[0]).expect("valid JSON"); + assert_eq!(parsed.message, "OpenTelemetry trace error occurred: export failed"); + assert_eq!( + parsed.rest.get("otel.target").and_then(|v| v.as_str()), + Some("opentelemetry_sdk::trace::span_processor") + ); + } + + #[test] + fn otel_error_layer_classifies_metric_by_message_when_target_is_generic() { + let buf = Arc::new(Mutex::new(Vec::::new())); + let make_writer = BufMakeWriter(buf.clone()); + + let fmt_layer = + FmtLayer::new(Json::default(), make_writer).with_filter(filter_fn(|meta| { + !meta.target().starts_with("opentelemetry") + })); + + let subscriber = tracing_subscriber::registry() + .with(fmt_layer) + .with(OtelErrorLayer::new()) + .with(tracing_subscriber::filter::LevelFilter::TRACE); + + tracing::subscriber::with_default(subscriber, || { + tracing::event!( + target: "opentelemetry_sdk::something", + Level::WARN, + message = "Metrics error: boom" + ); + }); + + let lines = take_lines(&buf); + assert_eq!(lines.len(), 1); + + let parsed: LogLine = serde_json::from_str(&lines[0]).expect("valid JSON"); + assert_eq!(parsed.message, "OpenTelemetry metric warning occurred: Metrics error: boom"); + } + + #[test] + fn otel_error_layer_ignores_info_level_events() { + let buf = Arc::new(Mutex::new(Vec::::new())); + let make_writer = BufMakeWriter(buf.clone()); + + // Filter out raw OTel targets; INFO isn't re-emitted by OtelErrorLayer, so expect no output. + let fmt_layer = + FmtLayer::new(Json::default(), make_writer).with_filter(filter_fn(|meta| { + !meta.target().starts_with("opentelemetry") + })); + + let subscriber = tracing_subscriber::registry() + .with(fmt_layer) + .with(OtelErrorLayer::new()) + .with(tracing_subscriber::filter::LevelFilter::TRACE); + + tracing::subscriber::with_default(subscriber, || { + tracing::event!( + target: "opentelemetry::metrics", + Level::INFO, + message = "info should be ignored" + ); + }); + + let lines = take_lines(&buf); + assert!(lines.is_empty()); + } + + #[tokio::test] + async fn otel_error_layer_rate_limits_per_error_type_end_to_end() { + let buf = Arc::new(Mutex::new(Vec::::new())); + let make_writer = BufMakeWriter(buf.clone()); + + let fmt_layer = + FmtLayer::new(Json::default(), make_writer).with_filter(filter_fn(|meta| { + !meta.target().starts_with("opentelemetry") + })); + + // Important: use `set_default` (not `with_default`) so the subscriber stays installed across await. + let subscriber = tracing_subscriber::registry() + .with(fmt_layer) + .with(OtelErrorLayer::new()) + .with(tracing_subscriber::filter::LevelFilter::TRACE); + let _guard = tracing::subscriber::set_default(subscriber); + + tracing::event!( + target: "opentelemetry::metrics", + Level::WARN, + message = "metric message 1 should emit" + ); + tracing::event!( + target: "opentelemetry::metrics", + Level::WARN, + message = "metric message 2 should be suppressed" + ); + tracing::event!( + target: "opentelemetry_sdk::trace::span_processor", + Level::WARN, + message = "trace message 1 should emit" + ); + + tokio::time::sleep(Duration::from_millis(200)).await; + + // After window -> metric emits again. + tracing::event!( + target: "opentelemetry::metrics", + Level::WARN, + message = "metric message 3 should emit" + ); + + drop(_guard); + + let lines = take_lines(&buf); + assert_eq!(lines.len(), 3); + + let mut msgs = lines + .iter() + .map(|l| serde_json::from_str::(l).expect("valid JSON").message) + .collect::>(); + msgs.sort(); + + assert_eq!(msgs[0], "OpenTelemetry metric warning occurred: metric message 1 should emit"); + assert_eq!(msgs[1], "OpenTelemetry metric warning occurred: metric message 3 should emit"); + assert_eq!(msgs[2], "OpenTelemetry trace warning occurred: trace message 1 should emit"); + } + + #[test] + fn otel_error_layer_ignores_non_opentelemetry_targets_end_to_end() { + let buf = Arc::new(Mutex::new(Vec::::new())); + let make_writer = BufMakeWriter(buf.clone()); + + // Only allow router-internal otel logs through this formatter (isolates OtelErrorLayer output). + let fmt_layer = + FmtLayer::new(Json::default(), make_writer).with_filter(filter_fn(|meta| { + meta.target() == "apollo_router::otel_internal" + })); + + let subscriber = tracing_subscriber::registry() + .with(fmt_layer) + .with(OtelErrorLayer::new()) + .with(tracing_subscriber::filter::LevelFilter::TRACE); + + tracing::subscriber::with_default(subscriber, || { + tracing::warn!(target: "not_otel", "hello"); + }); + + let lines = take_lines(&buf); + assert!(lines.is_empty()); + } + + #[test] + fn reemits_otel_event_with_single_non_empty_message() { + let buf = Arc::new(Mutex::new(Vec::::new())); + let make_writer = BufMakeWriter(buf.clone()); + + // Match production: hide raw OTel targets from the formatter. + let fmt_layer = + FmtLayer::new(Json::default(), make_writer).with_filter(filter_fn(|meta| { + !meta.target().starts_with("opentelemetry") + })); + + let subscriber = tracing_subscriber::registry() + .with(fmt_layer) + .with(OtelErrorLayer::new()) + .with(ReemitOtelEventsLayer) + .with(tracing_subscriber::filter::LevelFilter::TRACE); + + tracing::subscriber::with_default(subscriber, || { + // This provides two message fields, the explicitly defined `"Last reference dropped"` and + // the implicitly defined `""` empty string. [oai_citation:2‡Docs.rs](https://docs.rs/tracing/latest/tracing/macro.event.html) + tracing::info!( + target: "opentelemetry_sdk::metrics::registry", + message = "Last reference dropped", + "" + ); + }); + + let lines = take_lines(&buf); + assert_eq!(lines.len(), 1, "expected exactly one re-emitted line"); + + // If there are duplicate `message` keys, derived Deserialize fails with a duplicate-field error. [oai_citation:3‡Docs.rs](https://docs.rs/serde/latest/serde/de/trait.Error.html?utm_source=chatgpt.com) + let parsed: LogLine = + serde_json::from_str(&lines[0]).expect("valid JSON without duplicate keys"); + + assert_eq!(parsed.message, "Last reference dropped"); + assert_eq!( + parsed.rest.get("otel.target").and_then(|v| v.as_str()), + Some("opentelemetry_sdk::metrics::registry") + ); + } + + #[test] + fn does_not_reemit_when_only_empty_message_is_present() { + let buf = Arc::new(Mutex::new(Vec::::new())); + let make_writer = BufMakeWriter(buf.clone()); + + let fmt_layer = + FmtLayer::new(Json::default(), make_writer).with_filter(filter_fn(|meta| { + !meta.target().starts_with("opentelemetry") + })); + + let subscriber = tracing_subscriber::registry() + .with(fmt_layer) + .with(OtelErrorLayer::new()) + .with(ReemitOtelEventsLayer) + .with(tracing_subscriber::filter::LevelFilter::TRACE); + + tracing::subscriber::with_default(subscriber, || { + tracing::info!( + target: "opentelemetry_sdk::metrics::registry", + "" + ); + }); + + let lines = take_lines(&buf); + assert!( + lines.is_empty(), + "expected no output for empty-message-only OTel event" + ); + } + + #[test] + fn reemits_otel_event_when_only_implicit_message_is_present() { + let buf = Arc::new(Mutex::new(Vec::::new())); + let make_writer = BufMakeWriter(buf.clone()); + + let fmt_layer = FmtLayer::new(Json::default(), make_writer).with_filter(filter_fn(|meta| { + !meta.target().starts_with("opentelemetry") + })); + + let subscriber = tracing_subscriber::registry() + .with(fmt_layer) + .with(OtelErrorLayer::new()) + .with(ReemitOtelEventsLayer) + .with(tracing_subscriber::filter::LevelFilter::TRACE); + + tracing::subscriber::with_default(subscriber, || { + tracing::info!( + target: "opentelemetry_sdk::metrics::registry", + "Implicit message only" + ); + }); + + let lines = take_lines(&buf); + assert_eq!(lines.len(), 1); + + let parsed: LogLine = serde_json::from_str(&lines[0]).expect("valid JSON"); + assert_eq!(parsed.message, "Implicit message only"); + assert_eq!( + parsed.rest.get("otel.target").and_then(|v| v.as_str()), + Some("opentelemetry_sdk::metrics::registry") + ); + } + + #[test] + fn reemits_otel_event_prefers_implicit_message_when_explicit_message_is_empty() { + let buf = Arc::new(Mutex::new(Vec::::new())); + let make_writer = BufMakeWriter(buf.clone()); + + let fmt_layer = FmtLayer::new(Json::default(), make_writer).with_filter(filter_fn(|meta| { + !meta.target().starts_with("opentelemetry") + })); + + let subscriber = tracing_subscriber::registry() + .with(fmt_layer) + .with(OtelErrorLayer::new()) + .with(ReemitOtelEventsLayer) + .with(tracing_subscriber::filter::LevelFilter::TRACE); + + tracing::subscriber::with_default(subscriber, || { + tracing::info!( + target: "opentelemetry_sdk::metrics::registry", + message = "", // This should be ignored + "This message should be reemitted" + ); + }); + + let lines = take_lines(&buf); + assert_eq!(lines.len(), 1); + + let parsed: LogLine = serde_json::from_str(&lines[0]).expect("valid JSON"); + assert_eq!(parsed.message, "This message should be reemitted"); + } + + #[test] + fn reemits_otel_debug_and_trace_events() { + let buf = Arc::new(Mutex::new(Vec::::new())); + let make_writer = BufMakeWriter(buf.clone()); + + let fmt_layer = FmtLayer::new(Json::default(), make_writer).with_filter(filter_fn(|meta| { + !meta.target().starts_with("opentelemetry") + })); + + let subscriber = tracing_subscriber::registry() + .with(fmt_layer) + .with(OtelErrorLayer::new()) + .with(ReemitOtelEventsLayer) + .with(tracing_subscriber::filter::LevelFilter::TRACE); + + tracing::subscriber::with_default(subscriber, || { + tracing::debug!( + target: "opentelemetry_sdk::metrics::registry", + message = "debug message", + "" + ); + tracing::trace!( + target: "opentelemetry_sdk::metrics::registry", + message = "trace message", + "" + ); + }); + + let lines = take_lines(&buf); + assert_eq!(lines.len(), 2); + + let p0: LogLine = serde_json::from_str(&lines[0]).expect("valid JSON"); + let p1: LogLine = serde_json::from_str(&lines[1]).expect("valid JSON"); + + assert!( + (p0.message == "debug message" && p1.message == "trace message") + || (p0.message == "trace message" && p1.message == "debug message") + ); + } + + #[test] + fn warn_is_handled_by_otel_error_layer_not_reemit_layer() { + let buf = Arc::new(Mutex::new(Vec::::new())); + let make_writer = BufMakeWriter(buf.clone()); + + let fmt_layer = FmtLayer::new(Json::default(), make_writer).with_filter(filter_fn(|meta| { + !meta.target().starts_with("opentelemetry") + })); + + let subscriber = tracing_subscriber::registry() + .with(fmt_layer) + .with(OtelErrorLayer::new()) + .with(ReemitOtelEventsLayer) + .with(tracing_subscriber::filter::LevelFilter::TRACE); + + tracing::subscriber::with_default(subscriber, || { + tracing::warn!( + target: "opentelemetry::metrics", + "Metrics error: Warning: Maximum data points for metric stream exceeded." + ); + }); + + let lines = take_lines(&buf); + assert_eq!(lines.len(), 1); + + let parsed: LogLine = serde_json::from_str(&lines[0]).expect("valid JSON"); + assert!(parsed.message.contains("OpenTelemetry") && parsed.message.contains("warning")); + assert_eq!( + parsed.rest.get("otel.target").and_then(|v| v.as_str()), + Some("opentelemetry::metrics") + ); + } + + #[test] + fn does_not_reemit_non_otel_targets() { + let buf = Arc::new(Mutex::new(Vec::::new())); + let make_writer = BufMakeWriter(buf.clone()); + + // No target filter: we want to see the original event. + let fmt_layer = FmtLayer::new(Json::default(), make_writer); + + let subscriber = tracing_subscriber::registry() + .with(fmt_layer) + .with(OtelErrorLayer::new()) + .with(ReemitOtelEventsLayer) + .with(tracing_subscriber::filter::LevelFilter::TRACE); + + tracing::subscriber::with_default(subscriber, || { + tracing::info!(target: "not_otel", "hello"); + }); + + let lines = take_lines(&buf); + assert_eq!(lines.len(), 1); + + let parsed: LogLine = serde_json::from_str(&lines[0]).expect("valid JSON"); + assert_eq!(parsed.message, "hello"); + assert!(!parsed.rest.contains_key("otel.target")); + } +} diff --git a/apollo-router/src/plugins/telemetry/otlp.rs b/apollo-router/src/plugins/telemetry/otlp.rs index ce80e06333..6112c581cf 100644 --- a/apollo-router/src/plugins/telemetry/otlp.rs +++ b/apollo-router/src/plugins/telemetry/otlp.rs @@ -2,11 +2,12 @@ use std::collections::HashMap; use http::Uri; -use opentelemetry_otlp::HttpExporterBuilder; -use opentelemetry_otlp::TonicExporterBuilder; +use opentelemetry_otlp::MetricExporter; +use opentelemetry_otlp::Protocol::Grpc; +use opentelemetry_otlp::SpanExporter; use opentelemetry_otlp::WithExportConfig; -use opentelemetry_sdk::metrics::InstrumentKind; -use opentelemetry_sdk::metrics::reader::TemporalitySelector; +use opentelemetry_otlp::WithHttpConfig; +use opentelemetry_otlp::WithTonicConfig; use schemars::JsonSchema; use serde::Deserialize; use serde::Serialize; @@ -24,9 +25,6 @@ use crate::plugins::telemetry::tracing::BatchProcessorConfig; #[serde(deny_unknown_fields)] #[schemars(rename = "OTLPConfig")] pub(crate) struct Config { - /// Enable otlp - pub(crate) enabled: bool, - /// The endpoint to send data to #[serde(default)] pub(crate) endpoint: Option, @@ -43,6 +41,9 @@ pub(crate) struct Config { #[serde(default)] pub(crate) http: HttpExporter, + /// Enable otlp + pub(crate) enabled: bool, + /// Batch processor settings #[serde(default)] pub(crate) batch_processor: BatchProcessorConfig, @@ -73,16 +74,19 @@ pub(crate) enum TelemetryDataKind { // implementation unifies the processing of endpoints. // // The processing does the following: -// - If an endpoint is not specified, this results in `None` -// - If an endpoint is specified as "default", this results in `""` -// - If an endpoint is `""` or ends with a protocol appropriate suffix, we stop processing +// - If an endpoint is not specified, this results in `None` which will be treated as +// OTEL_EXPORTER_OTLP_GRPC_ENDPOINT_DEFAULT by the OTel SDK +// - If an endpoint is specified as "default" or `""`, this results in `None` which will be treated +// as OTEL_EXPORTER_OTLP_GRPC_ENDPOINT_DEFAULT by the OTel SDK +// - If an endpoint ends with a protocol appropriate suffix, we stop processing // - If we continue processing: // - If an endpoint has no scheme, we prepend "http://" // - If our endpoint has no path, we append a protocol specific suffix // - If it has a path, we return it unmodified // -// Note: "" is the empty string and is thus interpreted by any opentelemetry sdk as indicating that -// the default endpoint should be used. +// Note: Due to a bug in the OTel SDK v0.30.0 no longer recognizes `""` as valid. This is fixed in +// v0.31.0 where it is again recognized as a placeholder for OTEL_EXPORTER_OTLP_GRPC_ENDPOINT_DEFAULT. +// For now, we'll treat `"deafult"` and `""` as it has the same result. // // If you are interested in learning more about opentelemetry endpoints: // https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/protocol/exporter.md @@ -92,115 +96,156 @@ pub(super) fn process_endpoint( kind: &TelemetryDataKind, protocol: &Protocol, ) -> Result, BoxError> { - // If there is no endpoint, None, do no processing because the user must be relying on the - // router processing OTEL environment variables for endpoint. + // If there is no endpoint (None, "", or "default"), do no processing because the user must be + // relying on the router processing OTEL environment variables for endpoint. // If there is an endpoint, Some(value), we must process that value. Most of this processing is // performed to try and remain backwards compatible with previous versions of the router which // depended on "non-standard" behaviour of the opentelemetry_otlp crate. I've tried documenting // each of the outcomes clearly for the benefit of future maintainers. - endpoint - .as_ref() - .map(|v| { - let mut base = if v == "default" { - "".to_string() - } else { - v.to_string() + match endpoint.as_ref() { + None => Ok(None), + Some(v) if v == "default" || v.is_empty() => Ok(None), + Some(v) => { + let mut base = v.to_string(); + // We require a scheme on our endpoint or we can't parse it as a Uri. + // If we don't have one, prepend with "http://" + if !base.starts_with("http") { + base = format!("http://{base}"); + } + // We expect different suffixes by protocol and signal type + let suffix = match protocol { + Protocol::Grpc => "/", + Protocol::Http => match kind { + TelemetryDataKind::Metrics => "/v1/metrics", + TelemetryDataKind::Traces => "/v1/traces", + }, }; - if base.is_empty() { - // We don't want to process empty strings - Ok(base) + if base.ends_with(suffix) { + // Our suffix is in place, all is good + Ok(Some(base)) } else { - // We require a scheme on our endpoint or we can't parse it as a Uri. - // If we don't have one, prepend with "http://" - if !base.starts_with("http") { - base = format!("http://{base}"); - } - // We expect different suffixes by protocol and signal type - let suffix = match protocol { - Protocol::Grpc => "/", - Protocol::Http => match kind { - TelemetryDataKind::Metrics => "/v1/metrics", - TelemetryDataKind::Traces => "/v1/traces", - }, - }; - if base.ends_with(suffix) { - // Our suffix is in place, all is good - Ok(base) - } else { - let uri = http::Uri::try_from(&base)?; - // Note: If our endpoint is ":://host:port", then the path will be "/". - // We already ensured that our base does not end with , so we must append - // - if uri.path() == "/" { - // Remove any trailing slash from the base so we don't end up with a - // double slash when concatenating e.g. "http://my-base//v1/metrics" - if base.ends_with("/") { - base.pop(); - } - // We don't have a path, we need to add one - Ok(format!("{base}{suffix}")) - } else { - // We have a path, it doesn't end with , let it pass... - // We could try and enforce the standard here and only let through paths - // which end with the expected suffix. However, I think that would reduce - // backwards compatibility and we should just trust that the user knows - // what they are doing. - Ok(base) + let uri = http::Uri::try_from(&base)?; + // Note: If our endpoint is ":://host:port", then the path will be "/". + // We already ensured that our base does not end with , so we must append + // + if uri.path() == "/" { + // Remove any trailing slash from the base so we don't end up with a + // double slash when concatenating e.g. "http://my-base//v1/metrics" + if base.ends_with("/") { + base.pop(); } + // We don't have a path, we need to add one + Ok(Some(format!("{base}{suffix}"))) + } else { + // We have a path, it doesn't end with , let it pass... + // We could try and enforce the standard here and only let through paths + // which end with the expected suffix. However, I think that would reduce + // backwards compatibility and we should just trust that the user knows + // what they are doing. + Ok(Some(base)) } } - }) - .transpose() + } + } } impl Config { - pub(crate) fn exporter + From>( + // We have make some repetitive code here because OTel's + // TonicExporterBuilder::build_span_exporter and ::build_metric_exporter are protected when they + // shouldn't be. + pub(crate) fn span_exporter(&self, kind: TelemetryDataKind) -> Result { + let endpoint_opt = process_endpoint(&self.endpoint, &kind, &self.protocol)?; + match self.protocol { + Protocol::Grpc => { + let tls_config_opt = self.tls_config(&endpoint_opt)?; + let mut builder = SpanExporter::builder() + .with_tonic() + .with_protocol(Grpc) + .with_timeout(self.batch_processor.max_export_timeout) + .with_metadata(MetadataMap::from_headers(self.grpc.metadata.clone())); + if let Some(tls_config) = tls_config_opt { + builder = builder.with_tls_config(tls_config); + } + if let Some(endpoint) = &endpoint_opt { + builder = builder.with_endpoint(endpoint) + } + Ok(builder.build()?) + } + Protocol::Http => { + let headers = self.http.headers.clone(); + let mut builder = SpanExporter::builder() + .with_http() + .with_protocol(Grpc) + .with_timeout(self.batch_processor.max_export_timeout) + .with_headers(headers); + if let Some(endpoint) = &endpoint_opt { + builder = builder.with_endpoint(endpoint) + } + Ok(builder.build()?) + } + } + } + + pub(crate) fn metric_exporter( &self, kind: TelemetryDataKind, - ) -> Result { + ) -> Result { + let endpoint_opt = process_endpoint(&self.endpoint, &kind, &self.protocol)?; match self.protocol { Protocol::Grpc => { - let endpoint_opt = process_endpoint(&self.endpoint, &kind, &self.protocol)?; // Figure out if we need to set tls config for our exporter - let tls_config_opt = if let Some(endpoint) = &endpoint_opt { - if !endpoint.is_empty() { - let tls_url = Uri::try_from(endpoint)?; - Some(self.grpc.clone().to_tls_config(&tls_url)?) - } else { - None - } - } else { - None - }; - - let mut exporter = opentelemetry_otlp::new_exporter() - .tonic() - .with_protocol(opentelemetry_otlp::Protocol::Grpc) + let tls_config_opt = self.tls_config(&endpoint_opt)?; + let mut builder = MetricExporter::builder() + .with_tonic() + .with_protocol(Grpc) .with_timeout(self.batch_processor.max_export_timeout) .with_metadata(MetadataMap::from_headers(self.grpc.metadata.clone())); + if let Some(endpoint) = endpoint_opt { - exporter = exporter.with_endpoint(endpoint); + builder = builder.with_endpoint(endpoint); } if let Some(tls_config) = tls_config_opt { - exporter = exporter.with_tls_config(tls_config); + builder = builder.with_tls_config(tls_config); } - Ok(exporter.into()) + + Ok(builder.build()?) } Protocol::Http => { - let endpoint_opt = process_endpoint(&self.endpoint, &kind, &self.protocol)?; let headers = self.http.headers.clone(); - let mut exporter: HttpExporterBuilder = opentelemetry_otlp::new_exporter() - .http() - .with_protocol(opentelemetry_otlp::Protocol::Grpc) + let temporality = match self.temporality { + Temporality::Cumulative => opentelemetry_sdk::metrics::Temporality::Cumulative, + Temporality::Delta => opentelemetry_sdk::metrics::Temporality::Delta, + }; + let mut builder = MetricExporter::builder() + .with_http() + .with_protocol(Grpc) .with_timeout(self.batch_processor.max_export_timeout) - .with_headers(headers); + .with_headers(headers) + .with_temporality(temporality); if let Some(endpoint) = endpoint_opt { - exporter = exporter.with_endpoint(endpoint); + builder = builder.with_endpoint(endpoint); } - Ok(exporter.into()) + Ok(builder.build()?) } } } + + fn tls_config( + &self, + endpoint_opt: &Option, + ) -> Result, BoxError> { + let tls_config_opt = if let Some(endpoint) = &endpoint_opt { + if !endpoint.is_empty() { + let tls_url = Uri::try_from(endpoint)?; + Some(self.grpc.clone().to_tls_config(&tls_url)?) + } else { + None + } + } else { + None + }; + Ok(tls_config_opt) + } } #[derive(Debug, Clone, Deserialize, Serialize, Default, JsonSchema, PartialEq)] @@ -290,182 +335,10 @@ pub(crate) enum Temporality { Delta, } -pub(crate) struct CustomTemporalitySelector( - pub(crate) opentelemetry_sdk::metrics::data::Temporality, -); - -impl TemporalitySelector for CustomTemporalitySelector { - fn temporality(&self, kind: InstrumentKind) -> opentelemetry_sdk::metrics::data::Temporality { - // Up/down counters should always use cumulative temporality to ensure they are sent as aggregates - // rather than deltas, which prevents drift issues. - // See https://github.com/open-telemetry/opentelemetry-specification/blob/a1c13d59bb7d0fb086df2b3e1eaec9df9efef6cc/specification/metrics/sdk_exporters/otlp.md#additional-configuration for mor information - match kind { - InstrumentKind::UpDownCounter | InstrumentKind::ObservableUpDownCounter => { - opentelemetry_sdk::metrics::data::Temporality::Cumulative - } - _ => self.0, - } - } -} - -impl From<&Temporality> for Box { - fn from(value: &Temporality) -> Self { - Box::new(match value { - Temporality::Cumulative => { - CustomTemporalitySelector(opentelemetry_sdk::metrics::data::Temporality::Cumulative) - } - Temporality::Delta => { - CustomTemporalitySelector(opentelemetry_sdk::metrics::data::Temporality::Delta) - } - }) - } -} - #[cfg(test)] mod tests { - use opentelemetry_sdk::metrics::data::Temporality as SdkTemporality; - use super::*; - #[test] - fn test_updown_counter_temporality_override() { - // Test that up/down counters always get cumulative temporality regardless of configuration - let delta_selector = CustomTemporalitySelector(SdkTemporality::Delta); - let cumulative_selector = CustomTemporalitySelector(SdkTemporality::Cumulative); - - // UpDownCounter should always be cumulative - assert_eq!( - delta_selector.temporality(InstrumentKind::UpDownCounter), - SdkTemporality::Cumulative, - "UpDownCounter should always use cumulative temporality even with delta config" - ); - assert_eq!( - cumulative_selector.temporality(InstrumentKind::UpDownCounter), - SdkTemporality::Cumulative, - "UpDownCounter should use cumulative temporality with cumulative config" - ); - - // ObservableUpDownCounter should always be cumulative - assert_eq!( - delta_selector.temporality(InstrumentKind::ObservableUpDownCounter), - SdkTemporality::Cumulative, - "ObservableUpDownCounter should always use cumulative temporality even with delta config" - ); - assert_eq!( - cumulative_selector.temporality(InstrumentKind::ObservableUpDownCounter), - SdkTemporality::Cumulative, - "ObservableUpDownCounter should use cumulative temporality with cumulative config" - ); - } - - #[test] - fn test_counter_temporality_respects_config() { - // Test that regular counters respect the configured temporality - let delta_selector = CustomTemporalitySelector(SdkTemporality::Delta); - let cumulative_selector = CustomTemporalitySelector(SdkTemporality::Cumulative); - - // Counter should respect configuration - assert_eq!( - delta_selector.temporality(InstrumentKind::Counter), - SdkTemporality::Delta, - "Counter should use delta temporality with delta config" - ); - assert_eq!( - cumulative_selector.temporality(InstrumentKind::Counter), - SdkTemporality::Cumulative, - "Counter should use cumulative temporality with cumulative config" - ); - - // ObservableCounter should respect configuration - assert_eq!( - delta_selector.temporality(InstrumentKind::ObservableCounter), - SdkTemporality::Delta, - "ObservableCounter should use delta temporality with delta config" - ); - assert_eq!( - cumulative_selector.temporality(InstrumentKind::ObservableCounter), - SdkTemporality::Cumulative, - "ObservableCounter should use cumulative temporality with cumulative config" - ); - } - - #[test] - fn test_gauge_temporality_respects_config() { - // Test that gauges respect the configured temporality (gauges are not forced to cumulative) - let delta_selector = CustomTemporalitySelector(SdkTemporality::Delta); - let cumulative_selector = CustomTemporalitySelector(SdkTemporality::Cumulative); - - // Gauge should respect configuration - assert_eq!( - delta_selector.temporality(InstrumentKind::Gauge), - SdkTemporality::Delta, - "Gauge should use delta temporality with delta config" - ); - assert_eq!( - cumulative_selector.temporality(InstrumentKind::Gauge), - SdkTemporality::Cumulative, - "Gauge should use cumulative temporality with cumulative config" - ); - - // ObservableGauge should respect configuration - assert_eq!( - delta_selector.temporality(InstrumentKind::ObservableGauge), - SdkTemporality::Delta, - "ObservableGauge should use delta temporality with delta config" - ); - assert_eq!( - cumulative_selector.temporality(InstrumentKind::ObservableGauge), - SdkTemporality::Cumulative, - "ObservableGauge should use cumulative temporality with cumulative config" - ); - } - - #[test] - fn test_histogram_temporality_respects_config() { - // Test that histograms respect the configured temporality - let delta_selector = CustomTemporalitySelector(SdkTemporality::Delta); - let cumulative_selector = CustomTemporalitySelector(SdkTemporality::Cumulative); - - // Histogram should respect configuration - assert_eq!( - delta_selector.temporality(InstrumentKind::Histogram), - SdkTemporality::Delta, - "Histogram should use delta temporality with delta config" - ); - assert_eq!( - cumulative_selector.temporality(InstrumentKind::Histogram), - SdkTemporality::Cumulative, - "Histogram should use cumulative temporality with cumulative config" - ); - } - - #[test] - fn endpoint_grpc_defaulting_no_scheme() { - let url = Url::parse("api.apm.com:433").unwrap(); - let exporter = GrpcExporter::default(); - let domain = exporter.default_tls_domain(&url); - assert_eq!(domain, None); - } - - #[test] - fn endpoint_grpc_defaulting_scheme() { - let url = Url::parse("https://api.apm.com:433").unwrap(); - let exporter = GrpcExporter::default(); - let domain = exporter.default_tls_domain(&url); - assert_eq!(domain, Some(url.domain().expect("domain was expected")),); - } - - #[test] - fn endpoint_grpc_explicit_domain() { - let url = Url::parse("https://api.apm.com:433").unwrap(); - let exporter = GrpcExporter { - domain_name: Some("foo.bar".to_string()), - ..Default::default() - }; - let domain = exporter.default_tls_domain(&url); - assert_eq!(domain, Some("foo.bar")); - } - #[test] fn test_process_endpoint() { // Traces @@ -474,10 +347,15 @@ mod tests { process_endpoint(&endpoint, &TelemetryDataKind::Traces, &Protocol::Grpc).unwrap(); assert_eq!(endpoint, processed_endpoint); + let endpoint = Some("".to_string()); + let processed_endpoint = + process_endpoint(&endpoint, &TelemetryDataKind::Traces, &Protocol::Grpc).unwrap(); + assert_eq!(None, processed_endpoint); + let endpoint = Some("default".to_string()); let processed_endpoint = process_endpoint(&endpoint, &TelemetryDataKind::Traces, &Protocol::Grpc).unwrap(); - assert_eq!(Some("".to_string()), processed_endpoint); + assert_eq!(None, processed_endpoint); let endpoint = Some("https://api.apm.com:433/v1/traces".to_string()); let processed_endpoint = @@ -540,10 +418,15 @@ mod tests { process_endpoint(&endpoint, &TelemetryDataKind::Metrics, &Protocol::Grpc).unwrap(); assert_eq!(None, processed_endpoint); + let endpoint = Some("".to_string()); + let processed_endpoint = + process_endpoint(&endpoint, &TelemetryDataKind::Traces, &Protocol::Grpc).unwrap(); + assert_eq!(None, processed_endpoint); + let endpoint = Some("default".to_string()); let processed_endpoint = process_endpoint(&endpoint, &TelemetryDataKind::Metrics, &Protocol::Grpc).unwrap(); - assert_eq!(Some("".to_string()), processed_endpoint); + assert_eq!(None, processed_endpoint); let endpoint = Some("https://api.apm.com:433/v1/metrics".to_string()); let processed_endpoint = diff --git a/apollo-router/src/plugins/telemetry/reload/activation.rs b/apollo-router/src/plugins/telemetry/reload/activation.rs index 82e993ba60..2abb54c3e8 100644 --- a/apollo-router/src/plugins/telemetry/reload/activation.rs +++ b/apollo-router/src/plugins/telemetry/reload/activation.rs @@ -25,6 +25,7 @@ use std::collections::HashMap; use std::sync::LazyLock; +use opentelemetry::InstrumentationScope; use opentelemetry::propagation::TextMapCompositePropagator; use opentelemetry::trace::TracerProvider; use parking_lot::Mutex; @@ -46,7 +47,7 @@ use crate::plugins::telemetry::reload::otel::reload_fmt; /// then atomically applies them during the activation phase via [`Activation::commit()`]. pub(crate) struct Activation { /// The new tracer provider. None means leave the existing one - new_trace_provider: Option, + new_trace_provider: Option, /// The new tracer propagator. None means leave the existing one new_trace_propagator: Option, @@ -135,7 +136,7 @@ impl Activation { pub(crate) fn with_tracer_provider( &mut self, - tracer_provider: opentelemetry_sdk::trace::TracerProvider, + tracer_provider: opentelemetry_sdk::trace::SdkTracerProvider, ) { self.new_trace_provider = Some(tracer_provider); #[cfg(test)] @@ -191,10 +192,11 @@ impl Activation { && let Some(tracer_provider) = self.new_trace_provider.take() { // Build a new tracer from the provider and hot-swap it into the tracing subscriber - let tracer = tracer_provider - .tracer_builder(GLOBAL_TRACER_NAME) - .with_version(env!("CARGO_PKG_VERSION")) - .build(); + let tracer = tracer_provider.tracer_with_scope( + InstrumentationScope::builder(GLOBAL_TRACER_NAME) + .with_version(env!("CARGO_PKG_VERSION")) + .build(), + ); hot_tracer.reload(tracer); // Install the new provider globally and safely drop the old one in a blocking task diff --git a/apollo-router/src/plugins/telemetry/reload/builder.rs b/apollo-router/src/plugins/telemetry/reload/builder.rs index cc9e39aa2d..18924ae4a9 100644 --- a/apollo-router/src/plugins/telemetry/reload/builder.rs +++ b/apollo-router/src/plugins/telemetry/reload/builder.rs @@ -37,6 +37,7 @@ use crate::plugins::telemetry::apollo; use crate::plugins::telemetry::apollo_exporter::Sender; use crate::plugins::telemetry::config::Conf; use crate::plugins::telemetry::config::MetricView; +use crate::plugins::telemetry::config::OTelMetricView; use crate::plugins::telemetry::config_new::cache::CACHE_METRIC; use crate::plugins::telemetry::fmt_layer::create_fmt_layer; use crate::plugins::telemetry::metrics; @@ -99,7 +100,12 @@ impl<'a> Builder<'a> { let mut builder = MetricsBuilder::new(self.config); builder.configure(&self.config.exporters.metrics.prometheus)?; builder.configure(&self.config.exporters.metrics.otlp)?; - builder.configure_views(MeterProviderType::Public)?; + // Only configure views if the customer enabled an exporter. Otherwise, + // `configure_public_views()` would create a Public MeterProvider with nowhere to export + // the metrics + if self.config.exporters.metrics.prometheus.enabled || self.config.exporters.metrics.otlp.enabled { + builder.configure_public_views()?; + } let (prometheus_registry, meter_providers, _) = builder.build(); self.activation @@ -157,18 +163,16 @@ impl<'a> Builder<'a> { // we throw the entity caching operations metric here. This is handled exceptionally // until we move fully from entity caching to response caching which does NOT // necessitate this as it does not touch the safe-listed `operations.*` namespace. - builder.with_view( - MeterProviderType::Apollo, - MetricView { - name: String::from(CACHE_METRIC), - rename: None, - description: None, - unit: None, - aggregation: Some(crate::plugins::telemetry::config::MetricAggregation::Drop), - allowed_attribute_keys: None, - } - .try_into()?, - ); + let view: OTelMetricView = MetricView { + name: String::from(CACHE_METRIC), + rename: None, + description: None, + unit: None, + aggregation: Some(crate::plugins::telemetry::config::MetricAggregation::Drop), + allowed_attribute_keys: None, + } + .try_into()?; + builder.with_view(MeterProviderType::Apollo, view); } let (_, meter_providers, sender) = builder.build(); diff --git a/apollo-router/src/plugins/telemetry/reload/metrics.rs b/apollo-router/src/plugins/telemetry/reload/metrics.rs index 00d95f7877..c2fcbac481 100644 --- a/apollo-router/src/plugins/telemetry/reload/metrics.rs +++ b/apollo-router/src/plugins/telemetry/reload/metrics.rs @@ -22,9 +22,10 @@ use ahash::HashMap; use opentelemetry_sdk::Resource; +use opentelemetry_sdk::metrics::{Aggregation, Instrument, InstrumentKind}; use opentelemetry_sdk::metrics::MeterProviderBuilder; use opentelemetry_sdk::metrics::SdkMeterProvider; -use opentelemetry_sdk::metrics::View; +use opentelemetry_sdk::metrics::Stream; use prometheus::Registry; use tower::BoxError; @@ -128,11 +129,14 @@ impl<'a> MetricsBuilder<'a> { self } - pub(crate) fn with_view( + pub(crate) fn with_view( &mut self, meter_provider_type: MeterProviderType, - view: Box, - ) -> &mut Self { + view: T, + ) -> &mut Self + where + T: Fn(&Instrument) -> Option + Send + Sync + 'static, + { let meter_provider = self.meter_provider(meter_provider_type); *meter_provider = std::mem::take(meter_provider).with_view(view); self @@ -172,12 +176,56 @@ impl<'a> MetricsBuilder<'a> { }) } - pub(crate) fn configure_views( - &mut self, - meter_provider_type: MeterProviderType, - ) -> Result<(), BoxError> { + pub(crate) fn configure_public_views(&mut self) -> Result<(), BoxError> { + // First apply a "common" view with buckets for those that don't have a custom view defined + let instrument_names_with_custom_views = self + .metrics_common() + .views + .iter() + .map(|v| v.name.clone()) + .collect::>(); + let common_buckets = self.metrics_common().buckets.clone(); + let common_view = move |i: &Instrument| { + if matches!(i.kind(), InstrumentKind::Histogram) && !instrument_names_with_custom_views.contains(&i.name().to_string()) { + Some( + Stream::builder() + .with_aggregation(Aggregation::ExplicitBucketHistogram { + boundaries: common_buckets.clone(), + record_min_max: true, + }) + .build() + .unwrap(), + ) + } else { + None + } + }; + self.with_view(MeterProviderType::Public, common_view); + // Next apply all custom views. If new buckets are not defined by the custom view, we add + // the common buckets to it. for metric_view in self.metrics_common().views.clone() { - self.with_view(meter_provider_type, metric_view.try_into()?); + // MetricView doesn't have access to metric_common, so we insert the default via hook + // here. + let view; + let common_buckets = self.metrics_common().buckets.clone(); + if metric_view.aggregation.is_none() { + view = metric_view.try_into_otel_metric_view_with(move |i, builder| { + if matches!(i.kind(), InstrumentKind::Histogram) { + builder.with_aggregation( + Aggregation::ExplicitBucketHistogram { + boundaries: common_buckets.clone(), + record_min_max: true, + } + ) + } else { + builder + } + })?; + } else { + view = metric_view.try_into()?; + } + + self.with_view(MeterProviderType::Public, view); } Ok(()) } diff --git a/apollo-router/src/plugins/telemetry/reload/otel.rs b/apollo-router/src/plugins/telemetry/reload/otel.rs index 62f44f7c26..e4004f39a4 100644 --- a/apollo-router/src/plugins/telemetry/reload/otel.rs +++ b/apollo-router/src/plugins/telemetry/reload/otel.rs @@ -25,6 +25,7 @@ //! The reload handles enable the activation phase to update telemetry without recreating the //! entire subscriber stack, which would require restarting the application. +use std::fmt::Debug; use std::io::IsTerminal; use anyhow::anyhow; @@ -54,6 +55,7 @@ use crate::plugins::telemetry::formatters::text::Text; use crate::plugins::telemetry::otel; use crate::plugins::telemetry::otel::OpenTelemetryLayer; use crate::plugins::telemetry::otel::PreSampledTracer; +use crate::plugins::telemetry::otel_layers::{OtelErrorLayer, ReemitOtelEventsLayer}; use crate::plugins::telemetry::tracing::reload::ReloadTracer; use crate::tracer::TraceId; @@ -78,14 +80,12 @@ static FMT_LAYER_HANDLE: OnceCell< > = OnceCell::new(); pub(crate) fn init_telemetry(log_level: &str) -> anyhow::Result<()> { - let hot_tracer = ReloadTracer::new( - opentelemetry_sdk::trace::TracerProvider::default() - .tracer_builder("noop") - .build(), - ); + let hot_tracer = + ReloadTracer::new(opentelemetry_sdk::trace::SdkTracerProvider::default().tracer("noop")); let opentelemetry_layer = otel::layer().with_tracer(hot_tracer.clone()); - - // We choose json or plain based on tty + // We choose json or plain based on tty. + // We filter out raw opentelemetry logs inside FmtLayer::on_* functions so we can modify and + // show them in otel_layers.rs let fmt = if std::io::stdout().is_terminal() { FmtLayer::new(Text::default(), std::io::stdout).boxed() } else { @@ -106,6 +106,8 @@ pub(crate) fn init_telemetry(log_level: &str) -> anyhow::Result<()> { .with(DynAttributeLayer::new()) .with(opentelemetry_layer) .with(fmt_layer) + .with(OtelErrorLayer::new()) + .with(ReemitOtelEventsLayer) .with(WarnLegacyMetricsLayer) .with(EnvFilter::try_new(log_level)?) .try_init()?; @@ -249,4 +251,4 @@ impl Layer for WarnLegacyMetricsLayer { )); } } -} +} \ No newline at end of file diff --git a/apollo-router/src/plugins/telemetry/reload/tracing.rs b/apollo-router/src/plugins/telemetry/reload/tracing.rs index 3b6f2744dd..e8c0183fbf 100644 --- a/apollo-router/src/plugins/telemetry/reload/tracing.rs +++ b/apollo-router/src/plugins/telemetry/reload/tracing.rs @@ -22,10 +22,13 @@ use opentelemetry::propagation::TextMapCompositePropagator; use opentelemetry::propagation::TextMapPropagator; +use opentelemetry_sdk::trace::Sampler; +use opentelemetry_sdk::trace::SdkTracerProvider; use opentelemetry_sdk::trace::SpanProcessor; -use opentelemetry_sdk::trace::TracerProvider; +use opentelemetry_sdk::trace::TracerProviderBuilder; use tower::BoxError; +use crate::_private::telemetry::ConfigResource; use crate::plugins::telemetry::CustomTraceIdPropagator; use crate::plugins::telemetry::config::Conf; use crate::plugins::telemetry::config::Propagation; @@ -37,7 +40,7 @@ use crate::plugins::telemetry::config_new::spans::Spans; pub(crate) struct TracingBuilder<'a> { common: &'a TracingCommon, spans: &'a Spans, - builder: opentelemetry_sdk::trace::Builder, + builder: TracerProviderBuilder, } impl<'a> TracingBuilder<'a> { @@ -45,8 +48,21 @@ impl<'a> TracingBuilder<'a> { Self { common: &config.exporters.tracing.common, spans: &config.instrumentation.spans, - builder: opentelemetry_sdk::trace::TracerProvider::builder() - .with_config((&config.exporters.tracing.common).into()), + builder: opentelemetry_sdk::trace::SdkTracerProvider::builder() + .with_resource(config.exporters.tracing.common.to_resource()) + .with_sampler::(config.exporters.tracing.common.sampler.clone().into()) + .with_max_events_per_span(config.exporters.tracing.common.max_events_per_span) + .with_max_attributes_per_span( + config.exporters.tracing.common.max_attributes_per_span, + ) + .with_max_attributes_per_event( + config.exporters.tracing.common.max_attributes_per_event, + ) + .with_max_attributes_per_link( + config.exporters.tracing.common.max_attributes_per_link, + ), + //TODO DD agent sampling + //TODO parent_based_sampler } } @@ -70,7 +86,7 @@ impl<'a> TracingBuilder<'a> { self.builder = builder.with_span_processor(span_processor); } - pub(crate) fn build(self) -> TracerProvider { + pub(crate) fn build(self) -> SdkTracerProvider { self.builder.build() } } @@ -114,9 +130,7 @@ pub(crate) fn create_propagator( } } - propagators.push(Box::< - crate::plugins::telemetry::tracing::datadog_exporter::DatadogPropagator, - >::default()); + propagators.push(Box::::default()); } if propagation.aws_xray { propagators.push(Box::::default()); diff --git a/apollo-router/src/plugins/telemetry/resource.rs b/apollo-router/src/plugins/telemetry/resource.rs index 338e22d8f2..17162abeb5 100644 --- a/apollo-router/src/plugins/telemetry/resource.rs +++ b/apollo-router/src/plugins/telemetry/resource.rs @@ -1,6 +1,5 @@ use std::collections::BTreeMap; use std::env; -use std::time::Duration; use opentelemetry::KeyValue; use opentelemetry_sdk::Resource; @@ -15,7 +14,7 @@ const OTEL_SERVICE_NAME: &str = "OTEL_SERVICE_NAME"; /// Users can always override them via config. struct StaticResourceDetector; impl ResourceDetector for StaticResourceDetector { - fn detect(&self, _timeout: Duration) -> Resource { + fn detect(&self) -> Resource { let mut config_resources = vec![]; config_resources.push(KeyValue::new( opentelemetry_semantic_conventions::resource::SERVICE_VERSION, @@ -29,20 +28,24 @@ impl ResourceDetector for StaticResourceDetector { executable_name, )); } - Resource::new(config_resources) + Resource::builder() + .with_attributes(config_resources) + .build() } } struct EnvServiceNameDetector; // Used instead of SdkProvidedResourceDetector impl ResourceDetector for EnvServiceNameDetector { - fn detect(&self, _timeout: Duration) -> Resource { + fn detect(&self) -> Resource { match env::var(OTEL_SERVICE_NAME) { - Ok(service_name) if !service_name.is_empty() => Resource::new(vec![KeyValue::new( - opentelemetry_semantic_conventions::resource::SERVICE_NAME, - service_name, - )]), - Ok(_) | Err(_) => Resource::new(vec![]), // return empty resource + Ok(service_name) if !service_name.is_empty() => Resource::builder() + .with_attribute(KeyValue::new( + opentelemetry_semantic_conventions::resource::SERVICE_NAME, + service_name, + )) + .build(), + Ok(_) | Err(_) => Resource::builder_empty().build(), // return empty resource } } } @@ -62,28 +65,31 @@ pub trait ConfigResource { }; // Last one wins - let resource = Resource::from_detectors( - Duration::from_secs(0), - vec![ - Box::new(StaticResourceDetector), - Box::new(config_resource_detector), - Box::new(EnvResourceDetector::new()), - Box::new(EnvServiceNameDetector), - ], - ); + let detectors: Vec> = vec![ + Box::new(StaticResourceDetector), + Box::new(config_resource_detector), + Box::new(EnvResourceDetector::new()), + Box::new(EnvServiceNameDetector), + ]; + let resource = Resource::builder().with_detectors(&detectors).build(); // Default service name if resource - .get(opentelemetry_semantic_conventions::resource::SERVICE_NAME.into()) + .get(&opentelemetry::Key::from( + opentelemetry_semantic_conventions::resource::SERVICE_NAME, + )) .is_none() { let executable_name = executable_name(); - resource.merge(&Resource::new(vec![KeyValue::new( - opentelemetry_semantic_conventions::resource::SERVICE_NAME, - executable_name - .map(|executable_name| format!("{UNKNOWN_SERVICE}:{executable_name}")) - .unwrap_or_else(|| UNKNOWN_SERVICE.to_string()), - )])) + Resource::builder() + .with_detectors(&detectors) + .with_attribute(KeyValue::new( + opentelemetry_semantic_conventions::resource::SERVICE_NAME, + executable_name + .map(|executable_name| format!("{UNKNOWN_SERVICE}:{executable_name}")) + .unwrap_or_else(|| UNKNOWN_SERVICE.to_string()), + )) + .build() } else { resource } @@ -104,7 +110,7 @@ struct ConfigResourceDetector { } impl ResourceDetector for ConfigResourceDetector { - fn detect(&self, _timeout: Duration) -> Resource { + fn detect(&self) -> Resource { let mut config_resources = vec![]; // For config resources last entry wins @@ -136,7 +142,9 @@ impl ResourceDetector for ConfigResourceDetector { service_name.to_string(), )); } - Resource::new(config_resources) + Resource::builder() + .with_attributes(config_resources) + .build() } } diff --git a/apollo-router/src/plugins/telemetry/snapshots/apollo_router__plugins__telemetry__tests__it_test_prometheus_metrics.snap b/apollo-router/src/plugins/telemetry/snapshots/apollo_router__plugins__telemetry__tests__it_test_prometheus_metrics.snap index 925e5b8de3..b5982c70c1 100644 --- a/apollo-router/src/plugins/telemetry/snapshots/apollo_router__plugins__telemetry__tests__it_test_prometheus_metrics.snap +++ b/apollo-router/src/plugins/telemetry/snapshots/apollo_router__plugins__telemetry__tests__it_test_prometheus_metrics.snap @@ -1,17 +1,18 @@ --- source: apollo-router/src/plugins/telemetry/mod.rs expression: "prometheus_metrics.replace(& format!\n(r#\"service_version=\"{}\"\"#, std :: env! (\"CARGO_PKG_VERSION\")),\nr#\"service_version=\"X\"\"#)" +snapshot_kind: text --- -apollo_test_histo_bucket{otel_scope_name="apollo/router",service_version="X",test_resource="test",le="+Inf"} 1 -apollo_test_histo_bucket{otel_scope_name="apollo/router",service_version="X",test_resource="test",le="0.001"} 0 -apollo_test_histo_bucket{otel_scope_name="apollo/router",service_version="X",test_resource="test",le="0.005"} 0 -apollo_test_histo_bucket{otel_scope_name="apollo/router",service_version="X",test_resource="test",le="0.015"} 0 -apollo_test_histo_bucket{otel_scope_name="apollo/router",service_version="X",test_resource="test",le="0.05"} 0 -apollo_test_histo_bucket{otel_scope_name="apollo/router",service_version="X",test_resource="test",le="0.1"} 0 -apollo_test_histo_bucket{otel_scope_name="apollo/router",service_version="X",test_resource="test",le="0.2"} 0 -apollo_test_histo_bucket{otel_scope_name="apollo/router",service_version="X",test_resource="test",le="0.3"} 0 -apollo_test_histo_bucket{otel_scope_name="apollo/router",service_version="X",test_resource="test",le="0.4"} 0 -apollo_test_histo_bucket{otel_scope_name="apollo/router",service_version="X",test_resource="test",le="0.5"} 0 -apollo_test_histo_bucket{otel_scope_name="apollo/router",service_version="X",test_resource="test",le="1"} 1 -apollo_test_histo_bucket{otel_scope_name="apollo/router",service_version="X",test_resource="test",le="10"} 1 -apollo_test_histo_bucket{otel_scope_name="apollo/router",service_version="X",test_resource="test",le="5"} 1 +apollo_test_histo_bucket{otel_scope_name="apollo/router",service_version="X",telemetry_sdk_language="rust",telemetry_sdk_name="opentelemetry",telemetry_sdk_version="0.30.0",test_resource="test",le="+Inf"} 1 +apollo_test_histo_bucket{otel_scope_name="apollo/router",service_version="X",telemetry_sdk_language="rust",telemetry_sdk_name="opentelemetry",telemetry_sdk_version="0.30.0",test_resource="test",le="0.001"} 0 +apollo_test_histo_bucket{otel_scope_name="apollo/router",service_version="X",telemetry_sdk_language="rust",telemetry_sdk_name="opentelemetry",telemetry_sdk_version="0.30.0",test_resource="test",le="0.005"} 0 +apollo_test_histo_bucket{otel_scope_name="apollo/router",service_version="X",telemetry_sdk_language="rust",telemetry_sdk_name="opentelemetry",telemetry_sdk_version="0.30.0",test_resource="test",le="0.015"} 0 +apollo_test_histo_bucket{otel_scope_name="apollo/router",service_version="X",telemetry_sdk_language="rust",telemetry_sdk_name="opentelemetry",telemetry_sdk_version="0.30.0",test_resource="test",le="0.05"} 0 +apollo_test_histo_bucket{otel_scope_name="apollo/router",service_version="X",telemetry_sdk_language="rust",telemetry_sdk_name="opentelemetry",telemetry_sdk_version="0.30.0",test_resource="test",le="0.1"} 0 +apollo_test_histo_bucket{otel_scope_name="apollo/router",service_version="X",telemetry_sdk_language="rust",telemetry_sdk_name="opentelemetry",telemetry_sdk_version="0.30.0",test_resource="test",le="0.2"} 0 +apollo_test_histo_bucket{otel_scope_name="apollo/router",service_version="X",telemetry_sdk_language="rust",telemetry_sdk_name="opentelemetry",telemetry_sdk_version="0.30.0",test_resource="test",le="0.3"} 0 +apollo_test_histo_bucket{otel_scope_name="apollo/router",service_version="X",telemetry_sdk_language="rust",telemetry_sdk_name="opentelemetry",telemetry_sdk_version="0.30.0",test_resource="test",le="0.4"} 0 +apollo_test_histo_bucket{otel_scope_name="apollo/router",service_version="X",telemetry_sdk_language="rust",telemetry_sdk_name="opentelemetry",telemetry_sdk_version="0.30.0",test_resource="test",le="0.5"} 0 +apollo_test_histo_bucket{otel_scope_name="apollo/router",service_version="X",telemetry_sdk_language="rust",telemetry_sdk_name="opentelemetry",telemetry_sdk_version="0.30.0",test_resource="test",le="1"} 1 +apollo_test_histo_bucket{otel_scope_name="apollo/router",service_version="X",telemetry_sdk_language="rust",telemetry_sdk_name="opentelemetry",telemetry_sdk_version="0.30.0",test_resource="test",le="10"} 1 +apollo_test_histo_bucket{otel_scope_name="apollo/router",service_version="X",telemetry_sdk_language="rust",telemetry_sdk_name="opentelemetry",telemetry_sdk_version="0.30.0",test_resource="test",le="5"} 1 diff --git a/apollo-router/src/plugins/telemetry/snapshots/apollo_router__plugins__telemetry__tests__it_test_prometheus_metrics_custom_buckets_for_specific_metrics.snap b/apollo-router/src/plugins/telemetry/snapshots/apollo_router__plugins__telemetry__tests__it_test_prometheus_metrics_custom_buckets_for_specific_metrics.snap index 4949edee8a..faad015be9 100644 --- a/apollo-router/src/plugins/telemetry/snapshots/apollo_router__plugins__telemetry__tests__it_test_prometheus_metrics_custom_buckets_for_specific_metrics.snap +++ b/apollo-router/src/plugins/telemetry/snapshots/apollo_router__plugins__telemetry__tests__it_test_prometheus_metrics_custom_buckets_for_specific_metrics.snap @@ -1,10 +1,25 @@ --- source: apollo-router/src/plugins/telemetry/mod.rs expression: "prometheus_metrics.replace(& format!\n(r#\"service_version=\"{}\"\"#, std :: env! (\"CARGO_PKG_VERSION\")),\nr#\"service_version=\"X\"\"#)" +snapshot_kind: text --- -apollo_test_histo_bucket{otel_scope_name="apollo/router",le="+Inf"} 1 -apollo_test_histo_bucket{otel_scope_name="apollo/router",le="1"} 1 -apollo_test_histo_bucket{otel_scope_name="apollo/router",le="2"} 1 -apollo_test_histo_bucket{otel_scope_name="apollo/router",le="3"} 1 -apollo_test_histo_bucket{otel_scope_name="apollo/router",le="4"} 1 -apollo_test_histo_bucket{otel_scope_name="apollo/router",le="5"} 1 +# HELP apollo_test_global should have global buckets +apollo_test_custom_bucket{otel_scope_name="apollo/router",le="+Inf"} 1 +apollo_test_custom_bucket{otel_scope_name="apollo/router",le="1"} 1 +apollo_test_custom_bucket{otel_scope_name="apollo/router",le="2"} 1 +apollo_test_custom_bucket{otel_scope_name="apollo/router",le="3"} 1 +apollo_test_custom_bucket{otel_scope_name="apollo/router",le="4"} 1 +apollo_test_custom_bucket{otel_scope_name="apollo/router",le="5"} 1 +apollo_test_global_bucket{otel_scope_name="apollo/router",le="+Inf"} 1 +apollo_test_global_bucket{otel_scope_name="apollo/router",le="0.001"} 0 +apollo_test_global_bucket{otel_scope_name="apollo/router",le="0.005"} 0 +apollo_test_global_bucket{otel_scope_name="apollo/router",le="0.015"} 0 +apollo_test_global_bucket{otel_scope_name="apollo/router",le="0.05"} 0 +apollo_test_global_bucket{otel_scope_name="apollo/router",le="0.1"} 0 +apollo_test_global_bucket{otel_scope_name="apollo/router",le="0.2"} 0 +apollo_test_global_bucket{otel_scope_name="apollo/router",le="0.3"} 0 +apollo_test_global_bucket{otel_scope_name="apollo/router",le="0.4"} 0 +apollo_test_global_bucket{otel_scope_name="apollo/router",le="0.5"} 0 +apollo_test_global_bucket{otel_scope_name="apollo/router",le="1"} 1 +apollo_test_global_bucket{otel_scope_name="apollo/router",le="10"} 1 +apollo_test_global_bucket{otel_scope_name="apollo/router",le="5"} 1 diff --git a/apollo-router/src/plugins/telemetry/snapshots/apollo_router__plugins__telemetry__tests__it_test_prometheus_metrics_units_are_included.snap b/apollo-router/src/plugins/telemetry/snapshots/apollo_router__plugins__telemetry__tests__it_test_prometheus_metrics_units_are_included.snap index b6cf273ce5..d9a613f14d 100644 --- a/apollo-router/src/plugins/telemetry/snapshots/apollo_router__plugins__telemetry__tests__it_test_prometheus_metrics_units_are_included.snap +++ b/apollo-router/src/plugins/telemetry/snapshots/apollo_router__plugins__telemetry__tests__it_test_prometheus_metrics_units_are_included.snap @@ -1,30 +1,31 @@ --- source: apollo-router/src/plugins/telemetry/mod.rs expression: "prometheus_metrics.replace(& format!\n(r#\"service_version=\"{}\"\"#, std :: env! (\"CARGO_PKG_VERSION\")),\nr#\"service_version=\"X\"\"#)" +snapshot_kind: text --- -apollo_test_histo1_bucket{otel_scope_name="apollo/router",service_version="X",test_resource="test",le="+Inf"} 1 -apollo_test_histo1_bucket{otel_scope_name="apollo/router",service_version="X",test_resource="test",le="0.001"} 0 -apollo_test_histo1_bucket{otel_scope_name="apollo/router",service_version="X",test_resource="test",le="0.005"} 0 -apollo_test_histo1_bucket{otel_scope_name="apollo/router",service_version="X",test_resource="test",le="0.015"} 0 -apollo_test_histo1_bucket{otel_scope_name="apollo/router",service_version="X",test_resource="test",le="0.05"} 0 -apollo_test_histo1_bucket{otel_scope_name="apollo/router",service_version="X",test_resource="test",le="0.1"} 0 -apollo_test_histo1_bucket{otel_scope_name="apollo/router",service_version="X",test_resource="test",le="0.2"} 0 -apollo_test_histo1_bucket{otel_scope_name="apollo/router",service_version="X",test_resource="test",le="0.3"} 0 -apollo_test_histo1_bucket{otel_scope_name="apollo/router",service_version="X",test_resource="test",le="0.4"} 0 -apollo_test_histo1_bucket{otel_scope_name="apollo/router",service_version="X",test_resource="test",le="0.5"} 0 -apollo_test_histo1_bucket{otel_scope_name="apollo/router",service_version="X",test_resource="test",le="1"} 1 -apollo_test_histo1_bucket{otel_scope_name="apollo/router",service_version="X",test_resource="test",le="10"} 1 -apollo_test_histo1_bucket{otel_scope_name="apollo/router",service_version="X",test_resource="test",le="5"} 1 -apollo_test_histo2_seconds_bucket{otel_scope_name="apollo/router",service_version="X",test_resource="test",le="+Inf"} 1 -apollo_test_histo2_seconds_bucket{otel_scope_name="apollo/router",service_version="X",test_resource="test",le="0.001"} 0 -apollo_test_histo2_seconds_bucket{otel_scope_name="apollo/router",service_version="X",test_resource="test",le="0.005"} 0 -apollo_test_histo2_seconds_bucket{otel_scope_name="apollo/router",service_version="X",test_resource="test",le="0.015"} 0 -apollo_test_histo2_seconds_bucket{otel_scope_name="apollo/router",service_version="X",test_resource="test",le="0.05"} 0 -apollo_test_histo2_seconds_bucket{otel_scope_name="apollo/router",service_version="X",test_resource="test",le="0.1"} 0 -apollo_test_histo2_seconds_bucket{otel_scope_name="apollo/router",service_version="X",test_resource="test",le="0.2"} 0 -apollo_test_histo2_seconds_bucket{otel_scope_name="apollo/router",service_version="X",test_resource="test",le="0.3"} 0 -apollo_test_histo2_seconds_bucket{otel_scope_name="apollo/router",service_version="X",test_resource="test",le="0.4"} 0 -apollo_test_histo2_seconds_bucket{otel_scope_name="apollo/router",service_version="X",test_resource="test",le="0.5"} 0 -apollo_test_histo2_seconds_bucket{otel_scope_name="apollo/router",service_version="X",test_resource="test",le="1"} 1 -apollo_test_histo2_seconds_bucket{otel_scope_name="apollo/router",service_version="X",test_resource="test",le="10"} 1 -apollo_test_histo2_seconds_bucket{otel_scope_name="apollo/router",service_version="X",test_resource="test",le="5"} 1 +apollo_test_histo1_bucket{otel_scope_name="apollo/router",service_version="X",telemetry_sdk_language="rust",telemetry_sdk_name="opentelemetry",telemetry_sdk_version="0.30.0",test_resource="test",le="+Inf"} 1 +apollo_test_histo1_bucket{otel_scope_name="apollo/router",service_version="X",telemetry_sdk_language="rust",telemetry_sdk_name="opentelemetry",telemetry_sdk_version="0.30.0",test_resource="test",le="0.001"} 0 +apollo_test_histo1_bucket{otel_scope_name="apollo/router",service_version="X",telemetry_sdk_language="rust",telemetry_sdk_name="opentelemetry",telemetry_sdk_version="0.30.0",test_resource="test",le="0.005"} 0 +apollo_test_histo1_bucket{otel_scope_name="apollo/router",service_version="X",telemetry_sdk_language="rust",telemetry_sdk_name="opentelemetry",telemetry_sdk_version="0.30.0",test_resource="test",le="0.015"} 0 +apollo_test_histo1_bucket{otel_scope_name="apollo/router",service_version="X",telemetry_sdk_language="rust",telemetry_sdk_name="opentelemetry",telemetry_sdk_version="0.30.0",test_resource="test",le="0.05"} 0 +apollo_test_histo1_bucket{otel_scope_name="apollo/router",service_version="X",telemetry_sdk_language="rust",telemetry_sdk_name="opentelemetry",telemetry_sdk_version="0.30.0",test_resource="test",le="0.1"} 0 +apollo_test_histo1_bucket{otel_scope_name="apollo/router",service_version="X",telemetry_sdk_language="rust",telemetry_sdk_name="opentelemetry",telemetry_sdk_version="0.30.0",test_resource="test",le="0.2"} 0 +apollo_test_histo1_bucket{otel_scope_name="apollo/router",service_version="X",telemetry_sdk_language="rust",telemetry_sdk_name="opentelemetry",telemetry_sdk_version="0.30.0",test_resource="test",le="0.3"} 0 +apollo_test_histo1_bucket{otel_scope_name="apollo/router",service_version="X",telemetry_sdk_language="rust",telemetry_sdk_name="opentelemetry",telemetry_sdk_version="0.30.0",test_resource="test",le="0.4"} 0 +apollo_test_histo1_bucket{otel_scope_name="apollo/router",service_version="X",telemetry_sdk_language="rust",telemetry_sdk_name="opentelemetry",telemetry_sdk_version="0.30.0",test_resource="test",le="0.5"} 0 +apollo_test_histo1_bucket{otel_scope_name="apollo/router",service_version="X",telemetry_sdk_language="rust",telemetry_sdk_name="opentelemetry",telemetry_sdk_version="0.30.0",test_resource="test",le="1"} 1 +apollo_test_histo1_bucket{otel_scope_name="apollo/router",service_version="X",telemetry_sdk_language="rust",telemetry_sdk_name="opentelemetry",telemetry_sdk_version="0.30.0",test_resource="test",le="10"} 1 +apollo_test_histo1_bucket{otel_scope_name="apollo/router",service_version="X",telemetry_sdk_language="rust",telemetry_sdk_name="opentelemetry",telemetry_sdk_version="0.30.0",test_resource="test",le="5"} 1 +apollo_test_histo2_seconds_bucket{otel_scope_name="apollo/router",service_version="X",telemetry_sdk_language="rust",telemetry_sdk_name="opentelemetry",telemetry_sdk_version="0.30.0",test_resource="test",le="+Inf"} 1 +apollo_test_histo2_seconds_bucket{otel_scope_name="apollo/router",service_version="X",telemetry_sdk_language="rust",telemetry_sdk_name="opentelemetry",telemetry_sdk_version="0.30.0",test_resource="test",le="0.001"} 0 +apollo_test_histo2_seconds_bucket{otel_scope_name="apollo/router",service_version="X",telemetry_sdk_language="rust",telemetry_sdk_name="opentelemetry",telemetry_sdk_version="0.30.0",test_resource="test",le="0.005"} 0 +apollo_test_histo2_seconds_bucket{otel_scope_name="apollo/router",service_version="X",telemetry_sdk_language="rust",telemetry_sdk_name="opentelemetry",telemetry_sdk_version="0.30.0",test_resource="test",le="0.015"} 0 +apollo_test_histo2_seconds_bucket{otel_scope_name="apollo/router",service_version="X",telemetry_sdk_language="rust",telemetry_sdk_name="opentelemetry",telemetry_sdk_version="0.30.0",test_resource="test",le="0.05"} 0 +apollo_test_histo2_seconds_bucket{otel_scope_name="apollo/router",service_version="X",telemetry_sdk_language="rust",telemetry_sdk_name="opentelemetry",telemetry_sdk_version="0.30.0",test_resource="test",le="0.1"} 0 +apollo_test_histo2_seconds_bucket{otel_scope_name="apollo/router",service_version="X",telemetry_sdk_language="rust",telemetry_sdk_name="opentelemetry",telemetry_sdk_version="0.30.0",test_resource="test",le="0.2"} 0 +apollo_test_histo2_seconds_bucket{otel_scope_name="apollo/router",service_version="X",telemetry_sdk_language="rust",telemetry_sdk_name="opentelemetry",telemetry_sdk_version="0.30.0",test_resource="test",le="0.3"} 0 +apollo_test_histo2_seconds_bucket{otel_scope_name="apollo/router",service_version="X",telemetry_sdk_language="rust",telemetry_sdk_name="opentelemetry",telemetry_sdk_version="0.30.0",test_resource="test",le="0.4"} 0 +apollo_test_histo2_seconds_bucket{otel_scope_name="apollo/router",service_version="X",telemetry_sdk_language="rust",telemetry_sdk_name="opentelemetry",telemetry_sdk_version="0.30.0",test_resource="test",le="0.5"} 0 +apollo_test_histo2_seconds_bucket{otel_scope_name="apollo/router",service_version="X",telemetry_sdk_language="rust",telemetry_sdk_name="opentelemetry",telemetry_sdk_version="0.30.0",test_resource="test",le="1"} 1 +apollo_test_histo2_seconds_bucket{otel_scope_name="apollo/router",service_version="X",telemetry_sdk_language="rust",telemetry_sdk_name="opentelemetry",telemetry_sdk_version="0.30.0",test_resource="test",le="10"} 1 +apollo_test_histo2_seconds_bucket{otel_scope_name="apollo/router",service_version="X",telemetry_sdk_language="rust",telemetry_sdk_name="opentelemetry",telemetry_sdk_version="0.30.0",test_resource="test",le="5"} 1 diff --git a/apollo-router/src/plugins/telemetry/testdata/prometheus_custom_buckets_specific_metrics.router.yaml b/apollo-router/src/plugins/telemetry/testdata/prometheus_custom_buckets_specific_metrics.router.yaml index 23352c1cf6..1b90c44ed5 100644 --- a/apollo-router/src/plugins/telemetry/testdata/prometheus_custom_buckets_specific_metrics.router.yaml +++ b/apollo-router/src/plugins/telemetry/testdata/prometheus_custom_buckets_specific_metrics.router.yaml @@ -7,7 +7,7 @@ telemetry: common: service_name: apollo-router views: - - name: apollo.test.histo + - name: apollo.test.custom unit: seconds description: duration of the http request aggregation: diff --git a/apollo-router/src/plugins/telemetry/tracing/apollo.rs b/apollo-router/src/plugins/telemetry/tracing/apollo.rs index 1e7b67ea62..b882a31d5d 100644 --- a/apollo-router/src/plugins/telemetry/tracing/apollo.rs +++ b/apollo-router/src/plugins/telemetry/tracing/apollo.rs @@ -1,5 +1,6 @@ //! Tracing configuration for apollo telemetry. -use opentelemetry_sdk::trace::BatchSpanProcessor; + +use opentelemetry_sdk::trace::span_processor_with_async_runtime::BatchSpanProcessor; use serde::Serialize; use tower::BoxError; diff --git a/apollo-router/src/plugins/telemetry/tracing/apollo_telemetry.rs b/apollo-router/src/plugins/telemetry/tracing/apollo_telemetry.rs index 8b153f16d8..9bca66f322 100644 --- a/apollo-router/src/plugins/telemetry/tracing/apollo_telemetry.rs +++ b/apollo-router/src/plugins/telemetry/tracing/apollo_telemetry.rs @@ -3,7 +3,7 @@ use std::collections::HashMap; use std::collections::HashSet; use std::io::Cursor; use std::num::NonZeroUsize; -use std::sync::Arc; +use std::sync::Mutex; use std::time::SystemTime; use std::time::SystemTimeError; @@ -11,8 +11,6 @@ use async_trait::async_trait; use base64::Engine as _; use base64::prelude::BASE64_STANDARD; use derivative::Derivative; -use futures::FutureExt; -use futures::future::BoxFuture; use http::HeaderMap; use http::HeaderValue; use http::header::CACHE_CONTROL; @@ -24,17 +22,18 @@ use opentelemetry::Value; use opentelemetry::trace::SpanId; use opentelemetry::trace::SpanKind; use opentelemetry::trace::Status; -use opentelemetry::trace::TraceError; use opentelemetry::trace::TraceId; use opentelemetry_sdk::Resource; -use opentelemetry_sdk::export::trace::ExportResult; -use opentelemetry_sdk::export::trace::SpanData; -use opentelemetry_sdk::export::trace::SpanExporter; +use opentelemetry_sdk::error::OTelSdkResult; +use opentelemetry_sdk::trace::SpanData; +use opentelemetry_sdk::trace::SpanExporter; use prost::Message; use rand::Rng; +use rhai::Shared; use serde::de::DeserializeOwned; use serde_json::Value as JSONValue; use thiserror::Error; +use tokio::sync::Mutex as TokioMutex; use tracing::Level; use url::Url; @@ -287,7 +286,7 @@ impl LightSpanData { None => value .attributes .into_iter() - .map(|KeyValue { key, value }| (key, value)) + .map(|KeyValue { key, value, .. }| (key, value)) .collect(), Some(attr_names) => value .attributes @@ -349,13 +348,12 @@ impl LightSpanData { #[derive(Derivative)] #[derivative(Debug)] pub(crate) struct Exporter { - spans_by_parent_id: LruCache>, - /// An externally updateable gauge for "apollo.router.exporter.span.lru.size". - span_lru_size_instrument: LruSizeInstrument, + spans_by_parent_id: Shared>>>, + span_lru_size_instrument: Shared>, #[derivative(Debug = "ignore")] - report_exporter: Option>, + report_exporter: Option>, #[derivative(Debug = "ignore")] - otlp_exporter: Option, + otlp_exporter: Option>>, otlp_tracing_ratio: f64, field_execution_weight: f64, errors_configuration: ErrorsConfiguration, @@ -426,10 +424,10 @@ impl Exporter { LruSizeInstrument::new("apollo.router.exporter.span.lru.size"); Ok(Self { - spans_by_parent_id: LruCache::new(buffer_size), - span_lru_size_instrument, + spans_by_parent_id: Shared::new(Mutex::new(LruCache::new(buffer_size))), + span_lru_size_instrument: Shared::new(Mutex::new(span_lru_size_instrument)), report_exporter: if otlp_tracing_ratio < 1f64 { - Some(Arc::new(ApolloExporter::new( + Some(Shared::new(ApolloExporter::new( endpoint, &batch_processor_config.into(), apollo_key, @@ -442,7 +440,7 @@ impl Exporter { None }, otlp_exporter: if otlp_tracing_ratio > 0f64 { - Some(ApolloOtlpExporter::new( + Some(Shared::new(TokioMutex::new(ApolloOtlpExporter::new( otlp_endpoint, otlp_tracing_protocol, batch_processor_config, @@ -450,7 +448,7 @@ impl Exporter { apollo_graph_ref, schema_id, errors_configuration, - )?) + )?))) } else { None }, @@ -479,7 +477,7 @@ impl Exporter { } fn extract_root_traces( - &mut self, + &self, span: &LightSpanData, child_nodes: Vec, ) -> Result, Error> { @@ -575,7 +573,7 @@ impl Exporter { Ok(results) } - fn extract_traces(&mut self, span: LightSpanData) -> Result, Error> { + fn extract_traces(&self, span: LightSpanData) -> Result, Error> { let mut results = vec![]; for node in self.extract_data_from_spans(&span)? { if let TreeData::Request(trace) | TreeData::SubscriptionEvent(trace) = node { @@ -587,56 +585,92 @@ impl Exporter { /// Collects the subtree for a trace by calling pop() on the LRU cache for /// all spans in the tree. - fn pop_spans_for_tree(&mut self, root_span: LightSpanData) -> Vec { + fn pop_spans_for_tree(&self, root_span: LightSpanData) -> Result, String> { let root_span_id = root_span.span_id; - let mut child_spans = match self.spans_by_parent_id.pop(&root_span_id) { - Some(spans) => spans - .into_iter() - .flat_map(|(_, span)| self.pop_spans_for_tree(span)) - .collect(), + + // Acquire lock once and work with it + let mut cache_guard = self + .spans_by_parent_id + .lock() + .map_err(|_| "Failed to acquire spans cache lock")?; + + let child_spans = match cache_guard.pop(&root_span_id) { + Some(spans) => { + // Release the lock before recursion to avoid deadlocks + drop(cache_guard); + + // Recursively collect child spans + let mut all_child_spans = Vec::new(); + for (_, span) in spans { + match self.pop_spans_for_tree(span) { + Ok(mut tree_spans) => all_child_spans.append(&mut tree_spans), + Err(e) => { + tracing::error!("Failed to pop child spans: {}", e); + // Continue with other spans even if one fails + } + } + } + all_child_spans + } None => Vec::new(), }; + let mut spans_for_tree = vec![root_span]; - spans_for_tree.append(&mut child_spans); - spans_for_tree + spans_for_tree.extend(child_spans); + Ok(spans_for_tree) } /// Used by the OTLP exporter to build up a complete trace given an initial "root span". /// Iterates over all children and recursively collect the entire subtree. /// For a future iteration, consider using the same algorithm in `groupbytrace` processor, which /// groups based on trace ID instead of connecting recursively by parent ID. - fn group_by_trace(&mut self, span: LightSpanData) -> Vec { - self.pop_spans_for_tree(span) + fn group_by_trace(&self, span: LightSpanData) -> Vec { + self.pop_spans_for_tree(span).unwrap_or_default() } - fn extract_data_from_spans(&mut self, span: &LightSpanData) -> Result, Error> { - let (mut child_nodes, errors) = match self.spans_by_parent_id.pop_entry(&span.span_id) { - Some((_, spans)) => spans - .into_iter() - .map(|(_, span)| { - // If it's an unknown span or a span we don't care here it's better to know it here because as this algo is recursive if we encounter unknown spans it changes the order of spans and break the logics - let unknown = self.include_span_names.contains(span.name.as_ref()); - (self.extract_data_from_spans(&span), unknown) - }) - .fold( - (Vec::new(), Vec::new()), - |(mut oks, mut errors), (next, unknown_span)| { - match next { - Ok(mut children) => { - if unknown_span { - oks.append(&mut children) - } else { - children.append(&mut oks); - oks = children; - } - } - Err(err) => errors.push(err), - } - (oks, errors) - }, - ), - None => (Vec::new(), Vec::new()), + fn extract_data_from_spans(&self, span: &LightSpanData) -> Result, Error> { + // Safe mutex access instead of get_mut().unwrap() + let (mut child_nodes, errors) = match self.spans_by_parent_id.lock() { + Ok(mut cache_guard) => { + match cache_guard.pop_entry(&span.span_id) { + Some((_, spans)) => { + // Release the lock before recursion to avoid deadlocks + drop(cache_guard); + + spans + .into_iter() + .map(|(_, span)| { + // If it's an unknown span or a span we don't care here it's better to know it here because as this algo is recursive if we encounter unknown spans it changes the order of spans and break the logics + let unknown = self.include_span_names.contains(span.name.as_ref()); + (self.extract_data_from_spans(&span), unknown) + }) + .fold( + (Vec::new(), Vec::new()), + |(mut oks, mut errors), (next, unknown_span)| { + match next { + Ok(mut children) => { + if unknown_span { + oks.append(&mut children) + } else { + children.append(&mut oks); + oks = children; + } + } + Err(err) => errors.push(err), + } + (oks, errors) + }, + ) + } + None => (Vec::new(), Vec::new()), + } + } + Err(poisoned) => { + tracing::error!("Failed to acquire spans cache lock: {}", poisoned); + return Err(Error::TraceParsingFailed); + } }; + if !errors.is_empty() { return Err(Error::MultipleErrors(errors)); } @@ -1178,7 +1212,10 @@ fn extract_http_data(span: &LightSpanData) -> (Http, Option) { #[async_trait] impl SpanExporter for Exporter { /// Export spans to apollo telemetry - fn export(&mut self, batch: Vec) -> BoxFuture<'static, ExportResult> { + fn export( + &self, + batch: Vec, + ) -> impl std::future::Future + Send { // Exporting to apollo means that we must have complete trace as the entire trace must be built. // We do what we can, and if there are any traces that are not complete then we keep them for the next export event. // We may get spans that simply don't complete. These need to be cleaned up after a period. It's the price of using ftv1. @@ -1202,15 +1239,28 @@ impl SpanExporter for Exporter { &self.include_attr_names, &self.include_attr_event_names, ); + if send_otlp { let grouped_trace_spans = self.group_by_trace(root_span); - if let Some(trace) = self - .otlp_exporter - .as_ref() - .expect("otlp exporter required") - .prepare_for_export(grouped_trace_spans) - { - otlp_trace_spans.push(trace); + + // Safe mutex access for otlp_exporter + if let Some(exporter_mutex) = &self.otlp_exporter { + match exporter_mutex.try_lock() { + Ok(exporter) => { + if let Some(trace) = + exporter.prepare_for_export(grouped_trace_spans) + { + otlp_trace_spans.push(trace); + } + } + Err(_) => { + tracing::error!( + "Failed to acquire OTLP exporter lock for preparation" + ); + // Skip this span if we can't acquire the lock + continue; + } + } } } else if send_reports { match self.extract_traces(root_span) { @@ -1239,61 +1289,86 @@ impl SpanExporter for Exporter { // This is sad, but with LRU there is no `get_insert_mut` so a double lookup is required // It is safe to expect the entry to exist as we just inserted it, however capacity of the LRU must not be 0. - let len = self - .spans_by_parent_id - .get_or_insert(span.parent_span_id, || { - LruCache::new(NonZeroUsize::new(50).unwrap()) - }) - .len(); - self.spans_by_parent_id - .get_mut(&span.parent_span_id) - .expect("capacity of cache was zero") - .push( - len, - LightSpanData::from_span_data( - span, - &self.include_attr_names, - &self.include_attr_event_names, - ), - ); + + // Thread-safe cache access with proper error handling + match self.spans_by_parent_id.lock() { + Ok(mut cache_guard) => { + // Ensure the inner cache exists + if !cache_guard.contains(&span.parent_span_id) { + cache_guard.put( + span.parent_span_id, + LruCache::new(NonZeroUsize::new(50).unwrap()), + ); + } + + // Now get mutable access to the inner cache + if let Some(inner_cache) = cache_guard.get_mut(&span.parent_span_id) { + let len = inner_cache.len(); + inner_cache.push( + len, + LightSpanData::from_span_data( + span, + &self.include_attr_names, + &self.include_attr_event_names, + ), + ); + } + } + Err(poisoned) => { + tracing::error!("Spans cache mutex poisoned: {}", poisoned); + continue; + } + } } } - // Note this won't be correct anymore if there is any way outside of `.export()` - // to affect the size of the cache. - self.span_lru_size_instrument - .update(self.spans_by_parent_id.len() as u64); - - if send_otlp && !otlp_trace_spans.is_empty() { - self.otlp_exporter - .as_mut() - .expect("expected an otel exporter") - .export(otlp_trace_spans.into_iter().flatten().collect()) - } else if send_reports && !traces.is_empty() { - let mut report = telemetry::apollo::Report::default(); - report += SingleReport::Traces(TracesReport { traces }); - let exporter = self - .report_exporter - .as_ref() - .expect("expected an apollo exporter") - .clone(); - async move { - exporter - .submit_report(report) - .await - .map_err(|e| TraceError::ExportFailed(Box::new(e))) + // Update size instrument safely + if let Ok(cache_guard) = self.spans_by_parent_id.lock() { + // Assuming span_lru_size_instrument also needs mutex protection + if let Ok(instrument_guard) = self.span_lru_size_instrument.lock() { + instrument_guard.update(cache_guard.len() as u64); } - .boxed() - } else { - async { ExportResult::Ok(()) }.boxed() + } + + async move { + // Export based on type + if send_otlp && !otlp_trace_spans.is_empty() { + if let Some(exporter_mutex) = &self.otlp_exporter { + let mut exporter = exporter_mutex.lock().await; + return exporter + .export(otlp_trace_spans.into_iter().flatten().collect()) + .await; + } + } else if send_reports && !traces.is_empty() { + let mut report = telemetry::apollo::Report::default(); + report += SingleReport::Traces(TracesReport { traces }); + + if let Some(exporter) = &self.report_exporter { + return exporter.submit_report(report).await.map_err(|e| { + opentelemetry_sdk::error::OTelSdkError::InternalFailure(e.to_string()) + }); + } + } + + Ok(()) } } - fn shutdown(&mut self) { + fn shutdown(&mut self) -> OTelSdkResult { // Currently only handled in the OTLP case. - if let Some(exporter) = &mut self.otlp_exporter { - exporter.shutdown() - }; + // Safe shutdown with mutex + if let Some(exporter_mutex) = &mut self.otlp_exporter { + // Use try_lock for shutdown since it's expected to be synchronous + match exporter_mutex.try_lock() { + Ok(mut exporter) => exporter.shutdown(), + Err(_) => { + tracing::warn!("Could not acquire OTLP exporter lock during shutdown"); + Ok(()) + } + } + } else { + Ok(()) + } } fn set_resource(&mut self, _resource: &Resource) { diff --git a/apollo-router/src/plugins/telemetry/tracing/datadog/agent_sampling.rs b/apollo-router/src/plugins/telemetry/tracing/datadog/agent_sampling.rs index 9b523c4b39..02b8cd9453 100644 --- a/apollo-router/src/plugins/telemetry/tracing/datadog/agent_sampling.rs +++ b/apollo-router/src/plugins/telemetry/tracing/datadog/agent_sampling.rs @@ -1,14 +1,14 @@ -use opentelemetry::KeyValue; -use opentelemetry::Value; -use opentelemetry::trace::Link; +use opentelemetry::trace::{Link, TraceState}; use opentelemetry::trace::SamplingDecision; use opentelemetry::trace::SamplingResult; use opentelemetry::trace::SpanKind; use opentelemetry::trace::TraceId; +use opentelemetry::KeyValue; +use opentelemetry_datadog::DatadogTraceState; use opentelemetry_sdk::trace::ShouldSample; -use crate::plugins::telemetry::tracing::datadog_exporter::DatadogTraceState; -use crate::plugins::telemetry::tracing::datadog_exporter::propagator::SamplingPriority; +/// Key for Datadog Trace State Priority Sampling Rate +const PRIORITY_SAMPLING_RATE_KEY: &str = "psr"; /// The Datadog Agent Sampler /// @@ -41,6 +41,13 @@ impl DatadogAgentSampling { parent_based_sampler, } } + + // We used to be able to determine if the propagator had already set the priority based on + // trace_state.priority_sampling being set to None, but that value is now a non-optional boolean. + // Instead, we have to peek at the underlying key-value store. + fn priority_sampling_is_set(&self, trace_state: &TraceState) -> bool { + trace_state.get(PRIORITY_SAMPLING_RATE_KEY).is_some() + } } impl ShouldSample for DatadogAgentSampling { @@ -61,48 +68,36 @@ impl ShouldSample for DatadogAgentSampling { attributes, links, ); + // Override the sampling decision to record and make sure that the trace state is set correctly // if either parent sampling is disabled or it has not been populated by a propagator. // The propagator gets first dibs on setting the trace state, so if it sets it, we don't override it unless we are not parent based. match result.decision { SamplingDecision::Drop | SamplingDecision::RecordOnly => { result.decision = SamplingDecision::RecordOnly; - if !self.parent_based_sampler || result.trace_state.sampling_priority().is_none() { - result.trace_state = result - .trace_state - .with_priority_sampling(SamplingPriority::AutoReject) + if !self.parent_based_sampler || !self.priority_sampling_is_set(&result.trace_state) { + result.trace_state = result.trace_state.with_priority_sampling(false) } } SamplingDecision::RecordAndSample => { - if !self.parent_based_sampler || result.trace_state.sampling_priority().is_none() { - result.trace_state = result - .trace_state - .with_priority_sampling(SamplingPriority::AutoKeep) + if !self.parent_based_sampler || !self.priority_sampling_is_set(&result.trace_state) { + result.trace_state = result.trace_state.with_priority_sampling(true) } } } // We always want to measure result.trace_state = result.trace_state.with_measuring(true); - // We always want to set the sampling.priority attribute in case we are communicating with the agent via otlp. - // Reverse engineered from https://github.com/DataDog/datadog-agent/blob/c692f62423f93988b008b669008f9199a5ad196b/pkg/trace/api/otlp.go#L502 - if let Some(priority) = result.trace_state.sampling_priority() { - result.attributes.push(KeyValue::new( - "sampling.priority", - Value::I64(priority.as_i64()), - )); - } else { - tracing::error!("Failed to set trace sampling priority."); - } + // We used to set the sampling.priority attribute here, but now that's handled by the + // DatadogPropagator where the x-datadog-sampling-priority header is set. + result } } + #[cfg(test)] mod tests { use buildstructor::Builder; - use opentelemetry::Context; - use opentelemetry::KeyValue; - use opentelemetry::Value; use opentelemetry::trace::Link; use opentelemetry::trace::SamplingDecision; use opentelemetry::trace::SamplingResult; @@ -113,12 +108,13 @@ mod tests { use opentelemetry::trace::TraceFlags; use opentelemetry::trace::TraceId; use opentelemetry::trace::TraceState; + use opentelemetry::Context; + use opentelemetry::KeyValue; + use opentelemetry_datadog::DatadogTraceState; use opentelemetry_sdk::trace::Sampler; use opentelemetry_sdk::trace::ShouldSample; - + use crate::plugins::telemetry::tracing::datadog::agent_sampling::PRIORITY_SAMPLING_RATE_KEY; use crate::plugins::telemetry::tracing::datadog::DatadogAgentSampling; - use crate::plugins::telemetry::tracing::datadog_exporter::DatadogTraceState; - use crate::plugins::telemetry::tracing::datadog_exporter::propagator::SamplingPriority; #[derive(Debug, Clone, Builder)] struct StubSampler { @@ -163,20 +159,8 @@ mod tests { // Verify that the decision is RecordOnly (converted from Drop) assert_eq!(result.decision, SamplingDecision::RecordOnly); - // Verify that the sampling priority is set to AutoReject - assert_eq!( - result.trace_state.sampling_priority(), - Some(SamplingPriority::AutoReject) - ); - // Verify that the sampling.priority attribute is set correctly - assert!( - result - .attributes - .iter() - .any(|kv| kv.key.as_str() == "sampling.priority" - && kv.value == Value::I64(SamplingPriority::AutoReject.as_i64())) - ); - + // Verify that the sampling priority is disabled + assert!(!result.trace_state.priority_sampling_enabled()); // Verify that measuring is enabled assert!(result.trace_state.measuring_enabled()); } @@ -200,20 +184,8 @@ mod tests { // Record only should remain as record only assert_eq!(result.decision, SamplingDecision::RecordOnly); - - // Verify that the sampling priority is set to AutoReject so the trace won't be transmitted to Datadog - assert_eq!( - result.trace_state.sampling_priority(), - Some(SamplingPriority::AutoReject) - ); - assert!( - result - .attributes - .iter() - .any(|kv| kv.key.as_str() == "sampling.priority" - && kv.value == Value::I64(SamplingPriority::AutoReject.as_i64())) - ); - + // Verify that the sampling priority is disabled so the trace won't be transmitted to Datadog + assert!(!result.trace_state.priority_sampling_enabled()); // Verify that measuring is enabled assert!(result.trace_state.measuring_enabled()); } @@ -237,20 +209,8 @@ mod tests { // Record and sample should remain as record and sample assert_eq!(result.decision, SamplingDecision::RecordAndSample); - - // Verify that the sampling priority is set to AutoKeep so the trace will be transmitted to Datadog - assert_eq!( - result.trace_state.sampling_priority(), - Some(SamplingPriority::AutoKeep) - ); - assert!( - result - .attributes - .iter() - .any(|kv| kv.key.as_str() == "sampling.priority" - && kv.value == Value::I64(SamplingPriority::AutoKeep.as_i64())) - ); - + // Verify that the sampling priority is enabled so the trace will be transmitted to Datadog + assert!(result.trace_state.priority_sampling_enabled()); // Verify that measuring is enabled assert!(result.trace_state.measuring_enabled()); } @@ -275,24 +235,14 @@ mod tests { // Record and sample should remain as record and sample assert_eq!(result.decision, SamplingDecision::RecordAndSample); - - // Verify that the sampling priority is set to AutoKeep so the trace will be transmitted to Datadog - assert_eq!( - result.trace_state.sampling_priority(), - Some(SamplingPriority::AutoKeep) - ); - assert!( - result - .attributes - .iter() - .any(|kv| kv.key.as_str() == "sampling.priority" - && kv.value == Value::I64(SamplingPriority::AutoKeep.as_i64())) - ); - + // Verify that the sampling priority is enabled so the trace will be transmitted to Datadog + assert!(result.trace_state.priority_sampling_enabled()); // Verify that measuring is enabled assert!(result.trace_state.measuring_enabled()); } + const USER_REJECTED_PSR: &str = "-1"; + #[test] fn test_trace_state_already_populated_record_and_sample() { let sampler = StubSampler::builder() @@ -308,7 +258,9 @@ mod tests { SpanId::from_u64(1), TraceFlags::SAMPLED, true, - TraceState::default().with_priority_sampling(SamplingPriority::UserReject), + TraceState::default() + .insert(PRIORITY_SAMPLING_RATE_KEY, USER_REJECTED_PSR) + .expect("failed to insert value"), ))), TraceId::from_u128(1), "test_span", @@ -319,20 +271,8 @@ mod tests { // Record and sample should remain as record and sample assert_eq!(result.decision, SamplingDecision::RecordAndSample); - // Verify that the sampling priority is not overridden - assert_eq!( - result.trace_state.sampling_priority(), - Some(SamplingPriority::UserReject) - ); - assert!( - result - .attributes - .iter() - .any(|kv| kv.key.as_str() == "sampling.priority" - && kv.value == Value::I64(SamplingPriority::UserReject.as_i64())) - ); - + assert_eq!(result.trace_state.get(PRIORITY_SAMPLING_RATE_KEY).unwrap(), USER_REJECTED_PSR); // Verify that measuring is enabled assert!(result.trace_state.measuring_enabled()); } @@ -352,7 +292,9 @@ mod tests { SpanId::from_u64(1), TraceFlags::default(), true, - TraceState::default().with_priority_sampling(SamplingPriority::UserReject), + TraceState::default() + .insert(PRIORITY_SAMPLING_RATE_KEY, USER_REJECTED_PSR) + .expect("failed to insert value"), ))), TraceId::from_u128(1), "test_span", @@ -363,20 +305,8 @@ mod tests { // Drop is converted to RecordOnly assert_eq!(result.decision, SamplingDecision::RecordOnly); - // Verify that the sampling priority is not overridden - assert_eq!( - result.trace_state.sampling_priority(), - Some(SamplingPriority::UserReject) - ); - assert!( - result - .attributes - .iter() - .any(|kv| kv.key.as_str() == "sampling.priority" - && kv.value == Value::I64(SamplingPriority::UserReject.as_i64())) - ); - + assert_eq!(result.trace_state.get(PRIORITY_SAMPLING_RATE_KEY).unwrap(), USER_REJECTED_PSR); // Verify that measuring is enabled assert!(result.trace_state.measuring_enabled()); } diff --git a/apollo-router/src/plugins/telemetry/tracing/datadog/mod.rs b/apollo-router/src/plugins/telemetry/tracing/datadog/mod.rs index 8633e5bf8f..837a035642 100644 --- a/apollo-router/src/plugins/telemetry/tracing/datadog/mod.rs +++ b/apollo-router/src/plugins/telemetry/tracing/datadog/mod.rs @@ -10,17 +10,18 @@ use std::time::Duration; pub(crate) use agent_sampling::DatadogAgentSampling; use ahash::HashMap; use ahash::HashMapExt; -use futures::future::BoxFuture; use http::Uri; use opentelemetry::Key; use opentelemetry::KeyValue; use opentelemetry::Value; use opentelemetry::trace::SpanContext; use opentelemetry::trace::SpanKind; +use opentelemetry_datadog::DatadogTraceState; use opentelemetry_sdk::Resource; -use opentelemetry_sdk::export::trace::ExportResult; -use opentelemetry_sdk::export::trace::SpanData; -use opentelemetry_sdk::export::trace::SpanExporter; +use opentelemetry_sdk::error::OTelSdkResult; +use opentelemetry_sdk::trace::SpanData; +use opentelemetry_sdk::trace::SpanExporter; +use opentelemetry_sdk::trace::span_processor_with_async_runtime::BatchSpanProcessor; use opentelemetry_semantic_conventions::resource::SERVICE_NAME; use opentelemetry_semantic_conventions::resource::SERVICE_VERSION; use schemars::JsonSchema; @@ -46,8 +47,6 @@ use crate::plugins::telemetry::reload::tracing::TracingBuilder; use crate::plugins::telemetry::reload::tracing::TracingConfigurator; use crate::plugins::telemetry::tracing::BatchProcessorConfig; use crate::plugins::telemetry::tracing::SpanProcessorExt; -use crate::plugins::telemetry::tracing::datadog_exporter; -use crate::plugins::telemetry::tracing::datadog_exporter::DatadogTraceState; fn default_resource_mappings() -> HashMap { let mut map = HashMap::with_capacity(7); @@ -145,7 +144,7 @@ impl TracingConfigurator for Config { .endpoint .to_full_uri(&Uri::from_static(DEFAULT_ENDPOINT)); - let exporter = datadog_exporter::new_pipeline() + let exporter = opentelemetry_datadog::new_pipeline() .with_agent_endpoint(endpoint.to_string().trim_end_matches('/')) .with(&resource_mappings, |builder, resource_mappings| { let resource_mappings = resource_mappings.clone(); @@ -163,6 +162,7 @@ impl TracingConfigurator for Config { && let Some(KeyValue { key: _, value: Value::String(v), + .. }) = span.attributes.iter().find(|kv| kv.key == *mapping) { return v.as_str(); @@ -188,20 +188,20 @@ impl TracingConfigurator for Config { &span.name }) .with( - &common.resource.get(SERVICE_NAME.into()), + &common.resource.get(&opentelemetry::Key::new(SERVICE_NAME)), |builder, service_name| { // Datadog exporter incorrectly ignores the service name in the resource // Set it explicitly here builder.with_service_name(service_name.as_str()) }, ) - .with(&common.resource.get(ENV_KEY), |builder, env| { + .with(&common.resource.get(&ENV_KEY), |builder, env| { builder.with_env(env.as_str()) }) .with_version( common .resource - .get(SERVICE_VERSION.into()) + .get(&opentelemetry::Key::from(SERVICE_VERSION)) .expect("cargo version is set as a resource default;qed") .to_string(), ) @@ -225,13 +225,11 @@ impl TracingConfigurator for Config { }; let named_exporter = NamedSpanExporter::new(wrapper, "datadog"); - let batch_processor = opentelemetry_sdk::trace::BatchSpanProcessor::builder( - named_exporter, - NamedTokioRuntime::new("datadog-tracing"), - ) - .with_batch_config(self.batch_processor.clone().into()) - .build() - .filtered(); + let batch_processor = + BatchSpanProcessor::builder(named_exporter, NamedTokioRuntime::new("datadog-tracing")) + .with_batch_config(self.batch_processor.clone().into()) + .build() + .filtered(); if builder .tracing_common() @@ -247,7 +245,7 @@ impl TracingConfigurator for Config { } struct ExporterWrapper { - delegate: datadog_exporter::DatadogExporter, + delegate: opentelemetry_datadog::DatadogExporter, span_metrics: HashMap, } @@ -258,7 +256,10 @@ impl Debug for ExporterWrapper { } impl SpanExporter for ExporterWrapper { - fn export(&mut self, mut batch: Vec) -> BoxFuture<'static, ExportResult> { + fn export( + &self, + mut batch: Vec, + ) -> impl std::future::Future + Send { // Here we do some special processing of the spans before passing them to the delegate // In particular we default the span.kind to the span kind, and also override the trace measure status if we need to. for span in &mut batch { @@ -303,10 +304,10 @@ impl SpanExporter for ExporterWrapper { } self.delegate.export(batch) } - fn shutdown(&mut self) { + fn shutdown(&mut self) -> OTelSdkResult { self.delegate.shutdown() } - fn force_flush(&mut self) -> BoxFuture<'static, ExportResult> { + fn force_flush(&mut self) -> OTelSdkResult { self.delegate.force_flush() } fn set_resource(&mut self, resource: &Resource) { diff --git a/apollo-router/src/plugins/telemetry/tracing/datadog/span_processor.rs b/apollo-router/src/plugins/telemetry/tracing/datadog/span_processor.rs index e362ca967c..03806e8aa5 100644 --- a/apollo-router/src/plugins/telemetry/tracing/datadog/span_processor.rs +++ b/apollo-router/src/plugins/telemetry/tracing/datadog/span_processor.rs @@ -1,9 +1,9 @@ use opentelemetry::Context; use opentelemetry::trace::SpanContext; -use opentelemetry::trace::TraceResult; use opentelemetry_sdk::Resource; -use opentelemetry_sdk::export::trace::SpanData; +use opentelemetry_sdk::error::OTelSdkResult; use opentelemetry_sdk::trace::Span; +use opentelemetry_sdk::trace::SpanData; use opentelemetry_sdk::trace::SpanProcessor; /// When using the Datadog agent we need spans to always be exported. However, the batch span processor will only export spans that are sampled. @@ -39,17 +39,21 @@ impl SpanProcessor for DatadogSpanProcessor { self.delegate.on_end(span) } - fn force_flush(&self) -> TraceResult<()> { + fn force_flush(&self) -> OTelSdkResult { self.delegate.force_flush() } - fn shutdown(&self) -> TraceResult<()> { + fn shutdown(&self) -> OTelSdkResult { self.delegate.shutdown() } fn set_resource(&mut self, resource: &Resource) { self.delegate.set_resource(resource) } + + fn shutdown_with_timeout(&self, timeout: std::time::Duration) -> OTelSdkResult { + self.delegate.shutdown_with_timeout(timeout) + } } #[cfg(test)] @@ -89,11 +93,15 @@ mod tests { self.spans.lock().push(span); } - fn force_flush(&self) -> TraceResult<()> { + fn force_flush(&self) -> OTelSdkResult { + Ok(()) + } + + fn shutdown(&self) -> OTelSdkResult { Ok(()) } - fn shutdown(&self) -> TraceResult<()> { + fn shutdown_with_timeout(&self, _timeout: std::time::Duration) -> OTelSdkResult { Ok(()) } } @@ -120,7 +128,7 @@ mod tests { events: SpanEvents::default(), links: SpanLinks::default(), status: Default::default(), - instrumentation_lib: Default::default(), + instrumentation_scope: Default::default(), dropped_attributes_count: 0, }; diff --git a/apollo-router/src/plugins/telemetry/tracing/datadog_exporter/README.md b/apollo-router/src/plugins/telemetry/tracing/datadog_exporter/README.md deleted file mode 100644 index eeb009b68e..0000000000 --- a/apollo-router/src/plugins/telemetry/tracing/datadog_exporter/README.md +++ /dev/null @@ -1,5 +0,0 @@ -This is temporary interning of the datadog exporter until we update otel. -The newest version of the exporter does support setting span metrics, but we -can't upgrade until we upgrade Otel. - -Once otel is upgraded, we can remove this code and use the exporter directly. \ No newline at end of file diff --git a/apollo-router/src/plugins/telemetry/tracing/datadog_exporter/exporter/intern.rs b/apollo-router/src/plugins/telemetry/tracing/datadog_exporter/exporter/intern.rs deleted file mode 100644 index d63fb9a42e..0000000000 --- a/apollo-router/src/plugins/telemetry/tracing/datadog_exporter/exporter/intern.rs +++ /dev/null @@ -1,517 +0,0 @@ -use std::cell::RefCell; -use std::hash::BuildHasherDefault; -use std::hash::Hash; - -use indexmap::set::IndexSet; -use opentelemetry::StringValue; -use opentelemetry::Value; -use rmp::encode::RmpWrite; -use rmp::encode::ValueWriteError; - -type InternHasher = ahash::AHasher; - -#[derive(PartialEq)] -pub(crate) enum InternValue<'a> { - RegularString(&'a str), - OpenTelemetryValue(&'a Value), -} - -impl Hash for InternValue<'_> { - fn hash(&self, state: &mut H) { - match &self { - InternValue::RegularString(s) => s.hash(state), - InternValue::OpenTelemetryValue(v) => match v { - Value::Bool(x) => x.hash(state), - Value::I64(x) => x.hash(state), - Value::String(x) => x.hash(state), - Value::F64(x) => x.to_bits().hash(state), - Value::Array(a) => match a { - opentelemetry::Array::Bool(x) => x.hash(state), - opentelemetry::Array::I64(x) => x.hash(state), - opentelemetry::Array::F64(floats) => { - for f in floats { - f.to_bits().hash(state); - } - } - opentelemetry::Array::String(x) => x.hash(state), - }, - }, - } - } -} - -impl Eq for InternValue<'_> {} - -const BOOLEAN_TRUE: &str = "true"; -const BOOLEAN_FALSE: &str = "false"; -const LEFT_SQUARE_BRACKET: u8 = b'['; -const RIGHT_SQUARE_BRACKET: u8 = b']'; -const COMMA: u8 = b','; -const DOUBLE_QUOTE: u8 = b'"'; -const EMPTY_ARRAY: &str = "[]"; - -trait WriteAsLiteral { - fn write_to(&self, buffer: &mut Vec); -} - -impl WriteAsLiteral for bool { - fn write_to(&self, buffer: &mut Vec) { - buffer.extend_from_slice(if *self { BOOLEAN_TRUE } else { BOOLEAN_FALSE }.as_bytes()); - } -} - -impl WriteAsLiteral for i64 { - fn write_to(&self, buffer: &mut Vec) { - buffer.extend_from_slice(itoa::Buffer::new().format(*self).as_bytes()); - } -} - -impl WriteAsLiteral for f64 { - fn write_to(&self, buffer: &mut Vec) { - buffer.extend_from_slice(ryu::Buffer::new().format(*self).as_bytes()); - } -} - -impl WriteAsLiteral for StringValue { - fn write_to(&self, buffer: &mut Vec) { - buffer.push(DOUBLE_QUOTE); - buffer.extend_from_slice(self.as_str().as_bytes()); - buffer.push(DOUBLE_QUOTE); - } -} - -impl InternValue<'_> { - pub(crate) fn write_as_str( - &self, - payload: &mut W, - reusable_buffer: &mut Vec, - ) -> Result<(), ValueWriteError> { - match self { - InternValue::RegularString(x) => rmp::encode::write_str(payload, x), - InternValue::OpenTelemetryValue(v) => match v { - Value::Bool(x) => { - rmp::encode::write_str(payload, if *x { BOOLEAN_TRUE } else { BOOLEAN_FALSE }) - } - Value::I64(x) => rmp::encode::write_str(payload, itoa::Buffer::new().format(*x)), - Value::F64(x) => rmp::encode::write_str(payload, ryu::Buffer::new().format(*x)), - Value::String(x) => rmp::encode::write_str(payload, x.as_ref()), - Value::Array(array) => match array { - opentelemetry::Array::Bool(x) => { - Self::write_generic_array(payload, reusable_buffer, x) - } - opentelemetry::Array::I64(x) => { - Self::write_generic_array(payload, reusable_buffer, x) - } - opentelemetry::Array::F64(x) => { - Self::write_generic_array(payload, reusable_buffer, x) - } - opentelemetry::Array::String(x) => { - Self::write_generic_array(payload, reusable_buffer, x) - } - }, - }, - } - } - - fn write_empty_array(payload: &mut W) -> Result<(), ValueWriteError> { - rmp::encode::write_str(payload, EMPTY_ARRAY) - } - - fn write_buffer_as_string( - payload: &mut W, - reusable_buffer: &[u8], - ) -> Result<(), ValueWriteError> { - rmp::encode::write_str_len(payload, reusable_buffer.len() as u32)?; - payload - .write_bytes(reusable_buffer) - .map_err(ValueWriteError::InvalidDataWrite) - } - - fn write_generic_array( - payload: &mut W, - reusable_buffer: &mut Vec, - array: &[T], - ) -> Result<(), ValueWriteError> { - if array.is_empty() { - return Self::write_empty_array(payload); - } - - reusable_buffer.clear(); - reusable_buffer.push(LEFT_SQUARE_BRACKET); - - array[0].write_to(reusable_buffer); - - for value in array[1..].iter() { - reusable_buffer.push(COMMA); - value.write_to(reusable_buffer); - } - - reusable_buffer.push(RIGHT_SQUARE_BRACKET); - - Self::write_buffer_as_string(payload, reusable_buffer) - } -} - -pub(crate) struct StringInterner<'a> { - data: IndexSet, BuildHasherDefault>, -} - -impl<'a> StringInterner<'a> { - pub(crate) fn new() -> StringInterner<'a> { - StringInterner { - data: IndexSet::with_capacity_and_hasher(128, BuildHasherDefault::default()), - } - } - - pub(crate) fn intern(&mut self, data: &'a str) -> u32 { - if let Some(idx) = self.data.get_index_of(&InternValue::RegularString(data)) { - return idx as u32; - } - self.data.insert_full(InternValue::RegularString(data)).0 as u32 - } - - pub(crate) fn intern_value(&mut self, data: &'a Value) -> u32 { - if let Some(idx) = self - .data - .get_index_of(&InternValue::OpenTelemetryValue(data)) - { - return idx as u32; - } - self.data - .insert_full(InternValue::OpenTelemetryValue(data)) - .0 as u32 - } - - pub(crate) fn write_dictionary( - &self, - payload: &mut W, - ) -> Result<(), ValueWriteError> { - thread_local! { - static BUFFER: RefCell> = RefCell::new(Vec::with_capacity(4096)); - } - - BUFFER.with(|cell| { - let reusable_buffer = &mut cell.borrow_mut(); - rmp::encode::write_array_len(payload, self.data.len() as u32)?; - for data in self.data.iter() { - data.write_as_str(payload, reusable_buffer)?; - } - - Ok(()) - }) - } -} - -#[cfg(test)] -mod tests { - use opentelemetry::Array; - - use super::*; - - #[test] - fn test_intern() { - let a = "a".to_string(); - let b = "b"; - let c = "c"; - - let mut intern = StringInterner::new(); - let a_idx = intern.intern(a.as_str()); - let b_idx = intern.intern(b); - let c_idx = intern.intern(c); - let d_idx = intern.intern(a.as_str()); - let e_idx = intern.intern(c); - - assert_eq!(a_idx, 0); - assert_eq!(b_idx, 1); - assert_eq!(c_idx, 2); - assert_eq!(d_idx, a_idx); - assert_eq!(e_idx, c_idx); - } - - #[test] - fn test_intern_bool() { - let a = Value::Bool(true); - let b = Value::Bool(false); - let c = "c"; - - let mut intern = StringInterner::new(); - let a_idx = intern.intern_value(&a); - let b_idx = intern.intern_value(&b); - let c_idx = intern.intern(c); - let d_idx = intern.intern_value(&a); - let e_idx = intern.intern(c); - - assert_eq!(a_idx, 0); - assert_eq!(b_idx, 1); - assert_eq!(c_idx, 2); - assert_eq!(d_idx, a_idx); - assert_eq!(e_idx, c_idx); - } - - #[test] - fn test_intern_i64() { - let a = Value::I64(1234567890); - let b = Value::I64(-1234567890); - let c = "c"; - let d = Value::I64(1234567890); - - let mut intern = StringInterner::new(); - let a_idx = intern.intern_value(&a); - let b_idx = intern.intern_value(&b); - let c_idx = intern.intern(c); - let d_idx = intern.intern_value(&a); - let e_idx = intern.intern(c); - let f_idx = intern.intern_value(&d); - - assert_eq!(a_idx, 0); - assert_eq!(b_idx, 1); - assert_eq!(c_idx, 2); - assert_eq!(d_idx, a_idx); - assert_eq!(e_idx, c_idx); - assert_eq!(f_idx, a_idx); - } - - #[test] - fn test_intern_f64() { - let a = Value::F64(123456.7890); - let b = Value::F64(-1234567.890); - let c = "c"; - let d = Value::F64(-1234567.890); - - let mut intern = StringInterner::new(); - let a_idx = intern.intern_value(&a); - let b_idx = intern.intern_value(&b); - let c_idx = intern.intern(c); - let d_idx = intern.intern_value(&a); - let e_idx = intern.intern(c); - let f_idx = intern.intern_value(&d); - - assert_eq!(a_idx, 0); - assert_eq!(b_idx, 1); - assert_eq!(c_idx, 2); - assert_eq!(d_idx, a_idx); - assert_eq!(e_idx, c_idx); - assert_eq!(b_idx, f_idx); - } - - #[test] - fn test_intern_array_of_booleans() { - let a = Value::Array(Array::Bool(vec![true, false])); - let b = Value::Array(Array::Bool(vec![false, true])); - let c = "c"; - let d = Value::Array(Array::Bool(vec![])); - let f = Value::Array(Array::Bool(vec![false, true])); - - let mut intern = StringInterner::new(); - let a_idx = intern.intern_value(&a); - let b_idx = intern.intern_value(&b); - let c_idx = intern.intern(c); - let d_idx = intern.intern_value(&a); - let e_idx = intern.intern(c); - let f_idx = intern.intern_value(&d); - let g_idx = intern.intern_value(&f); - - assert_eq!(a_idx, 0); - assert_eq!(b_idx, 1); - assert_eq!(c_idx, 2); - assert_eq!(d_idx, a_idx); - assert_eq!(e_idx, c_idx); - assert_eq!(f_idx, 3); - assert_eq!(g_idx, b_idx); - } - - #[test] - fn test_intern_array_of_i64() { - let a = Value::Array(Array::I64(vec![123, -123])); - let b = Value::Array(Array::I64(vec![-123, 123])); - let c = "c"; - let d = Value::Array(Array::I64(vec![])); - let f = Value::Array(Array::I64(vec![-123, 123])); - - let mut intern = StringInterner::new(); - let a_idx = intern.intern_value(&a); - let b_idx = intern.intern_value(&b); - let c_idx = intern.intern(c); - let d_idx = intern.intern_value(&a); - let e_idx = intern.intern(c); - let f_idx = intern.intern_value(&d); - let g_idx = intern.intern_value(&f); - - assert_eq!(a_idx, 0); - assert_eq!(b_idx, 1); - assert_eq!(c_idx, 2); - assert_eq!(d_idx, a_idx); - assert_eq!(e_idx, c_idx); - assert_eq!(f_idx, 3); - assert_eq!(g_idx, b_idx); - } - - #[test] - fn test_intern_array_of_f64() { - let f1 = 123.0f64; - let f2 = 0f64; - - let a = Value::Array(Array::F64(vec![f1, f2])); - let b = Value::Array(Array::F64(vec![f2, f1])); - let c = "c"; - let d = Value::Array(Array::F64(vec![])); - let f = Value::Array(Array::F64(vec![f2, f1])); - - let mut intern = StringInterner::new(); - let a_idx = intern.intern_value(&a); - let b_idx = intern.intern_value(&b); - let c_idx = intern.intern(c); - let d_idx = intern.intern_value(&a); - let e_idx = intern.intern(c); - let f_idx = intern.intern_value(&d); - let g_idx = intern.intern_value(&f); - - assert_eq!(a_idx, 0); - assert_eq!(b_idx, 1); - assert_eq!(c_idx, 2); - assert_eq!(d_idx, a_idx); - assert_eq!(e_idx, c_idx); - assert_eq!(f_idx, 3); - assert_eq!(g_idx, b_idx); - } - - #[test] - fn test_intern_array_of_string() { - let s1 = "a"; - let s2 = "b"; - - let a = Value::Array(Array::String(vec![ - StringValue::from(s1), - StringValue::from(s2), - ])); - let b = Value::Array(Array::String(vec![ - StringValue::from(s2), - StringValue::from(s1), - ])); - let c = "c"; - let d = Value::Array(Array::String(vec![])); - let f = Value::Array(Array::String(vec![ - StringValue::from(s2), - StringValue::from(s1), - ])); - - let mut intern = StringInterner::new(); - let a_idx = intern.intern_value(&a); - let b_idx = intern.intern_value(&b); - let c_idx = intern.intern(c); - let d_idx = intern.intern_value(&a); - let e_idx = intern.intern(c); - let f_idx = intern.intern_value(&d); - let g_idx = intern.intern_value(&f); - - assert_eq!(a_idx, 0); - assert_eq!(b_idx, 1); - assert_eq!(c_idx, 2); - assert_eq!(d_idx, a_idx); - assert_eq!(e_idx, c_idx); - assert_eq!(f_idx, 3); - assert_eq!(g_idx, b_idx); - } - - #[test] - fn test_write_boolean_literal() { - let mut buffer: Vec = vec![]; - - true.write_to(&mut buffer); - - assert_eq!(&buffer[..], b"true"); - - buffer.clear(); - - false.write_to(&mut buffer); - - assert_eq!(&buffer[..], b"false"); - } - - #[test] - fn test_write_i64_literal() { - let mut buffer: Vec = vec![]; - - 1234567890i64.write_to(&mut buffer); - - assert_eq!(&buffer[..], b"1234567890"); - - buffer.clear(); - - (-1234567890i64).write_to(&mut buffer); - - assert_eq!(&buffer[..], b"-1234567890"); - } - - #[test] - fn test_write_f64_literal() { - let mut buffer: Vec = vec![]; - - let f1 = 12345.678f64; - let f2 = -12345.678f64; - - f1.write_to(&mut buffer); - - assert_eq!(&buffer[..], format!("{f1}").as_bytes()); - - buffer.clear(); - - f2.write_to(&mut buffer); - - assert_eq!(&buffer[..], format!("{f2}").as_bytes()); - } - - #[test] - fn test_write_string_literal() { - let mut buffer: Vec = vec![]; - - let s1 = StringValue::from("abc"); - let s2 = StringValue::from(""); - - s1.write_to(&mut buffer); - - assert_eq!(&buffer[..], format!("\"{s1}\"").as_bytes()); - - buffer.clear(); - - s2.write_to(&mut buffer); - - assert_eq!(&buffer[..], format!("\"{s2}\"").as_bytes()); - } - - fn test_encoding_intern_value(value: InternValue<'_>) { - let mut expected: Vec = vec![]; - let mut actual: Vec = vec![]; - - let mut buffer = vec![]; - - value.write_as_str(&mut actual, &mut buffer).unwrap(); - - let InternValue::OpenTelemetryValue(value) = value else { - return; - }; - - rmp::encode::write_str(&mut expected, value.as_str().as_ref()).unwrap(); - - assert_eq!(expected, actual); - } - - #[test] - fn test_encode_boolean() { - test_encoding_intern_value(InternValue::OpenTelemetryValue(&Value::Bool(true))); - test_encoding_intern_value(InternValue::OpenTelemetryValue(&Value::Bool(false))); - } - - #[test] - fn test_encode_i64() { - test_encoding_intern_value(InternValue::OpenTelemetryValue(&Value::I64(123))); - test_encoding_intern_value(InternValue::OpenTelemetryValue(&Value::I64(0))); - test_encoding_intern_value(InternValue::OpenTelemetryValue(&Value::I64(-123))); - } - - #[test] - fn test_encode_f64() { - test_encoding_intern_value(InternValue::OpenTelemetryValue(&Value::F64(123.456f64))); - test_encoding_intern_value(InternValue::OpenTelemetryValue(&Value::F64(-123.456f64))); - } -} diff --git a/apollo-router/src/plugins/telemetry/tracing/datadog_exporter/exporter/mod.rs b/apollo-router/src/plugins/telemetry/tracing/datadog_exporter/exporter/mod.rs deleted file mode 100644 index 5bace8a3f9..0000000000 --- a/apollo-router/src/plugins/telemetry/tracing/datadog_exporter/exporter/mod.rs +++ /dev/null @@ -1,534 +0,0 @@ -mod intern; -mod model; - -use std::borrow::Cow; -use std::fmt::Debug; -use std::fmt::Formatter; -use std::sync::Arc; -use std::time::Duration; - -use futures::future::BoxFuture; -pub use model::ApiVersion; -pub use model::Error; -pub use model::FieldMappingFn; -use opentelemetry::KeyValue; -use opentelemetry::global; -use opentelemetry::trace::TraceError; -use opentelemetry::trace::TracerProvider; -use opentelemetry_http::HttpClient; -use opentelemetry_http::ResponseExt; -use opentelemetry_sdk::Resource; -use opentelemetry_sdk::export::trace::ExportResult; -use opentelemetry_sdk::export::trace::SpanData; -use opentelemetry_sdk::export::trace::SpanExporter; -use opentelemetry_sdk::resource::ResourceDetector; -use opentelemetry_sdk::resource::SdkProvidedResourceDetector; -use opentelemetry_sdk::runtime::RuntimeChannel; -use opentelemetry_sdk::trace::Config; -use opentelemetry_sdk::trace::Tracer; -use opentelemetry_semantic_conventions as semcov; -use url::Url; - -use self::model::unified_tags::UnifiedTags; -use crate::plugins::telemetry::tracing::datadog_exporter::exporter::model::FieldMapping; - -/// Default Datadog collector endpoint -const DEFAULT_AGENT_ENDPOINT: &str = "http://127.0.0.1:8126"; - -/// Header name used to inform the Datadog agent of the number of traces in the payload -const DATADOG_TRACE_COUNT_HEADER: &str = "X-Datadog-Trace-Count"; - -/// Header name use to inform datadog as to what version -const DATADOG_META_LANG_HEADER: &str = "Datadog-Meta-Lang"; -const DATADOG_META_TRACER_VERSION_HEADER: &str = "Datadog-Meta-Tracer-Version"; - -// Struct to hold the mapping between Opentelemetry spans and datadog spans. -pub struct Mapping { - resource: Option, - name: Option, - service_name: Option, -} - -impl Mapping { - pub fn new( - resource: Option, - name: Option, - service_name: Option, - ) -> Self { - Mapping { - resource, - name, - service_name, - } - } - pub fn empty() -> Self { - Self::new(None, None, None) - } -} - -/// Datadog span exporter -pub struct DatadogExporter { - client: Arc, - request_url: http::Uri, - model_config: ModelConfig, - api_version: ApiVersion, - mapping: Mapping, - unified_tags: UnifiedTags, - resource: Option, -} - -impl DatadogExporter { - fn new( - model_config: ModelConfig, - request_url: http::Uri, - api_version: ApiVersion, - client: Arc, - mapping: Mapping, - unified_tags: UnifiedTags, - ) -> Self { - DatadogExporter { - client, - request_url, - model_config, - api_version, - mapping, - unified_tags, - resource: None, - } - } - - fn build_request( - &self, - mut batch: Vec, - ) -> Result>, TraceError> { - let traces: Vec<&[SpanData]> = group_into_traces(&mut batch); - let trace_count = traces.len(); - let data = self.api_version.encode( - &self.model_config, - traces, - &self.mapping, - &self.unified_tags, - self.resource.as_ref(), - )?; - let req = http::Request::builder() - .method(http::Method::POST) - .uri(self.request_url.clone()) - .header(http::header::CONTENT_TYPE, self.api_version.content_type()) - .header(DATADOG_TRACE_COUNT_HEADER, trace_count) - .header(DATADOG_META_LANG_HEADER, "rust") - .header( - DATADOG_META_TRACER_VERSION_HEADER, - env!("CARGO_PKG_VERSION"), - ) - .body(data) - .map_err::(Into::into)?; - - Ok(req) - } -} - -impl Debug for DatadogExporter { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - f.debug_struct("DatadogExporter") - .field("model_config", &self.model_config) - .field("request_url", &self.request_url) - .field("api_version", &self.api_version) - .field("client", &self.client) - .field("resource_mapping", &mapping_debug(&self.mapping.resource)) - .field("name_mapping", &mapping_debug(&self.mapping.name)) - .field( - "service_name_mapping", - &mapping_debug(&self.mapping.service_name), - ) - .finish() - } -} - -/// Create a new Datadog exporter pipeline builder. -pub fn new_pipeline() -> DatadogPipelineBuilder { - DatadogPipelineBuilder::default() -} - -/// Builder for `ExporterConfig` struct. -pub struct DatadogPipelineBuilder { - agent_endpoint: String, - trace_config: Option, - api_version: ApiVersion, - client: Option>, - mapping: Mapping, - unified_tags: UnifiedTags, -} - -impl Default for DatadogPipelineBuilder { - fn default() -> Self { - DatadogPipelineBuilder { - agent_endpoint: DEFAULT_AGENT_ENDPOINT.to_string(), - trace_config: None, - mapping: Mapping::empty(), - api_version: ApiVersion::Version05, - unified_tags: UnifiedTags::new(), - client: None, - } - } -} - -impl Debug for DatadogPipelineBuilder { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - f.debug_struct("DatadogExporter") - .field("agent_endpoint", &self.agent_endpoint) - .field("trace_config", &self.trace_config) - .field("client", &self.client) - .field("resource_mapping", &mapping_debug(&self.mapping.resource)) - .field("name_mapping", &mapping_debug(&self.mapping.name)) - .field( - "service_name_mapping", - &mapping_debug(&self.mapping.service_name), - ) - .finish() - } -} - -impl DatadogPipelineBuilder { - /// Building a new exporter. - /// - /// This is useful if you are manually constructing a pipeline. - pub fn build_exporter(mut self) -> Result { - let (_, service_name) = self.build_config_and_service_name(); - self.build_exporter_with_service_name(service_name) - } - - fn build_config_and_service_name(&mut self) -> (Config, String) { - let service_name = self.unified_tags.service(); - if let Some(service_name) = service_name { - let config = if let Some(mut cfg) = self.trace_config.take() { - cfg.resource = Cow::Owned(Resource::new( - cfg.resource - .iter() - .filter(|(k, _v)| k.as_str() != semcov::resource::SERVICE_NAME) - .map(|(k, v)| KeyValue::new(k.clone(), v.clone())), - )); - cfg - } else { - Config::default().with_resource(Resource::empty()) - }; - (config, service_name) - } else { - let service_name = SdkProvidedResourceDetector - .detect(Duration::from_secs(0)) - .get(semcov::resource::SERVICE_NAME.into()) - .unwrap() - .to_string(); - ( - // use a empty resource to prevent TracerProvider to assign a service name. - Config::default().with_resource(Resource::empty()), - service_name, - ) - } - } - - // parse the endpoint and append the path based on versions. - // keep the query and host the same. - fn build_endpoint(agent_endpoint: &str, version: &str) -> Result { - // build agent endpoint based on version - let mut endpoint = agent_endpoint - .parse::() - .map_err::(Into::into)?; - let mut paths = endpoint - .path_segments() - .map(|c| c.filter(|s| !s.is_empty()).collect::>()) - .unwrap_or_default(); - paths.push(version); - - let path_str = paths.join("/"); - endpoint.set_path(path_str.as_str()); - - Ok(endpoint.as_str().parse().map_err::(Into::into)?) - } - - fn build_exporter_with_service_name( - self, - service_name: String, - ) -> Result { - if let Some(client) = self.client { - let model_config = ModelConfig { service_name }; - - let exporter = DatadogExporter::new( - model_config, - Self::build_endpoint(&self.agent_endpoint, self.api_version.path())?, - self.api_version, - client, - self.mapping, - self.unified_tags, - ); - Ok(exporter) - } else { - Err(Error::NoHttpClient.into()) - } - } - - /// Install the Datadog trace exporter pipeline using a simple span processor. - pub fn install_simple(mut self) -> Result { - let (config, service_name) = self.build_config_and_service_name(); - let exporter = self.build_exporter_with_service_name(service_name)?; - let mut provider_builder = - opentelemetry_sdk::trace::TracerProvider::builder().with_simple_exporter(exporter); - provider_builder = provider_builder.with_config(config); - let provider = provider_builder.build(); - let tracer = provider - .tracer_builder("opentelemetry-datadog") - .with_version(env!("CARGO_PKG_VERSION")) - .with_schema_url(semcov::SCHEMA_URL) - .build(); - let _ = global::set_tracer_provider(provider); - Ok(tracer) - } - - /// Install the Datadog trace exporter pipeline using a batch span processor with the specified - /// runtime. - pub fn install_batch(mut self, runtime: R) -> Result { - let (config, service_name) = self.build_config_and_service_name(); - let exporter = self.build_exporter_with_service_name(service_name)?; - let mut provider_builder = opentelemetry_sdk::trace::TracerProvider::builder() - .with_batch_exporter(exporter, runtime); - provider_builder = provider_builder.with_config(config); - let provider = provider_builder.build(); - let tracer = provider - .tracer_builder("opentelemetry-datadog") - .with_version(env!("CARGO_PKG_VERSION")) - .with_schema_url(semcov::SCHEMA_URL) - .build(); - let _ = global::set_tracer_provider(provider); - Ok(tracer) - } - - /// Assign the service name under which to group traces - pub fn with_service_name>(mut self, service_name: T) -> Self { - self.unified_tags.set_service(Some(service_name.into())); - self - } - - /// Assign the version under which to group traces - pub fn with_version>(mut self, version: T) -> Self { - self.unified_tags.set_version(Some(version.into())); - self - } - - /// Assign the env under which to group traces - pub fn with_env>(mut self, env: T) -> Self { - self.unified_tags.set_env(Some(env.into())); - self - } - - /// Assign the Datadog collector endpoint. - /// - /// The endpoint of the datadog agent, by default it is `http://127.0.0.1:8126`. - pub fn with_agent_endpoint>(mut self, endpoint: T) -> Self { - self.agent_endpoint = endpoint.into(); - self - } - - /// Choose the http client used by uploader - pub fn with_http_client(mut self, client: T) -> Self { - self.client = Some(Arc::new(client)); - self - } - - /// Assign the SDK trace configuration - pub fn with_trace_config(mut self, config: Config) -> Self { - self.trace_config = Some(config); - self - } - - /// Set version of Datadog trace ingestion API - pub fn with_api_version(mut self, api_version: ApiVersion) -> Self { - self.api_version = api_version; - self - } - - /// Custom the value used for `resource` field in datadog spans. - /// See [`FieldMappingFn`] for details. - pub fn with_resource_mapping(mut self, f: F) -> Self - where - F: for<'a> Fn(&'a SpanData, &'a ModelConfig) -> &'a str + Send + Sync + 'static, - { - self.mapping.resource = Some(Arc::new(f)); - self - } - - /// Custom the value used for `name` field in datadog spans. - /// See [`FieldMappingFn`] for details. - pub fn with_name_mapping(mut self, f: F) -> Self - where - F: for<'a> Fn(&'a SpanData, &'a ModelConfig) -> &'a str + Send + Sync + 'static, - { - self.mapping.name = Some(Arc::new(f)); - self - } - - /// Custom the value used for `service_name` field in datadog spans. - /// See [`FieldMappingFn`] for details. - pub fn with_service_name_mapping(mut self, f: F) -> Self - where - F: for<'a> Fn(&'a SpanData, &'a ModelConfig) -> &'a str + Send + Sync + 'static, - { - self.mapping.service_name = Some(Arc::new(f)); - self - } -} - -fn group_into_traces(spans: &mut [SpanData]) -> Vec<&[SpanData]> { - if spans.is_empty() { - return vec![]; - } - - spans.sort_by_key(|x| x.span_context.trace_id().to_bytes()); - - let mut traces: Vec<&[SpanData]> = Vec::with_capacity(spans.len()); - - let mut start = 0; - let mut start_trace_id = spans[start].span_context.trace_id(); - for (idx, span) in spans.iter().enumerate() { - let current_trace_id = span.span_context.trace_id(); - if start_trace_id != current_trace_id { - traces.push(&spans[start..idx]); - start = idx; - start_trace_id = current_trace_id; - } - } - traces.push(&spans[start..]); - traces -} - -async fn send_request( - client: Arc, - request: http::Request>, -) -> ExportResult { - let _ = client.send(request).await?.error_for_status()?; - Ok(()) -} - -impl SpanExporter for DatadogExporter { - /// Export spans to datadog-agent - fn export(&mut self, batch: Vec) -> BoxFuture<'static, ExportResult> { - let request = match self.build_request(batch) { - Ok(req) => req, - Err(err) => return Box::pin(std::future::ready(Err(err))), - }; - - let client = self.client.clone(); - Box::pin(send_request(client, request)) - } - - fn set_resource(&mut self, resource: &Resource) { - self.resource = Some(resource.clone()); - } -} - -/// Helper struct to custom the mapping between Opentelemetry spans and datadog spans. -/// -/// This struct will be passed to [`FieldMappingFn`] -#[derive(Default, Debug)] -#[non_exhaustive] -pub struct ModelConfig { - pub service_name: String, -} - -fn mapping_debug(f: &Option) -> String { - if f.is_some() { - "custom mapping" - } else { - "default mapping" - } - .to_string() -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::plugins::telemetry::tracing::datadog_exporter::ApiVersion::Version05; - use crate::plugins::telemetry::tracing::datadog_exporter::exporter::model::tests::get_span; - - #[test] - fn test_out_of_order_group() { - let mut batch = vec![get_span(1, 1, 1), get_span(2, 2, 2), get_span(1, 1, 3)]; - let expected = vec![ - vec![get_span(1, 1, 1), get_span(1, 1, 3)], - vec![get_span(2, 2, 2)], - ]; - - let mut traces = group_into_traces(&mut batch); - // We need to sort the output in order to compare, but this is not required by the Datadog agent - traces.sort_by_key(|t| u128::from_be_bytes(t[0].span_context.trace_id().to_bytes())); - - assert_eq!(traces, expected); - } - - #[test] - fn test_agent_endpoint_with_api_version() { - let with_tail_slash = - DatadogPipelineBuilder::build_endpoint("http://localhost:8126/", Version05.path()); - let without_tail_slash = - DatadogPipelineBuilder::build_endpoint("http://localhost:8126", Version05.path()); - let with_query = DatadogPipelineBuilder::build_endpoint( - "http://localhost:8126?api_key=123", - Version05.path(), - ); - let invalid = DatadogPipelineBuilder::build_endpoint( - "http://localhost:klsajfjksfh", - Version05.path(), - ); - - assert_eq!( - with_tail_slash.unwrap().to_string(), - "http://localhost:8126/v0.5/traces" - ); - assert_eq!( - without_tail_slash.unwrap().to_string(), - "http://localhost:8126/v0.5/traces" - ); - assert_eq!( - with_query.unwrap().to_string(), - "http://localhost:8126/v0.5/traces?api_key=123" - ); - assert!(invalid.is_err()) - } - - #[derive(Debug)] - struct DummyClient; - - #[async_trait::async_trait] - impl HttpClient for DummyClient { - async fn send( - &self, - _request: http::Request>, - ) -> Result, opentelemetry_http::HttpError> { - Ok(http::Response::new("dummy response".into())) - } - } - - #[test] - fn test_custom_http_client() { - new_pipeline() - .with_http_client(DummyClient) - .build_exporter() - .unwrap(); - } - - #[test] - fn test_install_simple() { - new_pipeline() - .with_service_name("test_service") - .with_http_client(DummyClient) - .install_simple() - .unwrap(); - } - - #[test] - fn test_install_batch() { - new_pipeline() - .with_service_name("test_service") - .with_http_client(DummyClient) - .install_batch(opentelemetry_sdk::runtime::AsyncStd {}) - .unwrap(); - } -} diff --git a/apollo-router/src/plugins/telemetry/tracing/datadog_exporter/exporter/model/mod.rs b/apollo-router/src/plugins/telemetry/tracing/datadog_exporter/exporter/model/mod.rs deleted file mode 100644 index dfd3649dd1..0000000000 --- a/apollo-router/src/plugins/telemetry/tracing/datadog_exporter/exporter/model/mod.rs +++ /dev/null @@ -1,315 +0,0 @@ -use std::fmt::Debug; - -use http::uri; -use opentelemetry_sdk::Resource; -use opentelemetry_sdk::export::ExportError; -use opentelemetry_sdk::export::trace::SpanData; -use opentelemetry_sdk::export::trace::{self}; -use url::ParseError; - -use self::unified_tags::UnifiedTags; -use super::Mapping; -use crate::plugins::telemetry::tracing::datadog_exporter::ModelConfig; - -pub mod unified_tags; -mod v03; -mod v05; - -// todo: we should follow the same mapping defined in https://github.com/DataDog/datadog-agent/blob/main/pkg/trace/api/otlp.go - -// https://github.com/DataDog/dd-trace-js/blob/c89a35f7d27beb4a60165409376e170eacb194c5/packages/dd-trace/src/constants.js#L4 -static SAMPLING_PRIORITY_KEY: &str = "_sampling_priority_v1"; - -// https://github.com/DataDog/datadog-agent/blob/ec96f3c24173ec66ba235bda7710504400d9a000/pkg/trace/traceutil/span.go#L20 -static DD_MEASURED_KEY: &str = "_dd.measured"; - -/// Custom mapping between opentelemetry spans and datadog spans. -/// -/// User can provide custom function to change the mapping. It currently supports customizing the following -/// fields in Datadog span protocol. -/// -/// |field name|default value| -/// |---------------|-------------| -/// |service name| service name configuration from [`ModelConfig`]| -/// |name | opentelemetry instrumentation library name | -/// |resource| opentelemetry name| -/// -/// The function takes a reference to [`SpanData`]() and a reference to [`ModelConfig`]() as parameters. -/// It should return a `&str` which will be used as the value for the field. -/// -/// If no custom mapping is provided. Default mapping detailed above will be used. -/// -/// For example, -/// ```no_run -/// use opentelemetry_datadog::{ApiVersion, new_pipeline}; -/// fn main() -> Result<(), opentelemetry::trace::TraceError> { -/// let tracer = new_pipeline() -/// .with_service_name("my_app") -/// .with_api_version(ApiVersion::Version05) -/// // the custom mapping below will change the all spans' name to datadog spans -/// .with_name_mapping(|span, model_config|{ -/// "datadog spans" -/// }) -/// .with_agent_endpoint("http://localhost:8126") -/// .install_batch(opentelemetry_sdk::runtime::Tokio)?; -/// -/// Ok(()) -/// } -/// ``` -pub type FieldMappingFn = dyn for<'a> Fn(&'a SpanData, &'a ModelConfig) -> &'a str + Send + Sync; - -pub(crate) type FieldMapping = std::sync::Arc; - -// Datadog uses some magic tags in their models. There is no recommended mapping defined in -// opentelemetry spec. Below is default mapping we gonna uses. Users can override it by providing -// their own implementations. -fn default_service_name_mapping<'a>(_span: &'a SpanData, config: &'a ModelConfig) -> &'a str { - config.service_name.as_str() -} - -fn default_name_mapping<'a>(span: &'a SpanData, _config: &'a ModelConfig) -> &'a str { - span.instrumentation_lib.name.as_ref() -} - -fn default_resource_mapping<'a>(span: &'a SpanData, _config: &'a ModelConfig) -> &'a str { - span.name.as_ref() -} - -/// Wrap type for errors from opentelemetry datadog exporter -#[allow(clippy::enum_variant_names)] -#[derive(Debug, thiserror::Error)] -pub enum Error { - /// Message pack error - #[error("message pack error")] - MessagePackError, - /// No http client founded. User should provide one or enable features - #[error( - "http client must be set, users can enable reqwest or surf feature to use http client implementation within create" - )] - NoHttpClient, - /// Http requests failed with following errors - #[error(transparent)] - RequestError(#[from] http::Error), - /// The Uri was invalid - #[error("invalid url {0}")] - InvalidUri(String), - /// Other errors - #[error("{0}")] - Other(String), -} - -impl ExportError for Error { - fn exporter_name(&self) -> &'static str { - "datadog" - } -} - -impl From for Error { - fn from(_: rmp::encode::ValueWriteError) -> Self { - Self::MessagePackError - } -} - -impl From for Error { - fn from(err: ParseError) -> Self { - Self::InvalidUri(err.to_string()) - } -} - -impl From for Error { - fn from(err: uri::InvalidUri) -> Self { - Self::InvalidUri(err.to_string()) - } -} - -/// Version of datadog trace ingestion API -#[derive(Debug, Copy, Clone)] -#[non_exhaustive] -pub enum ApiVersion { - /// Version 0.3 - Version03, - /// Version 0.5 - requires datadog-agent v7.22.0 or above - Version05, -} - -impl ApiVersion { - pub(crate) fn path(self) -> &'static str { - match self { - ApiVersion::Version03 => "/v0.3/traces", - ApiVersion::Version05 => "/v0.5/traces", - } - } - - pub(crate) fn content_type(self) -> &'static str { - match self { - ApiVersion::Version03 => "application/msgpack", - ApiVersion::Version05 => "application/msgpack", - } - } - - pub(crate) fn encode( - self, - model_config: &ModelConfig, - traces: Vec<&[trace::SpanData]>, - mapping: &Mapping, - unified_tags: &UnifiedTags, - resource: Option<&Resource>, - ) -> Result, Error> { - match self { - Self::Version03 => v03::encode( - model_config, - traces, - |span, config| match &mapping.service_name { - Some(f) => f(span, config), - None => default_service_name_mapping(span, config), - }, - |span, config| match &mapping.name { - Some(f) => f(span, config), - None => default_name_mapping(span, config), - }, - |span, config| match &mapping.resource { - Some(f) => f(span, config), - None => default_resource_mapping(span, config), - }, - resource, - ), - Self::Version05 => v05::encode( - model_config, - traces, - |span, config| match &mapping.service_name { - Some(f) => f(span, config), - None => default_service_name_mapping(span, config), - }, - |span, config| match &mapping.name { - Some(f) => f(span, config), - None => default_name_mapping(span, config), - }, - |span, config| match &mapping.resource { - Some(f) => f(span, config), - None => default_resource_mapping(span, config), - }, - unified_tags, - resource, - ), - } - } -} - -#[cfg(test)] -pub(crate) mod tests { - use std::time::Duration; - use std::time::SystemTime; - - use base64::Engine; - use opentelemetry::KeyValue; - use opentelemetry::trace::SpanContext; - use opentelemetry::trace::SpanId; - use opentelemetry::trace::SpanKind; - use opentelemetry::trace::Status; - use opentelemetry::trace::TraceFlags; - use opentelemetry::trace::TraceId; - use opentelemetry::trace::TraceState; - use opentelemetry_sdk::InstrumentationLibrary; - use opentelemetry_sdk::trace::SpanEvents; - use opentelemetry_sdk::trace::SpanLinks; - use opentelemetry_sdk::{self}; - - use super::*; - - fn get_traces() -> Vec> { - vec![vec![get_span(7, 1, 99)]] - } - - pub(crate) fn get_span(trace_id: u128, parent_span_id: u64, span_id: u64) -> trace::SpanData { - let span_context = SpanContext::new( - TraceId::from_u128(trace_id), - SpanId::from_u64(span_id), - TraceFlags::default(), - false, - TraceState::default(), - ); - - let start_time = SystemTime::UNIX_EPOCH; - let end_time = start_time.checked_add(Duration::from_secs(1)).unwrap(); - - let attributes = vec![ - KeyValue::new("span.type", "web"), - KeyValue::new("host.name", "test"), - ]; - let instrumentation_lib = InstrumentationLibrary::builder("component").build(); - - trace::SpanData { - span_context, - parent_span_id: SpanId::from_u64(parent_span_id), - span_kind: SpanKind::Client, - name: "resource".into(), - start_time, - end_time, - attributes, - events: SpanEvents::default(), - links: SpanLinks::default(), - status: Status::Ok, - instrumentation_lib, - dropped_attributes_count: 0, - } - } - - #[test] - fn test_encode_v03() -> Result<(), Box> { - let traces = get_traces(); - let model_config = ModelConfig { - service_name: "service_name".to_string(), - ..Default::default() - }; - let encoded = - base64::engine::general_purpose::STANDARD.encode(ApiVersion::Version03.encode( - &model_config, - traces.iter().map(|x| &x[..]).collect(), - &Mapping::empty(), - &UnifiedTags::new(), - None, - )?); - - assert_eq!( - encoded.as_str(), - "kZGMpHR5cGWjd2Vip3NlcnZpY2Wsc2VydmljZV9uYW1lpG5hbWWpY29tcG9uZW\ - 50qHJlc291cmNlqHJlc291cmNlqHRyYWNlX2lkzwAAAAAAAAAHp3NwYW5faWTPAAAAAAAAAGOpcGFyZW50X2lkzwAAAA\ - AAAAABpXN0YXJ00wAAAAAAAAAAqGR1cmF0aW9u0wAAAAA7msoApWVycm9y0gAAAACkbWV0YYKpc3Bhbi50eXBlo3dlYq\ - lob3N0Lm5hbWWkdGVzdKdtZXRyaWNzgbVfc2FtcGxpbmdfcHJpb3JpdHlfdjHLAAAAAAAAAAA=" - ); - - Ok(()) - } - - #[test] - fn test_encode_v05() -> Result<(), Box> { - let traces = get_traces(); - let model_config = ModelConfig { - service_name: "service_name".to_string(), - ..Default::default() - }; - - let mut unified_tags = UnifiedTags::new(); - unified_tags.set_env(Some(String::from("test-env"))); - unified_tags.set_version(Some(String::from("test-version"))); - unified_tags.set_service(Some(String::from("test-service"))); - - let _encoded = - base64::engine::general_purpose::STANDARD.encode(ApiVersion::Version05.encode( - &model_config, - traces.iter().map(|x| &x[..]).collect(), - &Mapping::empty(), - &unified_tags, - None, - )?); - - // TODO: Need someone to generate the expected result or instructions to do so. - // assert_eq!(encoded.as_str(), "kp6jd2VirHNlcnZpY2VfbmFtZaljb21wb25lbnSocmVzb3VyY2WpaG9zdC5uYW\ - // 1lpHRlc3Snc2VydmljZax0ZXN0LXNlcnZpY2WjZW52qHRlc3QtZW52p3ZlcnNpb26sdGVzdC12ZXJzaW9uqXNwYW4udH\ - // lwZbVfc2FtcGxpbmdfcHJpb3JpdHlfdjGRkZzOAAAAAc4AAAACzgAAAAPPAAAAAAAAAAfPAAAAAAAAAGPPAAAAAAAAAA\ - // HTAAAAAAAAAADTAAAAADuaygDSAAAAAIXOAAAABM4AAAAFzgAAAAbOAAAAB84AAAAIzgAAAAnOAAAACs4AAAALzgAAAA\ - // zOAAAAAIHOAAAADcsAAAAAAAAAAM4AAAAA"); - - Ok(()) - } -} diff --git a/apollo-router/src/plugins/telemetry/tracing/datadog_exporter/exporter/model/unified_tags.rs b/apollo-router/src/plugins/telemetry/tracing/datadog_exporter/exporter/model/unified_tags.rs deleted file mode 100644 index 85bece7e9f..0000000000 --- a/apollo-router/src/plugins/telemetry/tracing/datadog_exporter/exporter/model/unified_tags.rs +++ /dev/null @@ -1,85 +0,0 @@ -//! Unified tags - See: - -pub struct UnifiedTags { - pub service: UnifiedTagField, - pub env: UnifiedTagField, - pub version: UnifiedTagField, -} - -impl UnifiedTags { - pub fn new() -> Self { - UnifiedTags { - service: UnifiedTagField::new(UnifiedTagEnum::Service), - env: UnifiedTagField::new(UnifiedTagEnum::Env), - version: UnifiedTagField::new(UnifiedTagEnum::Version), - } - } - pub fn set_service(&mut self, service: Option) { - self.service.value = service; - } - pub fn set_version(&mut self, version: Option) { - self.version.value = version; - } - pub fn set_env(&mut self, env: Option) { - self.env.value = env; - } - pub fn service(&self) -> Option { - self.service.value.clone() - } - pub fn compute_attribute_size(&self) -> u32 { - self.service.len() + self.env.len() + self.version.len() - } -} - -pub struct UnifiedTagField { - pub value: Option, - pub kind: UnifiedTagEnum, -} - -impl UnifiedTagField { - pub fn new(kind: UnifiedTagEnum) -> Self { - UnifiedTagField { - value: kind.find_unified_tag_value(), - kind, - } - } - pub fn len(&self) -> u32 { - if self.value.is_some() { - return 1; - } - 0 - } - pub fn get_tag_name(&self) -> &'static str { - self.kind.get_tag_name() - } -} - -pub enum UnifiedTagEnum { - Service, - Version, - Env, -} - -impl UnifiedTagEnum { - fn get_env_variable_name(&self) -> &'static str { - match self { - UnifiedTagEnum::Service => "DD_SERVICE", - UnifiedTagEnum::Version => "DD_VERSION", - UnifiedTagEnum::Env => "DD_ENV", - } - } - fn get_tag_name(&self) -> &'static str { - match self { - UnifiedTagEnum::Service => "service", - UnifiedTagEnum::Version => "version", - UnifiedTagEnum::Env => "env", - } - } - fn find_unified_tag_value(&self) -> Option { - let env_name_to_check = self.get_env_variable_name(); - match std::env::var(env_name_to_check) { - Ok(tag_value) => Some(tag_value.to_lowercase()), - _ => None, - } - } -} diff --git a/apollo-router/src/plugins/telemetry/tracing/datadog_exporter/exporter/model/v03.rs b/apollo-router/src/plugins/telemetry/tracing/datadog_exporter/exporter/model/v03.rs deleted file mode 100644 index e29a4b9c00..0000000000 --- a/apollo-router/src/plugins/telemetry/tracing/datadog_exporter/exporter/model/v03.rs +++ /dev/null @@ -1,134 +0,0 @@ -use std::time::SystemTime; - -use opentelemetry::KeyValue; -use opentelemetry::trace::Status; -use opentelemetry_sdk::Resource; -use opentelemetry_sdk::export::trace::SpanData; - -use crate::plugins::telemetry::tracing::datadog_exporter::Error; -use crate::plugins::telemetry::tracing::datadog_exporter::ModelConfig; -use crate::plugins::telemetry::tracing::datadog_exporter::exporter::model::SAMPLING_PRIORITY_KEY; - -pub(crate) fn encode( - model_config: &ModelConfig, - traces: Vec<&[SpanData]>, - get_service_name: S, - get_name: N, - get_resource: R, - resource: Option<&Resource>, -) -> Result, Error> -where - for<'a> S: Fn(&'a SpanData, &'a ModelConfig) -> &'a str, - for<'a> N: Fn(&'a SpanData, &'a ModelConfig) -> &'a str, - for<'a> R: Fn(&'a SpanData, &'a ModelConfig) -> &'a str, -{ - let mut encoded = Vec::new(); - rmp::encode::write_array_len(&mut encoded, traces.len() as u32)?; - - for trace in traces.into_iter() { - rmp::encode::write_array_len(&mut encoded, trace.len() as u32)?; - - for span in trace { - // Safe until the year 2262 when Datadog will need to change their API - let start = span - .start_time - .duration_since(SystemTime::UNIX_EPOCH) - .unwrap() - .as_nanos() as i64; - - let duration = span - .end_time - .duration_since(span.start_time) - .map(|x| x.as_nanos() as i64) - .unwrap_or(0); - - let mut span_type_found = false; - for kv in &span.attributes { - if kv.key.as_str() == "span.type" { - span_type_found = true; - rmp::encode::write_map_len(&mut encoded, 12)?; - rmp::encode::write_str(&mut encoded, "type")?; - rmp::encode::write_str(&mut encoded, kv.value.as_str().as_ref())?; - break; - } - } - - if !span_type_found { - rmp::encode::write_map_len(&mut encoded, 11)?; - } - - // Datadog span name is OpenTelemetry component name - see module docs for more information - rmp::encode::write_str(&mut encoded, "service")?; - rmp::encode::write_str(&mut encoded, get_service_name(span, model_config))?; - - rmp::encode::write_str(&mut encoded, "name")?; - rmp::encode::write_str(&mut encoded, get_name(span, model_config))?; - - rmp::encode::write_str(&mut encoded, "resource")?; - rmp::encode::write_str(&mut encoded, get_resource(span, model_config))?; - - rmp::encode::write_str(&mut encoded, "trace_id")?; - rmp::encode::write_u64( - &mut encoded, - u128::from_be_bytes(span.span_context.trace_id().to_bytes()) as u64, - )?; - - rmp::encode::write_str(&mut encoded, "span_id")?; - rmp::encode::write_u64( - &mut encoded, - u64::from_be_bytes(span.span_context.span_id().to_bytes()), - )?; - - rmp::encode::write_str(&mut encoded, "parent_id")?; - rmp::encode::write_u64( - &mut encoded, - u64::from_be_bytes(span.parent_span_id.to_bytes()), - )?; - - rmp::encode::write_str(&mut encoded, "start")?; - rmp::encode::write_i64(&mut encoded, start)?; - - rmp::encode::write_str(&mut encoded, "duration")?; - rmp::encode::write_i64(&mut encoded, duration)?; - - rmp::encode::write_str(&mut encoded, "error")?; - rmp::encode::write_i32( - &mut encoded, - match span.status { - Status::Error { .. } => 1, - _ => 0, - }, - )?; - - rmp::encode::write_str(&mut encoded, "meta")?; - rmp::encode::write_map_len( - &mut encoded, - (span.attributes.len() + resource.map(|r| r.len()).unwrap_or(0)) as u32, - )?; - if let Some(resource) = resource { - for (key, value) in resource.iter() { - rmp::encode::write_str(&mut encoded, key.as_str())?; - rmp::encode::write_str(&mut encoded, value.as_str().as_ref())?; - } - } - for KeyValue { key, value } in span.attributes.iter() { - rmp::encode::write_str(&mut encoded, key.as_str())?; - rmp::encode::write_str(&mut encoded, value.as_str().as_ref())?; - } - - rmp::encode::write_str(&mut encoded, "metrics")?; - rmp::encode::write_map_len(&mut encoded, 1)?; - rmp::encode::write_str(&mut encoded, SAMPLING_PRIORITY_KEY)?; - rmp::encode::write_f64( - &mut encoded, - if span.span_context.is_sampled() { - 1.0 - } else { - 0.0 - }, - )?; - } - } - - Ok(encoded) -} diff --git a/apollo-router/src/plugins/telemetry/tracing/datadog_exporter/exporter/model/v05.rs b/apollo-router/src/plugins/telemetry/tracing/datadog_exporter/exporter/model/v05.rs deleted file mode 100644 index 5bd8f24e0e..0000000000 --- a/apollo-router/src/plugins/telemetry/tracing/datadog_exporter/exporter/model/v05.rs +++ /dev/null @@ -1,284 +0,0 @@ -use std::time::SystemTime; - -use opentelemetry::KeyValue; -use opentelemetry::trace::Status; -use opentelemetry_sdk::Resource; -use opentelemetry_sdk::export::trace::SpanData; - -use super::unified_tags::UnifiedTagField; -use super::unified_tags::UnifiedTags; -use crate::plugins::telemetry::tracing::datadog_exporter::DatadogTraceState; -use crate::plugins::telemetry::tracing::datadog_exporter::Error; -use crate::plugins::telemetry::tracing::datadog_exporter::ModelConfig; -use crate::plugins::telemetry::tracing::datadog_exporter::exporter::intern::StringInterner; -use crate::plugins::telemetry::tracing::datadog_exporter::exporter::model::DD_MEASURED_KEY; -use crate::plugins::telemetry::tracing::datadog_exporter::exporter::model::SAMPLING_PRIORITY_KEY; -use crate::plugins::telemetry::tracing::datadog_exporter::propagator::SamplingPriority; - -const SPAN_NUM_ELEMENTS: u32 = 12; -const METRICS_LEN: u32 = 2; -const GIT_META_TAGS_COUNT: u32 = if matches!( - ( - option_env!("DD_GIT_REPOSITORY_URL"), - option_env!("DD_GIT_COMMIT_SHA") - ), - (Some(_), Some(_)) -) { - 2 -} else { - 0 -}; - -// Protocol documentation sourced from https://github.com/DataDog/datadog-agent/blob/c076ea9a1ffbde4c76d35343dbc32aecbbf99cb9/pkg/trace/api/version.go -// -// The payload is an array containing exactly 12 elements: -// -// 1. An array of all unique strings present in the payload (a dictionary referred to by index). -// 2. An array of traces, where each trace is an array of spans. A span is encoded as an array having -// exactly 12 elements, representing all span properties, in this exact order: -// -// 0: Service (uint32) -// 1: Name (uint32) -// 2: Resource (uint32) -// 3: TraceID (uint64) -// 4: SpanID (uint64) -// 5: ParentID (uint64) -// 6: Start (int64) -// 7: Duration (int64) -// 8: Error (int32) -// 9: Meta (map[uint32]uint32) -// 10: Metrics (map[uint32]float64) -// 11: Type (uint32) -// -// Considerations: -// -// - The "uint32" typed values in "Service", "Name", "Resource", "Type", "Meta" and "Metrics" represent -// the index at which the corresponding string is found in the dictionary. If any of the values are the -// empty string, then the empty string must be added into the dictionary. -// -// - None of the elements can be nil. If any of them are unset, they should be given their "zero-value". Here -// is an example of a span with all unset values: -// -// 0: 0 // Service is "" (index 0 in dictionary) -// 1: 0 // Name is "" -// 2: 0 // Resource is "" -// 3: 0 // TraceID -// 4: 0 // SpanID -// 5: 0 // ParentID -// 6: 0 // Start -// 7: 0 // Duration -// 8: 0 // Error -// 9: map[uint32]uint32{} // Meta (empty map) -// 10: map[uint32]float64{} // Metrics (empty map) -// 11: 0 // Type is "" -// -// The dictionary in this case would be []string{""}, having only the empty string at index 0. -// -pub(crate) fn encode( - model_config: &ModelConfig, - traces: Vec<&[SpanData]>, - get_service_name: S, - get_name: N, - get_resource: R, - unified_tags: &UnifiedTags, - resource: Option<&Resource>, -) -> Result, Error> -where - for<'a> S: Fn(&'a SpanData, &'a ModelConfig) -> &'a str, - for<'a> N: Fn(&'a SpanData, &'a ModelConfig) -> &'a str, - for<'a> R: Fn(&'a SpanData, &'a ModelConfig) -> &'a str, -{ - let mut interner = StringInterner::new(); - let mut encoded_traces = encode_traces( - &mut interner, - model_config, - get_service_name, - get_name, - get_resource, - &traces, - unified_tags, - resource, - )?; - - let mut payload = Vec::with_capacity(traces.len() * 512); - rmp::encode::write_array_len(&mut payload, 2)?; - - interner.write_dictionary(&mut payload)?; - - payload.append(&mut encoded_traces); - - Ok(payload) -} - -fn write_unified_tags<'a>( - encoded: &mut Vec, - interner: &mut StringInterner<'a>, - unified_tags: &'a UnifiedTags, -) -> Result<(), Error> { - write_unified_tag(encoded, interner, &unified_tags.service)?; - write_unified_tag(encoded, interner, &unified_tags.env)?; - write_unified_tag(encoded, interner, &unified_tags.version)?; - Ok(()) -} - -fn write_unified_tag<'a>( - encoded: &mut Vec, - interner: &mut StringInterner<'a>, - tag: &'a UnifiedTagField, -) -> Result<(), Error> { - if let Some(tag_value) = &tag.value { - rmp::encode::write_u32(encoded, interner.intern(tag.get_tag_name()))?; - rmp::encode::write_u32(encoded, interner.intern(tag_value.as_str().as_ref()))?; - } - Ok(()) -} - -fn get_sampling_priority(span: &SpanData) -> f64 { - match span - .span_context - .trace_state() - .sampling_priority() - .unwrap_or_else(|| { - // Datadog sampling has not been set, revert to traceflags - if span.span_context.trace_flags().is_sampled() { - SamplingPriority::AutoKeep - } else { - SamplingPriority::AutoReject - } - }) { - SamplingPriority::UserReject => -1.0, - SamplingPriority::AutoReject => 0.0, - SamplingPriority::AutoKeep => 1.0, - SamplingPriority::UserKeep => 2.0, - } -} - -fn get_measuring(span: &SpanData) -> f64 { - if span.span_context.trace_state().measuring_enabled() { - 1.0 - } else { - 0.0 - } -} - -#[allow(clippy::too_many_arguments)] -fn encode_traces<'interner, S, N, R>( - interner: &mut StringInterner<'interner>, - model_config: &'interner ModelConfig, - get_service_name: S, - get_name: N, - get_resource: R, - traces: &'interner [&[SpanData]], - unified_tags: &'interner UnifiedTags, - resource: Option<&'interner Resource>, -) -> Result, Error> -where - for<'a> S: Fn(&'a SpanData, &'a ModelConfig) -> &'a str, - for<'a> N: Fn(&'a SpanData, &'a ModelConfig) -> &'a str, - for<'a> R: Fn(&'a SpanData, &'a ModelConfig) -> &'a str, -{ - let mut encoded = Vec::new(); - rmp::encode::write_array_len(&mut encoded, traces.len() as u32)?; - - for trace in traces.iter() { - rmp::encode::write_array_len(&mut encoded, trace.len() as u32)?; - - for span in trace.iter() { - // Safe until the year 2262 when Datadog will need to change their API - let start = span - .start_time - .duration_since(SystemTime::UNIX_EPOCH) - .unwrap() - .as_nanos() as i64; - - let duration = span - .end_time - .duration_since(span.start_time) - .map(|x| x.as_nanos() as i64) - .unwrap_or(0); - - let mut span_type = interner.intern(""); - for KeyValue { key, value } in &span.attributes { - if key.as_str() == "span.type" { - span_type = interner.intern_value(value); - break; - } - } - - // Datadog span name is OpenTelemetry component name - see module docs for more information - rmp::encode::write_array_len(&mut encoded, SPAN_NUM_ELEMENTS)?; - rmp::encode::write_u32( - &mut encoded, - interner.intern(get_service_name(span, model_config)), - )?; - rmp::encode::write_u32(&mut encoded, interner.intern(get_name(span, model_config)))?; - rmp::encode::write_u32( - &mut encoded, - interner.intern(get_resource(span, model_config)), - )?; - rmp::encode::write_u64( - &mut encoded, - u128::from_be_bytes(span.span_context.trace_id().to_bytes()) as u64, - )?; - rmp::encode::write_u64( - &mut encoded, - u64::from_be_bytes(span.span_context.span_id().to_bytes()), - )?; - rmp::encode::write_u64( - &mut encoded, - u64::from_be_bytes(span.parent_span_id.to_bytes()), - )?; - rmp::encode::write_i64(&mut encoded, start)?; - rmp::encode::write_i64(&mut encoded, duration)?; - rmp::encode::write_i32( - &mut encoded, - match span.status { - Status::Error { .. } => 1, - _ => 0, - }, - )?; - - rmp::encode::write_map_len( - &mut encoded, - (span.attributes.len() + resource.map(|r| r.len()).unwrap_or(0)) as u32 - + unified_tags.compute_attribute_size() - + GIT_META_TAGS_COUNT, - )?; - if let Some(resource) = resource { - for (key, value) in resource.iter() { - rmp::encode::write_u32(&mut encoded, interner.intern(key.as_str()))?; - rmp::encode::write_u32(&mut encoded, interner.intern_value(value))?; - } - } - - write_unified_tags(&mut encoded, interner, unified_tags)?; - - for KeyValue { key, value } in span.attributes.iter() { - rmp::encode::write_u32(&mut encoded, interner.intern(key.as_str()))?; - rmp::encode::write_u32(&mut encoded, interner.intern_value(value))?; - } - - if let (Some(repository_url), Some(commit_sha)) = ( - option_env!("DD_GIT_REPOSITORY_URL"), - option_env!("DD_GIT_COMMIT_SHA"), - ) { - rmp::encode::write_u32(&mut encoded, interner.intern("git.repository_url"))?; - rmp::encode::write_u32(&mut encoded, interner.intern(repository_url))?; - rmp::encode::write_u32(&mut encoded, interner.intern("git.commit.sha"))?; - rmp::encode::write_u32(&mut encoded, interner.intern(commit_sha))?; - } - - rmp::encode::write_map_len(&mut encoded, METRICS_LEN)?; - rmp::encode::write_u32(&mut encoded, interner.intern(SAMPLING_PRIORITY_KEY))?; - let sampling_priority = get_sampling_priority(span); - rmp::encode::write_f64(&mut encoded, sampling_priority)?; - - rmp::encode::write_u32(&mut encoded, interner.intern(DD_MEASURED_KEY))?; - let measuring = get_measuring(span); - rmp::encode::write_f64(&mut encoded, measuring)?; - rmp::encode::write_u32(&mut encoded, span_type)?; - } - } - - Ok(encoded) -} diff --git a/apollo-router/src/plugins/telemetry/tracing/datadog_exporter/mod.rs b/apollo-router/src/plugins/telemetry/tracing/datadog_exporter/mod.rs deleted file mode 100644 index f2d5c21aef..0000000000 --- a/apollo-router/src/plugins/telemetry/tracing/datadog_exporter/mod.rs +++ /dev/null @@ -1,569 +0,0 @@ -//! # OpenTelemetry Datadog Exporter -//! -//! An OpenTelemetry datadog exporter implementation -//! -//! See the [Datadog Docs](https://docs.datadoghq.com/agent/) for information on how to run the datadog-agent -//! -//! ## Quirks -//! -//! There are currently some incompatibilities between Datadog and OpenTelemetry, and this manifests -//! as minor quirks to this exporter. -//! -//! Firstly Datadog uses operation_name to describe what OpenTracing would call a component. -//! Or to put it another way, in OpenTracing the operation / span name's are relatively -//! granular and might be used to identify a specific endpoint. In datadog, however, they -//! are less granular - it is expected in Datadog that a service will have single -//! primary span name that is the root of all traces within that service, with an additional piece of -//! metadata called resource_name providing granularity. See [here](https://docs.datadoghq.com/tracing/guide/configuring-primary-operation/) -//! -//! The Datadog Golang API takes the approach of using a `resource.name` OpenTelemetry attribute to set the -//! resource_name. See [here](https://github.com/DataDog/dd-trace-go/blob/ecb0b805ef25b00888a2fb62d465a5aa95e7301e/ddtrace/opentracer/tracer.go#L10) -//! -//! Unfortunately, this breaks compatibility with other OpenTelemetry exporters which expect -//! a more granular operation name - as per the OpenTracing specification. -//! -//! This exporter therefore takes a different approach of naming the span with the name of the -//! tracing provider, and using the span name to set the resource_name. This should in most cases -//! lead to the behaviour that users expect. -//! -//! Datadog additionally has a span_type string that alters the rendering of the spans in the web UI. -//! This can be set as the `span.type` OpenTelemetry span attribute. -//! -//! For standard values see [here](https://github.com/DataDog/dd-trace-go/blob/ecb0b805ef25b00888a2fb62d465a5aa95e7301e/ddtrace/ext/app_types.go#L31). -//! -//! If the default mapping is not fit for your use case, you may change some of them by providing [`FieldMappingFn`]s in pipeline. -//! -//! ## Performance -//! -//! For optimal performance, a batch exporter is recommended as the simple exporter will export -//! each span synchronously on drop. You can enable the [`rt-tokio`], [`rt-tokio-current-thread`] -//! or [`rt-async-std`] features and specify a runtime on the pipeline to have a batch exporter -//! configured for you automatically. -//! -//! ```toml -//! [dependencies] -//! opentelemetry = { version = "*", features = ["rt-tokio"] } -//! opentelemetry-datadog = "*" -//! ``` -//! -//! ```no_run -//! # fn main() -> Result<(), opentelemetry::trace::TraceError> { -//! let tracer = opentelemetry_datadog::new_pipeline() -//! .install_batch(opentelemetry_sdk::runtime::Tokio)?; -//! # Ok(()) -//! # } -//! ``` -//! -//! [`rt-tokio`]: https://tokio.rs -//! [`rt-tokio-current-thread`]: https://tokio.rs -//! [`rt-async-std`]: https://async.rs -//! -//! ## Bring your own http client -//! -//! Users can choose appropriate http clients to align with their runtime. -//! -//! Based on the feature enabled. The default http client will be different. If user doesn't specific -//! features or enabled `reqwest-blocking-client` feature. The blocking reqwest http client will be used as -//! default client. If `reqwest-client` feature is enabled. The async reqwest http client will be used. If -//! `surf-client` feature is enabled. The surf http client will be used. -//! -//! Note that async http clients may need specific runtime otherwise it will panic. User should make -//! sure the http client is running in appropriate runime. -//! -//! Users can always use their own http clients by implementing `HttpClient` trait. -//! -//! ## Kitchen Sink Full Configuration -//! -//! Example showing how to override all configuration options. See the -//! [`DatadogPipelineBuilder`] docs for details of each option. -//! -//! [`DatadogPipelineBuilder`]: struct.DatadogPipelineBuilder.html -//! -//! ```no_run -//! use opentelemetry::{KeyValue, trace::Tracer}; -//! use opentelemetry_sdk::{trace::{self, RandomIdGenerator, Sampler}, Resource}; -//! use opentelemetry_sdk::export::trace::ExportResult; -//! use opentelemetry::global::shutdown_tracer_provider; -//! use opentelemetry_datadog::{new_pipeline, ApiVersion, Error}; -//! use opentelemetry_http::{HttpClient, HttpError}; -//! use async_trait::async_trait; -//! use bytes::Bytes; -//! use futures_util::io::AsyncReadExt as _; -//! use http::{Request, Response}; -//! use std::convert::TryInto as _; -//! -//! // `reqwest` and `surf` are supported through features, if you prefer an -//! // alternate http client you can add support by implementing `HttpClient` as -//! // shown here. -//! #[derive(Debug)] -//! struct IsahcClient(isahc::HttpClient); -//! -//! #[async_trait] -//! impl HttpClient for IsahcClient { -//! async fn send(&self, request: Request>) -> Result, HttpError> { -//! let mut response = self.0.send_async(request).await?; -//! let status = response.status(); -//! let mut bytes = Vec::with_capacity(response.body().len().unwrap_or(0).try_into()?); -//! isahc::AsyncReadResponseExt::copy_to(&mut response, &mut bytes).await?; -//! -//! Ok(Response::builder() -//! .status(response.status()) -//! .body(bytes.into())?) -//! } -//! } -//! -//! fn main() -> Result<(), opentelemetry::trace::TraceError> { -//! let tracer = new_pipeline() -//! .with_service_name("my_app") -//! .with_api_version(ApiVersion::Version05) -//! .with_agent_endpoint("http://localhost:8126") -//! .with_trace_config( -//! trace::config() -//! .with_sampler(Sampler::AlwaysOn) -//! .with_id_generator(RandomIdGenerator::default()) -//! ) -//! .install_batch(opentelemetry_sdk::runtime::Tokio)?; -//! -//! tracer.in_span("doing_work", |cx| { -//! // Traced app logic here... -//! }); -//! -//! shutdown_tracer_provider(); // sending remaining spans before exit -//! -//! Ok(()) -//! } -//! ``` - -mod exporter; - -#[allow(unused_imports)] -pub use exporter::ApiVersion; -#[allow(unused_imports)] -pub use exporter::DatadogExporter; -#[allow(unused_imports)] -pub use exporter::DatadogPipelineBuilder; -#[allow(unused_imports)] -pub use exporter::Error; -#[allow(unused_imports)] -pub use exporter::FieldMappingFn; -#[allow(unused_imports)] -pub use exporter::ModelConfig; -#[allow(unused_imports)] -pub use exporter::new_pipeline; -#[allow(unused_imports)] -pub use propagator::DatadogPropagator; -#[allow(unused_imports)] -pub use propagator::DatadogTraceState; -#[allow(unused_imports)] -pub use propagator::DatadogTraceStateBuilder; - -pub(crate) mod propagator { - use std::fmt::Display; - - use once_cell::sync::Lazy; - use opentelemetry::Context; - use opentelemetry::propagation::Extractor; - use opentelemetry::propagation::Injector; - use opentelemetry::propagation::TextMapPropagator; - use opentelemetry::propagation::text_map_propagator::FieldIter; - use opentelemetry::trace::SpanContext; - use opentelemetry::trace::SpanId; - use opentelemetry::trace::TraceContextExt; - use opentelemetry::trace::TraceFlags; - use opentelemetry::trace::TraceId; - use opentelemetry::trace::TraceState; - - const DATADOG_TRACE_ID_HEADER: &str = "x-datadog-trace-id"; - const DATADOG_PARENT_ID_HEADER: &str = "x-datadog-parent-id"; - const DATADOG_SAMPLING_PRIORITY_HEADER: &str = "x-datadog-sampling-priority"; - - const TRACE_FLAG_DEFERRED: TraceFlags = TraceFlags::new(0x02); - const TRACE_STATE_PRIORITY_SAMPLING: &str = "psr"; - const TRACE_STATE_MEASURE: &str = "m"; - const TRACE_STATE_TRUE_VALUE: &str = "1"; - const TRACE_STATE_FALSE_VALUE: &str = "0"; - - static DATADOG_HEADER_FIELDS: Lazy<[String; 3]> = Lazy::new(|| { - [ - DATADOG_TRACE_ID_HEADER.to_string(), - DATADOG_PARENT_ID_HEADER.to_string(), - DATADOG_SAMPLING_PRIORITY_HEADER.to_string(), - ] - }); - - #[derive(Default)] - pub struct DatadogTraceStateBuilder { - sampling_priority: SamplingPriority, - measuring: Option, - } - - fn boolean_to_trace_state_flag(value: bool) -> &'static str { - if value { - TRACE_STATE_TRUE_VALUE - } else { - TRACE_STATE_FALSE_VALUE - } - } - - fn trace_flag_to_boolean(value: &str) -> bool { - value == TRACE_STATE_TRUE_VALUE - } - - #[allow(clippy::needless_update)] - impl DatadogTraceStateBuilder { - pub fn with_priority_sampling(self, sampling_priority: SamplingPriority) -> Self { - Self { - sampling_priority, - ..self - } - } - - pub fn with_measuring(self, enabled: bool) -> Self { - Self { - measuring: Some(enabled), - ..self - } - } - - pub fn build(self) -> TraceState { - if let Some(measuring) = self.measuring { - let values = [ - (TRACE_STATE_MEASURE, boolean_to_trace_state_flag(measuring)), - ( - TRACE_STATE_PRIORITY_SAMPLING, - &self.sampling_priority.to_string(), - ), - ]; - - TraceState::from_key_value(values).unwrap_or_default() - } else { - let values = [( - TRACE_STATE_PRIORITY_SAMPLING, - &self.sampling_priority.to_string(), - )]; - - TraceState::from_key_value(values).unwrap_or_default() - } - } - } - - pub trait DatadogTraceState { - fn with_measuring(&self, enabled: bool) -> TraceState; - - fn measuring_enabled(&self) -> bool; - - fn with_priority_sampling(&self, sampling_priority: SamplingPriority) -> TraceState; - - fn sampling_priority(&self) -> Option; - } - - impl DatadogTraceState for TraceState { - fn with_measuring(&self, enabled: bool) -> TraceState { - self.insert(TRACE_STATE_MEASURE, boolean_to_trace_state_flag(enabled)) - .unwrap_or_else(|_err| self.clone()) - } - - fn measuring_enabled(&self) -> bool { - self.get(TRACE_STATE_MEASURE) - .map(trace_flag_to_boolean) - .unwrap_or_default() - } - - fn with_priority_sampling(&self, sampling_priority: SamplingPriority) -> TraceState { - self.insert(TRACE_STATE_PRIORITY_SAMPLING, sampling_priority.to_string()) - .unwrap_or_else(|_err| self.clone()) - } - - fn sampling_priority(&self) -> Option { - self.get(TRACE_STATE_PRIORITY_SAMPLING).map(|value| { - SamplingPriority::try_from(value).unwrap_or(SamplingPriority::AutoReject) - }) - } - } - - #[derive(Default, Debug, Eq, PartialEq)] - pub(crate) enum SamplingPriority { - UserReject = -1, - #[default] - AutoReject = 0, - AutoKeep = 1, - UserKeep = 2, - } - - impl SamplingPriority { - pub(crate) fn as_i64(&self) -> i64 { - match self { - SamplingPriority::UserReject => -1, - SamplingPriority::AutoReject => 0, - SamplingPriority::AutoKeep => 1, - SamplingPriority::UserKeep => 2, - } - } - } - - impl Display for SamplingPriority { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let value = match self { - SamplingPriority::UserReject => -1, - SamplingPriority::AutoReject => 0, - SamplingPriority::AutoKeep => 1, - SamplingPriority::UserKeep => 2, - }; - write!(f, "{value}") - } - } - - impl SamplingPriority { - pub fn as_str(&self) -> &'static str { - match self { - SamplingPriority::UserReject => "-1", - SamplingPriority::AutoReject => "0", - SamplingPriority::AutoKeep => "1", - SamplingPriority::UserKeep => "2", - } - } - } - - impl TryFrom<&str> for SamplingPriority { - type Error = ExtractError; - - fn try_from(value: &str) -> Result { - match value { - "-1" => Ok(SamplingPriority::UserReject), - "0" => Ok(SamplingPriority::AutoReject), - "1" => Ok(SamplingPriority::AutoKeep), - "2" => Ok(SamplingPriority::UserKeep), - _ => Err(ExtractError::SamplingPriority), - } - } - } - - #[derive(Debug)] - pub(crate) enum ExtractError { - TraceId, - SpanId, - SamplingPriority, - } - - /// Extracts and injects `SpanContext`s into `Extractor`s or `Injector`s using Datadog's header format. - /// - /// The Datadog header format does not have an explicit spec, but can be divined from the client libraries, - /// such as [dd-trace-go] - /// - /// ## Example - /// - /// ``` - /// use opentelemetry::global; - /// use opentelemetry_datadog::DatadogPropagator; - /// - /// global::set_text_map_propagator(DatadogPropagator::default()); - /// ``` - /// - /// [dd-trace-go]: https://github.com/DataDog/dd-trace-go/blob/v1.28.0/ddtrace/tracer/textmap.go#L293 - #[derive(Clone, Debug, Default)] - pub struct DatadogPropagator { - _private: (), - } - - fn create_trace_state_and_flags(trace_flags: TraceFlags) -> (TraceState, TraceFlags) { - (TraceState::default(), trace_flags) - } - - impl DatadogPropagator { - /// Creates a new `DatadogPropagator`. - pub fn new() -> Self { - DatadogPropagator::default() - } - - fn extract_trace_id(&self, trace_id: &str) -> Result { - trace_id - .parse::() - .map(|id| TraceId::from(id as u128)) - .map_err(|_| ExtractError::TraceId) - } - - fn extract_span_id(&self, span_id: &str) -> Result { - span_id - .parse::() - .map(SpanId::from) - .map_err(|_| ExtractError::SpanId) - } - - fn extract_span_context( - &self, - extractor: &dyn Extractor, - ) -> Result { - let trace_id = - self.extract_trace_id(extractor.get(DATADOG_TRACE_ID_HEADER).unwrap_or(""))?; - // If we have a trace_id but can't get the parent span, we default it to invalid instead of completely erroring - // out so that the rest of the spans aren't completely lost - let span_id = self - .extract_span_id(extractor.get(DATADOG_PARENT_ID_HEADER).unwrap_or("")) - .unwrap_or(SpanId::INVALID); - let sampling_priority = extractor - .get(DATADOG_SAMPLING_PRIORITY_HEADER) - .unwrap_or("") - .try_into(); - - let sampled = match sampling_priority { - Ok(SamplingPriority::UserReject) | Ok(SamplingPriority::AutoReject) => { - TraceFlags::default() - } - Ok(SamplingPriority::UserKeep) | Ok(SamplingPriority::AutoKeep) => { - TraceFlags::SAMPLED - } - // Treat the sampling as DEFERRED instead of erroring on extracting the span context - Err(_) => TRACE_FLAG_DEFERRED, - }; - - let (mut trace_state, trace_flags) = create_trace_state_and_flags(sampled); - if let Ok(sampling_priority) = sampling_priority { - trace_state = trace_state.with_priority_sampling(sampling_priority); - } - - Ok(SpanContext::new( - trace_id, - span_id, - trace_flags, - true, - trace_state, - )) - } - } - - impl TextMapPropagator for DatadogPropagator { - fn inject_context(&self, cx: &Context, injector: &mut dyn Injector) { - let span = cx.span(); - let span_context = span.span_context(); - if span_context.is_valid() { - injector.set( - DATADOG_TRACE_ID_HEADER, - (u128::from_be_bytes(span_context.trace_id().to_bytes()) as u64).to_string(), - ); - injector.set( - DATADOG_PARENT_ID_HEADER, - u64::from_be_bytes(span_context.span_id().to_bytes()).to_string(), - ); - - if span_context.trace_flags() & TRACE_FLAG_DEFERRED != TRACE_FLAG_DEFERRED { - // The sampling priority - let sampling_priority = span_context - .trace_state() - .sampling_priority() - .unwrap_or_else(|| { - if span_context.is_sampled() { - SamplingPriority::AutoKeep - } else { - SamplingPriority::AutoReject - } - }); - injector.set( - DATADOG_SAMPLING_PRIORITY_HEADER, - (sampling_priority as i32).to_string(), - ); - } - } - } - - fn extract_with_context(&self, cx: &Context, extractor: &dyn Extractor) -> Context { - self.extract_span_context(extractor) - .map(|sc| cx.with_remote_span_context(sc)) - .unwrap_or_else(|_| cx.clone()) - } - - fn fields(&self) -> FieldIter<'_> { - FieldIter::new(DATADOG_HEADER_FIELDS.as_ref()) - } - } - - #[cfg(test)] - mod tests { - use std::collections::HashMap; - - use opentelemetry::trace::TraceState; - use opentelemetry_sdk::testing::trace::TestSpan; - - use super::*; - - #[rustfmt::skip] - fn extract_test_data() -> Vec<(Vec<(&'static str, &'static str)>, SpanContext)> { - vec![ - (vec![], SpanContext::empty_context()), - (vec![(DATADOG_SAMPLING_PRIORITY_HEADER, "0")], SpanContext::empty_context()), - (vec![(DATADOG_TRACE_ID_HEADER, "garbage")], SpanContext::empty_context()), - (vec![(DATADOG_TRACE_ID_HEADER, "1234"), (DATADOG_PARENT_ID_HEADER, "garbage")], SpanContext::new(TraceId::from_u128(1234), SpanId::INVALID, TRACE_FLAG_DEFERRED, true, TraceState::default())), - (vec![(DATADOG_TRACE_ID_HEADER, "1234"), (DATADOG_PARENT_ID_HEADER, "12")], SpanContext::new(TraceId::from_u128(1234), SpanId::from_u64(12), TRACE_FLAG_DEFERRED, true, TraceState::default())), - (vec![(DATADOG_TRACE_ID_HEADER, "1234"), (DATADOG_PARENT_ID_HEADER, "12"), (DATADOG_SAMPLING_PRIORITY_HEADER, "-1")], SpanContext::new(TraceId::from_u128(1234), SpanId::from_u64(12), TraceFlags::default(), true, DatadogTraceStateBuilder::default().with_priority_sampling(SamplingPriority::UserReject).build())), - (vec![(DATADOG_TRACE_ID_HEADER, "1234"), (DATADOG_PARENT_ID_HEADER, "12"), (DATADOG_SAMPLING_PRIORITY_HEADER, "0")], SpanContext::new(TraceId::from_u128(1234), SpanId::from_u64(12), TraceFlags::default(), true, DatadogTraceStateBuilder::default().with_priority_sampling(SamplingPriority::AutoReject).build())), - (vec![(DATADOG_TRACE_ID_HEADER, "1234"), (DATADOG_PARENT_ID_HEADER, "12"), (DATADOG_SAMPLING_PRIORITY_HEADER, "1")], SpanContext::new(TraceId::from_u128(1234), SpanId::from_u64(12), TraceFlags::SAMPLED, true, DatadogTraceStateBuilder::default().with_priority_sampling(SamplingPriority::AutoKeep).build())), - (vec![(DATADOG_TRACE_ID_HEADER, "1234"), (DATADOG_PARENT_ID_HEADER, "12"), (DATADOG_SAMPLING_PRIORITY_HEADER, "2")], SpanContext::new(TraceId::from_u128(1234), SpanId::from_u64(12), TraceFlags::SAMPLED, true, DatadogTraceStateBuilder::default().with_priority_sampling(SamplingPriority::UserKeep).build())), - ] - } - - #[rustfmt::skip] - fn inject_test_data() -> Vec<(Vec<(&'static str, &'static str)>, SpanContext)> { - vec![ - (vec![], SpanContext::empty_context()), - (vec![], SpanContext::new(TraceId::INVALID, SpanId::INVALID, TRACE_FLAG_DEFERRED, true, TraceState::default())), - (vec![], SpanContext::new(TraceId::from_hex("1234").unwrap(), SpanId::INVALID, TRACE_FLAG_DEFERRED, true, TraceState::default())), - (vec![], SpanContext::new(TraceId::from_hex("1234").unwrap(), SpanId::INVALID, TraceFlags::SAMPLED, true, TraceState::default())), - (vec![(DATADOG_TRACE_ID_HEADER, "1234"), (DATADOG_PARENT_ID_HEADER, "12")], SpanContext::new(TraceId::from_u128(1234), SpanId::from_u64(12), TRACE_FLAG_DEFERRED, true, TraceState::default())), - (vec![(DATADOG_TRACE_ID_HEADER, "1234"), (DATADOG_PARENT_ID_HEADER, "12"), (DATADOG_SAMPLING_PRIORITY_HEADER, "-1")], SpanContext::new(TraceId::from_u128(1234), SpanId::from_u64(12), TraceFlags::default(), true, DatadogTraceStateBuilder::default().with_priority_sampling(SamplingPriority::UserReject).build())), - (vec![(DATADOG_TRACE_ID_HEADER, "1234"), (DATADOG_PARENT_ID_HEADER, "12"), (DATADOG_SAMPLING_PRIORITY_HEADER, "0")], SpanContext::new(TraceId::from_u128(1234), SpanId::from_u64(12), TraceFlags::default(), true, DatadogTraceStateBuilder::default().with_priority_sampling(SamplingPriority::AutoReject).build())), - (vec![(DATADOG_TRACE_ID_HEADER, "1234"), (DATADOG_PARENT_ID_HEADER, "12"), (DATADOG_SAMPLING_PRIORITY_HEADER, "1")], SpanContext::new(TraceId::from_u128(1234), SpanId::from_u64(12), TraceFlags::SAMPLED, true, DatadogTraceStateBuilder::default().with_priority_sampling(SamplingPriority::AutoKeep).build())), - (vec![(DATADOG_TRACE_ID_HEADER, "1234"), (DATADOG_PARENT_ID_HEADER, "12"), (DATADOG_SAMPLING_PRIORITY_HEADER, "2")], SpanContext::new(TraceId::from_u128(1234), SpanId::from_u64(12), TraceFlags::SAMPLED, true, DatadogTraceStateBuilder::default().with_priority_sampling(SamplingPriority::UserKeep).build())), - ] - } - - #[test] - fn test_extract() { - for (header_list, expected) in extract_test_data() { - let map: HashMap = header_list - .into_iter() - .map(|(k, v)| (k.to_string(), v.to_string())) - .collect(); - - let propagator = DatadogPropagator::default(); - let context = propagator.extract(&map); - assert_eq!(context.span().span_context(), &expected); - } - } - - #[test] - fn test_extract_empty() { - let map: HashMap = HashMap::new(); - let propagator = DatadogPropagator::default(); - let context = propagator.extract(&map); - assert_eq!(context.span().span_context(), &SpanContext::empty_context()) - } - - #[test] - fn test_extract_with_empty_remote_context() { - let map: HashMap = HashMap::new(); - let propagator = DatadogPropagator::default(); - let context = propagator.extract_with_context(&Context::new(), &map); - assert!(!context.has_active_span()) - } - - #[test] - fn test_inject() { - let propagator = DatadogPropagator::default(); - for (header_values, span_context) in inject_test_data() { - let mut injector: HashMap = HashMap::new(); - propagator.inject_context( - &Context::current_with_span(TestSpan(span_context)), - &mut injector, - ); - - if !header_values.is_empty() { - for (k, v) in header_values.into_iter() { - let injected_value: Option<&String> = injector.get(k); - assert_eq!(injected_value, Some(&v.to_string())); - injector.remove(k); - } - } - assert!(injector.is_empty()); - } - } - } -} diff --git a/apollo-router/src/plugins/telemetry/tracing/mod.rs b/apollo-router/src/plugins/telemetry/tracing/mod.rs index c689bf8d47..c31985e3ae 100644 --- a/apollo-router/src/plugins/telemetry/tracing/mod.rs +++ b/apollo-router/src/plugins/telemetry/tracing/mod.rs @@ -3,12 +3,12 @@ use std::fmt::Formatter; use std::time::Duration; use opentelemetry::Context; -use opentelemetry::trace::TraceResult; use opentelemetry_sdk::Resource; -use opentelemetry_sdk::export::trace::SpanData; +use opentelemetry_sdk::error::OTelSdkResult; use opentelemetry_sdk::trace::BatchConfig; use opentelemetry_sdk::trace::BatchConfigBuilder; use opentelemetry_sdk::trace::Span; +use opentelemetry_sdk::trace::SpanData; use opentelemetry_sdk::trace::SpanProcessor; use schemars::JsonSchema; use serde::Deserialize; @@ -21,7 +21,6 @@ pub(crate) mod apollo; pub(crate) mod apollo_telemetry; pub(crate) mod datadog; #[allow(unreachable_pub, dead_code)] -pub(crate) mod datadog_exporter; pub(crate) mod otlp; pub(crate) mod reload; pub(crate) mod zipkin; @@ -59,17 +58,21 @@ impl SpanProcessor for ApolloFilterSpanProcessor { } } - fn force_flush(&self) -> TraceResult<()> { + fn force_flush(&self) -> OTelSdkResult { self.delegate.force_flush() } - fn shutdown(&self) -> TraceResult<()> { + fn shutdown(&self) -> OTelSdkResult { self.delegate.shutdown() } fn set_resource(&mut self, resource: &Resource) { self.delegate.set_resource(resource) } + + fn shutdown_with_timeout(&self, timeout: Duration) -> OTelSdkResult { + self.delegate.shutdown_with_timeout(timeout) + } } trait SpanProcessorExt diff --git a/apollo-router/src/plugins/telemetry/tracing/otlp.rs b/apollo-router/src/plugins/telemetry/tracing/otlp.rs index ea90c0f69d..607dfe84fa 100644 --- a/apollo-router/src/plugins/telemetry/tracing/otlp.rs +++ b/apollo-router/src/plugins/telemetry/tracing/otlp.rs @@ -1,8 +1,8 @@ //! Configuration for Otlp tracing. use std::result::Result; -use opentelemetry_otlp::SpanExporterBuilder; -use opentelemetry_sdk::trace::BatchSpanProcessor; +use opentelemetry_otlp::SpanExporter; +use opentelemetry_sdk::trace::span_processor_with_async_runtime::BatchSpanProcessor; use tower::BoxError; use crate::plugins::telemetry::config::Conf; @@ -23,8 +23,8 @@ impl TracingConfigurator for super::super::otlp::Config { } fn configure(&self, builder: &mut TracingBuilder) -> Result<(), BoxError> { - let exporter: SpanExporterBuilder = self.exporter(TelemetryDataKind::Traces)?; - let named_exporter = NamedSpanExporter::new(exporter.build_span_exporter()?, "otlp"); + let exporter: SpanExporter = self.span_exporter(TelemetryDataKind::Traces)?; + let named_exporter = NamedSpanExporter::new(exporter, "otlp"); let batch_span_processor = BatchSpanProcessor::builder(named_exporter, NamedTokioRuntime::new("otlp-tracing")) .with_batch_config(self.batch_processor.clone().into()) diff --git a/apollo-router/src/plugins/telemetry/tracing/zipkin.rs b/apollo-router/src/plugins/telemetry/tracing/zipkin.rs index a25da681fc..1b25cce786 100644 --- a/apollo-router/src/plugins/telemetry/tracing/zipkin.rs +++ b/apollo-router/src/plugins/telemetry/tracing/zipkin.rs @@ -2,7 +2,8 @@ use std::sync::LazyLock; use http::Uri; -use opentelemetry_sdk::trace::BatchSpanProcessor; +use opentelemetry::Key; +use opentelemetry_sdk::trace::span_processor_with_async_runtime::BatchSpanProcessor; use opentelemetry_semantic_conventions::resource::SERVICE_NAME; use schemars::JsonSchema; use serde::Deserialize; @@ -50,18 +51,13 @@ impl TracingConfigurator for Config { tracing::info!("configuring Zipkin tracing: {}", self.batch_processor); let common: opentelemetry_sdk::trace::Config = builder.tracing_common().into(); let endpoint = &self.endpoint.to_full_uri(&DEFAULT_ENDPOINT); - let exporter = opentelemetry_zipkin::new_pipeline() + let exporter = opentelemetry_zipkin::ZipkinExporter::builder() .with_collector_endpoint(endpoint.to_string()) .with( - &common.resource.get(SERVICE_NAME.into()), - |builder, service_name| { - // Zipkin exporter incorrectly ignores the service name in the resource - // Set it explicitly here - builder.with_service_name(service_name.as_str()) - }, + &common.resource.get(&Key::from(SERVICE_NAME)), + |builder, _service_name| builder, ) - .with_trace_config(common) - .init_exporter()?; + .build()?; let named_exporter = NamedSpanExporter::new(exporter, "zipkin"); builder.with_span_processor( diff --git a/apollo-router/src/query_planner/caching_query_planner.rs b/apollo-router/src/query_planner/caching_query_planner.rs index 0cf6ca9872..8f27db89f1 100644 --- a/apollo-router/src/query_planner/caching_query_planner.rs +++ b/apollo-router/src/query_planner/caching_query_planner.rs @@ -822,7 +822,7 @@ impl StructHasher { } } fn finalize(self) -> Vec { - self.hasher.finalize().as_slice().into() + self.hasher.finalize().to_vec() } } diff --git a/apollo-router/src/query_planner/query_planner_service.rs b/apollo-router/src/query_planner/query_planner_service.rs index bd2a3b62ae..6737aa6618 100644 --- a/apollo-router/src/query_planner/query_planner_service.rs +++ b/apollo-router/src/query_planner/query_planner_service.rs @@ -109,7 +109,7 @@ fn federation_version_instrument(federation_version: Option) -> ObservableG )], ); }) - .init() + .build() } impl QueryPlannerService { diff --git a/apollo-router/src/services/layers/apq.rs b/apollo-router/src/services/layers/apq.rs index 088709aad3..aed09d580a 100644 --- a/apollo-router/src/services/layers/apq.rs +++ b/apollo-router/src/services/layers/apq.rs @@ -187,7 +187,9 @@ async fn apq_request( fn query_matches_hash(query: &str, hash: &[u8]) -> bool { let mut digest = Sha256::new(); digest.update(query.as_bytes()); - hash == digest.finalize().as_slice() + let binding = digest.finalize(); + let digest_bytes: &[u8] = binding.as_ref(); + hash == digest_bytes } fn redis_key(query_hash: &str) -> String { diff --git a/apollo-router/src/spec/schema.rs b/apollo-router/src/spec/schema.rs index 88dd3e4a95..a89f852780 100644 --- a/apollo-router/src/spec/schema.rs +++ b/apollo-router/src/spec/schema.rs @@ -444,7 +444,7 @@ impl QueryHash { hasher.update(query_text); hasher.update(&[0xFF][..]); hasher.update(operation_name.unwrap_or("-")); - Self(hasher.finalize().as_slice().into()) + Self(hasher.finalize().to_vec()) } /// Return the hash as a byte slice. diff --git a/apollo-router/src/tracer.rs b/apollo-router/src/tracer.rs index 13da01fd0d..1c2fca755a 100644 --- a/apollo-router/src/tracer.rs +++ b/apollo-router/src/tracer.rs @@ -118,14 +118,10 @@ mod test { let _guard = TRACING_LOCK.lock(); // Create a tracing layer with the configured tracer - let provider = opentelemetry_sdk::trace::TracerProvider::builder() - .with_simple_exporter( - opentelemetry_stdout::SpanExporter::builder() - .with_writer(std::io::stdout()) - .build(), - ) + let provider = opentelemetry_sdk::trace::SdkTracerProvider::builder() + .with_simple_exporter(opentelemetry_stdout::SpanExporter::default()) .build(); - let tracer = provider.tracer_builder("noop").build(); + let tracer = provider.tracer("noop"); let telemetry = otel::layer().force_sampling().with_tracer(tracer); // Use the tracing subscriber `Registry`, or any other subscriber @@ -145,10 +141,10 @@ mod test { let my_id = TraceId::maybe_new(); assert!(my_id.is_none()); // Create a tracing layer with the configured tracer - let provider = opentelemetry_sdk::trace::TracerProvider::builder() + let provider = opentelemetry_sdk::trace::SdkTracerProvider::builder() .with_simple_exporter(opentelemetry_stdout::SpanExporter::default()) .build(); - let tracer = provider.tracer_builder("noop").build(); + let tracer = provider.tracer("noop"); let telemetry = otel::layer().force_sampling().with_tracer(tracer); // Use the tracing subscriber `Registry`, or any other subscriber // that impls `LookupSpan` @@ -168,10 +164,10 @@ mod test { fn it_correctly_compares_valid_and_valid_trace_id() { let _guard = TRACING_LOCK.lock(); // Create a tracing layer with the configured tracer - let provider = opentelemetry_sdk::trace::TracerProvider::builder() + let provider = opentelemetry_sdk::trace::SdkTracerProvider::builder() .with_simple_exporter(opentelemetry_stdout::SpanExporter::default()) .build(); - let tracer = provider.tracer_builder("noop").build(); + let tracer = provider.tracer("noop"); let telemetry = otel::layer().force_sampling().with_tracer(tracer); // Use the tracing subscriber `Registry`, or any other subscriber // that impls `LookupSpan` diff --git a/apollo-router/tests/common.rs b/apollo-router/tests/common.rs index 55f4871afe..2849504961 100644 --- a/apollo-router/tests/common.rs +++ b/apollo-router/tests/common.rs @@ -29,17 +29,15 @@ use opentelemetry::trace::SpanContext; use opentelemetry::trace::TraceContextExt; use opentelemetry::trace::TraceId; use opentelemetry::trace::TracerProvider as OtherTracerProvider; -use opentelemetry_otlp::HttpExporterBuilder; use opentelemetry_otlp::Protocol; -use opentelemetry_otlp::SpanExporterBuilder; use opentelemetry_otlp::WithExportConfig; use opentelemetry_proto::tonic::collector::metrics::v1::ExportMetricsServiceRequest; use opentelemetry_sdk::Resource; +use opentelemetry_sdk::runtime; use opentelemetry_sdk::testing::trace::NoopSpanExporter; use opentelemetry_sdk::trace::BatchConfigBuilder; -use opentelemetry_sdk::trace::BatchSpanProcessor; -use opentelemetry_sdk::trace::Config; -use opentelemetry_sdk::trace::TracerProvider; +use opentelemetry_sdk::trace::SdkTracerProvider; +use opentelemetry_sdk::trace::span_processor_with_async_runtime::BatchSpanProcessor; use opentelemetry_semantic_conventions::resource::SERVICE_NAME; use parking_lot::Mutex; use prost::Message; @@ -208,8 +206,8 @@ pub struct IntegrationTest { telemetry: Telemetry, extra_propagator: Telemetry, - pub _tracer_provider_client: TracerProvider, - pub _tracer_provider_subgraph: TracerProvider, + pub _tracer_provider_client: SdkTracerProvider, + pub _tracer_provider_subgraph: SdkTracerProvider, subscriber_client: Dispatch, _subgraph_overrides: HashMap, @@ -372,27 +370,25 @@ pub enum Telemetry { } impl Telemetry { - fn tracer_provider(&self, service_name: &str) -> TracerProvider { - let config = Config::default().with_resource(Resource::new(vec![KeyValue::new( - SERVICE_NAME, - service_name.to_string(), - )])); + fn tracer_provider(&self, service_name: &str) -> SdkTracerProvider { + let resource = Resource::builder() + .with_attributes(vec![KeyValue::new(SERVICE_NAME, service_name.to_string())]) + .build(); match self { Telemetry::Otlp { endpoint: Some(endpoint), - } => TracerProvider::builder() - .with_config(config) + } => SdkTracerProvider::builder() + .with_resource(resource) .with_span_processor( BatchSpanProcessor::builder( - SpanExporterBuilder::Http( - HttpExporterBuilder::default() - .with_endpoint(endpoint) - .with_protocol(Protocol::HttpBinary), - ) - .build_span_exporter() - .expect("otlp pipeline failed"), - opentelemetry_sdk::runtime::Tokio, + opentelemetry_otlp::SpanExporter::builder() + .with_http() + .with_endpoint(endpoint) + .with_protocol(Protocol::HttpBinary) + .build() + .expect("otlp pipeline failed"), + runtime::Tokio, ) .with_batch_config( BatchConfigBuilder::default() @@ -402,15 +398,14 @@ impl Telemetry { .build(), ) .build(), - Telemetry::Datadog => TracerProvider::builder() - .with_config(config) + Telemetry::Datadog => SdkTracerProvider::builder() .with_span_processor( BatchSpanProcessor::builder( opentelemetry_datadog::new_pipeline() .with_service_name(service_name) .build_exporter() .expect("datadog pipeline failed"), - opentelemetry_sdk::runtime::Tokio, + runtime::Tokio, ) .with_batch_config( BatchConfigBuilder::default() @@ -420,15 +415,14 @@ impl Telemetry { .build(), ) .build(), - Telemetry::Zipkin => TracerProvider::builder() - .with_config(config) + Telemetry::Zipkin => SdkTracerProvider::builder() + .with_resource(resource) .with_span_processor( BatchSpanProcessor::builder( - opentelemetry_zipkin::new_pipeline() - .with_service_name(service_name) - .init_exporter() + opentelemetry_zipkin::ZipkinExporter::builder() + .build() .expect("zipkin pipeline failed"), - opentelemetry_sdk::runtime::Tokio, + runtime::Tokio, ) .with_batch_config( BatchConfigBuilder::default() @@ -438,8 +432,8 @@ impl Telemetry { .build(), ) .build(), - Telemetry::None | Telemetry::Otlp { endpoint: None } => TracerProvider::builder() - .with_config(config) + Telemetry::None | Telemetry::Otlp { endpoint: None } => SdkTracerProvider::builder() + .with_resource(resource) .with_simple_exporter(NoopSpanExporter::default()) .build(), } @@ -450,22 +444,53 @@ impl Telemetry { match self { Telemetry::Datadog => { - // Get the existing PSR header if it exists. This is because the existing telemetry propagator doesn't support PSR properly yet. - // In testing we are manually setting the PSR header, and we don't want to override it. - let psr = request + // Preserve an explicitly set PSR header (tests often set this manually). + let explicit_psr = request .headers() .get("x-datadog-sampling-priority") .cloned(); + + + // We now have `opentelemetry-datadog`'s `agent-sampling` feature enabled. This causes + // context injection to derive Priority Sampling Rate (PSR) from the `trace_state.psr` + // flag instead of the sampled bit. Our test client spans typically don't set + // `trace_state.psr`, so without intervention the propagator would default PSR to + // AutoReject (0), a behavior change we don't intend for tests. To prevent this + // behavior change, we now manually derive a boolean PSR from the sampled bit when + // `trace_state.psr` is absent. + let ctx = tracing::span::Span::current().context(); + let span = ctx.span(); + let span_context = span.span_context(); + let trace_state_has_psr = span_context + .trace_state() + .get("psr") + .is_some(); + let propagator = opentelemetry_datadog::DatadogPropagator::new(); propagator.inject_context( &ctx, &mut apollo_router::otel_compat::HeaderInjector(request.headers_mut()), ); - if let Some(psr) = psr { + // If PSR was explicitly set by the test, restore that value by overriding the + // propagator's injected value. + if let Some(psr) = explicit_psr { request .headers_mut() .insert("x-datadog-sampling-priority", psr); + } else if !trace_state_has_psr { + // Otherwise, only set the PSR when the span has no `tracestate.psr` set. + if request + .headers() + .contains_key("x-datadog-sampling-priority") + && span_context.is_valid() + { + let psr = if span_context.is_sampled() { "1" } else {"0"}; + request.headers_mut().insert( + "x-datadog-sampling-priority", + psr.parse().expect("x-datadog-sampling-priority must be a valid header value"), + ); + } } } Telemetry::Otlp { .. } => { @@ -713,7 +738,7 @@ impl IntegrationTest { } } - fn dispatch(tracer_provider: &TracerProvider) -> Dispatch { + fn dispatch(tracer_provider: &SdkTracerProvider) -> Dispatch { let tracer = tracer_provider.tracer("tracer"); let tracing_layer = tracing_opentelemetry::layer() .with_tracer(tracer) @@ -1541,16 +1566,12 @@ impl IntegrationTest { pub(crate) fn force_flush(&self) { let tracer_provider_client = self._tracer_provider_client.clone(); let tracer_provider_subgraph = self._tracer_provider_subgraph.clone(); - for r in tracer_provider_subgraph.force_flush() { - if let Err(e) = r { - eprintln!("failed to flush subgraph tracer: {e}"); - } + if let Err(e) = tracer_provider_subgraph.force_flush() { + eprintln!("failed to flush subgraph tracer: {e}"); } - for r in tracer_provider_client.force_flush() { - if let Err(e) = r { - eprintln!("failed to flush client tracer: {e}"); - } + if let Err(e) = tracer_provider_client.force_flush() { + eprintln!("failed to flush client tracer: {e}"); } } diff --git a/apollo-router/tests/fixtures/file_upload/default.router.yaml b/apollo-router/tests/fixtures/file_upload/default.router.yaml index 91fce6b5ba..d5f613303b 100644 --- a/apollo-router/tests/fixtures/file_upload/default.router.yaml +++ b/apollo-router/tests/fixtures/file_upload/default.router.yaml @@ -14,4 +14,4 @@ preview_file_uploads: max_file_size: 512kb max_files: 5 include_subgraph_errors: - all: true + all: true \ No newline at end of file diff --git a/apollo-router/tests/integration/lifecycle.rs b/apollo-router/tests/integration/lifecycle.rs index 81dc9743d7..2ddb392ecb 100644 --- a/apollo-router/tests/integration/lifecycle.rs +++ b/apollo-router/tests/integration/lifecycle.rs @@ -201,12 +201,27 @@ async fn test_shutdown_with_idle_connection() -> Result<(), BoxError> { Ok(()) } +fn strip_noisy_otel_shutdown_logs(s: &str) -> String { + let had_trailing_newline = s.ends_with('\n'); + let filtered = s + .lines() + .filter(|line| if line.trim().contains(r#""name":"MeterProvider.Drop""#) { false } else { true } ) + .collect::>() + .join("\n"); + if had_trailing_newline && !filtered.is_empty() { + format!("{filtered}\n") + } else { + filtered + } +} + async fn command_output(command: &mut Command) -> String { let output = command.output().await.unwrap(); let success = output.status.success(); let exit_code = output.status.code(); let stderr = String::from_utf8_lossy(&output.stderr); - let stdout = String::from_utf8_lossy(&output.stdout); + let stdout_raw = String::from_utf8_lossy(&output.stdout); + let stdout = strip_noisy_otel_shutdown_logs(&stdout_raw); format!( "Success: {success:?}\n\ Exit code: {exit_code:?}\n\ diff --git a/apollo-router/tests/integration/snapshots/integration_tests__integration__lifecycle__cli_config_experimental.snap b/apollo-router/tests/integration/snapshots/integration_tests__integration__lifecycle__cli_config_experimental.snap index 3addb79d26..910a31a525 100644 --- a/apollo-router/tests/integration/snapshots/integration_tests__integration__lifecycle__cli_config_experimental.snap +++ b/apollo-router/tests/integration/snapshots/integration_tests__integration__lifecycle__cli_config_experimental.snap @@ -1,6 +1,7 @@ --- source: apollo-router/tests/integration/lifecycle.rs expression: "command_output(Command::new(IntegrationTest::router_location()).arg(\"config\").arg(\"experimental\").env(\"RUST_BACKTRACE\",\n\"\")).await" +snapshot_kind: text --- Success: true Exit code: Some(0) diff --git a/apollo-router/tests/integration/snapshots/integration_tests__integration__lifecycle__cli_config_preview.snap b/apollo-router/tests/integration/snapshots/integration_tests__integration__lifecycle__cli_config_preview.snap index ac8a0f02ea..e47525f953 100644 --- a/apollo-router/tests/integration/snapshots/integration_tests__integration__lifecycle__cli_config_preview.snap +++ b/apollo-router/tests/integration/snapshots/integration_tests__integration__lifecycle__cli_config_preview.snap @@ -1,6 +1,7 @@ --- source: apollo-router/tests/integration/lifecycle.rs -expression: "command_output(Command::new(IntegrationTest::router_location()).arg(\"config\").arg(\"preview\").env(\"RUST_BACKTRACE\",\n \"\")).await" +expression: "command_output(Command::new(IntegrationTest::router_location()).arg(\"config\").arg(\"preview\").env(\"RUST_BACKTRACE\",\n\"\")).await" +snapshot_kind: text --- Success: true Exit code: Some(0) diff --git a/apollo-router/tests/snapshots/apollo_otel_traces__batch_send_header-2.snap b/apollo-router/tests/snapshots/apollo_otel_traces__batch_send_header-2.snap index d069ee55b1..df48c6a6a7 100644 --- a/apollo-router/tests/snapshots/apollo_otel_traces__batch_send_header-2.snap +++ b/apollo-router/tests/snapshots/apollo_otel_traces__batch_send_header-2.snap @@ -1,6 +1,7 @@ --- source: apollo-router/tests/apollo_otel_traces.rs expression: report +snapshot_kind: text --- resourceSpans: - resource: @@ -23,7 +24,20 @@ resourceSpans: - key: apollo.user.agent value: stringValue: "[redacted]" + - key: service.name + value: + stringValue: unknown_service + - key: telemetry.sdk.language + value: + stringValue: rust + - key: telemetry.sdk.name + value: + stringValue: opentelemetry + - key: telemetry.sdk.version + value: + stringValue: 0.30.0 droppedAttributesCount: 0 + entityRefs: [] scopeSpans: - scope: name: apollo-router diff --git a/apollo-router/tests/snapshots/apollo_otel_traces__batch_send_header.snap b/apollo-router/tests/snapshots/apollo_otel_traces__batch_send_header.snap index d069ee55b1..df48c6a6a7 100644 --- a/apollo-router/tests/snapshots/apollo_otel_traces__batch_send_header.snap +++ b/apollo-router/tests/snapshots/apollo_otel_traces__batch_send_header.snap @@ -1,6 +1,7 @@ --- source: apollo-router/tests/apollo_otel_traces.rs expression: report +snapshot_kind: text --- resourceSpans: - resource: @@ -23,7 +24,20 @@ resourceSpans: - key: apollo.user.agent value: stringValue: "[redacted]" + - key: service.name + value: + stringValue: unknown_service + - key: telemetry.sdk.language + value: + stringValue: rust + - key: telemetry.sdk.name + value: + stringValue: opentelemetry + - key: telemetry.sdk.version + value: + stringValue: 0.30.0 droppedAttributesCount: 0 + entityRefs: [] scopeSpans: - scope: name: apollo-router diff --git a/apollo-router/tests/snapshots/apollo_otel_traces__batch_trace_id-2.snap b/apollo-router/tests/snapshots/apollo_otel_traces__batch_trace_id-2.snap index fec5016370..12f5f598ea 100644 --- a/apollo-router/tests/snapshots/apollo_otel_traces__batch_trace_id-2.snap +++ b/apollo-router/tests/snapshots/apollo_otel_traces__batch_trace_id-2.snap @@ -1,6 +1,7 @@ --- source: apollo-router/tests/apollo_otel_traces.rs expression: report +snapshot_kind: text --- resourceSpans: - resource: @@ -23,7 +24,20 @@ resourceSpans: - key: apollo.user.agent value: stringValue: "[redacted]" + - key: service.name + value: + stringValue: unknown_service + - key: telemetry.sdk.language + value: + stringValue: rust + - key: telemetry.sdk.name + value: + stringValue: opentelemetry + - key: telemetry.sdk.version + value: + stringValue: 0.30.0 droppedAttributesCount: 0 + entityRefs: [] scopeSpans: - scope: name: apollo-router diff --git a/apollo-router/tests/snapshots/apollo_otel_traces__batch_trace_id.snap b/apollo-router/tests/snapshots/apollo_otel_traces__batch_trace_id.snap index fec5016370..12f5f598ea 100644 --- a/apollo-router/tests/snapshots/apollo_otel_traces__batch_trace_id.snap +++ b/apollo-router/tests/snapshots/apollo_otel_traces__batch_trace_id.snap @@ -1,6 +1,7 @@ --- source: apollo-router/tests/apollo_otel_traces.rs expression: report +snapshot_kind: text --- resourceSpans: - resource: @@ -23,7 +24,20 @@ resourceSpans: - key: apollo.user.agent value: stringValue: "[redacted]" + - key: service.name + value: + stringValue: unknown_service + - key: telemetry.sdk.language + value: + stringValue: rust + - key: telemetry.sdk.name + value: + stringValue: opentelemetry + - key: telemetry.sdk.version + value: + stringValue: 0.30.0 droppedAttributesCount: 0 + entityRefs: [] scopeSpans: - scope: name: apollo-router diff --git a/apollo-router/tests/snapshots/apollo_otel_traces__client_name-2.snap b/apollo-router/tests/snapshots/apollo_otel_traces__client_name-2.snap index 0791fe568d..9dc6ce7114 100644 --- a/apollo-router/tests/snapshots/apollo_otel_traces__client_name-2.snap +++ b/apollo-router/tests/snapshots/apollo_otel_traces__client_name-2.snap @@ -1,6 +1,7 @@ --- source: apollo-router/tests/apollo_otel_traces.rs expression: report +snapshot_kind: text --- resourceSpans: - resource: @@ -23,7 +24,20 @@ resourceSpans: - key: apollo.user.agent value: stringValue: "[redacted]" + - key: service.name + value: + stringValue: unknown_service + - key: telemetry.sdk.language + value: + stringValue: rust + - key: telemetry.sdk.name + value: + stringValue: opentelemetry + - key: telemetry.sdk.version + value: + stringValue: 0.30.0 droppedAttributesCount: 0 + entityRefs: [] scopeSpans: - scope: name: apollo-router diff --git a/apollo-router/tests/snapshots/apollo_otel_traces__client_name.snap b/apollo-router/tests/snapshots/apollo_otel_traces__client_name.snap index 0791fe568d..9dc6ce7114 100644 --- a/apollo-router/tests/snapshots/apollo_otel_traces__client_name.snap +++ b/apollo-router/tests/snapshots/apollo_otel_traces__client_name.snap @@ -1,6 +1,7 @@ --- source: apollo-router/tests/apollo_otel_traces.rs expression: report +snapshot_kind: text --- resourceSpans: - resource: @@ -23,7 +24,20 @@ resourceSpans: - key: apollo.user.agent value: stringValue: "[redacted]" + - key: service.name + value: + stringValue: unknown_service + - key: telemetry.sdk.language + value: + stringValue: rust + - key: telemetry.sdk.name + value: + stringValue: opentelemetry + - key: telemetry.sdk.version + value: + stringValue: 0.30.0 droppedAttributesCount: 0 + entityRefs: [] scopeSpans: - scope: name: apollo-router diff --git a/apollo-router/tests/snapshots/apollo_otel_traces__client_version-2.snap b/apollo-router/tests/snapshots/apollo_otel_traces__client_version-2.snap index eaf3ec9fd2..93de78aa8d 100644 --- a/apollo-router/tests/snapshots/apollo_otel_traces__client_version-2.snap +++ b/apollo-router/tests/snapshots/apollo_otel_traces__client_version-2.snap @@ -1,6 +1,7 @@ --- source: apollo-router/tests/apollo_otel_traces.rs expression: report +snapshot_kind: text --- resourceSpans: - resource: @@ -23,7 +24,20 @@ resourceSpans: - key: apollo.user.agent value: stringValue: "[redacted]" + - key: service.name + value: + stringValue: unknown_service + - key: telemetry.sdk.language + value: + stringValue: rust + - key: telemetry.sdk.name + value: + stringValue: opentelemetry + - key: telemetry.sdk.version + value: + stringValue: 0.30.0 droppedAttributesCount: 0 + entityRefs: [] scopeSpans: - scope: name: apollo-router diff --git a/apollo-router/tests/snapshots/apollo_otel_traces__client_version.snap b/apollo-router/tests/snapshots/apollo_otel_traces__client_version.snap index eaf3ec9fd2..93de78aa8d 100644 --- a/apollo-router/tests/snapshots/apollo_otel_traces__client_version.snap +++ b/apollo-router/tests/snapshots/apollo_otel_traces__client_version.snap @@ -1,6 +1,7 @@ --- source: apollo-router/tests/apollo_otel_traces.rs expression: report +snapshot_kind: text --- resourceSpans: - resource: @@ -23,7 +24,20 @@ resourceSpans: - key: apollo.user.agent value: stringValue: "[redacted]" + - key: service.name + value: + stringValue: unknown_service + - key: telemetry.sdk.language + value: + stringValue: rust + - key: telemetry.sdk.name + value: + stringValue: opentelemetry + - key: telemetry.sdk.version + value: + stringValue: 0.30.0 droppedAttributesCount: 0 + entityRefs: [] scopeSpans: - scope: name: apollo-router diff --git a/apollo-router/tests/snapshots/apollo_otel_traces__condition_else-2.snap b/apollo-router/tests/snapshots/apollo_otel_traces__condition_else-2.snap index 0c45204836..a6ab795d7a 100644 --- a/apollo-router/tests/snapshots/apollo_otel_traces__condition_else-2.snap +++ b/apollo-router/tests/snapshots/apollo_otel_traces__condition_else-2.snap @@ -1,6 +1,7 @@ --- source: apollo-router/tests/apollo_otel_traces.rs expression: report +snapshot_kind: text --- resourceSpans: - resource: @@ -23,7 +24,20 @@ resourceSpans: - key: apollo.user.agent value: stringValue: "[redacted]" + - key: service.name + value: + stringValue: unknown_service + - key: telemetry.sdk.language + value: + stringValue: rust + - key: telemetry.sdk.name + value: + stringValue: opentelemetry + - key: telemetry.sdk.version + value: + stringValue: 0.30.0 droppedAttributesCount: 0 + entityRefs: [] scopeSpans: - scope: name: apollo-router diff --git a/apollo-router/tests/snapshots/apollo_otel_traces__condition_else.snap b/apollo-router/tests/snapshots/apollo_otel_traces__condition_else.snap index 0c45204836..a6ab795d7a 100644 --- a/apollo-router/tests/snapshots/apollo_otel_traces__condition_else.snap +++ b/apollo-router/tests/snapshots/apollo_otel_traces__condition_else.snap @@ -1,6 +1,7 @@ --- source: apollo-router/tests/apollo_otel_traces.rs expression: report +snapshot_kind: text --- resourceSpans: - resource: @@ -23,7 +24,20 @@ resourceSpans: - key: apollo.user.agent value: stringValue: "[redacted]" + - key: service.name + value: + stringValue: unknown_service + - key: telemetry.sdk.language + value: + stringValue: rust + - key: telemetry.sdk.name + value: + stringValue: opentelemetry + - key: telemetry.sdk.version + value: + stringValue: 0.30.0 droppedAttributesCount: 0 + entityRefs: [] scopeSpans: - scope: name: apollo-router diff --git a/apollo-router/tests/snapshots/apollo_otel_traces__condition_if-2.snap b/apollo-router/tests/snapshots/apollo_otel_traces__condition_if-2.snap index 2275283e09..418769b575 100644 --- a/apollo-router/tests/snapshots/apollo_otel_traces__condition_if-2.snap +++ b/apollo-router/tests/snapshots/apollo_otel_traces__condition_if-2.snap @@ -1,6 +1,7 @@ --- source: apollo-router/tests/apollo_otel_traces.rs expression: report +snapshot_kind: text --- resourceSpans: - resource: @@ -23,7 +24,20 @@ resourceSpans: - key: apollo.user.agent value: stringValue: "[redacted]" + - key: service.name + value: + stringValue: unknown_service + - key: telemetry.sdk.language + value: + stringValue: rust + - key: telemetry.sdk.name + value: + stringValue: opentelemetry + - key: telemetry.sdk.version + value: + stringValue: 0.30.0 droppedAttributesCount: 0 + entityRefs: [] scopeSpans: - scope: name: apollo-router diff --git a/apollo-router/tests/snapshots/apollo_otel_traces__condition_if.snap b/apollo-router/tests/snapshots/apollo_otel_traces__condition_if.snap index 2275283e09..418769b575 100644 --- a/apollo-router/tests/snapshots/apollo_otel_traces__condition_if.snap +++ b/apollo-router/tests/snapshots/apollo_otel_traces__condition_if.snap @@ -1,6 +1,7 @@ --- source: apollo-router/tests/apollo_otel_traces.rs expression: report +snapshot_kind: text --- resourceSpans: - resource: @@ -23,7 +24,20 @@ resourceSpans: - key: apollo.user.agent value: stringValue: "[redacted]" + - key: service.name + value: + stringValue: unknown_service + - key: telemetry.sdk.language + value: + stringValue: rust + - key: telemetry.sdk.name + value: + stringValue: opentelemetry + - key: telemetry.sdk.version + value: + stringValue: 0.30.0 droppedAttributesCount: 0 + entityRefs: [] scopeSpans: - scope: name: apollo-router diff --git a/apollo-router/tests/snapshots/apollo_otel_traces__connector-2.snap b/apollo-router/tests/snapshots/apollo_otel_traces__connector-2.snap index 2ebc89bd7d..470156d671 100644 --- a/apollo-router/tests/snapshots/apollo_otel_traces__connector-2.snap +++ b/apollo-router/tests/snapshots/apollo_otel_traces__connector-2.snap @@ -1,6 +1,7 @@ --- source: apollo-router/tests/apollo_otel_traces.rs expression: report +snapshot_kind: text --- resourceSpans: - resource: @@ -23,7 +24,20 @@ resourceSpans: - key: apollo.user.agent value: stringValue: "[redacted]" + - key: service.name + value: + stringValue: unknown_service + - key: telemetry.sdk.language + value: + stringValue: rust + - key: telemetry.sdk.name + value: + stringValue: opentelemetry + - key: telemetry.sdk.version + value: + stringValue: 0.30.0 droppedAttributesCount: 0 + entityRefs: [] scopeSpans: - scope: name: apollo-router diff --git a/apollo-router/tests/snapshots/apollo_otel_traces__connector.snap b/apollo-router/tests/snapshots/apollo_otel_traces__connector.snap index 2ebc89bd7d..470156d671 100644 --- a/apollo-router/tests/snapshots/apollo_otel_traces__connector.snap +++ b/apollo-router/tests/snapshots/apollo_otel_traces__connector.snap @@ -1,6 +1,7 @@ --- source: apollo-router/tests/apollo_otel_traces.rs expression: report +snapshot_kind: text --- resourceSpans: - resource: @@ -23,7 +24,20 @@ resourceSpans: - key: apollo.user.agent value: stringValue: "[redacted]" + - key: service.name + value: + stringValue: unknown_service + - key: telemetry.sdk.language + value: + stringValue: rust + - key: telemetry.sdk.name + value: + stringValue: opentelemetry + - key: telemetry.sdk.version + value: + stringValue: 0.30.0 droppedAttributesCount: 0 + entityRefs: [] scopeSpans: - scope: name: apollo-router diff --git a/apollo-router/tests/snapshots/apollo_otel_traces__connector_error-2.snap b/apollo-router/tests/snapshots/apollo_otel_traces__connector_error-2.snap index a39dd672c1..f522db517b 100644 --- a/apollo-router/tests/snapshots/apollo_otel_traces__connector_error-2.snap +++ b/apollo-router/tests/snapshots/apollo_otel_traces__connector_error-2.snap @@ -1,6 +1,7 @@ --- source: apollo-router/tests/apollo_otel_traces.rs expression: report +snapshot_kind: text --- resourceSpans: - resource: @@ -23,7 +24,20 @@ resourceSpans: - key: apollo.user.agent value: stringValue: "[redacted]" + - key: service.name + value: + stringValue: unknown_service + - key: telemetry.sdk.language + value: + stringValue: rust + - key: telemetry.sdk.name + value: + stringValue: opentelemetry + - key: telemetry.sdk.version + value: + stringValue: 0.30.0 droppedAttributesCount: 0 + entityRefs: [] scopeSpans: - scope: name: apollo-router diff --git a/apollo-router/tests/snapshots/apollo_otel_traces__connector_error.snap b/apollo-router/tests/snapshots/apollo_otel_traces__connector_error.snap index a39dd672c1..f522db517b 100644 --- a/apollo-router/tests/snapshots/apollo_otel_traces__connector_error.snap +++ b/apollo-router/tests/snapshots/apollo_otel_traces__connector_error.snap @@ -1,6 +1,7 @@ --- source: apollo-router/tests/apollo_otel_traces.rs expression: report +snapshot_kind: text --- resourceSpans: - resource: @@ -23,7 +24,20 @@ resourceSpans: - key: apollo.user.agent value: stringValue: "[redacted]" + - key: service.name + value: + stringValue: unknown_service + - key: telemetry.sdk.language + value: + stringValue: rust + - key: telemetry.sdk.name + value: + stringValue: opentelemetry + - key: telemetry.sdk.version + value: + stringValue: 0.30.0 droppedAttributesCount: 0 + entityRefs: [] scopeSpans: - scope: name: apollo-router diff --git a/apollo-router/tests/snapshots/apollo_otel_traces__non_defer-2.snap b/apollo-router/tests/snapshots/apollo_otel_traces__non_defer-2.snap index 96a3eed566..62f912c907 100644 --- a/apollo-router/tests/snapshots/apollo_otel_traces__non_defer-2.snap +++ b/apollo-router/tests/snapshots/apollo_otel_traces__non_defer-2.snap @@ -1,6 +1,7 @@ --- source: apollo-router/tests/apollo_otel_traces.rs expression: report +snapshot_kind: text --- resourceSpans: - resource: @@ -23,7 +24,20 @@ resourceSpans: - key: apollo.user.agent value: stringValue: "[redacted]" + - key: service.name + value: + stringValue: unknown_service + - key: telemetry.sdk.language + value: + stringValue: rust + - key: telemetry.sdk.name + value: + stringValue: opentelemetry + - key: telemetry.sdk.version + value: + stringValue: 0.30.0 droppedAttributesCount: 0 + entityRefs: [] scopeSpans: - scope: name: apollo-router diff --git a/apollo-router/tests/snapshots/apollo_otel_traces__non_defer.snap b/apollo-router/tests/snapshots/apollo_otel_traces__non_defer.snap index 96a3eed566..62f912c907 100644 --- a/apollo-router/tests/snapshots/apollo_otel_traces__non_defer.snap +++ b/apollo-router/tests/snapshots/apollo_otel_traces__non_defer.snap @@ -1,6 +1,7 @@ --- source: apollo-router/tests/apollo_otel_traces.rs expression: report +snapshot_kind: text --- resourceSpans: - resource: @@ -23,7 +24,20 @@ resourceSpans: - key: apollo.user.agent value: stringValue: "[redacted]" + - key: service.name + value: + stringValue: unknown_service + - key: telemetry.sdk.language + value: + stringValue: rust + - key: telemetry.sdk.name + value: + stringValue: opentelemetry + - key: telemetry.sdk.version + value: + stringValue: 0.30.0 droppedAttributesCount: 0 + entityRefs: [] scopeSpans: - scope: name: apollo-router diff --git a/apollo-router/tests/snapshots/apollo_otel_traces__send_header-2.snap b/apollo-router/tests/snapshots/apollo_otel_traces__send_header-2.snap index d090a76689..38386a46d9 100644 --- a/apollo-router/tests/snapshots/apollo_otel_traces__send_header-2.snap +++ b/apollo-router/tests/snapshots/apollo_otel_traces__send_header-2.snap @@ -1,6 +1,7 @@ --- source: apollo-router/tests/apollo_otel_traces.rs expression: report +snapshot_kind: text --- resourceSpans: - resource: @@ -23,7 +24,20 @@ resourceSpans: - key: apollo.user.agent value: stringValue: "[redacted]" + - key: service.name + value: + stringValue: unknown_service + - key: telemetry.sdk.language + value: + stringValue: rust + - key: telemetry.sdk.name + value: + stringValue: opentelemetry + - key: telemetry.sdk.version + value: + stringValue: 0.30.0 droppedAttributesCount: 0 + entityRefs: [] scopeSpans: - scope: name: apollo-router diff --git a/apollo-router/tests/snapshots/apollo_otel_traces__send_header.snap b/apollo-router/tests/snapshots/apollo_otel_traces__send_header.snap index d090a76689..38386a46d9 100644 --- a/apollo-router/tests/snapshots/apollo_otel_traces__send_header.snap +++ b/apollo-router/tests/snapshots/apollo_otel_traces__send_header.snap @@ -1,6 +1,7 @@ --- source: apollo-router/tests/apollo_otel_traces.rs expression: report +snapshot_kind: text --- resourceSpans: - resource: @@ -23,7 +24,20 @@ resourceSpans: - key: apollo.user.agent value: stringValue: "[redacted]" + - key: service.name + value: + stringValue: unknown_service + - key: telemetry.sdk.language + value: + stringValue: rust + - key: telemetry.sdk.name + value: + stringValue: opentelemetry + - key: telemetry.sdk.version + value: + stringValue: 0.30.0 droppedAttributesCount: 0 + entityRefs: [] scopeSpans: - scope: name: apollo-router diff --git a/apollo-router/tests/snapshots/apollo_otel_traces__send_variable_value-2.snap b/apollo-router/tests/snapshots/apollo_otel_traces__send_variable_value-2.snap index a6daaabc5b..8f5255ae29 100644 --- a/apollo-router/tests/snapshots/apollo_otel_traces__send_variable_value-2.snap +++ b/apollo-router/tests/snapshots/apollo_otel_traces__send_variable_value-2.snap @@ -1,6 +1,7 @@ --- source: apollo-router/tests/apollo_otel_traces.rs expression: report +snapshot_kind: text --- resourceSpans: - resource: @@ -23,7 +24,20 @@ resourceSpans: - key: apollo.user.agent value: stringValue: "[redacted]" + - key: service.name + value: + stringValue: unknown_service + - key: telemetry.sdk.language + value: + stringValue: rust + - key: telemetry.sdk.name + value: + stringValue: opentelemetry + - key: telemetry.sdk.version + value: + stringValue: 0.30.0 droppedAttributesCount: 0 + entityRefs: [] scopeSpans: - scope: name: apollo-router diff --git a/apollo-router/tests/snapshots/apollo_otel_traces__send_variable_value.snap b/apollo-router/tests/snapshots/apollo_otel_traces__send_variable_value.snap index a6daaabc5b..8f5255ae29 100644 --- a/apollo-router/tests/snapshots/apollo_otel_traces__send_variable_value.snap +++ b/apollo-router/tests/snapshots/apollo_otel_traces__send_variable_value.snap @@ -1,6 +1,7 @@ --- source: apollo-router/tests/apollo_otel_traces.rs expression: report +snapshot_kind: text --- resourceSpans: - resource: @@ -23,7 +24,20 @@ resourceSpans: - key: apollo.user.agent value: stringValue: "[redacted]" + - key: service.name + value: + stringValue: unknown_service + - key: telemetry.sdk.language + value: + stringValue: rust + - key: telemetry.sdk.name + value: + stringValue: opentelemetry + - key: telemetry.sdk.version + value: + stringValue: 0.30.0 droppedAttributesCount: 0 + entityRefs: [] scopeSpans: - scope: name: apollo-router diff --git a/apollo-router/tests/snapshots/apollo_otel_traces__trace_id-2.snap b/apollo-router/tests/snapshots/apollo_otel_traces__trace_id-2.snap index 96a3eed566..62f912c907 100644 --- a/apollo-router/tests/snapshots/apollo_otel_traces__trace_id-2.snap +++ b/apollo-router/tests/snapshots/apollo_otel_traces__trace_id-2.snap @@ -1,6 +1,7 @@ --- source: apollo-router/tests/apollo_otel_traces.rs expression: report +snapshot_kind: text --- resourceSpans: - resource: @@ -23,7 +24,20 @@ resourceSpans: - key: apollo.user.agent value: stringValue: "[redacted]" + - key: service.name + value: + stringValue: unknown_service + - key: telemetry.sdk.language + value: + stringValue: rust + - key: telemetry.sdk.name + value: + stringValue: opentelemetry + - key: telemetry.sdk.version + value: + stringValue: 0.30.0 droppedAttributesCount: 0 + entityRefs: [] scopeSpans: - scope: name: apollo-router diff --git a/apollo-router/tests/snapshots/apollo_otel_traces__trace_id.snap b/apollo-router/tests/snapshots/apollo_otel_traces__trace_id.snap index 96a3eed566..62f912c907 100644 --- a/apollo-router/tests/snapshots/apollo_otel_traces__trace_id.snap +++ b/apollo-router/tests/snapshots/apollo_otel_traces__trace_id.snap @@ -1,6 +1,7 @@ --- source: apollo-router/tests/apollo_otel_traces.rs expression: report +snapshot_kind: text --- resourceSpans: - resource: @@ -23,7 +24,20 @@ resourceSpans: - key: apollo.user.agent value: stringValue: "[redacted]" + - key: service.name + value: + stringValue: unknown_service + - key: telemetry.sdk.language + value: + stringValue: rust + - key: telemetry.sdk.name + value: + stringValue: opentelemetry + - key: telemetry.sdk.version + value: + stringValue: 0.30.0 droppedAttributesCount: 0 + entityRefs: [] scopeSpans: - scope: name: apollo-router diff --git a/apollo-router/tests/telemetry_resource_tests.rs b/apollo-router/tests/telemetry_resource_tests.rs index 2c5c82ea27..6ced0f2bd3 100644 --- a/apollo-router/tests/telemetry_resource_tests.rs +++ b/apollo-router/tests/telemetry_resource_tests.rs @@ -53,7 +53,9 @@ fn test_empty() -> Result<(), Failed> { }; let resource = test_config.to_resource(); let service_name = resource - .get(opentelemetry_semantic_conventions::resource::SERVICE_NAME.into()) + .get(&Key::from_static_str( + opentelemetry_semantic_conventions::resource::SERVICE_NAME, + )) .unwrap(); assert!( service_name @@ -63,17 +65,23 @@ fn test_empty() -> Result<(), Failed> { ); assert!( resource - .get(opentelemetry_semantic_conventions::resource::SERVICE_NAMESPACE.into()) + .get(&Key::from_static_str( + opentelemetry_semantic_conventions::resource::SERVICE_NAMESPACE + )) .is_none() ); assert_eq!( - resource.get(opentelemetry_semantic_conventions::resource::SERVICE_VERSION.into()), + resource.get(&Key::from_static_str( + opentelemetry_semantic_conventions::resource::SERVICE_VERSION + )), Some(std::env!("CARGO_PKG_VERSION").into()) ); assert!( resource - .get(opentelemetry_semantic_conventions::resource::PROCESS_EXECUTABLE_NAME.into()) + .get(&Key::from_static_str( + opentelemetry_semantic_conventions::resource::PROCESS_EXECUTABLE_NAME + )) .expect("expected excutable name") .as_str() .contains("telemetry_resources") @@ -102,15 +110,19 @@ fn test_config_resources() -> Result<(), Failed> { }; let resource = test_config.to_resource(); assert_eq!( - resource.get(opentelemetry_semantic_conventions::resource::SERVICE_NAME.into()), + resource.get(&Key::from_static_str( + opentelemetry_semantic_conventions::resource::SERVICE_NAME + )), Some("override-service-name".into()) ); assert_eq!( - resource.get(opentelemetry_semantic_conventions::resource::SERVICE_NAMESPACE.into()), + resource.get(&Key::from_static_str( + opentelemetry_semantic_conventions::resource::SERVICE_NAMESPACE + )), Some("override-namespace".into()) ); assert_eq!( - resource.get(Key::from_static_str("extra-key")), + resource.get(&Key::from_static_str("extra-key")), Some("extra-value".into()) ); Ok(()) @@ -124,11 +136,15 @@ fn test_service_name_service_namespace() -> Result<(), Failed> { }; let resource = test_config.to_resource(); assert_eq!( - resource.get(opentelemetry_semantic_conventions::resource::SERVICE_NAME.into()), + resource.get(&Key::from_static_str( + opentelemetry_semantic_conventions::resource::SERVICE_NAME + )), Some("override-service-name".into()) ); assert_eq!( - resource.get(opentelemetry_semantic_conventions::resource::SERVICE_NAMESPACE.into()), + resource.get(&Key::from_static_str( + opentelemetry_semantic_conventions::resource::SERVICE_NAMESPACE + )), Some("override-namespace".into()) ); Ok(()) @@ -150,7 +166,9 @@ fn test_service_name_override() -> Result<(), Failed> { resources: Default::default(), } .to_resource() - .get(opentelemetry_semantic_conventions::resource::SERVICE_NAME.into()) + .get(&Key::from_static_str( + opentelemetry_semantic_conventions::resource::SERVICE_NAME + )) .unwrap() .as_str() .starts_with("unknown_service:telemetry_resources-") @@ -166,7 +184,9 @@ fn test_service_name_override() -> Result<(), Failed> { )]), } .to_resource() - .get(opentelemetry_semantic_conventions::resource::SERVICE_NAME.into()), + .get(&Key::from_static_str( + opentelemetry_semantic_conventions::resource::SERVICE_NAME + )), Some("yaml-resource".into()) ); @@ -180,7 +200,9 @@ fn test_service_name_override() -> Result<(), Failed> { )]), } .to_resource() - .get(opentelemetry_semantic_conventions::resource::SERVICE_NAME.into()), + .get(&Key::from_static_str( + opentelemetry_semantic_conventions::resource::SERVICE_NAME + )), Some("yaml-service-name".into()) ); @@ -198,7 +220,9 @@ fn test_service_name_override() -> Result<(), Failed> { )]), } .to_resource() - .get(opentelemetry_semantic_conventions::resource::SERVICE_NAME.into()), + .get(&Key::from_static_str( + opentelemetry_semantic_conventions::resource::SERVICE_NAME + )), Some("env-resource".into()) ); @@ -216,7 +240,9 @@ fn test_service_name_override() -> Result<(), Failed> { )]), } .to_resource() - .get(opentelemetry_semantic_conventions::resource::SERVICE_NAME.into()), + .get(&Key::from_static_str( + opentelemetry_semantic_conventions::resource::SERVICE_NAME + )), Some("env-service-name".into()) );