Skip to content

Commit

Permalink
[Refactor] Complete metrics overhaul
Browse files Browse the repository at this point in the history
Metrics got an entire overhaul. Instead of relying on a broken
prometheus library to publish our metrics, we now use the
`tracing` library and with OpenTelemetry that we bind together
then publish into a prometheus library.

Metrics are now mostly derive-macros. This means that the struct
can express what it wants to export and a help text. The library
will choose if it is able to export it.

Tracing now works by calling `.publish()` on the parent structs,
those structs need to call `.publish()` on all the child members
it wishes to publish data about. If a "group" is requested, use
the `group!()` macro, which under-the-hood calls `tracing::span`
with some special labels. At primitive layers, it will call the
`publish!()` macro, which will call `tracing::event!()` macro
under-the-hood with some special fields set. A custom
`tracing::Subscriber` will intercept all the events and spans
and convert them into a json-like object. This object can then
be exported as real json or encoded into other formats like
otel/prometheus.

closes: #1164, #650, #384, #209
towards: #206
  • Loading branch information
allada committed Jul 25, 2024
1 parent 3574149 commit a34447d
Show file tree
Hide file tree
Showing 79 changed files with 2,557 additions and 1,468 deletions.
8 changes: 8 additions & 0 deletions BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ rust_binary(
deps = [
"//nativelink-config",
"//nativelink-error",
"//nativelink-metric",
"//nativelink-metric-collector",
"//nativelink-proto",
"//nativelink-scheduler",
"//nativelink-service",
Expand All @@ -28,16 +30,22 @@ rust_binary(
"@crates//:futures",
"@crates//:hyper",
"@crates//:mimalloc",
"@crates//:opentelemetry",
"@crates//:opentelemetry-prometheus",
"@crates//:opentelemetry_sdk",
"@crates//:parking_lot",
"@crates//:prometheus",
"@crates//:prometheus-client",
"@crates//:rustls-pemfile",
"@crates//:scopeguard",
"@crates//:serde_json",
"@crates//:serde_json5",
"@crates//:tokio",
"@crates//:tokio-rustls",
"@crates//:tonic",
"@crates//:tower",
"@crates//:tracing",
"@crates//:tracing-subscriber",
],
)

Expand Down
200 changes: 200 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ nativelink-service = { path = "nativelink-service" }
nativelink-store = { path = "nativelink-store" }
nativelink-util = { path = "nativelink-util" }
nativelink-worker = { path = "nativelink-worker" }
nativelink-metric = { path = "nativelink-metric" }
nativelink-metric-collector = { path = "nativelink-metric-collector" }

async-lock = "3.3.0"
axum = "0.6.20"
Expand All @@ -58,3 +60,9 @@ tokio-rustls = "0.25.0"
tonic = { version = "0.11.0", features = ["gzip", "tls"] }
tower = "0.4.13"
tracing = "0.1.40"
opentelemetry_sdk = { version = "0.23.0", features = ["metrics"] }
tracing-subscriber = "0.3.18"
opentelemetry = { version = "0.23.0", features = ["metrics"] }
prometheus = "0.13.4"
opentelemetry-prometheus = "0.16.0"
serde_json = "1.0.120"
1 change: 1 addition & 0 deletions nativelink-error/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ rust_library(
],
visibility = ["//visibility:public"],
deps = [
"//nativelink-metric",
"//nativelink-proto",
"@crates//:hex",
"@crates//:prost",
Expand Down
1 change: 1 addition & 0 deletions nativelink-error/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ autobenches = false

[dependencies]
nativelink-proto = { path = "../nativelink-proto" }
nativelink-metric = { path = "../nativelink-metric" }

hex = "0.4.3"
prost = "0.12.4"
Expand Down
Loading

0 comments on commit a34447d

Please sign in to comment.