Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,12 @@ Options:
[env: OTLP_SERVICE_NAME=]
[default: text-embeddings-inference.server]
--prometheus-port <PORT>
The Prometheus metrics port to listen on
[env: PROMETHEUS_PORT=]
[default: 9000]
--cors-allow-origin <CORS_ALLOW_ORIGIN>
Unused for gRPC servers
Expand Down
3 changes: 2 additions & 1 deletion router/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ pub async fn run(
api_key: Option<String>,
otlp_endpoint: Option<String>,
otlp_service_name: String,
prometheus_port: u16,
cors_allow_origin: Option<Vec<String>>,
) -> Result<()> {
let model_id_path = Path::new(&model_id);
Expand Down Expand Up @@ -314,7 +315,7 @@ pub async fn run(
}
};

let prom_builder = prometheus::prometheus_builer(info.max_input_length)?;
let prom_builder = prometheus::prometheus_builer(addr, prometheus_port, info.max_input_length)?;

#[cfg(all(feature = "grpc", feature = "http"))]
compile_error!("Features `http` and `grpc` cannot be enabled at the same time.");
Expand Down
5 changes: 5 additions & 0 deletions router/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,10 @@ struct Args {
#[clap(default_value = "text-embeddings-inference.server", long, env)]
otlp_service_name: String,

/// The Prometheus port to listen on.
#[clap(default_value = "9000", long, short, env)]
prometheus_port: u16,

/// Unused for gRPC servers
#[clap(long, env)]
cors_allow_origin: Option<Vec<String>>,
Expand Down Expand Up @@ -227,6 +231,7 @@ async fn main() -> Result<()> {
args.api_key,
args.otlp_endpoint,
args.otlp_service_name,
args.prometheus_port,
args.cors_allow_origin,
)
.await?;
Expand Down
12 changes: 11 additions & 1 deletion router/src/prometheus.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,15 @@
use std::net::SocketAddr;

use metrics_exporter_prometheus::{BuildError, Matcher, PrometheusBuilder};

pub(crate) fn prometheus_builer(max_input_length: usize) -> Result<PrometheusBuilder, BuildError> {
pub(crate) fn prometheus_builer(
addr: SocketAddr,
port: u16,
max_input_length: usize,
) -> Result<PrometheusBuilder, BuildError> {
let mut addr = addr;
addr.set_port(port);

// Duration buckets
let duration_matcher = Matcher::Suffix(String::from("duration"));
let n_duration_buckets = 35;
Expand Down Expand Up @@ -30,6 +39,7 @@ pub(crate) fn prometheus_builer(max_input_length: usize) -> Result<PrometheusBui

// Prometheus handler
PrometheusBuilder::new()
.with_http_listener(addr)
.set_buckets_for_metric(duration_matcher, &duration_buckets)?
.set_buckets_for_metric(input_length_matcher, &input_length_buckets)?
.set_buckets_for_metric(batch_size_matcher, &batch_size_buckets)?
Expand Down