diff --git a/README.md b/README.md index 520ad8d0..0ff17fe0 100644 --- a/README.md +++ b/README.md @@ -307,6 +307,12 @@ Options: [env: OTLP_SERVICE_NAME=] [default: text-embeddings-inference.server] + --prometheus-port + The Prometheus metrics port to listen on + + [env: PROMETHEUS_PORT=] + [default: 9000] + --cors-allow-origin Unused for gRPC servers diff --git a/router/src/lib.rs b/router/src/lib.rs index 49e0581d..66b1e240 100644 --- a/router/src/lib.rs +++ b/router/src/lib.rs @@ -63,6 +63,7 @@ pub async fn run( api_key: Option, otlp_endpoint: Option, otlp_service_name: String, + prometheus_port: u16, cors_allow_origin: Option>, ) -> Result<()> { let model_id_path = Path::new(&model_id); @@ -314,7 +315,7 @@ pub async fn run( } }; - let prom_builder = prometheus::prometheus_builer(info.max_input_length)?; + let prom_builder = prometheus::prometheus_builer(addr, prometheus_port, info.max_input_length)?; #[cfg(all(feature = "grpc", feature = "http"))] compile_error!("Features `http` and `grpc` cannot be enabled at the same time."); diff --git a/router/src/main.rs b/router/src/main.rs index e4a902d6..19b8ca13 100644 --- a/router/src/main.rs +++ b/router/src/main.rs @@ -164,6 +164,10 @@ struct Args { #[clap(default_value = "text-embeddings-inference.server", long, env)] otlp_service_name: String, + /// The Prometheus port to listen on. + #[clap(default_value = "9000", long, short, env)] + prometheus_port: u16, + /// Unused for gRPC servers #[clap(long, env)] cors_allow_origin: Option>, @@ -227,6 +231,7 @@ async fn main() -> Result<()> { args.api_key, args.otlp_endpoint, args.otlp_service_name, + args.prometheus_port, args.cors_allow_origin, ) .await?; diff --git a/router/src/prometheus.rs b/router/src/prometheus.rs index bded390f..d011efba 100644 --- a/router/src/prometheus.rs +++ b/router/src/prometheus.rs @@ -1,6 +1,15 @@ +use std::net::SocketAddr; + use metrics_exporter_prometheus::{BuildError, Matcher, PrometheusBuilder}; -pub(crate) fn prometheus_builer(max_input_length: usize) -> Result { +pub(crate) fn prometheus_builer( + addr: SocketAddr, + port: u16, + max_input_length: usize, +) -> Result { + let mut addr = addr; + addr.set_port(port); + // Duration buckets let duration_matcher = Matcher::Suffix(String::from("duration")); let n_duration_buckets = 35; @@ -30,6 +39,7 @@ pub(crate) fn prometheus_builer(max_input_length: usize) -> Result