Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions .changesets/feat_otel_metrics.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
### Implement metrics for mcp tool and operation counts and durations - @swcollard PR #297

This PR adds metrics to count and measure request duration to events throughout the MCP server

* apollo.mcp.operation.duration
* apollo.mcp.operation.count
* apollo.mcp.tool.duration
* apollo.mcp.tool.count
* apollo.mcp.initialize.count
* apollo.mcp.list_tools.count
* apollo.mcp.get_info.count
2 changes: 2 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions crates/apollo-mcp-server/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ url.workspace = true
chrono = { version = "0.4.41", default-features = false, features = ["now"] }
figment = { version = "0.10.19", features = ["test"] }
insta.workspace = true
opentelemetry_sdk = { version = "0.30.0", features = ["testing"] }
mockito = "1.7.0"
rstest.workspace = true
tokio.workspace = true
Expand Down
119 changes: 117 additions & 2 deletions crates/apollo-mcp-server/src/graphql.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
//! Execute GraphQL operations from an MCP tool

use crate::errors::McpError;
use opentelemetry::{KeyValue, global};
use reqwest::header::{HeaderMap, HeaderValue};
use reqwest_middleware::{ClientBuilder, Extension};
use reqwest_tracing::{OtelName, TracingMiddleware};
Expand Down Expand Up @@ -38,6 +39,9 @@ pub trait Executable {
/// Execute as a GraphQL operation using the endpoint and headers
#[tracing::instrument(skip(self))]
async fn execute(&self, request: Request<'_>) -> Result<CallToolResult, McpError> {
let meter = global::meter("apollo.mcp");
Copy link

Copilot AI Aug 27, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The meter and histogram instruments are created on every GraphQL operation. Consider creating these once during initialization and reusing them to avoid repeated allocations and improve performance.

Copilot uses AI. Check for mistakes.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this might warrant some merit, if we can manage it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added lazy loading into meter.rs and updated all references to call get_meter() to only initialize this once

let start = std::time::Instant::now();
let mut op_id: Option<String> = None;
let client_metadata = serde_json::json!({
"name": "mcp",
"version": std::env!("CARGO_PKG_VERSION")
Expand All @@ -59,6 +63,7 @@ pub trait Executable {
"clientLibrary": client_metadata,
}),
);
op_id = Some(id.to_string());
} else {
let OperationDetails {
query,
Expand All @@ -74,6 +79,7 @@ pub trait Executable {
);

if let Some(op_name) = operation_name {
op_id = Some(op_name.clone());
request_body.insert(String::from("operationName"), Value::String(op_name));
}
}
Expand All @@ -83,7 +89,7 @@ pub trait Executable {
.with(TracingMiddleware::default())
.build();

client
let result = client
.post(request.endpoint.as_str())
.headers(self.headers(&request.headers))
.body(Value::Object(request_body).to_string())
Expand Down Expand Up @@ -116,7 +122,39 @@ pub trait Executable {
.filter(|value| !matches!(value, Value::Null))
.is_none(),
),
})
});

// Record response metrics
let attributes = vec![
KeyValue::new(
"success",
result.is_ok()
&& result
.as_ref()
.ok()
.map(|r| r.is_error != Some(true))
.unwrap_or(false),
Copy link

Copilot AI Aug 27, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The success determination logic is duplicated from running.rs. Consider extracting this complex boolean logic into a shared helper function to reduce code duplication and improve maintainability.

Copilot uses AI. Check for mistakes.
),
KeyValue::new("operation.id", op_id.unwrap_or("unknown".to_string())),
KeyValue::new(
"operation.type",
if self.persisted_query_id().is_some() {
"persisted_query"
} else {
"operation"
},
),
];
meter
.f64_histogram("apollo.mcp.operation.duration")
.build()
.record(start.elapsed().as_millis() as f64, &attributes);
meter
.u64_counter("apollo.mcp.operation.count")
.build()
.add(1, &attributes);

result
}
}

Expand All @@ -125,6 +163,11 @@ mod test {
use crate::errors::McpError;
use crate::graphql::{Executable, OperationDetails, Request};
use http::{HeaderMap, HeaderValue};
use opentelemetry::global;
use opentelemetry_sdk::metrics::data::{AggregatedMetrics, MetricData};
use opentelemetry_sdk::metrics::{
InMemoryMetricExporter, MeterProviderBuilder, PeriodicReader,
};
use serde_json::{Map, Value, json};
use url::Url;

Expand Down Expand Up @@ -364,4 +407,76 @@ mod test {
assert!(result.is_error.is_some());
assert!(result.is_error.unwrap());
}

#[tokio::test]
async fn validate_metric_attributes_success_false() {
// given
let exporter = InMemoryMetricExporter::default();
let meter_provider = MeterProviderBuilder::default()
.with_reader(PeriodicReader::builder(exporter.clone()).build())
.build();
global::set_meter_provider(meter_provider.clone());

let mut server = mockito::Server::new_async().await;
let url = Url::parse(server.url().as_str()).unwrap();
let mock_request = Request {
input: json!({}),
endpoint: &url,
headers: HeaderMap::new(),
};

server
.mock("POST", "/")
.with_status(200)
.with_header("content-type", "application/json")
.with_body(json!({ "data": null, "errors": ["an error"] }).to_string())
.expect(1)
.create_async()
.await;

// when
let test_executable = TestExecutableWithPersistedQueryId {};
let result = test_executable.execute(mock_request).await.unwrap();

// then
assert!(result.is_error.is_some());
assert!(result.is_error.unwrap());

// Retrieve the finished metrics from the exporter
let finished_metrics = exporter.get_finished_metrics().unwrap();

// validate the attributes of the apollo.mcp.operation.count counter
for resource_metrics in finished_metrics {
if let Some(scope_metrics) = resource_metrics
.scope_metrics()
.find(|scope_metrics| scope_metrics.scope().name() == "apollo.mcp")
{
for metric in scope_metrics.metrics() {
if metric.name() == "apollo.mcp.operation.count" {
if let AggregatedMetrics::U64(MetricData::Sum(data)) = metric.data() {
for point in data.data_points() {
let attributes = point.attributes();
let mut attr_map = std::collections::HashMap::new();
for kv in attributes {
attr_map.insert(kv.key.as_str(), kv.value.as_str());
}
assert_eq!(
attr_map.get("operation.id").map(|s| s.as_ref()),
Some("mock_operation")
);
assert_eq!(
attr_map.get("operation.type").map(|s| s.as_ref()),
Some("persisted_query")
);
assert_eq!(
attr_map.get("success"),
Some(&std::borrow::Cow::Borrowed("false"))
);
}
}
}
}
}
}
}
}
58 changes: 49 additions & 9 deletions crates/apollo-mcp-server/src/server/states/running.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ use std::sync::Arc;

use apollo_compiler::{Schema, validation::Valid};
use headers::HeaderMapExt as _;
use opentelemetry::Context;
use opentelemetry::trace::FutureExt;
use opentelemetry::{Context, KeyValue, global};
use reqwest::header::HeaderMap;
use rmcp::model::Implementation;
use rmcp::{
Expand Down Expand Up @@ -177,6 +177,11 @@ impl ServerHandler for Running {
_request: InitializeRequestParam,
context: RequestContext<RoleServer>,
) -> Result<InitializeResult, McpError> {
let meter = global::meter("apollo.mcp");
meter
.u64_counter("apollo.mcp.initialize.count")
.build()
.add(1, &[]);
// TODO: how to remove these?
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This leftover TODO caught my eye. 😂

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, this was from #117
@nicholascioli Do you know what we'd need to clean this up?

let mut peers = self.peers.write().await;
peers.push(context.peer);
Expand All @@ -189,25 +194,28 @@ impl ServerHandler for Running {
request: CallToolRequestParam,
context: RequestContext<RoleServer>,
) -> Result<CallToolResult, McpError> {
let result = match request.name.as_ref() {
let meter = global::meter("apollo.mcp");
let start = std::time::Instant::now();
let tool_name = request.name.clone();
let result = match tool_name.as_ref() {
INTROSPECT_TOOL_NAME => {
self.introspect_tool
.as_ref()
.ok_or(tool_not_found(&request.name))?
.ok_or(tool_not_found(&tool_name))?
.execute(convert_arguments(request)?)
.await
}
SEARCH_TOOL_NAME => {
self.search_tool
.as_ref()
.ok_or(tool_not_found(&request.name))?
.ok_or(tool_not_found(&tool_name))?
.execute(convert_arguments(request)?)
.await
}
EXPLORER_TOOL_NAME => {
self.explorer_tool
.as_ref()
.ok_or(tool_not_found(&request.name))?
.ok_or(tool_not_found(&tool_name))?
.execute(convert_arguments(request)?)
.await
}
Expand All @@ -222,7 +230,7 @@ impl ServerHandler for Running {

self.execute_tool
.as_ref()
.ok_or(tool_not_found(&request.name))?
.ok_or(tool_not_found(&tool_name))?
.execute(graphql::Request {
input: Value::from(request.arguments.clone()),
endpoint: &self.endpoint,
Expand All @@ -233,7 +241,7 @@ impl ServerHandler for Running {
VALIDATE_TOOL_NAME => {
self.validate_tool
.as_ref()
.ok_or(tool_not_found(&request.name))?
.ok_or(tool_not_found(&tool_name))?
.execute(convert_arguments(request)?)
.await
}
Expand All @@ -260,8 +268,8 @@ impl ServerHandler for Running {
.lock()
.await
.iter()
.find(|op| op.as_ref().name == request.name)
.ok_or(tool_not_found(&request.name))?
.find(|op| op.as_ref().name == tool_name)
.ok_or(tool_not_found(&tool_name))?
.execute(graphql_request)
.with_context(Context::current())
.await
Expand All @@ -273,6 +281,28 @@ impl ServerHandler for Running {
health_check.record_rejection();
}

let attributes = vec![
Copy link

Copilot AI Aug 27, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The attributes vector is created on every tool call. Consider creating the meter and histogram instruments once during initialization and reusing them to avoid repeated allocations.

Copilot uses AI. Check for mistakes.
KeyValue::new(
"success",
result.is_ok()
&& result
.as_ref()
.ok()
.map(|r| r.is_error != Some(true))
.unwrap_or(false),
Copy link

Copilot AI Aug 27, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The success determination logic is complex and duplicated between files. The nested map/unwrap_or chain could be simplified with a helper function to improve readability and maintainability.

Copilot uses AI. Check for mistakes.
),
KeyValue::new("tool_name", tool_name),
];
// Record response time and status
meter
.f64_histogram("apollo.mcp.tool.duration")
.build()
.record(start.elapsed().as_millis() as f64, &attributes);
meter
.u64_counter("apollo.mcp.tool.count")
.build()
.add(1, &attributes);

result
}

Expand All @@ -282,6 +312,11 @@ impl ServerHandler for Running {
_request: Option<PaginatedRequestParam>,
_context: RequestContext<RoleServer>,
) -> Result<ListToolsResult, McpError> {
let meter = global::meter("apollo.mcp");
meter
.u64_counter("apollo.mcp.list_tools.count")
.build()
.add(1, &[]);
Ok(ListToolsResult {
next_cursor: None,
tools: self
Expand All @@ -300,6 +335,11 @@ impl ServerHandler for Running {
}

fn get_info(&self) -> ServerInfo {
let meter = global::meter("apollo.mcp");
meter
.u64_counter("apollo.mcp.get_info.count")
.build()
.add(1, &[]);
ServerInfo {
server_info: Implementation {
name: "Apollo MCP Server".to_string(),
Expand Down