Skip to content
9 changes: 9 additions & 0 deletions DOCKER.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,21 @@ Dockerfile: https://github.com/Couchbase-Ecosystem/mcp-server-couchbase/blob/mai
- Upsert a document by ID to a specified scope and collection
- Delete a document by ID from a specified scope and collection
- Run a [SQL++ query](https://www.couchbase.com/sqlplusplus/) on a specified scope
- Queries are automatically scoped to the specified bucket and scope, so use collection names directly (e.g., use `SELECT * FROM users` instead of `SELECT * FROM bucket.scope.users`)
- There is an option in the MCP server, `CB_MCP_READ_ONLY_QUERY_MODE` that is set to true by default to disable running SQL++ queries that change the data or the underlying collection structure. Note that the documents can still be updated by ID.
- Get the status of the MCP server
- Check the cluster credentials by connecting to the cluster
- List all indexes in the cluster with their definitions, with optional filtering by bucket, scope, collection and index name.
- Get index recommendations from Couchbase Index Advisor for a given SQL++ query to optimize query performance
- Get cluster health status and list of all running services
- Query performance analysis tools using:
- Get longest running queries by average service time
- Get most frequently executed queries
- Get queries with the largest response sizes
- Get queries with the largest result counts
- Get queries that use a primary index (potential performance concern)
- Get queries that don't use a covering index
- Get queries that are not selective (index scans return many more documents than final result)

## Usage

Expand Down
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,14 @@ An [MCP](https://modelcontextprotocol.io/) server implementation of Couchbase th
- List all indexes in the cluster with their definitions, with optional filtering by bucket, scope, collection and index name.
- Get index recommendations from Couchbase Index Advisor for a given SQL++ query to optimize query performance
- Get cluster health status and list of all running services
- Query performance analysis tools:
- Get longest running queries by average service time
- Get most frequently executed queries
- Get queries with the largest response sizes
- Get queries with the largest result counts
- Get queries that use a primary index (potential performance concern)
- Get queries that don't use a covering index
- Get queries that are not selective (index scans return many more documents than final result)

## Prerequisites

Expand Down
21 changes: 21 additions & 0 deletions src/tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,13 @@

# Query tools
from .query import (
get_longest_running_queries,
get_most_frequent_queries,
get_queries_not_selective,
get_queries_not_using_covering_index,
get_queries_using_primary_index,
get_queries_with_large_result_count,
get_queries_with_largest_response_sizes,
get_schema_for_collection,
run_sql_plus_plus_query,
)
Expand Down Expand Up @@ -47,6 +54,13 @@
get_index_advisor_recommendations,
list_indexes,
get_cluster_health_and_services,
get_queries_not_selective,
get_queries_not_using_covering_index,
get_queries_using_primary_index,
get_queries_with_large_result_count,
get_queries_with_largest_response_sizes,
get_longest_running_queries,
get_most_frequent_queries,
]

__all__ = [
Expand All @@ -65,6 +79,13 @@
"get_index_advisor_recommendations",
"list_indexes",
"get_cluster_health_and_services",
"get_queries_not_selective",
"get_queries_not_using_covering_index",
"get_queries_using_primary_index",
"get_queries_with_large_result_count",
"get_queries_with_largest_response_sizes",
"get_longest_running_queries",
"get_most_frequent_queries",
# Convenience
"ALL_TOOLS",
]
256 changes: 255 additions & 1 deletion src/tools/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,6 @@ def run_sql_plus_plus_query(
raise


# Don't expose this function to the MCP server until we have a use case
def run_cluster_query(ctx: Context, query: str, **kwargs: Any) -> list[dict[str, Any]]:
"""Run a query on the cluster object and return the results as a list of JSON objects."""

Expand All @@ -104,3 +103,258 @@ def run_cluster_query(ctx: Context, query: str, **kwargs: Any) -> list[dict[str,
except Exception as e:
logger.error(f"Error running query: {e}")
raise


def _run_query_tool_with_empty_message(
ctx: Context,
query: str,
*,
limit: int,
empty_message: str,
extra_payload: dict[str, Any] | None = None,
**query_kwargs: Any,
) -> list[dict[str, Any]]:
"""Execute a cluster query with a consistent empty-result response."""
results = run_cluster_query(ctx, query, limit=limit, **query_kwargs)

if results:
return results

payload: dict[str, Any] = {"message": empty_message, "results": []}
if extra_payload:
payload.update(extra_payload)
return [payload]


def get_longest_running_queries(ctx: Context, limit: int = 10) -> list[dict[str, Any]]:
"""Get the N longest running queries from the system:completed_requests catalog.

Args:
limit: Number of queries to return (default: 10)

Returns:
List of queries with their average service time and count
"""
query = """
SELECT statement,
DURATION_TO_STR(avgServiceTime) AS avgServiceTime,
COUNT(1) AS queries
FROM system:completed_requests
WHERE UPPER(statement) NOT LIKE 'INFER %'
AND UPPER(statement) NOT LIKE 'CREATE INDEX%'
AND UPPER(statement) NOT LIKE 'CREATE PRIMARY INDEX%'
AND UPPER(statement) NOT LIKE '% SYSTEM:%'
GROUP BY statement
LETTING avgServiceTime = AVG(STR_TO_DURATION(serviceTime))
ORDER BY avgServiceTime DESC
LIMIT $limit
"""

return _run_query_tool_with_empty_message(
ctx,
query,
limit=limit,
empty_message=(
"No completed queries were available to calculate longest running queries."
),
)


def get_most_frequent_queries(ctx: Context, limit: int = 10) -> list[dict[str, Any]]:
"""Get the N most frequent queries from the system:completed_requests catalog.

Args:
limit: Number of queries to return (default: 10)

Returns:
List of queries with their frequency count
"""
query = """
SELECT statement,
COUNT(1) AS queries
FROM system:completed_requests
WHERE UPPER(statement) NOT LIKE 'INFER %'
Copy link

@kaminijagtiani kaminijagtiani Nov 25, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For every query that we run via UI, we also run explain and advise on it.
So you might also need to filter out explain and advise for top most frequent queries

AND UPPER(statement) NOT LIKE 'CREATE INDEX%'
AND UPPER(statement) NOT LIKE 'CREATE PRIMARY INDEX%'
AND UPPER(statement) NOT LIKE 'EXPLAIN %'
AND UPPER(statement) NOT LIKE 'ADVISE %'
AND UPPER(statement) NOT LIKE '% SYSTEM:%'
GROUP BY statement
LETTING queries = COUNT(1)
ORDER BY queries DESC
LIMIT $limit
"""

return _run_query_tool_with_empty_message(
ctx,
query,
limit=limit,
empty_message=(
"No completed queries were available to calculate most frequent queries."
),
)


def get_queries_with_largest_response_sizes(
ctx: Context, limit: int = 10
) -> list[dict[str, Any]]:
"""Get queries with the largest response sizes from the system:completed_requests catalog.

Args:
limit: Number of queries to return (default: 10)

Returns:
List of queries with their average result size in bytes, KB, and MB
"""
query = """
SELECT statement,
avgResultSize AS avgResultSizeBytes,
(avgResultSize / 1000) AS avgResultSizeKB,
(avgResultSize / 1000000) AS avgResultSizeMB,
COUNT(1) AS queries
FROM system:completed_requests
WHERE UPPER(statement) NOT LIKE 'INFER %'
AND UPPER(statement) NOT LIKE 'CREATE INDEX%'
AND UPPER(statement) NOT LIKE 'CREATE PRIMARY INDEX%'
AND UPPER(statement) NOT LIKE '% SYSTEM:%'
GROUP BY statement
LETTING avgResultSize = AVG(resultSize)
ORDER BY avgResultSize DESC
LIMIT $limit
"""

return _run_query_tool_with_empty_message(
ctx,
query,
limit=limit,
empty_message=(
"No completed queries were available to calculate response sizes."
),
)


def get_queries_with_large_result_count(
ctx: Context, limit: int = 10
) -> list[dict[str, Any]]:
"""Get queries with the largest result counts from the system:completed_requests catalog.

Args:
limit: Number of queries to return (default: 10)

Returns:
List of queries with their average result count
"""
query = """
SELECT statement,
avgResultCount,
COUNT(1) AS queries
FROM system:completed_requests
WHERE UPPER(statement) NOT LIKE 'INFER %' AND
UPPER(statement) NOT LIKE 'CREATE INDEX%' AND
UPPER(statement) NOT LIKE 'CREATE PRIMARY INDEX%' AND
UPPER(statement) NOT LIKE '% SYSTEM:%'
GROUP BY statement
LETTING avgResultCount = AVG(resultCount)
ORDER BY avgResultCount DESC
LIMIT $limit
"""

return _run_query_tool_with_empty_message(
ctx,
query,
limit=limit,
empty_message=(
"No completed queries were available to calculate result counts."
),
)


def get_queries_using_primary_index(
ctx: Context, limit: int = 10
) -> list[dict[str, Any]]:
"""Get queries that use a primary index from the system:completed_requests catalog.

Args:
limit: Number of queries to return (default: 10)

Returns:
List of queries that use primary indexes, ordered by result count
"""
query = """
SELECT *
FROM system:completed_requests
WHERE phaseCounts.`primaryScan` IS NOT MISSING
AND UPPER(statement) NOT LIKE '% SYSTEM:%'
Comment on lines +286 to +287
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

For consistency with other query-analyzing tools in this file, consider filtering out INFER and CREATE INDEX statements. This will help focus the results on application-level queries.

Suggested change
WHERE phaseCounts.`primaryScan` IS NOT MISSING
AND UPPER(statement) NOT LIKE '% SYSTEM:%'
WHERE phaseCounts.`primaryScan` IS NOT MISSING
AND UPPER(statement) NOT LIKE 'INFER %'
AND UPPER(statement) NOT LIKE 'CREATE INDEX%'
AND UPPER(statement) NOT LIKE '% SYSTEM:%'

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you are adding INFER and SYSTEM, might as well add explain and advise as well- for consistency

ORDER BY resultCount DESC
LIMIT $limit
"""

return _run_query_tool_with_empty_message(
ctx,
query,
limit=limit,
empty_message=(
"No queries using the primary index were found in system:completed_requests."
),
)


def get_queries_not_using_covering_index(
ctx: Context, limit: int = 10
) -> list[dict[str, Any]]:
"""Get queries that don't use a covering index from the system:completed_requests catalog.

Args:
limit: Number of queries to return (default: 10)

Returns:
List of queries that perform index scans but also require fetches (not covering)
"""
query = """
SELECT *
FROM system:completed_requests
WHERE phaseCounts.`indexScan` IS NOT MISSING
AND phaseCounts.`fetch` IS NOT MISSING
AND UPPER(statement) NOT LIKE '% SYSTEM:%'
Comment on lines +316 to +318
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

This query is missing filters for INFER and CREATE INDEX statements, which are present in other similar functions. Adding them would provide more consistent and focused results on application queries.

Suggested change
WHERE phaseCounts.`indexScan` IS NOT MISSING
AND phaseCounts.`fetch` IS NOT MISSING
AND UPPER(statement) NOT LIKE '% SYSTEM:%'
WHERE phaseCounts.`indexScan` IS NOT MISSING
AND phaseCounts.`fetch` IS NOT MISSING
AND UPPER(statement) NOT LIKE 'INFER %'
AND UPPER(statement) NOT LIKE 'CREATE INDEX%'
AND UPPER(statement) NOT LIKE '% SYSTEM:%'

ORDER BY resultCount DESC
LIMIT $limit
"""

return _run_query_tool_with_empty_message(
ctx,
query,
limit=limit,
empty_message=(
"No queries that require fetches after index scans were found "
"in system:completed_requests."
),
)


def get_queries_not_selective(ctx: Context, limit: int = 10) -> list[dict[str, Any]]:
"""Get queries that are not very selective from the system:completed_requests catalog.

Args:
limit: Number of queries to return (default: 10)

Returns:
List of queries where index scans return significantly more documents than the final result
"""
query = """
SELECT statement,
AVG(phaseCounts.`indexScan` - resultCount) AS diff
FROM system:completed_requests
WHERE phaseCounts.`indexScan` > resultCount
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

This query is missing filters to exclude system-level statements (like INFER, CREATE INDEX, and queries on SYSTEM: keyspaces). Including these filters, as done in other functions in this file, will help to focus on application performance issues.

Suggested change
WHERE phaseCounts.`indexScan` > resultCount
WHERE phaseCounts.`indexScan` > resultCount
AND UPPER(statement) NOT LIKE 'INFER %'
AND UPPER(statement) NOT LIKE 'CREATE INDEX%'
AND UPPER(statement) NOT LIKE '% SYSTEM:%'

GROUP BY statement
ORDER BY diff DESC
LIMIT $limit
"""

return _run_query_tool_with_empty_message(
ctx,
query,
limit=limit,
empty_message=(
"No non-selective queries were found in system:completed_requests."
),
)
Loading