Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
FROM python:3.10-slim

# Set the working directory in the container
WORKDIR /app

# Copy requirements first to leverage Docker cache
COPY requirements.txt .

# Install any needed packages specified in requirements.txt
# Use --no-cache-dir to reduce image size
RUN pip install --no-cache-dir -r requirements.txt

# Optional: Clean up build dependencies to reduce image size
# RUN apt-get purge -y --auto-remove build-essential

# Copy the rest of the application code into the container at /app
COPY . .

# Make port 8000 available to the world outside this container
# (MCP servers typically run on port 8000 by default)
EXPOSE 8000

# Define environment variables (these will be overridden by docker run -e flags)
ENV DATABRICKS_HOST=""
ENV DATABRICKS_TOKEN=""
ENV DATABRICKS_HTTP_PATH=""

# Run main.py when the container launches
CMD ["python", "main.py"]
40 changes: 40 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,46 @@ You can test the MCP server using the inspector by running
npx @modelcontextprotocol/inspector python3 main.py
```

## Configuring with Docker (for MCP Clients like Cursor)

If you are integrating this server with an MCP client (like Cursor), you might configure it using Docker. The client will typically manage running the Docker container based on a configuration file (e.g., `mcp.json`).

A pre-built image is available on Docker Hub and can be pulled using:
```bash
docker pull jordineil/databricks-mcp-server
```

The configuration passes environment variables directly to the Docker container. Here's an example structure, replacing placeholders with your actual credentials and using the public image name:

```json
{
"mcpServers": {
"databricks-docker": {
"command": "docker",
"args": [
"run",
"--rm",
"-i",
"-e",
"DATABRICKS_HOST=<your-databricks-host>",
"-e",
"DATABRICKS_TOKEN=<your-databricks-token>",
"-e",
"DATABRICKS_HTTP_PATH=<your-databricks-http-path>",
"jordineil/databricks-mcp-server"
]
}
// ... other servers ...
}
}
```

- Replace `<your-databricks-host>` with your Databricks host (e.g., `dbc-xyz.cloud.databricks.com`).
- Replace `<your-databricks-token>` with your personal access token.
- Replace `<your-databricks-http-path>` with the HTTP path for your SQL warehouse.

This method avoids storing secrets directly in a `.env` file within the project, as the MCP client injects them at runtime.

## Available MCP Tools

The following MCP tools are available:
Expand Down
50 changes: 38 additions & 12 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from databricks.sql.client import Connection
from mcp.server.fastmcp import FastMCP
import requests
import logging

# Load environment variables
load_dotenv()
Expand All @@ -17,18 +18,42 @@
# Set up the MCP server
mcp = FastMCP("Databricks API Explorer")

# Global variable to hold the Databricks SQL connection
_db_connection: Connection | None = None

# Helper function to get a Databricks SQL connection
# Helper function to get a reusable Databricks SQL connection
def get_databricks_connection() -> Connection:
"""Create and return a Databricks SQL connection"""
"""Create and return a reusable Databricks SQL connection."""
global _db_connection

# Check if connection exists and is open
if _db_connection is not None:
try:
# A simple way to check if the connection is still valid
# This might depend on the driver's implementation; adjust if needed
cursor = _db_connection.cursor()
cursor.execute("SELECT 1")
cursor.close()
logging.info("Reusing existing Databricks SQL connection.")
return _db_connection
except Exception as e:
logging.warning(f"Existing connection seems invalid ({e}), creating a new one.")
try:
_db_connection.close()
except Exception:
pass # Ignore errors during close if connection was already broken
_db_connection = None # Ensure we create a new one

if not all([DATABRICKS_HOST, DATABRICKS_TOKEN, DATABRICKS_HTTP_PATH]):
raise ValueError("Missing required Databricks connection details in .env file")

return connect(
logging.info("Creating new Databricks SQL connection.")
_db_connection = connect(
server_hostname=DATABRICKS_HOST,
http_path=DATABRICKS_HTTP_PATH,
access_token=DATABRICKS_TOKEN
)
return _db_connection

# Helper function for Databricks REST API requests
def databricks_api_request(endpoint: str, method: str = "GET", data: Dict = None) -> Dict:
Expand Down Expand Up @@ -56,28 +81,26 @@ def databricks_api_request(endpoint: str, method: str = "GET", data: Dict = None
@mcp.resource("schema://tables")
def get_schema() -> str:
"""Provide the list of tables in the Databricks SQL warehouse as a resource"""
conn = get_databricks_connection()
try:
conn = get_databricks_connection() # Use the shared connection
cursor = conn.cursor()
tables = cursor.tables().fetchall()

table_info = []
for table in tables:
table_info.append(f"Database: {table.TABLE_CAT}, Schema: {table.TABLE_SCHEM}, Table: {table.TABLE_NAME}")

# Close cursor but not the connection
cursor.close()
return "\n".join(table_info)
except Exception as e:
return f"Error retrieving tables: {str(e)}"
finally:
if 'conn' in locals():
conn.close()

@mcp.tool()
def run_sql_query(sql: str) -> str:
"""Execute SQL queries on Databricks SQL warehouse"""
conn = get_databricks_connection()

try:
conn = get_databricks_connection() # Use the shared connection
cursor = conn.cursor()
result = cursor.execute(sql)

Expand All @@ -87,6 +110,10 @@ def run_sql_query(sql: str) -> str:

# Format the result as a table
rows = result.fetchall()

# Close cursor but not the connection
cursor.close()

if not rows:
return "Query executed successfully. No results returned."

Expand All @@ -99,12 +126,11 @@ def run_sql_query(sql: str) -> str:

return table
else:
# Close cursor but not the connection
cursor.close()
return "Query executed successfully. No results returned."
except Exception as e:
return f"Error executing query: {str(e)}"
finally:
if 'conn' in locals():
conn.close()

@mcp.tool()
def list_jobs() -> str:
Expand Down