JordiNeil · JordiNeil · Apr 24, 2025 · Apr 24, 2025 · Apr 24, 2025
diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,29 @@
+FROM python:3.10-slim
+
+# Set the working directory in the container
+WORKDIR /app
+
+# Copy requirements first to leverage Docker cache
+COPY requirements.txt .
+
+# Install any needed packages specified in requirements.txt
+# Use --no-cache-dir to reduce image size
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Optional: Clean up build dependencies to reduce image size
+# RUN apt-get purge -y --auto-remove build-essential
+
+# Copy the rest of the application code into the container at /app
+COPY . .
+
+# Make port 8000 available to the world outside this container
+# (MCP servers typically run on port 8000 by default)
+EXPOSE 8000
+
+# Define environment variables (these will be overridden by docker run -e flags)
+ENV DATABRICKS_HOST=""
+ENV DATABRICKS_TOKEN=""
+ENV DATABRICKS_HTTP_PATH=""
+
+# Run main.py when the container launches
+CMD ["python", "main.py"]
diff --git a/README.md b/README.md
@@ -66,6 +66,46 @@ You can test the MCP server using the inspector by running
 npx @modelcontextprotocol/inspector python3 main.py
 ```
 
+## Configuring with Docker (for MCP Clients like Cursor)
+
+If you are integrating this server with an MCP client (like Cursor), you might configure it using Docker. The client will typically manage running the Docker container based on a configuration file (e.g., `mcp.json`).
+
+A pre-built image is available on Docker Hub and can be pulled using:
+```bash
+docker pull jordineil/databricks-mcp-server
+```
+
+The configuration passes environment variables directly to the Docker container. Here's an example structure, replacing placeholders with your actual credentials and using the public image name:
+
+```json
+{
+  "mcpServers": {
+    "databricks-docker": {
+      "command": "docker",
+      "args": [
+        "run", 
+        "--rm", 
+        "-i", 
+        "-e",
+        "DATABRICKS_HOST=<your-databricks-host>",
+        "-e",
+        "DATABRICKS_TOKEN=<your-databricks-token>",
+        "-e",
+        "DATABRICKS_HTTP_PATH=<your-databricks-http-path>",
+        "jordineil/databricks-mcp-server" 
+      ]
+    }
+    // ... other servers ...
+  }
+}
+```
+
+- Replace `<your-databricks-host>` with your Databricks host (e.g., `dbc-xyz.cloud.databricks.com`).
+- Replace `<your-databricks-token>` with your personal access token.
+- Replace `<your-databricks-http-path>` with the HTTP path for your SQL warehouse.
+
+This method avoids storing secrets directly in a `.env` file within the project, as the MCP client injects them at runtime.
+
 ## Available MCP Tools
 
 The following MCP tools are available:

diff --git a/main.py b/main.py
@@ -5,6 +5,7 @@
 from databricks.sql.client import Connection
 from mcp.server.fastmcp import FastMCP
 import requests
+import logging
 
 # Load environment variables
 load_dotenv()
@@ -17,18 +18,42 @@
 # Set up the MCP server
 mcp = FastMCP("Databricks API Explorer")
 
+# Global variable to hold the Databricks SQL connection
+_db_connection: Connection | None = None
 
-# Helper function to get a Databricks SQL connection
+# Helper function to get a reusable Databricks SQL connection
 def get_databricks_connection() -> Connection:
-    """Create and return a Databricks SQL connection"""
+    """Create and return a reusable Databricks SQL connection."""
+    global _db_connection
+
+    # Check if connection exists and is open
+    if _db_connection is not None:
+        try:
+            # A simple way to check if the connection is still valid
+            # This might depend on the driver's implementation; adjust if needed
+            cursor = _db_connection.cursor()
+            cursor.execute("SELECT 1")
+            cursor.close()
+            logging.info("Reusing existing Databricks SQL connection.")
+            return _db_connection
+        except Exception as e:
+            logging.warning(f"Existing connection seems invalid ({e}), creating a new one.")
+            try:
+                _db_connection.close()
+            except Exception:
+                pass # Ignore errors during close if connection was already broken
+            _db_connection = None # Ensure we create a new one
+
     if not all([DATABRICKS_HOST, DATABRICKS_TOKEN, DATABRICKS_HTTP_PATH]):
         raise ValueError("Missing required Databricks connection details in .env file")
 
-    return connect(
+    logging.info("Creating new Databricks SQL connection.")
+    _db_connection = connect(
         server_hostname=DATABRICKS_HOST,
         http_path=DATABRICKS_HTTP_PATH,
         access_token=DATABRICKS_TOKEN
     )
+    return _db_connection
 
 # Helper function for Databricks REST API requests
 def databricks_api_request(endpoint: str, method: str = "GET", data: Dict = None) -> Dict:
@@ -56,28 +81,26 @@ def databricks_api_request(endpoint: str, method: str = "GET", data: Dict = None
 @mcp.resource("schema://tables")
 def get_schema() -> str:
     """Provide the list of tables in the Databricks SQL warehouse as a resource"""
-    conn = get_databricks_connection()
     try:
+        conn = get_databricks_connection() # Use the shared connection
         cursor = conn.cursor()
         tables = cursor.tables().fetchall()
 
         table_info = []
         for table in tables:
             table_info.append(f"Database: {table.TABLE_CAT}, Schema: {table.TABLE_SCHEM}, Table: {table.TABLE_NAME}")
 
+        # Close cursor but not the connection
+        cursor.close()
         return "\n".join(table_info)
     except Exception as e:
         return f"Error retrieving tables: {str(e)}"
-    finally:
-        if 'conn' in locals():
-            conn.close()
 
 @mcp.tool()
 def run_sql_query(sql: str) -> str:
     """Execute SQL queries on Databricks SQL warehouse"""
-    conn = get_databricks_connection()
-
     try:
+        conn = get_databricks_connection() # Use the shared connection
         cursor = conn.cursor()
         result = cursor.execute(sql)
 
@@ -87,6 +110,10 @@ def run_sql_query(sql: str) -> str:
 
             # Format the result as a table
             rows = result.fetchall()
+
+            # Close cursor but not the connection
+            cursor.close()
+
             if not rows:
                 return "Query executed successfully. No results returned."
 
@@ -99,12 +126,11 @@ def run_sql_query(sql: str) -> str:
 
             return table
         else:
+            # Close cursor but not the connection
+            cursor.close()
             return "Query executed successfully. No results returned."
     except Exception as e:
         return f"Error executing query: {str(e)}"
-    finally:
-        if 'conn' in locals():
-            conn.close()
 
 @mcp.tool()
 def list_jobs() -> str: