diff --git a/.github/workflows/skills-check.yml b/.github/workflows/skills-check.yml new file mode 100644 index 000000000..4aa4daf8e --- /dev/null +++ b/.github/workflows/skills-check.yml @@ -0,0 +1,16 @@ +name: Skills Version Check + +on: + pull_request: + paths: + - "skills/**" + +jobs: + version-parity: + name: Check skills/versions.json parity + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Verify versions.json matches SKILL.md frontmatter + run: bash skills/check-versions.sh diff --git a/ibis-server/app/dependencies.py b/ibis-server/app/dependencies.py index 5ce90b97f..f6953fe79 100644 --- a/ibis-server/app/dependencies.py +++ b/ibis-server/app/dependencies.py @@ -18,6 +18,9 @@ # Validate the dto by building the specific connection info from the data source def verify_query_dto(data_source: DataSource, dto: QueryDTO): + # Skip inline validation when using a file path; connection info is validated at query time + if dto.connection_file_path: + return # Use data_source.get_connection_info to validate the connection_info # This will ensure the connection_info can be properly parsed for the specific data source data_source.get_connection_info(dto.connection_info, {}) diff --git a/ibis-server/app/model/__init__.py b/ibis-server/app/model/__init__.py index 86ecd0b48..19f9b07c4 100644 --- a/ibis-server/app/model/__init__.py +++ b/ibis-server/app/model/__init__.py @@ -3,12 +3,12 @@ from enum import Enum from typing import Annotated, Any, Literal, Union -from pydantic import BaseModel, Field, SecretStr +from pydantic import BaseModel, Field, SecretStr, model_validator from app.model.error import ErrorCode, WrenError manifest_str_field = Field(alias="manifestStr", description="Base64 manifest") -connection_info_field = Field(alias="connectionInfo") +connection_info_field = Field(alias="connectionInfo", default=None) class BaseConnectionInfo(BaseModel): @@ -26,7 +26,17 @@ def to_key_string(self) -> str: class QueryDTO(BaseModel): sql: str manifest_str: str = manifest_str_field - connection_info: dict[str, Any] | ConnectionInfo = connection_info_field + connection_info: dict[str, Any] | ConnectionInfo | None = connection_info_field + connection_file_path: str | None = Field(alias="connectionFilePath", default=None) + + @model_validator(mode="after") + def check_connection_source(self): + if self.connection_info is None and self.connection_file_path is None: + raise WrenError( + ErrorCode.INVALID_CONNECTION_INFO, + "Either connectionInfo or connectionFilePath must be provided", + ) + return self class QueryBigQueryDTO(QueryDTO): @@ -654,7 +664,17 @@ class GcsFileConnectionInfo(BaseConnectionInfo): class ValidateDTO(BaseModel): manifest_str: str = manifest_str_field parameters: dict - connection_info: dict[str, Any] | ConnectionInfo = connection_info_field + connection_info: dict[str, Any] | ConnectionInfo | None = connection_info_field + connection_file_path: str | None = Field(alias="connectionFilePath", default=None) + + @model_validator(mode="after") + def check_connection_source(self): + if self.connection_info is None and self.connection_file_path is None: + raise WrenError( + ErrorCode.INVALID_CONNECTION_INFO, + "Either connectionInfo or connectionFilePath must be provided", + ) + return self class AnalyzeSQLDTO(BaseModel): @@ -674,9 +694,19 @@ class DryPlanDTO(BaseModel): class TranspileDTO(BaseModel): manifest_str: str = manifest_str_field - connection_info: dict[str, Any] | ConnectionInfo = connection_info_field + connection_info: dict[str, Any] | ConnectionInfo | None = connection_info_field + connection_file_path: str | None = Field(alias="connectionFilePath", default=None) sql: str + @model_validator(mode="after") + def check_connection_source(self): + if self.connection_info is None and self.connection_file_path is None: + raise WrenError( + ErrorCode.INVALID_CONNECTION_INFO, + "Either connectionInfo or connectionFilePath must be provided", + ) + return self + class ConfigModel(BaseModel): diagnose: bool diff --git a/ibis-server/app/model/metadata/dto.py b/ibis-server/app/model/metadata/dto.py index 685313617..0b76b61b6 100644 --- a/ibis-server/app/model/metadata/dto.py +++ b/ibis-server/app/model/metadata/dto.py @@ -1,10 +1,11 @@ from enum import Enum from typing import Any -from pydantic import BaseModel, Field +from pydantic import BaseModel, Field, model_validator from app.model import ConnectionInfo from app.model.data_source import DataSource +from app.model.error import ErrorCode, WrenError class V2MetadataDTO(BaseModel): @@ -16,10 +17,22 @@ class FilterInfo(BaseModel): class MetadataDTO(BaseModel): - connection_info: dict[str, Any] | ConnectionInfo = Field(alias="connectionInfo") + connection_info: dict[str, Any] | ConnectionInfo | None = Field( + alias="connectionInfo", default=None + ) + connection_file_path: str | None = Field(alias="connectionFilePath", default=None) table_limit: int | None = Field(alias="limit", default=None) filter_info: dict[str, Any] | None = Field(alias="filterInfo", default=None) + @model_validator(mode="after") + def check_connection_source(self): + if self.connection_info is None and self.connection_file_path is None: + raise WrenError( + ErrorCode.INVALID_CONNECTION_INFO, + "Either connectionInfo or connectionFilePath must be provided", + ) + return self + class BigQueryFilterInfo(FilterInfo): projects: list["ProjectDatasets"] | None = None diff --git a/ibis-server/app/routers/v2/connector.py b/ibis-server/app/routers/v2/connector.py index 2b1d4dc73..c68f08deb 100644 --- a/ibis-server/app/routers/v2/connector.py +++ b/ibis-server/app/routers/v2/connector.py @@ -45,6 +45,7 @@ execute_validate_with_timeout, get_fallback_message, pushdown_limit, + resolve_connection_info, set_attribute, to_json, update_response_headers, @@ -93,7 +94,7 @@ async def query( if cache_enable: span_name += "_cache_enable" connection_info = data_source.get_connection_info( - dto.connection_info, dict(headers) + resolve_connection_info(dto), dict(headers) ) # Convert headers to dict for cache manager headers_dict = dict(headers) if headers else None @@ -232,7 +233,7 @@ async def validate( ) as span: set_attribute(headers, span) connection_info = data_source.get_connection_info( - dto.connection_info, dict(headers) + resolve_connection_info(dto), dict(headers) ) validator = Validator( Connector(data_source, connection_info), @@ -273,7 +274,7 @@ async def get_table_list( ) as span: set_attribute(headers, span) connection_info = data_source.get_connection_info( - dto.connection_info, dict(headers) + resolve_connection_info(dto), dict(headers) ) if isinstance(connection_info, BigQueryProjectConnectionInfo): raise WrenError( @@ -302,7 +303,7 @@ async def get_constraints( ) as span: set_attribute(headers, span) connection_info = data_source.get_connection_info( - dto.connection_info, dict(headers) + resolve_connection_info(dto), dict(headers) ) if isinstance(connection_info, BigQueryProjectConnectionInfo): raise WrenError( @@ -324,7 +325,7 @@ async def get_db_version( headers: Annotated[Headers, Depends(get_wren_headers)] = None, ) -> str: connection_info = data_source.get_connection_info( - dto.connection_info, dict(headers) + resolve_connection_info(dto), dict(headers) ) metadata = MetadataFactory.get_metadata(data_source, connection_info) return await execute_get_version_with_timeout(metadata) @@ -398,7 +399,7 @@ async def model_substitute( ) as span: set_attribute(headers, span) connection_info = data_source.get_connection_info( - dto.connection_info, dict(headers) + resolve_connection_info(dto), dict(headers) ) sql = ModelSubstitute(data_source, dto.manifest_str, headers).substitute( dto.sql, write="trino" diff --git a/ibis-server/app/routers/v3/connector.py b/ibis-server/app/routers/v3/connector.py index 41767d5fc..c44d0d38c 100644 --- a/ibis-server/app/routers/v3/connector.py +++ b/ibis-server/app/routers/v3/connector.py @@ -47,6 +47,7 @@ execute_query_with_timeout, execute_validate_with_timeout, pushdown_limit, + resolve_connection_info, safe_strtobool, set_attribute, to_json, @@ -91,7 +92,7 @@ async def query( ) as span: set_attribute(headers, span) connection_info = data_source.get_connection_info( - dto.connection_info, dict(headers) + resolve_connection_info(dto), dict(headers) ) # Convert headers to dict for cache manager headers_dict = dict(headers) if headers else None @@ -357,7 +358,7 @@ async def validate( ) as span: set_attribute(headers, span) connection_info = data_source.get_connection_info( - dto.connection_info, dict(headers) + resolve_connection_info(dto), dict(headers) ) try: validator = Validator( @@ -485,7 +486,7 @@ async def model_substitute( ) as span: set_attribute(headers, span) connection_info = data_source.get_connection_info( - dto.connection_info, dict(headers) + resolve_connection_info(dto), dict(headers) ) try: sql = ModelSubstitute(data_source, dto.manifest_str, headers).substitute( @@ -569,7 +570,7 @@ async def get_table_list( ) as span: set_attribute(headers, span) connection_info = data_source.get_connection_info( - dto.connection_info, dict(headers) + resolve_connection_info(dto), dict(headers) ) metadata = MetadataFactory.get_metadata(data_source, connection_info) filter_info = get_filter_info(data_source, dto.filter_info or {}) @@ -595,7 +596,7 @@ async def get_schema_list( ) as span: set_attribute(headers, span) connection_info = data_source.get_connection_info( - dto.connection_info, dict(headers) + resolve_connection_info(dto), dict(headers) ) metadata = MetadataFactory.get_metadata(data_source, connection_info) filter_info = get_filter_info(data_source, dto.filter_info or {}) diff --git a/ibis-server/app/util.py b/ibis-server/app/util.py index 631eea730..8dce8316d 100644 --- a/ibis-server/app/util.py +++ b/ibis-server/app/util.py @@ -1,5 +1,7 @@ import asyncio import base64 +import json +import pathlib import time try: @@ -41,7 +43,7 @@ class ClickHouseDbError(Exception): X_WREN_TIMEZONE, ) from app.model.data_source import DataSource -from app.model.error import DatabaseTimeoutError +from app.model.error import DatabaseTimeoutError, ErrorCode, WrenError from app.model.metadata.bigquery import BigQueryMetadata from app.model.metadata.dto import FilterInfo from app.model.metadata.metadata import Metadata @@ -53,6 +55,44 @@ class ClickHouseDbError(Exception): Wren AI team are appreciate if you can provide the error messages and related logs for us." +def resolve_connection_info(dto) -> dict: + """Return connection info dict from either a file path or the inline DTO field. + + When connectionFilePath is used, CONNECTION_FILE_ROOT must be set to the + directory that is allowed to be read. Requests are rejected if the env var + is absent or the resolved path escapes that directory. + """ + import os + + if getattr(dto, "connection_file_path", None): + allowed_root = os.environ.get("CONNECTION_FILE_ROOT") + if not allowed_root: + raise WrenError( + ErrorCode.INVALID_CONNECTION_INFO, + "connectionFilePath requires the CONNECTION_FILE_ROOT environment variable to be set", + ) + path = pathlib.Path(dto.connection_file_path).resolve() + if not path.is_relative_to(pathlib.Path(allowed_root).resolve()): + raise WrenError( + ErrorCode.INVALID_CONNECTION_INFO, + f"Connection file path is outside the allowed directory: {dto.connection_file_path}", + ) + try: + with open(path) as f: + return json.load(f) + except FileNotFoundError: + raise WrenError( + ErrorCode.INVALID_CONNECTION_INFO, + f"Connection file not found: {dto.connection_file_path}", + ) + except json.JSONDecodeError as e: + raise WrenError( + ErrorCode.INVALID_CONNECTION_INFO, + f"Invalid JSON in connection file: {e}", + ) + return dto.connection_info + + @tracer.start_as_current_span("base64_to_dict", kind=trace.SpanKind.INTERNAL) def base64_to_dict(base64_str: str) -> dict: return orjson.loads(base64.b64decode(base64_str).decode("utf-8")) diff --git a/ibis-server/tests/routers/v2/connector/test_clickhouse.py b/ibis-server/tests/routers/v2/connector/test_clickhouse.py index ca46175b2..c8c230b63 100644 --- a/ibis-server/tests/routers/v2/connector/test_clickhouse.py +++ b/ibis-server/tests/routers/v2/connector/test_clickhouse.py @@ -371,10 +371,9 @@ async def test_query_without_connection_info(client, manifest_str): ) assert response.status_code == 422 result = response.json() - assert result["detail"][0] is not None - assert result["detail"][0]["type"] == "missing" - assert result["detail"][0]["loc"] == ["body", "connectionInfo"] - assert result["detail"][0]["msg"] == "Field required" + assert result["errorCode"] == "INVALID_CONNECTION_INFO" + assert "connectionInfo" in result["message"] + assert "connectionFilePath" in result["message"] async def test_query_with_dry_run( diff --git a/ibis-server/tests/routers/v2/connector/test_mssql.py b/ibis-server/tests/routers/v2/connector/test_mssql.py index d82ac8c29..c9b05d212 100644 --- a/ibis-server/tests/routers/v2/connector/test_mssql.py +++ b/ibis-server/tests/routers/v2/connector/test_mssql.py @@ -232,10 +232,9 @@ async def test_query_without_connection_info(client, manifest_str): ) assert response.status_code == 422 result = response.json() - assert result["detail"][0] is not None - assert result["detail"][0]["type"] == "missing" - assert result["detail"][0]["loc"] == ["body", "connectionInfo"] - assert result["detail"][0]["msg"] == "Field required" + assert result["errorCode"] == "INVALID_CONNECTION_INFO" + assert "connectionInfo" in result["message"] + assert "connectionFilePath" in result["message"] async def test_query_with_dry_run(client, manifest_str, mssql: SqlServerContainer): diff --git a/ibis-server/tests/routers/v2/connector/test_mysql.py b/ibis-server/tests/routers/v2/connector/test_mysql.py index 18301a099..3dbb2bbe1 100644 --- a/ibis-server/tests/routers/v2/connector/test_mysql.py +++ b/ibis-server/tests/routers/v2/connector/test_mysql.py @@ -239,10 +239,9 @@ async def test_query_without_connection_info(client, manifest_str): ) assert response.status_code == 422 result = response.json() - assert result["detail"][0] is not None - assert result["detail"][0]["type"] == "missing" - assert result["detail"][0]["loc"] == ["body", "connectionInfo"] - assert result["detail"][0]["msg"] == "Field required" + assert result["errorCode"] == "INVALID_CONNECTION_INFO" + assert "connectionInfo" in result["message"] + assert "connectionFilePath" in result["message"] async def test_query_with_dry_run(client, manifest_str, mysql: MySqlContainer): diff --git a/ibis-server/tests/routers/v2/connector/test_oracle.py b/ibis-server/tests/routers/v2/connector/test_oracle.py index 1e857d8e2..206f24289 100644 --- a/ibis-server/tests/routers/v2/connector/test_oracle.py +++ b/ibis-server/tests/routers/v2/connector/test_oracle.py @@ -277,10 +277,9 @@ async def test_query_without_connection_info( ) assert response.status_code == 422 result = response.json() - assert result["detail"][0] is not None - assert result["detail"][0]["type"] == "missing" - assert result["detail"][0]["loc"] == ["body", "connectionInfo"] - assert result["detail"][0]["msg"] == "Field required" + assert result["errorCode"] == "INVALID_CONNECTION_INFO" + assert "connectionInfo" in result["message"] + assert "connectionFilePath" in result["message"] async def test_query_with_dry_run(client, manifest_str, oracle: OracleDbContainer): diff --git a/ibis-server/tests/routers/v2/connector/test_postgres.py b/ibis-server/tests/routers/v2/connector/test_postgres.py index 8fee49f9e..6b1fde562 100644 --- a/ibis-server/tests/routers/v2/connector/test_postgres.py +++ b/ibis-server/tests/routers/v2/connector/test_postgres.py @@ -531,10 +531,9 @@ async def test_query_without_connection_info(client, manifest_str): ) assert response.status_code == 422 result = response.json() - assert result["detail"][0] is not None - assert result["detail"][0]["type"] == "missing" - assert result["detail"][0]["loc"] == ["body", "connectionInfo"] - assert result["detail"][0]["msg"] == "Field required" + assert result["errorCode"] == "INVALID_CONNECTION_INFO" + assert "connectionInfo" in result["message"] + assert "connectionFilePath" in result["message"] async def test_query_with_dry_run(client, manifest_str, postgres: PostgresContainer): @@ -1089,6 +1088,27 @@ async def test_order_by_nulls_last(client, manifest_str, postgres: PostgresConta assert result["data"][2][0] == "three" +async def test_connection_info_file( + client, manifest_str, postgres: PostgresContainer, tmp_path, monkeypatch +): + monkeypatch.setenv("CONNECTION_FILE_ROOT", str(tmp_path)) + conn_file = tmp_path / "connection.json" + conn_file.write_bytes(orjson.dumps(_to_connection_info(postgres))) + + response = await client.post( + url=f"{base_url}/query", + json={ + "connectionFilePath": str(conn_file), + "manifestStr": manifest_str, + "sql": 'SELECT * FROM "Orders" LIMIT 1', + }, + ) + assert response.status_code == 200 + result = response.json() + assert len(result["columns"]) == len(manifest["models"][0]["columns"]) + assert len(result["data"]) == 1 + + def _to_connection_info(pg: PostgresContainer): return { "host": pg.get_container_host_ip(), diff --git a/ibis-server/tests/routers/v2/connector/test_trino.py b/ibis-server/tests/routers/v2/connector/test_trino.py index 5bdd3b2a7..a5a528d8a 100644 --- a/ibis-server/tests/routers/v2/connector/test_trino.py +++ b/ibis-server/tests/routers/v2/connector/test_trino.py @@ -234,10 +234,9 @@ async def test_query_without_connection_info(client, manifest_str): ) assert response.status_code == 422 result = response.json() - assert result["detail"][0] is not None - assert result["detail"][0]["type"] == "missing" - assert result["detail"][0]["loc"] == ["body", "connectionInfo"] - assert result["detail"][0]["msg"] == "Field required" + assert result["errorCode"] == "INVALID_CONNECTION_INFO" + assert "connectionInfo" in result["message"] + assert "connectionFilePath" in result["message"] async def test_query_with_dry_run(client, manifest_str, trino: TrinoContainer): diff --git a/ibis-server/tests/routers/v3/connector/postgres/test_fallback_v2.py b/ibis-server/tests/routers/v3/connector/postgres/test_fallback_v2.py index 93ad3360a..85744f1ad 100644 --- a/ibis-server/tests/routers/v3/connector/postgres/test_fallback_v2.py +++ b/ibis-server/tests/routers/v3/connector/postgres/test_fallback_v2.py @@ -370,6 +370,28 @@ async def test_validate(client, manifest_str, connection_info): assert response.status_code == 422 +async def test_connection_info_file( + client, manifest_str, connection_info, tmp_path, monkeypatch +): + monkeypatch.setenv("CONNECTION_FILE_ROOT", str(tmp_path)) + conn_file = tmp_path / "connection.json" + conn_file.write_bytes(orjson.dumps(connection_info)) + + # v3 will fail with this manifest and fall back to v2 — connectionFilePath must survive the fallback + response = await client.post( + url=f"{base_url}/query", + json={ + "connectionFilePath": str(conn_file), + "manifestStr": manifest_str, + "sql": "SELECT orderkey FROM orders LIMIT 1", + }, + ) + assert response.status_code == 200 + result = response.json() + assert len(result["columns"]) == 1 + assert len(result["data"]) == 1 + + async def test_query_rlac(client, manifest_str, connection_info): response = await client.post( url=f"{base_url}/query", diff --git a/ibis-server/tests/routers/v3/connector/postgres/test_query.py b/ibis-server/tests/routers/v3/connector/postgres/test_query.py index 78638546f..8d6c2d0de 100644 --- a/ibis-server/tests/routers/v3/connector/postgres/test_query.py +++ b/ibis-server/tests/routers/v3/connector/postgres/test_query.py @@ -427,10 +427,9 @@ async def test_query_without_connection_info(client, manifest_str): ) assert response.status_code == 422 result = response.json() - assert result["detail"][0] is not None - assert result["detail"][0]["type"] == "missing" - assert result["detail"][0]["loc"] == ["body", "connectionInfo"] - assert result["detail"][0]["msg"] == "Field required" + assert result["errorCode"] == "INVALID_CONNECTION_INFO" + assert "connectionInfo" in result["message"] + assert "connectionFilePath" in result["message"] async def test_query_with_dry_run(client, manifest_str, connection_info): @@ -1299,3 +1298,25 @@ async def test_to_char_numeric(client, manifest_str, connection_info): "formatted_number": "string", "formatted_double": "string", } + + +async def test_connection_info_file( + client, manifest_str, connection_info, tmp_path, monkeypatch +): + monkeypatch.setenv("CONNECTION_FILE_ROOT", str(tmp_path)) + conn_file = tmp_path / "connection.json" + conn_file.write_bytes(orjson.dumps(connection_info)) + + response = await client.post( + url=f"{base_url}/query", + json={ + "connectionFilePath": str(conn_file), + "manifestStr": manifest_str, + "sql": "SELECT * FROM wren.public.orders LIMIT 1", + }, + headers={X_WREN_FALLBACK_DISABLE: "true"}, + ) + assert response.status_code == 200 + result = response.json() + assert len(result["columns"]) == 10 + assert len(result["data"]) == 1 diff --git a/mcp-server/docker/compose.yaml b/mcp-server/docker/compose.yaml index fb341204e..d07c31184 100644 --- a/mcp-server/docker/compose.yaml +++ b/mcp-server/docker/compose.yaml @@ -10,5 +10,6 @@ services: MCP_HOST: "0.0.0.0" MCP_PORT: "9000" WREN_URL: "localhost:8000" + CONNECTION_FILE_ROOT: "/workspace" volumes: - ${MDL_WORKSPACE:-../workspace}:/workspace diff --git a/skills/AUTHORING.md b/skills/AUTHORING.md index 5cd71b130..f8f0081cb 100644 --- a/skills/AUTHORING.md +++ b/skills/AUTHORING.md @@ -100,5 +100,17 @@ After creating a new skill: 1. Add a section to [SKILLS.md](SKILLS.md) describing the skill, its trigger conditions, and reference files. 2. Add a row to the skills table in [README.md](README.md). +3. Add the skill name and version to [versions.json](versions.json). +4. Add an entry to [index.json](index.json) with `name`, `version`, `description`, `tags`, `dependencies` (if any), and `repository`. +5. Add the skill to the `ALL_SKILLS` array in [install.sh](install.sh). -Both entries should use the same short trigger description. +Both `versions.json` and `index.json` must stay in sync with the `version` field in the skill's `SKILL.md` frontmatter. Run `bash skills/check-versions.sh` to verify parity before merging — the script validates both files. + +--- + +## Releasing a skill update + +1. Bump `version` in the skill's `SKILL.md` frontmatter. +2. Update the matching version in `versions.json`. +3. Update the matching version in `index.json`. +4. Run `bash skills/check-versions.sh` — must pass before merging. diff --git a/skills/README.md b/skills/README.md index 7680b6e5c..c5fdf1ead 100644 --- a/skills/README.md +++ b/skills/README.md @@ -30,14 +30,15 @@ npx openskills add Canner/wren-engine ### Option 3 — manual copy ```bash -cp -r skills/generate-mdl ~/.claude/skills/ +cp -r skills/wren-usage ~/.claude/skills/ # or all at once: -cp -r skills/generate-mdl skills/wren-project skills/wren-sql skills/wren-mcp-setup skills/wren-quickstart ~/.claude/skills/ +cp -r skills/wren-usage skills/generate-mdl skills/wren-project skills/wren-sql skills/wren-mcp-setup skills/wren-connection-info ~/.claude/skills/ ``` Once installed, invoke a skill by name in your conversation: ```text +/wren-usage /wren-quickstart /generate-mdl /wren-project @@ -45,15 +46,22 @@ Once installed, invoke a skill by name in your conversation: /wren-mcp-setup ``` +> **Tip:** Installing `wren-usage` via `install.sh` automatically installs all its dependencies. +> ```bash +> bash skills/install.sh wren-usage +> ``` + ## Available Skills | Skill | Description | |-------|-------------| -| [wren-quickstart](wren-quickstart/SKILL.md) | End-to-end quickstart — install skills, generate MDL, save project, start MCP server, and verify setup | +| [wren-usage](wren-usage/SKILL.md) | **Primary skill** — daily usage guide: query data, manage MDL, connect databases, operate MCP server | +| [wren-quickstart](wren-quickstart/SKILL.md) | End-to-end first-time setup — install skills, generate MDL, save project, start MCP server, verify setup | | [generate-mdl](generate-mdl/SKILL.md) | Generate a Wren MDL manifest from a live database using ibis-server introspection | | [wren-project](wren-project/SKILL.md) | Save, load, and build MDL manifests as version-controlled YAML project directories | | [wren-sql](wren-sql/SKILL.md) | Write and correct SQL queries for Wren Engine — types, date/time, BigQuery dialect, error diagnosis | | [wren-mcp-setup](wren-mcp-setup/SKILL.md) | Set up Wren Engine MCP via Docker, register with Claude Code or other MCP clients, and start querying | +| [wren-connection-info](wren-connection-info/SKILL.md) | Set up data source credentials — produces `connectionFilePath` or inline dict | See [SKILLS.md](SKILLS.md) for full details on each skill. diff --git a/skills/SKILLS.md b/skills/SKILLS.md index bf7a1b2ad..f5a670029 100644 --- a/skills/SKILLS.md +++ b/skills/SKILLS.md @@ -4,6 +4,38 @@ Skills are instruction files that extend AI agents with Wren-specific workflows. --- +## wren-usage + +**File:** [wren-usage/SKILL.md](wren-usage/SKILL.md) + +**Primary entry point** for day-to-day Wren Engine usage. Identifies the user's task and delegates to the appropriate focused skill. Covers SQL queries, MDL management, database connections, and MCP server operations. + +### When to use + +- Writing or debugging SQL queries against a deployed MDL +- Adding or modifying models, columns, or relationships in the MDL +- Changing database credentials or data source +- Rebuilding `target/mdl.json` after project changes +- Restarting or reconfiguring the MCP server +- Any ongoing Wren task after initial setup is complete + +### Dependent skills + +| Skill | Purpose | +|-------|---------| +| `@wren-sql` | Write and debug SQL queries | +| `@wren-connection-info` | Set up or change database credentials | +| `@generate-mdl` | Regenerate MDL from a changed database schema | +| `@wren-project` | Save, load, and build MDL YAML projects | +| `@wren-mcp-setup` | Reconfigure the MCP server | + +> Installing `wren-usage` via `install.sh` automatically installs all dependent skills: +> ```bash +> bash skills/install.sh wren-usage +> ``` + +--- + ## wren-quickstart **File:** [wren-quickstart/SKILL.md](wren-quickstart/SKILL.md) @@ -170,16 +202,17 @@ Sets up Wren Engine MCP server via Docker, registers it with an AI agent (Claude ## Installing a skill ```bash -# Single skill -cp -r skills/generate-mdl ~/.claude/skills/ +# Install wren-usage (auto-installs all dependencies) +bash skills/install.sh wren-usage -# All skills -cp -r skills/* ~/.claude/skills/ +# Or install everything +bash skills/install.sh ``` Then invoke in your AI client: ``` +/wren-usage /generate-mdl /wren-project /wren-sql diff --git a/skills/check-versions.sh b/skills/check-versions.sh new file mode 100755 index 000000000..f6199cf6c --- /dev/null +++ b/skills/check-versions.sh @@ -0,0 +1,68 @@ +#!/usr/bin/env bash +# Verify that skills/versions.json and skills/index.json both match +# the version in each skill's SKILL.md frontmatter. +# Exits non-zero if any mismatch is found. +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +VERSIONS_JSON="$SCRIPT_DIR/versions.json" +INDEX_JSON="$SCRIPT_DIR/index.json" +ERRORS=0 + +while IFS= read -r skill; do + skill_name="${skill//\"/}" + skill_name="${skill_name%%:*}" + skill_name="${skill_name// /}" + + versions_version=$(python3 -c "import json,sys; d=json.load(open('$VERSIONS_JSON')); print(d.get('$skill_name','MISSING'))") + + skill_file="$SCRIPT_DIR/$skill_name/SKILL.md" + if [ ! -f "$skill_file" ]; then + echo "ERROR: $skill_name listed in versions.json but $skill_file not found" >&2 + ERRORS=$((ERRORS + 1)) + continue + fi + + md_version=$(grep -m1 'version:' "$skill_file" | sed 's/.*version: *"\{0,1\}\([^"]*\)"\{0,1\}/\1/' | tr -d ' "') + + if [ "$versions_version" != "$md_version" ]; then + echo "MISMATCH: $skill_name — versions.json=$versions_version, SKILL.md=$md_version" >&2 + ERRORS=$((ERRORS + 1)) + else + echo "OK (versions.json): $skill_name @ $versions_version" + fi + + index_version=$(python3 -c " +import json, sys +skills = json.load(open('$INDEX_JSON')).get('skills', []) +match = next((s['version'] for s in skills if s['name'] == '$skill_name'), 'MISSING') +print(match) +") + + if [ "$index_version" != "$md_version" ]; then + echo "MISMATCH: $skill_name — index.json=$index_version, SKILL.md=$md_version" >&2 + ERRORS=$((ERRORS + 1)) + else + echo "OK (index.json): $skill_name @ $index_version" + fi +done < <(python3 -c " +import json +from pathlib import Path + +root = Path('$SCRIPT_DIR') +versions = set(json.load(open('$VERSIONS_JSON')).keys()) +index = {s['name'] for s in json.load(open('$INDEX_JSON')).get('skills', [])} +skill_dirs = {p.parent.name for p in root.glob('*/SKILL.md')} + +for name in sorted(versions | index | skill_dirs): + print(name) +") + +if [ "$ERRORS" -gt 0 ]; then + echo "" >&2 + echo "Found $ERRORS version mismatch(es). Update versions.json, index.json, or SKILL.md to match." >&2 + exit 1 +fi + +echo "" +echo "All skill versions match." diff --git a/skills/generate-mdl/SKILL.md b/skills/generate-mdl/SKILL.md index 71c3a2e17..75bcdd5b9 100644 --- a/skills/generate-mdl/SKILL.md +++ b/skills/generate-mdl/SKILL.md @@ -2,9 +2,10 @@ name: generate-mdl description: Generate a Wren MDL manifest from a database using ibis-server metadata endpoints. Use when a user wants to create or set up a new Wren MDL, scaffold a manifest from an existing database, or onboard a new data source without installing any database drivers locally. compatibility: Requires a running ibis-server (default port 8000). No local database drivers needed. +license: Apache-2.0 metadata: author: wren-engine - version: "1.1" + version: "1.2" --- # Generate Wren MDL @@ -12,10 +13,10 @@ metadata: ## Version check Silently fetch `https://raw.githubusercontent.com/Canner/wren-engine/main/skills/versions.json`. -Compare the `generate-mdl` key with this skill's version (`1.1`). +Compare the `generate-mdl` key with this skill's version (`1.2`). If the remote version is newer, notify the user before proceeding: -> A newer version of the **generate-mdl** skill is available (remote: X.Y, installed: 1.1). +> A newer version of the **generate-mdl** skill is available (remote: X.Y, installed: 1.2). > Update with: > ```bash > curl -fsSL https://raw.githubusercontent.com/Canner/wren-engine/main/skills/install.sh | bash -s -- --force generate-mdl @@ -31,28 +32,30 @@ Generates a Wren MDL manifest by using ibis-server to introspect the database sc Follow these steps in order. Do not skip steps or ask unnecessary questions between them. -### Step 1 — Gather connection info +### Step 1 — Set up data source and connection info -Ask the user for: -1. **Data source type** — one of: `POSTGRES`, `MYSQL`, `MSSQL`, `DUCKDB`, `BIGQUERY`, `SNOWFLAKE`, `CLICKHOUSE`, `TRINO`, `ATHENA`, `ORACLE`, `DATABRICKS` -2. **Connection credentials** — see [Connection info format](#connection-info-format) below -3. **Schema filter** (optional) — if the database has many schemas, ask which schema(s) to include +Follow the **wren-connection-info** skill (`skills/wren-connection-info/SKILL.md`) to: +1. Choose the data source type (e.g. `POSTGRES`, `BIGQUERY`, `SNOWFLAKE`, …) +2. Choose connection mode (Mode A: secure file path, or Mode B: inline for testing) +3. Gather credentials and produce either a `connectionFilePath` or inline `connectionInfo` -Do not ask for a SQLAlchemy connection string. Use the structured `connectionInfo` dict instead. +Also ask the user for a **schema filter** (optional) — if the database has many schemas, ask which schema(s) to include. -> **Important:** If the database runs on the host machine and ibis-server runs inside Docker, replace `localhost` / `127.0.0.1` with `host.docker.internal` in the host field. +After this step you will have: +- `data_source`: e.g. `"POSTGRES"` +- Either `connectionFilePath` (Mode A) or `connectionInfo` dict (Mode B) — used in all subsequent API calls ### Step 2 — Fetch table schema -Call the ibis-server metadata endpoint directly: +Call the ibis-server metadata endpoint directly, using the connection output from Step 1: ``` POST http://localhost:8000/v3/connector//metadata/tables Content-Type: application/json -{ - "connectionInfo": { } -} +{ "connectionFilePath": "/abs/path/to/target/connection.json" } + — or — +{ "connectionInfo": { } } ``` ibis-server returns a list of tables with their column names and types. Each table entry has a `properties.schema` field — use it to filter to the user's target schema if specified. @@ -65,9 +68,9 @@ If this fails, report the error and ask the user to correct the credentials. POST http://localhost:8000/v3/connector//metadata/constraints Content-Type: application/json -{ - "connectionInfo": { } -} +{ "connectionFilePath": "/abs/path/to/target/connection.json" } + — or — +{ "connectionInfo": { } } ``` Returns foreign key constraints. Use these to build `Relationship` entries in the MDL. If the response is empty (`[]`), infer relationships from column naming conventions (e.g. `order_id` → `orders.id`). @@ -83,8 +86,9 @@ Content-Type: application/json { "sql": "SELECT * FROM . LIMIT 3", "manifestStr": "", - "connectionInfo": { } + "connectionFilePath": "/abs/path/to/target/connection.json" } + — or use "connectionInfo": { } in Mode B ``` Note: use the raw `schema.table` reference at this stage, since the MDL is not yet deployed. @@ -246,19 +250,4 @@ When in doubt, use `VARCHAR` as a safe fallback. ## Connection info format -Pass to `setup_connection(datasource=..., connectionInfo={...})`: - -``` -POSTGRES : {"host": "...", "port": "5432", "user": "...", "password": "...", "database": "..."} -MYSQL : {"host": "...", "port": "3306", "user": "...", "password": "...", "database": "..."} -MSSQL : {"host": "...", "port": "1433", "user": "...", "password": "...", "database": "..."} -DUCKDB : {"path": ""} -BIGQUERY : {"project": "...", "dataset": "...", "credentials_base64": "..."} -SNOWFLAKE : {"account": "...", "user": "...", "password": "...", "database": "...", "schema": "..."} -CLICKHOUSE : {"host": "...", "port": "8123", "user": "...", "password": "...", "database": "..."} -TRINO : {"host": "...", "port": "8080", "user": "...", "catalog": "...", "schema": "..."} -ORACLE : {"host": "...", "port": "1521", "user": "...", "password": "...", "database": "..."} -DATABRICKS : {"host": "...", "httpPath": "...", "token": "..."} -``` - -The `datasource` value must match the `dataSource` field in the MDL exactly. +See the **wren-connection-info** skill (`skills/wren-connection-info/SKILL.md`) for the full per-connector field reference, secrets policy, and Mode A / Mode B workflow. diff --git a/skills/index.json b/skills/index.json new file mode 100644 index 000000000..3e804ef12 --- /dev/null +++ b/skills/index.json @@ -0,0 +1,129 @@ +{ + "name": "wren-engine", + "description": "AI agent skills for Wren Engine — semantic SQL layer and MCP server for 20+ data sources.", + "homepage": "https://wren.ai", + "repository": "https://github.com/Canner/wren-engine", + "license": "Apache-2.0", + "skills": [ + { + "name": "wren-connection-info", + "version": "1.1", + "description": "Set up data source type and connection credentials for Wren Engine.", + "tags": [ + "wren", + "credentials", + "connection", + "database", + "security" + ], + "repository": "https://github.com/Canner/wren-engine/tree/main/skills/wren-connection-info" + }, + { + "name": "generate-mdl", + "version": "1.2", + "description": "Generate a Wren MDL manifest from a live database using ibis-server introspection.", + "tags": [ + "wren", + "mdl", + "database", + "introspection", + "postgres", + "bigquery", + "snowflake", + "mysql", + "clickhouse", + "trino" + ], + "dependencies": [ + "wren-connection-info" + ], + "repository": "https://github.com/Canner/wren-engine/tree/main/skills/generate-mdl" + }, + { + "name": "wren-project", + "version": "1.4", + "description": "Save, load, and build Wren MDL manifests as YAML project directories for version control.", + "tags": [ + "wren", + "mdl", + "yaml", + "version-control", + "project", + "git" + ], + "dependencies": [ + "wren-connection-info" + ], + "repository": "https://github.com/Canner/wren-engine/tree/main/skills/wren-project" + }, + { + "name": "wren-sql", + "version": "1.0", + "description": "Write and correct SQL queries targeting Wren Engine — types, date/time, BigQuery dialect, error diagnosis.", + "tags": [ + "wren", + "sql", + "bigquery", + "array", + "struct", + "datetime", + "mdl", + "text-to-sql" + ], + "repository": "https://github.com/Canner/wren-engine/tree/main/skills/wren-sql" + }, + { + "name": "wren-mcp-setup", + "version": "1.1", + "description": "Set up Wren Engine MCP server via Docker and register it with an AI agent.", + "tags": [ + "wren", + "mcp", + "docker", + "claude-code", + "cursor", + "cline" + ], + "repository": "https://github.com/Canner/wren-engine/tree/main/skills/wren-mcp-setup" + }, + { + "name": "wren-quickstart", + "version": "1.0", + "description": "End-to-end quickstart for Wren Engine — from zero to querying.", + "tags": [ + "wren", + "quickstart", + "onboarding", + "mcp", + "docker" + ], + "dependencies": [ + "wren-connection-info", + "generate-mdl", + "wren-project", + "wren-mcp-setup" + ], + "repository": "https://github.com/Canner/wren-engine/tree/main/skills/wren-quickstart" + }, + { + "name": "wren-usage", + "version": "1.0", + "description": "Daily usage guide for Wren Engine — connect, query, manage MDL, and operate the MCP server via AI agents.", + "tags": [ + "wren", + "usage", + "sql", + "mdl", + "mcp" + ], + "dependencies": [ + "wren-connection-info", + "generate-mdl", + "wren-project", + "wren-sql", + "wren-mcp-setup" + ], + "repository": "https://github.com/Canner/wren-engine/tree/main/skills/wren-usage" + } + ] +} diff --git a/skills/install.sh b/skills/install.sh index 59b33db4a..21a1206d6 100755 --- a/skills/install.sh +++ b/skills/install.sh @@ -13,7 +13,7 @@ set -euo pipefail REPO="Canner/wren-engine" BRANCH="${WREN_SKILLS_BRANCH:-main}" DEST="${CLAUDE_SKILLS_DIR:-$HOME/.claude/skills}" -ALL_SKILLS=(generate-mdl wren-project wren-sql wren-mcp-setup wren-quickstart) +ALL_SKILLS=(generate-mdl wren-project wren-sql wren-mcp-setup wren-quickstart wren-connection-info wren-usage) # Parse --force flag and skill list from arguments FORCE=false @@ -50,6 +50,69 @@ if [ -n "${BASH_SOURCE[0]:-}" ] && [ "${BASH_SOURCE[0]}" != "/dev/stdin" ]; then SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" fi +# Locate index.json for dependency resolution (local or remote) +INDEX_JSON="" +if [ -n "$SCRIPT_DIR" ] && [ -f "$SCRIPT_DIR/index.json" ]; then + INDEX_JSON="$SCRIPT_DIR/index.json" +fi + +# Expand SELECTED_SKILLS to include dependencies declared in index.json. +# Only runs when python3 is available and index.json is accessible. +expand_with_deps() { + local json_file="$1" + shift + local -a input=("$@") + local -a result=() + + skill_in_result() { + local s="$1" + for r in "${result[@]:-}"; do [ "$r" = "$s" ] && return 0; done + return 1 + } + + is_known_skill() { + local s="$1" + for known in "${ALL_SKILLS[@]}"; do [ "$s" = "$known" ] && return 0; done + return 1 + } + + for skill in "${input[@]}"; do + skill_in_result "$skill" || result+=("$skill") + + if [ -n "$json_file" ] && command -v python3 &>/dev/null; then + while IFS= read -r dep; do + [ -z "$dep" ] && continue + is_known_skill "$dep" || continue + if ! skill_in_result "$dep"; then + echo " + $dep (dependency of $skill)" >&2 + result+=("$dep") + fi + done < <(python3 -c " +import json, sys +try: + d = json.load(open(sys.argv[1])) + s = next((x for x in d.get('skills', []) if x['name'] == sys.argv[2]), None) + if s: + for dep in s.get('dependencies', []): + print(dep) +except Exception: + pass +" "$json_file" "$skill" 2>/dev/null) + fi + done + + printf '%s\n' "${result[@]}" +} + +# Only expand deps when installing specific skills (not the full set) +if [ "${#SELECTED_SKILLS[@]}" -lt "${#ALL_SKILLS[@]}" ] && [ -n "$INDEX_JSON" ]; then + EXPANDED=() + while IFS= read -r line; do + [ -n "$line" ] && EXPANDED+=("$line") + done < <(expand_with_deps "$INDEX_JSON" "${SELECTED_SKILLS[@]}") + SELECTED_SKILLS=("${EXPANDED[@]}") +fi + install_from_local() { local src="$1" skill="$2" dest_dir="$3" if [ "$FORCE" = false ] && [ -d "$dest_dir" ]; then diff --git a/skills/versions.json b/skills/versions.json index 6258b9ac6..804788cef 100644 --- a/skills/versions.json +++ b/skills/versions.json @@ -1,7 +1,9 @@ { - "generate-mdl": "1.1", - "wren-project": "1.1", + "generate-mdl": "1.2", + "wren-connection-info": "1.1", + "wren-project": "1.4", "wren-sql": "1.0", "wren-mcp-setup": "1.1", - "wren-quickstart": "1.0" + "wren-quickstart": "1.0", + "wren-usage": "1.0" } diff --git a/skills/wren-connection-info/SKILL.md b/skills/wren-connection-info/SKILL.md new file mode 100644 index 000000000..bec3348ca --- /dev/null +++ b/skills/wren-connection-info/SKILL.md @@ -0,0 +1,247 @@ +--- +name: wren-connection-info +description: Set up data source type and connection credentials for Wren Engine. Use at the start of any workflow that connects to a database — produces either a connectionFilePath (secure, default) or an inline connectionInfo dict (opt-in for testing). Trigger before generate-mdl, wren-project, or any ibis-server API call that needs credentials. +license: Apache-2.0 +metadata: + author: wren-engine + version: "1.1" +--- + +# Wren Connection Info + +Sets up the data source type and credentials before any workflow that queries a database. + +--- + +## Step 1 — Choose data source + +Ask the user for their **data source type**: + +| Value | Database | +|-------|----------| +| `POSTGRES` | PostgreSQL | +| `MYSQL` | MySQL / MariaDB | +| `MSSQL` | SQL Server | +| `DUCKDB` | DuckDB | +| `BIGQUERY` | Google BigQuery | +| `SNOWFLAKE` | Snowflake | +| `CLICKHOUSE` | ClickHouse | +| `TRINO` | Trino | +| `ATHENA` | AWS Athena | +| `ORACLE` | Oracle | +| `DATABRICKS` | Databricks | + +> **Docker note**: If the database runs on the host machine and ibis-server runs inside Docker, replace `localhost` / `127.0.0.1` with `host.docker.internal` in the host field. + +--- + +## Step 2 — Choose connection mode + +Two modes are supported. Ask the user which they prefer, or infer from context. + +The chosen mode is recorded as `connection_mode` in `wren_project.yml` so every subsequent workflow knows how to handle credentials. + +### Mode A — Secure (default, recommended for production) + +`connection_mode: security` in `wren_project.yml`. + +The LLM never handles sensitive values. ibis-server reads the connection file directly. + +Use this mode by default unless the user explicitly says they are in a test/development environment and willing to share credentials. + +**When `connection_mode: security` is in effect** (either set explicitly or because the field is absent): +- **Never** read `connection.yml` or `target/connection.json` without first asking the user for permission. +- **Never** display or echo the contents of those files. +- If debugging requires connection info, ask the user to share only non-sensitive fields (e.g. `host`, `port`, `database`, `user`) — never passwords, tokens, or keys. + +### Mode B — Inline (opt-in, testing only) + +`connection_mode: inline` in `wren_project.yml`. + +> **How to opt in**: The user must say something like "I'm just testing, you can use my credentials" or "it's a dev environment, here are my connection details". Do not assume this mode. + +In this mode, ask for all fields including sensitive ones and assemble an inline `connectionInfo` dict. + +--- + +## Step 3 — Gather credentials + +Ask for the fields required for the chosen data source. Sensitive fields (marked **secret**) must **never** be filled in by the LLM in Mode A — leave them as `# TODO` comments. + +### PostgreSQL / MySQL / MSSQL / ClickHouse / Oracle + +``` +host: +port: +user: +password: +database: +``` + +Default ports: PostgreSQL `5432`, MySQL `3306`, MSSQL `1433`, ClickHouse `8123`, Oracle `1521` + +### Trino + +``` +host: +port: +user: +catalog: +schema: +``` + +### BigQuery + +``` +project_id: +dataset_id: +credentials_json_string: +``` + +> **BigQuery credentials**: Wren requires the service account JSON as a **base64-encoded string**, not the raw file. +> After downloading `credentials.json` from GCP, run: +> ```bash +> base64 -i credentials.json | tr -d '\n' +> ``` +> Paste the output as the value of `credentials_json_string`. +> On Linux: `base64 -w 0 credentials.json` + +### Snowflake + +``` +user: +password: +account: +database: +sf_schema: +``` + +### DuckDB + +``` +url: +``` + +### Athena + +``` +s3_staging_dir: +region: +aws_access_key_id: +aws_secret_access_key: +``` + +### Databricks + +``` +host: +http_path: +access_token: +``` + +**Sensitive fields by connector** — LLM must never populate these in Mode A: + +| Connector | Sensitive fields | +|-----------|-----------------| +| Postgres / MySQL / MSSQL / ClickHouse / Trino / Oracle | `password` | +| BigQuery | `credentials_json_string` | +| Snowflake | `password`, `private_key` | +| Athena | `aws_access_key_id`, `aws_secret_access_key`, `aws_session_token`, `web_identity_token` | +| S3 / Minio / GCS file | `access_key`, `secret_key` | +| Databricks | `access_token`, `client_secret` | +| Canner | `pat` | + +--- + +## Step 4 — Produce output + +### Mode A output + +Write `/connection.yml` with non-sensitive fields filled in and sensitive fields as `# TODO` comments: + +```yaml +# Example: PostgreSQL +host: my-db.example.com +port: 5432 +user: my_user +password: # TODO: fill in your database password +database: my_db +``` + +Then instruct the user: +> Please fill in the sensitive fields in `connection.yml`, then let me know when done. + +Wait for confirmation, then build `target/connection.json`: + +```bash +python -c " +import yaml, json, pathlib +p = pathlib.Path('connection.yml') +d = yaml.safe_load(p.read_text()) +pathlib.Path('target').mkdir(exist_ok=True) +json.dump(d, open('target/connection.json', 'w')) +" +``` + +**Do NOT read or display the contents of `target/connection.json` after building.** + +> **Server requirement:** ibis-server must have the `CONNECTION_FILE_ROOT` environment variable set to the directory containing `target/connection.json`. When running via Docker (the standard deployment), the workspace is mounted at `/workspace` and `CONNECTION_FILE_ROOT=/workspace` is set by default. For local dev, set `CONNECTION_FILE_ROOT` to the project root before starting ibis-server. + +Provide to the calling workflow: +- `connectionFilePath`: absolute path to `target/connection.json` — use the **container-internal** path (e.g. `/workspace/target/connection.json`) when ibis-server runs in Docker, or the host path for local dev +- `data_source`: the data source type string (e.g. `"POSTGRES"`) +- `connection_mode`: `"security"` — record this in `wren_project.yml` + +### Mode B output + +Assemble the inline dict directly. Provide to the calling workflow: +- `connectionInfo`: camelCase JSON dict (see [Field mapping](#field-mapping) below) +- `data_source`: the data source type string +- `connection_mode`: `"inline"` — record this in `wren_project.yml` + +--- + +## Field mapping + +When converting `connection.yml` to `target/connection.json`, rename these snake_case keys to camelCase: + +| YAML (snake_case) | JSON (camelCase) | +|-------------------|-----------------| +| `project_id` | `projectId` | +| `dataset_id` | `datasetId` | +| `credentials_json_string` | `credentialsJsonString` | +| `sf_schema` | `sfSchema` | +| `ssl_mode` | `sslMode` | +| `ssl_ca` | `sslCA` | +| `connection_url` | `connectionUrl` | +| `http_path` | `httpPath` | +| `access_token` | `accessToken` | +| `s3_staging_dir` | `s3StagingDir` | +| `aws_access_key_id` | `awsAccessKeyId` | +| `aws_secret_access_key` | `awsSecretAccessKey` | + +Fields without underscores (`host`, `port`, `user`, `password`, `database`, `account`, `url`, `catalog`, `schema`, `region`) remain unchanged. All other snake_case fields should be converted to camelCase for JSON. + +--- + +## Using connection info in API calls + +After this skill completes, use the output in ibis-server API calls: + +**Mode A (file path):** +```json +{ + "connectionFilePath": "/abs/path/to/target/connection.json", + "manifestStr": "...", + "sql": "..." +} +``` + +**Mode B (inline):** +```json +{ + "connectionInfo": { "host": "...", "port": "5432", ... }, + "manifestStr": "...", + "sql": "..." +} +``` diff --git a/skills/wren-mcp-setup/SKILL.md b/skills/wren-mcp-setup/SKILL.md index cc4740e0f..18bfd6c70 100644 --- a/skills/wren-mcp-setup/SKILL.md +++ b/skills/wren-mcp-setup/SKILL.md @@ -2,6 +2,7 @@ name: wren-mcp-setup description: Set up Wren Engine MCP server via Docker and register it with an AI agent. Covers pulling the Docker image, running the container with docker run, mounting a workspace, fixing localhost → host.docker.internal for connection info, registering the MCP server in Claude Code (or other MCP clients) using streamable-http transport, and starting a new session to interact with Wren MCP. Trigger when a user wants to run Wren MCP in Docker, configure Claude Code MCP, or connect an AI client to a Dockerized Wren Engine. compatibility: Requires Docker Desktop (or Docker Engine). +license: Apache-2.0 metadata: author: wren-engine version: "1.1" @@ -72,6 +73,7 @@ docker run -d \ -e MCP_HOST=0.0.0.0 \ -e MCP_PORT=9000 \ -e WREN_URL=localhost:8000 \ + -e CONNECTION_FILE_ROOT=/workspace \ -e MDL_PATH=/workspace/target/mdl.json \ -e CONNECTION_INFO_FILE=/workspace/target/connection.json \ -v :/workspace \ @@ -90,6 +92,7 @@ docker run -d \ -e MCP_HOST=0.0.0.0 \ -e MCP_PORT=9000 \ -e WREN_URL=localhost:8000 \ + -e CONNECTION_FILE_ROOT=/workspace \ -e MDL_PATH=/workspace/target/mdl.json \ -e CONNECTION_INFO_FILE=/workspace/target/connection.json \ -v /Users/me/my-mdl-files:/workspace \ diff --git a/skills/wren-project/SKILL.md b/skills/wren-project/SKILL.md index 177ca1b13..8491c0624 100644 --- a/skills/wren-project/SKILL.md +++ b/skills/wren-project/SKILL.md @@ -1,9 +1,10 @@ --- name: wren-project description: Save, load, and build Wren MDL manifests as YAML project directories for version control. Use when a user wants to persist an MDL as human-readable YAML files, load a YAML project back into MDL JSON, or compile a YAML project to a deployable mdl.json file. Also manages connection info stored in connection.yml and compiled to target/connection.json. +license: Apache-2.0 metadata: author: wren-engine - version: "1.1" + version: "1.4" --- # MDL Project @@ -11,15 +12,17 @@ metadata: ## Version check Silently fetch `https://raw.githubusercontent.com/Canner/wren-engine/main/skills/versions.json`. -Compare the `wren-project` key with this skill's version (`1.1`). +Compare the `wren-project` key with this skill's version (`1.4`). If the remote version is newer, notify the user before proceeding: -> A newer version of the **wren-project** skill is available (remote: X.Y, installed: 1.1). +> A newer version of the **wren-project** skill is available (remote: X.Y, installed: 1.4). > Update with: > ```bash > curl -fsSL https://raw.githubusercontent.com/Canner/wren-engine/main/skills/install.sh | bash -s -- --force wren-project > ``` + + Then continue with the workflow below regardless of update status. --- @@ -50,7 +53,11 @@ my_project/ └── connection.json # Connection info JSON (camelCase) ``` -> **Security note**: `connection.yml` may contain credentials. Add `target/` and `connection.yml` to `.gitignore` or use environment variable substitution (see `connection.yml` below) before committing. +> **Security note**: `connection.yml` may contain credentials. Add `target/` and `connection.yml` to `.gitignore` before committing. +> +> **Secrets policy (default)**: When generating `connection.yml`, leave sensitive fields empty with a `# TODO` comment — never ask the user for passwords or credentials in this conversation. ibis-server reads `target/connection.json` directly via its `connectionFilePath` parameter, so secrets stay in the file and out of the LLM context. +> +> **Testing / inline mode (opt-in)**: If the user explicitly says they are in a test/development environment and willing to share their credentials, you may help construct the full `connection.yml` or `connectionInfo` dict with actual values inline. Always confirm this intent before proceeding — do not assume. --- @@ -66,8 +73,23 @@ version: "1.0" catalog: wren schema: public data_source: POSTGRES +connection_mode: security # "security" | "inline" (default: "security") ``` +`connection_mode` records how the user has chosen to manage connection credentials: + +| Value | Meaning | +|-------|---------| +| `security` | `connection.yml` / `target/connection.json` contain sensitive credentials. **Never read these files without explicit user confirmation.** | +| `inline` | Credentials are provided inline (test/dev environment). Connection files may be read freely. | + +**Security mode rules** — enforced whenever `connection_mode: security` (or the field is absent): + +1. **Never** read `connection.yml` or `target/connection.json` without first asking the user for permission. +2. **Never** display, log, or echo the contents of those files. +3. If debugging requires connection info, ask the user to share only the non-sensitive fields (e.g. `host`, `port`, `database`, `user`) and leave out passwords, tokens, and keys. +4. After building the project, do **not** read `target/connection.json` to verify — confirm success by other means (e.g. checking that the file exists). + ### `models/.yml` One file per model. Example for `orders`: @@ -122,45 +144,7 @@ views: [] Connection parameters for the data source. Field names use **snake_case** in YAML and are converted to **camelCase** in `target/connection.json`. -**PostgreSQL / MySQL / MSSQL / ClickHouse / Trino / Oracle:** -```yaml -host: localhost -port: 5432 -user: my_user -password: my_password -database: my_db -``` - -**BigQuery:** -```yaml -project_id: my-gcp-project -dataset_id: my_dataset -credentials_json_string: '{"type":"service_account","project_id":"..."}' -``` - -**Snowflake:** -```yaml -user: my_user -password: my_password -account: my_account -database: my_db -sf_schema: public -``` - -**DuckDB (local file):** -```yaml -url: /path/to/my.duckdb -``` - -**Environment variable substitution** — to avoid committing secrets, reference env vars with `${VAR_NAME}`: -```yaml -host: ${DB_HOST} -port: ${DB_PORT} -user: ${DB_USER} -password: ${DB_PASSWORD} -database: ${DB_NAME} -``` -When building, resolve each `${VAR_NAME}` value from the environment before writing `target/connection.json`. +Follow the **wren-connection-info** skill (`skills/wren-connection-info/SKILL.md`) for the per-connector field reference, secrets policy, and how to generate `connection.yml` with sensitive fields left as `# TODO` comments. --- @@ -168,7 +152,7 @@ When building, resolve each `${VAR_NAME}` value from the environment before writ To assemble a YAML project back into an MDL JSON dict: -1. Read `wren_project.yml` → extract `catalog`, `schema`, `data_source` +1. Read `wren_project.yml` → extract `catalog`, `schema`, `data_source`, `connection_mode` 2. Read every file in `models/*.yml` → collect into `models` list 3. Read `relationships.yml` → extract `relationships` list 4. Read `views.yml` → extract `views` list @@ -188,10 +172,11 @@ To assemble a YAML project back into an MDL JSON dict: To load connection info: -1. Read `connection.yml` -2. Resolve any `${VAR_NAME}` placeholders from environment variables -3. **Rename snake_case keys to camelCase** (see Field mapping section below) -4. Result is a flat JSON object ready to pass as `connectionInfo` to ibis-server APIs +1. Check `connection_mode` from `wren_project.yml`. If it is `security` (or absent), **ask the user for permission before reading `connection.yml`**. +2. Read `connection.yml` +3. Resolve any `${VAR_NAME}` placeholders from environment variables +4. **Rename snake_case keys to camelCase** (see Field mapping section below) +5. Result is a flat JSON object ready to pass as `connectionInfo` to ibis-server APIs --- @@ -200,11 +185,14 @@ To load connection info: Same as **Load** above, but write both compiled files: - `/target/mdl.json` — assembled MDL JSON (camelCase) -- `/target/connection.json` — connection info JSON (camelCase, env vars resolved) +- `/target/connection.json` — connection info JSON (camelCase) + +**Important**: If `connection_mode` is `security` (or absent), do NOT read `connection.yml` without user confirmation, and do NOT read or display `target/connection.json` after building — it contains credentials. After building: - Pass `mdl_file_path="/target/mdl.json"` to `deploy()` to activate the MDL -- Pass the contents of `target/connection.json` as the `connectionInfo` field in API requests +- Pass `connectionFilePath="/target/connection.json"` in API requests instead of inline `connectionInfo` + — ibis-server reads the file directly, so secret values never enter the LLM context --- @@ -228,28 +216,38 @@ All other MDL fields (`name`, `type`, `catalog`, `schema`, `table`, `condition`, **Connection fields:** -| YAML field (snake_case) | JSON field (camelCase) | -|-------------------------|------------------------| -| `project_id` | `projectId` | -| `dataset_id` | `datasetId` | -| `credentials_json_string` | `credentialsJsonString` | -| `sf_schema` | `sfSchema` | - -All other connection fields (`host`, `port`, `user`, `password`, `database`, `account`, `url`) are the same in both formats. +See the **wren-connection-info** skill (`skills/wren-connection-info/SKILL.md`) for the full field mapping and secrets policy. --- ## Typical workflow ``` -1. Have MDL JSON dict (from generate-mdl skill or manual construction) -2. Save: write wren_project.yml + connection.yml + models/*.yml + relationships.yml + views.yml - (convert camelCase → snake_case) -3. Add target/ and optionally connection.yml to .gitignore -4. Commit project directory to version control -5. Later — Load: read all YAML files, resolve ${ENV_VAR} placeholders in connection.yml, - rename snake_case → camelCase, assemble MDL JSON dict + connection info dict -6. Build: write assembled JSON to target/mdl.json and target/connection.json +1. Set up data source and connection info + Follow the wren-connection-info skill to choose data source type, gather credentials, + and produce connection.yml (sensitive fields as # TODO) + target/connection.json. + Use connectionFilePath="/target/connection.json" in all subsequent API calls. + +2. Generate MDL + Follow the generate-mdl skill to introspect the database and build the MDL JSON dict, + using the connectionFilePath from step 1 for all ibis-server calls. + +3. Save project + Write wren_project.yml + models/*.yml + relationships.yml + views.yml + (convert camelCase → snake_case from the MDL JSON). + Set connection_mode in wren_project.yml based on the user's chosen mode (default: security). + connection.yml was already written in step 1. + +4. Add target/ and connection.yml to .gitignore +5. Commit project directory to version control (without secrets) + +6. Later — Build: read wren_project.yml first to check connection_mode. + If security mode, ask user before reading connection.yml. + Read remaining YAML files, rename snake_case → camelCase, + write target/mdl.json and target/connection.json. + Do NOT read or display target/connection.json (security mode). + 7. Deploy: deploy(mdl_file_path="./target/mdl.json") - use target/connection.json as connectionInfo in API requests + use connectionFilePath="/target/connection.json" in API requests + (ibis-server reads the file directly — secrets stay out of this conversation) ``` diff --git a/skills/wren-quickstart/SKILL.md b/skills/wren-quickstart/SKILL.md index aeea880dc..209cef3a1 100644 --- a/skills/wren-quickstart/SKILL.md +++ b/skills/wren-quickstart/SKILL.md @@ -1,7 +1,8 @@ --- name: wren-quickstart -description: End-to-end quickstart for Wren Engine — from zero to querying. Guides the user through installing skills, creating a workspace, generating an MDL from a live database, saving it as a versioned project, starting the Wren MCP Docker container, and verifying the setup with a health check. Trigger when a user wants to set up Wren Engine from scratch, onboard a new data source, or get started with Wren MCP. +description: End-to-end quickstart for Wren Engine — create a workspace, generate an MDL from a live database, save it as a versioned project, start the Wren MCP Docker container, and verify the setup with a health check. Trigger when a user wants to set up Wren Engine from scratch, onboard a new data source, or get started with Wren MCP. Requires dependent skills already installed (use /wren-usage to install them first). compatibility: Requires Docker Desktop (or Docker Engine). No local database drivers needed. +license: Apache-2.0 metadata: author: wren-engine version: "1.0" @@ -25,36 +26,13 @@ Then continue with the workflow below regardless of update status. --- -This skill walks a user through setting up Wren Engine end-to-end — from installing the required skills to running their first query via MCP. Each phase delegates to a focused skill. Follow the steps in order. +This skill walks a user through setting up Wren Engine end-to-end — from creating a workspace to running their first query via MCP. Each phase delegates to a focused skill. Follow the steps in order. ---- - -## Phase 1 — Install skills - -Before the workflow can proceed, the user needs the dependent skills installed locally. - -Tell the user to run the install script once: - -```bash -# From a local clone: -bash skills/install.sh - -# Or remotely (no clone required): -curl -fsSL https://raw.githubusercontent.com/Canner/wren-engine/main/skills/install.sh | bash -``` - -This installs all Wren skills (`generate-mdl`, `wren-project`, `wren-sql`, `wren-mcp-setup`, `wren-quickstart`) into `~/.claude/skills/`. - -After installation, the user should **restart their AI client session** so the new skills are loaded. - -> If the user only wants specific skills, they can pass names as arguments: -> ```bash -> bash skills/install.sh generate-mdl wren-project wren-mcp-setup -> ``` +> **Prerequisites:** The dependent skills (`generate-mdl`, `wren-project`, `wren-mcp-setup`, `wren-connection-info`) must be installed. If they are missing, use `/wren-usage` first — it handles skill installation and then routes back here for setup. --- -## Phase 2 — Create a workspace +## Phase 1 — Create a workspace Create a dedicated workspace directory on the host machine. This directory will be mounted into the Docker container, so the container can read and write MDL files. @@ -83,9 +61,9 @@ Recommended workspace layout after the quickstart completes: --- -## Phase 3 — Generate MDL and save project +## Phase 2 — Generate MDL and save project -### 3a — Generate MDL +### 2a — Generate MDL Invoke the **generate-mdl** skill to introspect the user's database and build the MDL manifest: @@ -99,11 +77,11 @@ The generate-mdl skill will: 3. Build the MDL JSON (models, columns, relationships) 4. Validate the manifest with a dry-plan -> **Important:** At this stage ibis-server may not be running yet. If the user has not started a container, proceed to Phase 4 first (start the container), then come back to generate the MDL using the running ibis-server on port 8000. +> **Important:** At this stage ibis-server may not be running yet. If the user has not started a container, proceed to Phase 3 first (start the container), then come back to generate the MDL using the running ibis-server on port 8000. > -> Alternatively, if the user already has a running ibis-server, run Phase 3 before Phase 4. +> Alternatively, if the user already has a running ibis-server, run Phase 2 before Phase 3. -### 3b — Save as YAML project +### 2b — Save as YAML project After the MDL is generated, invoke the **wren-project** skill to save it as a versioned YAML project inside the workspace: @@ -128,7 +106,7 @@ The Docker container will auto-load these files at startup. --- -## Phase 4 — Start and register the MCP server +## Phase 3 — Start and register the MCP server Invoke the **wren-mcp-setup** skill to start the Docker container and register the MCP server with the AI client: @@ -148,7 +126,7 @@ The wren-mcp-setup skill will: --- -## Phase 5 — Verify and confirm +## Phase 4 — Verify and confirm Once the MCP server is registered, the user must **start a new session** for the Wren MCP tools to be loaded. Instruct the user to do this now. @@ -177,9 +155,9 @@ If the health check fails, follow the troubleshooting steps in the **wren-mcp-se | Phase | Skill | Purpose | |-------|-------|---------| -| 3a | `@generate-mdl` | Introspect database and build MDL JSON | -| 3b | `@wren-project` | Save MDL as YAML project + compile to `target/` | -| 4 | `@wren-mcp-setup` | Start Docker container and register MCP server | +| 2a | `@generate-mdl` | Introspect database and build MDL JSON | +| 2b | `@wren-project` | Save MDL as YAML project + compile to `target/` | +| 3 | `@wren-mcp-setup` | Start Docker container and register MCP server | --- @@ -190,7 +168,7 @@ If the health check fails, follow the troubleshooting steps in the **wren-mcp-se - Check container logs: `docker logs wren-mcp` **generate-mdl fails because ibis-server is not yet running:** -- Start the container first (Phase 4), then return to Phase 3. +- Start the container first (Phase 3), then return to Phase 2. - ibis-server is available at `http://localhost:8000` once the container is up. **MCP tools not available after registration:** diff --git a/skills/wren-usage/SKILL.md b/skills/wren-usage/SKILL.md new file mode 100644 index 000000000..2f341258f --- /dev/null +++ b/skills/wren-usage/SKILL.md @@ -0,0 +1,165 @@ +--- +name: wren-usage +description: Daily usage guide for Wren Engine — connect to a database, write SQL queries, manage MDL projects, and operate the MCP server. Use when a user wants to perform any ongoing Wren task after initial setup. Trigger for: write SQL, query data, update MDL, add a model, change connection, rebuild project, restart MCP server. +license: Apache-2.0 +metadata: + author: wren-engine + version: "1.0" +--- + +# Wren Engine — Usage Guide + +## Version check + +Silently fetch `https://raw.githubusercontent.com/Canner/wren-engine/main/skills/versions.json`. +Compare the `wren-usage` key with this skill's version (`1.0`). +If the remote version is newer, notify the user before proceeding: + +> A newer version of the **wren-usage** skill is available (remote: X.Y, installed: 1.0). +> Update with: +> ```bash +> curl -fsSL https://raw.githubusercontent.com/Canner/wren-engine/main/skills/install.sh | bash -s -- --force wren-usage +> ``` + +Then continue with the workflow below regardless of update status. + +--- + +This skill is your day-to-day reference for working with Wren Engine. It delegates to focused sub-skills for each task. + +--- + +## Step 0 — Install dependent skills (first time only) + +Check whether the required skills are already installed in `~/.claude/skills/`. If any are missing, tell the user to run: + +```bash +# Install wren-usage and all its dependencies in one command: +curl -fsSL https://raw.githubusercontent.com/Canner/wren-engine/main/skills/install.sh | bash -s -- wren-usage +``` + +This installs `wren-usage`, `wren-connection-info`, `generate-mdl`, `wren-project`, `wren-sql`, and `wren-mcp-setup` into `~/.claude/skills/`. + +After installation, the user must **start a new session** for the new skills to be loaded. + +> If the user only wants the MCP server set up (no Docker yet), use `/wren-quickstart` for a guided end-to-end walkthrough instead. + +--- + +## What do you want to do? + +Identify the user's intent and delegate to the appropriate skill: + +| Task | Skill | +|------|-------| +| Write or debug a SQL query | `@wren-sql` | +| Connect to a new database / change credentials | `@wren-connection-info` | +| Generate MDL from an existing database | `@generate-mdl` | +| Save MDL to YAML files (version control) | `@wren-project` | +| Load a saved YAML project / rebuild `target/mdl.json` | `@wren-project` | +| Add a new model or column to the MDL | `@wren-project` | +| Start, reset, or reconfigure the MCP server | `@wren-mcp-setup` | +| First-time setup from scratch | `@wren-quickstart` | + +--- + +## Common workflows + +### Query your data + +Invoke `@wren-sql` to write a SQL query against the deployed MDL. + +Key rules: +- Query MDL model names directly (e.g. `SELECT * FROM orders`) +- Use `CAST` for type conversions, not `::` syntax +- Avoid correlated subqueries — use JOINs or CTEs instead + +```sql +-- Example: revenue by month +SELECT DATE_TRUNC('month', order_date) AS month, + SUM(total) AS revenue +FROM orders +GROUP BY 1 +ORDER BY 1 +``` + +For type-specific patterns (ARRAY, STRUCT, JSON), date/time arithmetic, or BigQuery dialect quirks, invoke `@wren-sql` for full guidance. + +--- + +### Update connection credentials + +Invoke `@wren-connection-info` to: +- Change the data source type or credentials +- Produce a new `connection.yml` + `target/connection.json` +- Switch between `connectionFilePath` (secure) and inline dict + +--- + +### Extend the MDL + +To add a model, column, relationship, or view to an existing project: + +1. Invoke `@wren-project` — **Load** the existing YAML project into an MDL dict +2. Edit the relevant YAML file (e.g. `models/orders.yml`) +3. Invoke `@wren-project` — **Build** to compile updated `target/mdl.json` +4. Call `deploy(mdl_file_path="./target/mdl.json")` to apply the change + +--- + +### Regenerate MDL from database + +When the database schema has changed and the MDL needs to be refreshed: + +1. Invoke `@wren-connection-info` — confirm or update credentials +2. Invoke `@generate-mdl` — re-introspect the database and rebuild the MDL JSON +3. Invoke `@wren-project` — **Save** the new MDL as an updated YAML project +4. Invoke `@wren-project` — **Build** to compile `target/mdl.json` +5. Deploy + +--- + +### MCP server operations + +| Operation | Command | +|-----------|---------| +| Check status | `docker ps --filter name=wren-mcp` | +| View logs | `docker logs wren-mcp` | +| Restart | `docker restart wren-mcp` | +| Full reconfigure | Invoke `@wren-mcp-setup` | +| Verify health | `health_check()` via MCP tools | + +--- + +## Quick reference — MCP tools + +| Tool | Purpose | +|------|---------| +| `health_check()` | Verify Wren Engine is reachable | +| `query(sql=...)` | Execute a SQL query against the deployed MDL | +| `deploy(mdl_file_path=...)` | Load a compiled `mdl.json` | +| `setup_connection(...)` | Configure data source credentials | +| `list_remote_tables(...)` | Introspect database schema | +| `mdl_validate_manifest(...)` | Validate an MDL JSON dict | +| `mdl_save_project(...)` | Save MDL as a YAML project | + +--- + +## Troubleshooting quick guide + +**Query fails with "table not found":** +- The MDL may not be deployed. Run `deploy(mdl_file_path="./target/mdl.json")`. +- Check model names match exactly (case-sensitive). + +**Connection error on queries:** +- Verify credentials with `@wren-connection-info`. +- Inside Docker: use `host.docker.internal` instead of `localhost`. + +**MDL changes not reflected:** +- Re-run `@wren-project` **Build** step and re-deploy. + +**MCP tools unavailable:** +- Start a new Claude Code session after registering the MCP server. +- Check: `docker ps --filter name=wren-mcp` and `docker logs wren-mcp`. + +For detailed MCP setup troubleshooting, invoke `@wren-mcp-setup`.