diff --git a/wren/src/wren/context.py b/wren/src/wren/context.py index c54fb9178..a53772e68 100644 --- a/wren/src/wren/context.py +++ b/wren/src/wren/context.py @@ -198,6 +198,119 @@ def convert_mdl_to_project(mdl_json: dict) -> list[ProjectFile]: return files +def convert_dlt_to_project( + duckdb_path: str | Path, + *, + project_name: str | None = None, +) -> list[ProjectFile]: + """Introspect a dlt DuckDB file and generate Wren v2 project files. + + Args: + duckdb_path: Path to .duckdb file produced by dlt. + project_name: Project name (defaults to DuckDB filename stem). + + Returns: + List of ProjectFile ready for write_project_files(). + """ + from datetime import datetime # noqa: PLC0415 + + from wren.dlt_introspect import DltIntrospector # noqa: PLC0415 + + duckdb_path = Path(duckdb_path) + project_name = project_name or duckdb_path.stem + + with DltIntrospector(duckdb_path) as introspector: + tables, relationships = introspector.introspect() + + # Guard: same table name in multiple schemas would silently overwrite model files + seen: dict[str, str] = {} + for table in tables: + prev_schema = seen.setdefault(table.name, table.schema) + if prev_schema != table.schema: + raise ValueError( + f"Duplicate table name across schemas is ambiguous for project " + f"generation: {table.name!r} appears in {prev_schema!r} " + f"and {table.schema!r}. Rename one of the tables before importing." + ) + + files: list[ProjectFile] = [] + + # ── wren_project.yml ────────────────────────────────────── + project_config: dict[str, Any] = { + "schema_version": 2, + "name": project_name, + "version": "1.0", + "catalog": "", + "schema": "public", + "data_source": "duckdb", + } + files.append( + ProjectFile( + relative_path="wren_project.yml", + content=yaml.dump( + project_config, default_flow_style=False, sort_keys=False + ), + ) + ) + + # ── Models ──────────────────────────────────────────────── + for table in tables: + model: dict[str, Any] = { + "name": table.name, + "table_reference": { + "catalog": table.catalog, + "schema": table.schema, + "table": table.name, + }, + "columns": [ + { + "name": col.name, + "type": col.normalized_type, + "is_calculated": False, + "not_null": not col.is_nullable, + "properties": {}, + } + for col in table.columns + ], + "cached": False, + "properties": {"description": "Imported from dlt pipeline"}, + } + files.append( + ProjectFile( + relative_path=f"models/{table.name}/metadata.yml", + content=yaml.dump(model, default_flow_style=False, sort_keys=False), + ) + ) + + # ── Relationships ───────────────────────────────────────── + files.append( + ProjectFile( + relative_path="relationships.yml", + content=yaml.dump( + {"relationships": relationships}, + default_flow_style=False, + sort_keys=False, + ), + ) + ) + + # ── Instructions ────────────────────────────────────────── + now = datetime.utcnow().isoformat(timespec="seconds") + files.append( + ProjectFile( + relative_path="instructions.md", + content=( + "# Instructions\n\n" + "This project was generated from a dlt DuckDB pipeline.\n" + f"Source: {duckdb_path}\n" + f"Generated: {now}\n" + ), + ) + ) + + return files + + def write_project_files( files: list[ProjectFile], output_dir: Path, diff --git a/wren/src/wren/context_cli.py b/wren/src/wren/context_cli.py index 5e673f563..be145b748 100644 --- a/wren/src/wren/context_cli.py +++ b/wren/src/wren/context_cli.py @@ -31,6 +31,17 @@ def init( Optional[str], typer.Option("--from-mdl", help="Import from MDL JSON file (camelCase)."), ] = None, + from_dlt: Annotated[ + Optional[str], + typer.Option("--from-dlt", help="Import from dlt-produced DuckDB file."), + ] = None, + profile: Annotated[ + Optional[str], + typer.Option( + "--profile", + help="Create a named DuckDB connection profile (requires --from-dlt).", + ), + ] = None, force: Annotated[ bool, typer.Option("--force", help="Overwrite existing project files."), @@ -38,12 +49,23 @@ def init( ) -> None: """Initialize a new Wren project. - Without --from-mdl: scaffolds an empty project structure. + Without flags: scaffolds an empty project structure. With --from-mdl: imports an existing MDL JSON and produces a complete v2 YAML project, ready for `wren context validate/build`. + With --from-dlt: introspects a dlt DuckDB file and generates models, + relationships, and project config automatically. """ project_path = Path(path).expanduser() if path else Path.cwd() + if from_mdl and from_dlt: + typer.echo( + "Error: --from-mdl and --from-dlt cannot be used together.", err=True + ) + raise typer.Exit(1) + if profile and not from_dlt: + typer.echo("Error: --profile requires --from-dlt.", err=True) + raise typer.Exit(1) + if from_mdl: # ── Import from MDL JSON ────────────────────────────── from wren.context import ( # noqa: PLC0415 @@ -77,6 +99,67 @@ def init( typer.echo(f" wren context build --path {project_path}") return + if from_dlt: + # ── Import from dlt DuckDB ──────────────────────────── + from wren.context import ( # noqa: PLC0415 + convert_dlt_to_project, + write_project_files, + ) + + duckdb_path = Path(from_dlt).expanduser() + if not duckdb_path.exists(): + typer.echo(f"Error: {duckdb_path} not found.", err=True) + raise typer.Exit(1) + + try: + files = convert_dlt_to_project(duckdb_path) + except Exception as e: + typer.echo(f"Error reading DuckDB file: {e}", err=True) + raise typer.Exit(1) + + try: + write_project_files(files, project_path, force=force) + except SystemExit as e: + typer.echo(str(e), err=True) + raise typer.Exit(1) + + # Count models and relationships from generated files + model_count = sum( + 1 + for f in files + if f.relative_path.startswith("models/") + and f.relative_path.endswith("/metadata.yml") + ) + rel_count = 0 + for f in files: + if f.relative_path == "relationships.yml": + import yaml as _yaml # noqa: PLC0415 + + data = _yaml.safe_load(f.content) or {} + rel_count = len(data.get("relationships", [])) + break + + typer.echo(f"Imported dlt DuckDB to YAML project at {project_path}/") + typer.echo(f" {model_count} models, {rel_count} relationships") + + if profile: + from wren.profile import add_profile # noqa: PLC0415 + + add_profile( + profile, + { + "datasource": "duckdb", + "url": str(duckdb_path.resolve()), + }, + activate=True, + ) + typer.echo(f" Profile '{profile}' created and activated.") + + typer.echo("\nNext steps:") + typer.echo(f" wren context validate --path {project_path}") + typer.echo(f" wren context build --path {project_path}") + return + # ── Scaffold empty project (existing behavior) ──────────── project_file = project_path / "wren_project.yml" if project_file.exists() and not force: diff --git a/wren/src/wren/dlt_introspect.py b/wren/src/wren/dlt_introspect.py new file mode 100644 index 000000000..e6022de33 --- /dev/null +++ b/wren/src/wren/dlt_introspect.py @@ -0,0 +1,192 @@ +"""DuckDB schema introspection for dlt-produced databases. + +Connects READ_ONLY to a .duckdb file produced by a dlt pipeline, +discovers user tables across all schemas, filters dlt internal tables +and columns, and detects parent-child relationships from dlt's +``_dlt_parent_id`` naming convention. +""" + +from __future__ import annotations + +import logging +from dataclasses import dataclass, field +from pathlib import Path + +logger = logging.getLogger(__name__) + +# dlt internal table names to exclude (exact match) +_DLT_INTERNAL_TABLES = frozenset({"_dlt_loads", "_dlt_pipeline_state", "_dlt_version"}) + +# dlt internal column names to exclude from model output +# (still read internally for relationship detection) +_DLT_INTERNAL_COLUMNS = frozenset( + {"_dlt_id", "_dlt_parent_id", "_dlt_load_id", "_dlt_list_idx"} +) + + +@dataclass +class DltColumn: + name: str + data_type: str # raw DuckDB type string + normalized_type: str # after sqlglot parse_type() + is_nullable: bool + + +@dataclass +class DltTable: + catalog: str # attached database alias (filename stem) + schema: str # DuckDB schema (e.g. "main", "hubspot_crm") + name: str # table name + columns: list[DltColumn] = field(default_factory=list) + has_dlt_parent_id: bool = False + + +class DltIntrospector: + """Introspect a dlt-produced DuckDB file and return tables + relationships.""" + + def __init__(self, duckdb_path: str | Path): + """Open the DuckDB file READ_ONLY via ATTACH.""" + import duckdb # noqa: PLC0415 + + self._path = Path(duckdb_path) + if not self._path.exists(): + raise FileNotFoundError(f"DuckDB file not found: {self._path}") + + # Use the filename stem as the catalog name for table_reference output, + # but prefix the ATTACH alias to avoid collisions with DuckDB reserved + # catalog names (memory, system, temp, etc.). + self._catalog = self._path.stem + self._alias = "dlt_" + self._catalog + self._con = duckdb.connect() + escaped_path = str(self._path).replace("'", "''") + escaped_alias = self._alias.replace('"', '""') + self._con.execute(f"ATTACH '{escaped_path}' AS \"{escaped_alias}\" (READ_ONLY)") + + # ── Public API ───────────────────────────────────────────────────────── + + def introspect(self) -> tuple[list[DltTable], list[dict]]: + """Return (tables, relationships) for the attached database.""" + tables = self._discover_tables() + relationships = self._detect_relationships(tables) + return tables, relationships + + # ── Internal helpers ─────────────────────────────────────────────────── + + def _discover_tables(self) -> list[DltTable]: + """Query duckdb_tables() / duckdb_columns() and build DltTable list.""" + from wren.type_mapping import parse_type # noqa: PLC0415 + + # duckdb_tables() returns (database_name, schema_name, table_name, ...) + table_rows = self._con.execute( + """ + SELECT schema_name, table_name + FROM duckdb_tables() + WHERE database_name = ? + AND NOT starts_with(table_name, '_dlt_') + ORDER BY schema_name, table_name + """, + [self._alias], + ).fetchall() + + tables: list[DltTable] = [] + for schema, table_name in table_rows: + if table_name in _DLT_INTERNAL_TABLES: + continue + + # duckdb_columns() returns (database_name, schema_name, table_name, + # column_name, data_type, is_nullable, ...) + col_rows = self._con.execute( + """ + SELECT column_name, data_type, is_nullable + FROM duckdb_columns() + WHERE database_name = ? AND schema_name = ? AND table_name = ? + ORDER BY column_index + """, + [self._alias, schema, table_name], + ).fetchall() + + has_parent_id = False + columns: list[DltColumn] = [] + for col_name, raw_type, is_nullable in col_rows: + if col_name == "_dlt_parent_id": + has_parent_id = True + if col_name in _DLT_INTERNAL_COLUMNS: + continue + normalized = parse_type(raw_type, "duckdb") + columns.append( + DltColumn( + name=col_name, + data_type=raw_type, + normalized_type=normalized, + is_nullable=bool(is_nullable), + ) + ) + + tables.append( + DltTable( + catalog=self._catalog, + schema=schema, + name=table_name, + columns=columns, + has_dlt_parent_id=has_parent_id, + ) + ) + + return tables + + def _detect_relationships(self, tables: list[DltTable]) -> list[dict]: + """Detect parent-child relationships from dlt's ``__`` naming convention. + + A child table has ``_dlt_parent_id`` and its name is + ``{parent_name}__{child_suffix}``. We find the longest prefix of + the child's ``__``-split parts that matches a known table name. + """ + table_names = {t.name for t in tables} + relationships: list[dict] = [] + + for table in tables: + if not table.has_dlt_parent_id: + continue + + parts = table.name.split("__") + parent_name: str | None = None + for i in range(len(parts) - 1, 0, -1): + candidate = "__".join(parts[:i]) + if candidate in table_names: + parent_name = candidate + break + + if parent_name is None: + logger.warning( + "Child table '%s' has _dlt_parent_id but no matching parent " + "found — skipping relationship", + table.name, + ) + continue + + child_suffix = table.name[len(parent_name) + 2 :] # strip "parent__" + rel_name = f"{parent_name}_{child_suffix}" + relationships.append( + { + "name": rel_name, + "models": [parent_name, table.name], + "join_type": "ONE_TO_MANY", + "condition": ( + f"{table.name}._dlt_parent_id = {parent_name}._dlt_id" + ), + } + ) + + return relationships + + # ── Context manager ──────────────────────────────────────────────────── + + def close(self) -> None: + """Close the DuckDB connection.""" + self._con.close() + + def __enter__(self) -> DltIntrospector: + return self + + def __exit__(self, *args: object) -> None: + self.close() diff --git a/wren/tests/unit/test_dlt_introspect.py b/wren/tests/unit/test_dlt_introspect.py new file mode 100644 index 000000000..18b09aa2b --- /dev/null +++ b/wren/tests/unit/test_dlt_introspect.py @@ -0,0 +1,405 @@ +"""Unit tests for wren.dlt_introspect and context.convert_dlt_to_project.""" + +from __future__ import annotations + +import logging +from pathlib import Path + +import pytest +import yaml +from typer.testing import CliRunner + +from wren.cli import app +from wren.context import convert_dlt_to_project +from wren.dlt_introspect import DltIntrospector + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + +runner = CliRunner() + + +@pytest.fixture() +def dlt_duckdb(tmp_path: Path): + """Create a DuckDB file that mimics a dlt pipeline output.""" + duckdb = pytest.importorskip("duckdb") + db_path = tmp_path / "test_pipeline.duckdb" + con = duckdb.connect(str(db_path)) + con.execute(""" + CREATE TABLE hubspot__contacts ( + _dlt_id VARCHAR, + _dlt_load_id VARCHAR, + id BIGINT, + email VARCHAR, + first_name VARCHAR, + created_at TIMESTAMP + ); + CREATE TABLE hubspot__contacts__emails ( + _dlt_id VARCHAR, + _dlt_parent_id VARCHAR, + _dlt_load_id VARCHAR, + _dlt_list_idx BIGINT, + email_address VARCHAR, + email_type VARCHAR + ); + CREATE TABLE _dlt_loads ( + load_id VARCHAR, + status VARCHAR + ); + CREATE TABLE _dlt_pipeline_state ( + version BIGINT, + state VARCHAR + ); + """) + con.close() + return db_path + + +@pytest.fixture() +def empty_duckdb(tmp_path: Path): + """DuckDB file with no user tables.""" + duckdb = pytest.importorskip("duckdb") + db_path = tmp_path / "empty.duckdb" + duckdb.connect(str(db_path)).close() + return db_path + + +@pytest.fixture() +def multi_schema_duckdb(tmp_path: Path): + """DuckDB file with tables in multiple schemas.""" + duckdb = pytest.importorskip("duckdb") + db_path = tmp_path / "multi.duckdb" + con = duckdb.connect(str(db_path)) + con.execute(""" + CREATE SCHEMA crm; + CREATE TABLE crm.contacts (id BIGINT, name VARCHAR); + CREATE TABLE orders (order_id BIGINT, amount DOUBLE); + """) + con.close() + return db_path + + +@pytest.fixture() +def orphan_child_duckdb(tmp_path: Path): + """Child table with _dlt_parent_id but no matching parent.""" + duckdb = pytest.importorskip("duckdb") + db_path = tmp_path / "orphan.duckdb" + con = duckdb.connect(str(db_path)) + con.execute(""" + CREATE TABLE orphan__child ( + _dlt_id VARCHAR, + _dlt_parent_id VARCHAR, + value INTEGER + ); + """) + con.close() + return db_path + + +# --------------------------------------------------------------------------- +# DltIntrospector tests +# --------------------------------------------------------------------------- + + +class TestDiscoverTables: + def test_finds_user_tables(self, dlt_duckdb): + with DltIntrospector(dlt_duckdb) as intro: + tables, _ = intro.introspect() + + names = {t.name for t in tables} + assert "hubspot__contacts" in names + assert "hubspot__contacts__emails" in names + + def test_excludes_dlt_internal_tables(self, dlt_duckdb): + with DltIntrospector(dlt_duckdb) as intro: + tables, _ = intro.introspect() + + names = {t.name for t in tables} + assert "_dlt_loads" not in names + assert "_dlt_pipeline_state" not in names + + def test_empty_database_returns_no_tables(self, empty_duckdb): + with DltIntrospector(empty_duckdb) as intro: + tables, rels = intro.introspect() + + assert tables == [] + assert rels == [] + + def test_multiple_schemas_discovered(self, multi_schema_duckdb): + with DltIntrospector(multi_schema_duckdb) as intro: + tables, _ = intro.introspect() + + names = {t.name for t in tables} + assert "contacts" in names + assert "orders" in names + + def test_schema_field_populated(self, multi_schema_duckdb): + with DltIntrospector(multi_schema_duckdb) as intro: + tables, _ = intro.introspect() + + schema_map = {t.name: t.schema for t in tables} + assert schema_map["contacts"] == "crm" + assert schema_map["orders"] == "main" + + +class TestFilterDltColumns: + def test_excludes_dlt_columns_from_contacts(self, dlt_duckdb): + with DltIntrospector(dlt_duckdb) as intro: + tables, _ = intro.introspect() + + contacts = next(t for t in tables if t.name == "hubspot__contacts") + col_names = {c.name for c in contacts.columns} + assert "_dlt_id" not in col_names + assert "_dlt_load_id" not in col_names + # User columns preserved + assert "id" in col_names + assert "email" in col_names + + def test_excludes_dlt_columns_from_child(self, dlt_duckdb): + with DltIntrospector(dlt_duckdb) as intro: + tables, _ = intro.introspect() + + emails = next(t for t in tables if t.name == "hubspot__contacts__emails") + col_names = {c.name for c in emails.columns} + assert "_dlt_parent_id" not in col_names + assert "_dlt_list_idx" not in col_names + assert "email_address" in col_names + + def test_has_dlt_parent_id_flag(self, dlt_duckdb): + with DltIntrospector(dlt_duckdb) as intro: + tables, _ = intro.introspect() + + contacts = next(t for t in tables if t.name == "hubspot__contacts") + emails = next(t for t in tables if t.name == "hubspot__contacts__emails") + assert contacts.has_dlt_parent_id is False + assert emails.has_dlt_parent_id is True + + +class TestTypeNormalization: + def test_bigint_normalized(self, dlt_duckdb): + with DltIntrospector(dlt_duckdb) as intro: + tables, _ = intro.introspect() + + contacts = next(t for t in tables if t.name == "hubspot__contacts") + col = next(c for c in contacts.columns if c.name == "id") + assert col.normalized_type == "BIGINT" + + def test_varchar_normalized(self, dlt_duckdb): + with DltIntrospector(dlt_duckdb) as intro: + tables, _ = intro.introspect() + + contacts = next(t for t in tables if t.name == "hubspot__contacts") + col = next(c for c in contacts.columns if c.name == "email") + # sqlglot normalizes VARCHAR → TEXT in the duckdb dialect + assert col.normalized_type in ("VARCHAR", "TEXT") + + def test_timestamp_normalized(self, dlt_duckdb): + with DltIntrospector(dlt_duckdb) as intro: + tables, _ = intro.introspect() + + contacts = next(t for t in tables if t.name == "hubspot__contacts") + col = next(c for c in contacts.columns if c.name == "created_at") + # sqlglot may produce TIMESTAMP or TIMESTAMP WITH TIME ZONE — just check it's set + assert col.normalized_type + + +class TestDetectRelationships: + def test_detects_parent_child(self, dlt_duckdb): + with DltIntrospector(dlt_duckdb) as intro: + _, rels = intro.introspect() + + assert len(rels) == 1 + rel = rels[0] + assert rel["models"] == ["hubspot__contacts", "hubspot__contacts__emails"] + assert rel["join_type"] == "ONE_TO_MANY" + assert "_dlt_parent_id" in rel["condition"] + assert "_dlt_id" in rel["condition"] + + def test_relationship_name(self, dlt_duckdb): + with DltIntrospector(dlt_duckdb) as intro: + _, rels = intro.introspect() + + assert rels[0]["name"] == "hubspot__contacts_emails" + + def test_no_parent_found_emits_warning(self, orphan_child_duckdb, caplog): + with caplog.at_level(logging.WARNING, logger="wren.dlt_introspect"): + with DltIntrospector(orphan_child_duckdb) as intro: + _, rels = intro.introspect() + + assert rels == [] + assert any("no matching parent" in r.message for r in caplog.records) + + def test_no_parent_found_skips_relationship(self, orphan_child_duckdb): + with DltIntrospector(orphan_child_duckdb) as intro: + _, rels = intro.introspect() + + assert rels == [] + + +# --------------------------------------------------------------------------- +# convert_dlt_to_project tests +# --------------------------------------------------------------------------- + + +class TestConvertDltToProject: + def test_produces_project_file(self, dlt_duckdb): + files = convert_dlt_to_project(dlt_duckdb) + paths = {f.relative_path for f in files} + assert "wren_project.yml" in paths + + def test_wren_project_yml_content(self, dlt_duckdb): + files = convert_dlt_to_project(dlt_duckdb) + proj = next(f for f in files if f.relative_path == "wren_project.yml") + data = yaml.safe_load(proj.content) + assert data["schema_version"] == 2 + assert data["data_source"] == "duckdb" + assert data["name"] == "test_pipeline" + + def test_custom_project_name(self, dlt_duckdb): + files = convert_dlt_to_project(dlt_duckdb, project_name="my_project") + proj = next(f for f in files if f.relative_path == "wren_project.yml") + data = yaml.safe_load(proj.content) + assert data["name"] == "my_project" + + def test_model_files_generated(self, dlt_duckdb): + files = convert_dlt_to_project(dlt_duckdb) + paths = {f.relative_path for f in files} + assert "models/hubspot__contacts/metadata.yml" in paths + assert "models/hubspot__contacts__emails/metadata.yml" in paths + + def test_model_content(self, dlt_duckdb): + files = convert_dlt_to_project(dlt_duckdb) + meta = next( + f + for f in files + if f.relative_path == "models/hubspot__contacts/metadata.yml" + ) + data = yaml.safe_load(meta.content) + assert data["name"] == "hubspot__contacts" + assert data["table_reference"]["table"] == "hubspot__contacts" + assert data["table_reference"]["schema"] == "main" + col_names = [c["name"] for c in data["columns"]] + assert "id" in col_names + assert "email" in col_names + # dlt columns must be excluded + assert "_dlt_id" not in col_names + assert "_dlt_load_id" not in col_names + + def test_relationships_file_generated(self, dlt_duckdb): + files = convert_dlt_to_project(dlt_duckdb) + paths = {f.relative_path for f in files} + assert "relationships.yml" in paths + + def test_relationships_content(self, dlt_duckdb): + files = convert_dlt_to_project(dlt_duckdb) + rel_file = next(f for f in files if f.relative_path == "relationships.yml") + data = yaml.safe_load(rel_file.content) + rels = data["relationships"] + assert len(rels) == 1 + assert rels[0]["join_type"] == "ONE_TO_MANY" + + def test_instructions_file_generated(self, dlt_duckdb): + files = convert_dlt_to_project(dlt_duckdb) + paths = {f.relative_path for f in files} + assert "instructions.md" in paths + + def test_instructions_content(self, dlt_duckdb): + files = convert_dlt_to_project(dlt_duckdb) + inst = next(f for f in files if f.relative_path == "instructions.md") + assert "dlt" in inst.content + assert "test_pipeline" in inst.content + + def test_empty_database_no_models(self, empty_duckdb): + files = convert_dlt_to_project(empty_duckdb) + model_files = [f for f in files if f.relative_path.startswith("models/")] + assert model_files == [] + + def test_missing_file_raises(self, tmp_path): + with pytest.raises(FileNotFoundError): + convert_dlt_to_project(tmp_path / "nonexistent.duckdb") + + +# --------------------------------------------------------------------------- +# CLI integration test +# --------------------------------------------------------------------------- + + +class TestInitFromDlt: + def test_init_from_dlt_creates_project(self, dlt_duckdb, tmp_path): + result = runner.invoke( + app, + ["context", "init", "--from-dlt", str(dlt_duckdb), "--path", str(tmp_path)], + ) + assert result.exit_code == 0, result.output + assert (tmp_path / "wren_project.yml").exists() + assert (tmp_path / "models" / "hubspot__contacts" / "metadata.yml").exists() + assert ( + tmp_path / "models" / "hubspot__contacts__emails" / "metadata.yml" + ).exists() + assert (tmp_path / "relationships.yml").exists() + + def test_init_from_dlt_summary_output(self, dlt_duckdb, tmp_path): + result = runner.invoke( + app, + ["context", "init", "--from-dlt", str(dlt_duckdb), "--path", str(tmp_path)], + ) + assert "2 models" in result.output + assert "1 relationships" in result.output + + def test_init_from_dlt_missing_file(self, tmp_path): + result = runner.invoke( + app, + [ + "context", + "init", + "--from-dlt", + str(tmp_path / "missing.duckdb"), + "--path", + str(tmp_path), + ], + ) + assert result.exit_code == 1 + assert "not found" in result.output + + def test_init_from_dlt_and_mdl_mutually_exclusive(self, tmp_path): + result = runner.invoke( + app, + [ + "context", + "init", + "--from-dlt", + "some.duckdb", + "--from-mdl", + "some.json", + "--path", + str(tmp_path), + ], + ) + assert result.exit_code == 1 + assert "cannot be used together" in result.output + + def test_init_from_dlt_refuses_existing_without_force(self, dlt_duckdb, tmp_path): + (tmp_path / "wren_project.yml").write_text("name: existing\n") + result = runner.invoke( + app, + ["context", "init", "--from-dlt", str(dlt_duckdb), "--path", str(tmp_path)], + ) + assert result.exit_code == 1 + assert "already exists" in result.output + + def test_init_from_dlt_force_overwrites(self, dlt_duckdb, tmp_path): + (tmp_path / "wren_project.yml").write_text("name: existing\n") + result = runner.invoke( + app, + [ + "context", + "init", + "--from-dlt", + str(dlt_duckdb), + "--path", + str(tmp_path), + "--force", + ], + ) + assert result.exit_code == 0, result.output