Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
113 changes: 113 additions & 0 deletions wren/src/wren/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,119 @@ def convert_mdl_to_project(mdl_json: dict) -> list[ProjectFile]:
return files


def convert_dlt_to_project(
duckdb_path: str | Path,
*,
project_name: str | None = None,
) -> list[ProjectFile]:
"""Introspect a dlt DuckDB file and generate Wren v2 project files.

Args:
duckdb_path: Path to .duckdb file produced by dlt.
project_name: Project name (defaults to DuckDB filename stem).

Returns:
List of ProjectFile ready for write_project_files().
"""
from datetime import datetime # noqa: PLC0415

from wren.dlt_introspect import DltIntrospector # noqa: PLC0415

duckdb_path = Path(duckdb_path)
project_name = project_name or duckdb_path.stem

with DltIntrospector(duckdb_path) as introspector:
tables, relationships = introspector.introspect()

# Guard: same table name in multiple schemas would silently overwrite model files
seen: dict[str, str] = {}
for table in tables:
prev_schema = seen.setdefault(table.name, table.schema)
if prev_schema != table.schema:
raise ValueError(
f"Duplicate table name across schemas is ambiguous for project "
f"generation: {table.name!r} appears in {prev_schema!r} "
f"and {table.schema!r}. Rename one of the tables before importing."
)

files: list[ProjectFile] = []

# ── wren_project.yml ──────────────────────────────────────
project_config: dict[str, Any] = {
"schema_version": 2,
"name": project_name,
"version": "1.0",
"catalog": "",
"schema": "public",
"data_source": "duckdb",
}
files.append(
ProjectFile(
relative_path="wren_project.yml",
content=yaml.dump(
project_config, default_flow_style=False, sort_keys=False
),
)
)

# ── Models ────────────────────────────────────────────────
for table in tables:
model: dict[str, Any] = {
"name": table.name,
"table_reference": {
"catalog": table.catalog,
"schema": table.schema,
"table": table.name,
},
"columns": [
{
"name": col.name,
"type": col.normalized_type,
"is_calculated": False,
"not_null": not col.is_nullable,
"properties": {},
}
for col in table.columns
],
"cached": False,
"properties": {"description": "Imported from dlt pipeline"},
}
files.append(
ProjectFile(
relative_path=f"models/{table.name}/metadata.yml",
content=yaml.dump(model, default_flow_style=False, sort_keys=False),
)
)

# ── Relationships ─────────────────────────────────────────
files.append(
ProjectFile(
relative_path="relationships.yml",
content=yaml.dump(
{"relationships": relationships},
default_flow_style=False,
sort_keys=False,
),
)
)

# ── Instructions ──────────────────────────────────────────
now = datetime.utcnow().isoformat(timespec="seconds")
files.append(
ProjectFile(
relative_path="instructions.md",
content=(
"# Instructions\n\n"
"This project was generated from a dlt DuckDB pipeline.\n"
f"Source: {duckdb_path}\n"
f"Generated: {now}\n"
),
)
)

return files


def write_project_files(
files: list[ProjectFile],
output_dir: Path,
Expand Down
85 changes: 84 additions & 1 deletion wren/src/wren/context_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,19 +31,41 @@ def init(
Optional[str],
typer.Option("--from-mdl", help="Import from MDL JSON file (camelCase)."),
] = None,
from_dlt: Annotated[
Optional[str],
typer.Option("--from-dlt", help="Import from dlt-produced DuckDB file."),
] = None,
profile: Annotated[
Optional[str],
typer.Option(
"--profile",
help="Create a named DuckDB connection profile (requires --from-dlt).",
),
] = None,
force: Annotated[
bool,
typer.Option("--force", help="Overwrite existing project files."),
] = False,
) -> None:
"""Initialize a new Wren project.

Without --from-mdl: scaffolds an empty project structure.
Without flags: scaffolds an empty project structure.
With --from-mdl: imports an existing MDL JSON and produces a complete
v2 YAML project, ready for `wren context validate/build`.
With --from-dlt: introspects a dlt DuckDB file and generates models,
relationships, and project config automatically.
"""
project_path = Path(path).expanduser() if path else Path.cwd()

if from_mdl and from_dlt:
typer.echo(
"Error: --from-mdl and --from-dlt cannot be used together.", err=True
)
raise typer.Exit(1)
if profile and not from_dlt:
typer.echo("Error: --profile requires --from-dlt.", err=True)
raise typer.Exit(1)

if from_mdl:
# ── Import from MDL JSON ──────────────────────────────
from wren.context import ( # noqa: PLC0415
Expand Down Expand Up @@ -77,6 +99,67 @@ def init(
typer.echo(f" wren context build --path {project_path}")
return

if from_dlt:
# ── Import from dlt DuckDB ────────────────────────────
from wren.context import ( # noqa: PLC0415
convert_dlt_to_project,
write_project_files,
)

duckdb_path = Path(from_dlt).expanduser()
if not duckdb_path.exists():
typer.echo(f"Error: {duckdb_path} not found.", err=True)
raise typer.Exit(1)

try:
files = convert_dlt_to_project(duckdb_path)
except Exception as e:
typer.echo(f"Error reading DuckDB file: {e}", err=True)
raise typer.Exit(1)

try:
write_project_files(files, project_path, force=force)
except SystemExit as e:
typer.echo(str(e), err=True)
raise typer.Exit(1)

# Count models and relationships from generated files
model_count = sum(
1
for f in files
if f.relative_path.startswith("models/")
and f.relative_path.endswith("/metadata.yml")
)
rel_count = 0
for f in files:
if f.relative_path == "relationships.yml":
import yaml as _yaml # noqa: PLC0415

data = _yaml.safe_load(f.content) or {}
rel_count = len(data.get("relationships", []))
break

typer.echo(f"Imported dlt DuckDB to YAML project at {project_path}/")
typer.echo(f" {model_count} models, {rel_count} relationships")

if profile:
from wren.profile import add_profile # noqa: PLC0415

add_profile(
profile,
{
"datasource": "duckdb",
"url": str(duckdb_path.resolve()),
},
activate=True,
)
typer.echo(f" Profile '{profile}' created and activated.")

typer.echo("\nNext steps:")
typer.echo(f" wren context validate --path {project_path}")
typer.echo(f" wren context build --path {project_path}")
return

# ── Scaffold empty project (existing behavior) ────────────
project_file = project_path / "wren_project.yml"
if project_file.exists() and not force:
Expand Down
Loading
Loading