Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added basic integration with Lakeview Dashboards #66

Merged
merged 3 commits into from
May 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ classifiers = [
"Programming Language :: Python :: Implementation :: PyPy",
]
dependencies = [
"databricks-labs-blueprint>=0.4.2",
"databricks-labs-blueprint[yaml]>=0.4.2",
"databricks-sdk>=0.22.0",
"sqlglot>=22.3.1"
]
Expand Down
82 changes: 82 additions & 0 deletions src/databricks/labs/lsql/dashboards.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
import json
from pathlib import Path
from typing import Protocol, ClassVar, runtime_checkable

import sqlglot
import yaml
from databricks.sdk import WorkspaceClient
from databricks.sdk.service.workspace import ExportFormat

from databricks.labs.lsql.lakeview import Dashboard, Page, Query, NamedQuery, ControlFieldEncoding

@runtime_checkable
class _DataclassInstance(Protocol):
__dataclass_fields__: ClassVar[dict]

class Dashboards:
def __init__(self, ws: WorkspaceClient):
self._ws = ws

def get_dashboard(self, dashboard_path: str):
with self._ws.workspace.download(dashboard_path, format=ExportFormat.SOURCE) as f:
raw = f.read().decode('utf-8')
as_dict = json.loads(raw)
return Dashboard.from_dict(as_dict)

def save_to_folder(self, dashboard_path: str, local_path: Path):
local_path.mkdir(parents=True, exist_ok=True)
dashboard = self.get_dashboard(dashboard_path)
better_names = {}
for dataset in dashboard.datasets:
name = dataset.display_name
better_names[dataset.name] = name
query_path = local_path / f"{name}.sql"
sql_query = dataset.query
self._format_sql_file(sql_query, query_path)
lvdash_yml = local_path / "lvdash.yml"
with lvdash_yml.open('w') as f:
first_page = dashboard.pages[0]
self._replace_names(first_page, better_names)
page = first_page.as_dict()
yaml.safe_dump(page, f)
assert True

def _format_sql_file(self, sql_query, query_path):
with query_path.open('w') as f:
try:
for statement in sqlglot.parse(sql_query):
# see https://sqlglot.com/sqlglot/generator.html#Generator
pretty = statement.sql(
dialect='databricks',
normalize=True, # normalize identifiers to lowercase
pretty=True, # format the produced SQL string
normalize_functions='upper', # normalize function names to uppercase
max_text_width=80, # wrap text at 120 characters
)
f.write(f"{pretty};\n")
except sqlglot.ParseError:
f.write(sql_query)

def _replace_names(self, node: _DataclassInstance, better_names: dict[str, str]):
# walk evely dataclass instance recursively and replace names
if isinstance(node, _DataclassInstance):
for field in node.__dataclass_fields__.values():
value = getattr(node, field.name)
if isinstance(value, list):
setattr(node, field.name, [self._replace_names(item, better_names) for item in value])
elif isinstance(value, _DataclassInstance):
setattr(node, field.name, self._replace_names(value, better_names))
if isinstance(node, Query):
node.dataset_name = better_names.get(node.dataset_name, node.dataset_name)
elif isinstance(node, NamedQuery) and node.query:
# 'dashboards/01eeb077e38c17e6ba3511036985960c/datasets/01eeb081882017f6a116991d124d3068_...'
if node.name.startswith('dashboards/'):
parts = [node.query.dataset_name]
for field in node.query.fields:
parts.append(field.name)
new_name = '_'.join(parts)
better_names[node.name] = new_name
node.name = better_names.get(node.name, node.name)
elif isinstance(node, ControlFieldEncoding):
node.query_name = better_names.get(node.query_name, node.query_name)
return node
6 changes: 6 additions & 0 deletions src/databricks/labs/lsql/lakeview/.codegen.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"formatter": "../../../../../../.venv/bin/black . && ../../../../../../.venv/bin/ruff . --fix",
"batch": {
"model.py.tmpl": "model.py"
}
}
1 change: 1 addition & 0 deletions src/databricks/labs/lsql/lakeview/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .model import * # noqa: F401, F403
Loading
Loading