Skip to content

Commit

Permalink
Added basic integration with Lakeview Dashboards (#66)
Browse files Browse the repository at this point in the history
  • Loading branch information
nfx authored May 23, 2024
1 parent 9032c9d commit 1bfcf61
Show file tree
Hide file tree
Showing 9 changed files with 2,439 additions and 1 deletion.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ classifiers = [
"Programming Language :: Python :: Implementation :: PyPy",
]
dependencies = [
"databricks-labs-blueprint>=0.4.2",
"databricks-labs-blueprint[yaml]>=0.4.2",
"databricks-sdk>=0.22.0",
"sqlglot>=22.3.1"
]
Expand Down
82 changes: 82 additions & 0 deletions src/databricks/labs/lsql/dashboards.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
import json
from pathlib import Path
from typing import Protocol, ClassVar, runtime_checkable

import sqlglot
import yaml
from databricks.sdk import WorkspaceClient
from databricks.sdk.service.workspace import ExportFormat

from databricks.labs.lsql.lakeview import Dashboard, Page, Query, NamedQuery, ControlFieldEncoding

@runtime_checkable
class _DataclassInstance(Protocol):
__dataclass_fields__: ClassVar[dict]

class Dashboards:
def __init__(self, ws: WorkspaceClient):
self._ws = ws

def get_dashboard(self, dashboard_path: str):
with self._ws.workspace.download(dashboard_path, format=ExportFormat.SOURCE) as f:
raw = f.read().decode('utf-8')
as_dict = json.loads(raw)
return Dashboard.from_dict(as_dict)

def save_to_folder(self, dashboard_path: str, local_path: Path):
local_path.mkdir(parents=True, exist_ok=True)
dashboard = self.get_dashboard(dashboard_path)
better_names = {}
for dataset in dashboard.datasets:
name = dataset.display_name
better_names[dataset.name] = name
query_path = local_path / f"{name}.sql"
sql_query = dataset.query
self._format_sql_file(sql_query, query_path)
lvdash_yml = local_path / "lvdash.yml"
with lvdash_yml.open('w') as f:
first_page = dashboard.pages[0]
self._replace_names(first_page, better_names)
page = first_page.as_dict()
yaml.safe_dump(page, f)
assert True

def _format_sql_file(self, sql_query, query_path):
with query_path.open('w') as f:
try:
for statement in sqlglot.parse(sql_query):
# see https://sqlglot.com/sqlglot/generator.html#Generator
pretty = statement.sql(
dialect='databricks',
normalize=True, # normalize identifiers to lowercase
pretty=True, # format the produced SQL string
normalize_functions='upper', # normalize function names to uppercase
max_text_width=80, # wrap text at 120 characters
)
f.write(f"{pretty};\n")
except sqlglot.ParseError:
f.write(sql_query)

def _replace_names(self, node: _DataclassInstance, better_names: dict[str, str]):
# walk evely dataclass instance recursively and replace names
if isinstance(node, _DataclassInstance):
for field in node.__dataclass_fields__.values():
value = getattr(node, field.name)
if isinstance(value, list):
setattr(node, field.name, [self._replace_names(item, better_names) for item in value])
elif isinstance(value, _DataclassInstance):
setattr(node, field.name, self._replace_names(value, better_names))
if isinstance(node, Query):
node.dataset_name = better_names.get(node.dataset_name, node.dataset_name)
elif isinstance(node, NamedQuery) and node.query:
# 'dashboards/01eeb077e38c17e6ba3511036985960c/datasets/01eeb081882017f6a116991d124d3068_...'
if node.name.startswith('dashboards/'):
parts = [node.query.dataset_name]
for field in node.query.fields:
parts.append(field.name)
new_name = '_'.join(parts)
better_names[node.name] = new_name
node.name = better_names.get(node.name, node.name)
elif isinstance(node, ControlFieldEncoding):
node.query_name = better_names.get(node.query_name, node.query_name)
return node
6 changes: 6 additions & 0 deletions src/databricks/labs/lsql/lakeview/.codegen.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"formatter": "../../../../../../.venv/bin/black . && ../../../../../../.venv/bin/ruff . --fix",
"batch": {
"model.py.tmpl": "model.py"
}
}
1 change: 1 addition & 0 deletions src/databricks/labs/lsql/lakeview/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .model import * # noqa: F401, F403
Loading

0 comments on commit 1bfcf61

Please sign in to comment.