Skip to content

Commit

Permalink
Support header for markdown files (#161)
Browse files Browse the repository at this point in the history
Resolves #159

![Screenshot 2024-06-17 at 14 28
54](https://github.com/databrickslabs/lsql/assets/5946784/be74d43a-6388-4303-a6bb-6f5b907e0018)

- [x] Add unit tests
- [x] Add docs
  • Loading branch information
JCZuurmond authored Jun 19, 2024
1 parent 2c2e5bf commit 83b0a27
Show file tree
Hide file tree
Showing 4 changed files with 435 additions and 118 deletions.
13 changes: 11 additions & 2 deletions docs/dashboards.md
Original file line number Diff line number Diff line change
Expand Up @@ -199,8 +199,17 @@ entities.

Markdown files are used to define text widgets that can populate a dashboard.

The configuration file is written in YAML, and is structured in a way that is easy to read and
write.
The configuration is defined at the top of the file in YAML enclosed by two horizontal rules marked with dashes (---):

``` md
---
order: -1
height: 5
---
# Churn dashboard

Welcome to our churn dashboard! Let me show you around ...
```

[[back to top](#dashboards-as-code)]

Expand Down
212 changes: 157 additions & 55 deletions src/databricks/labs/lsql/dashboards.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import dataclasses
import json
import logging
import re
import shlex
from argparse import ArgumentParser
from dataclasses import dataclass
Expand Down Expand Up @@ -32,6 +33,7 @@
TableEncodingMap,
TableV2Spec,
Widget,
WidgetFrameSpec,
WidgetSpec,
)

Expand Down Expand Up @@ -73,39 +75,44 @@ def from_path(cls, path: Path) -> "DashboardMetadata":
return fallback_metadata


class WidgetMetadata:
def __init__(
self,
path: Path,
order: int | None = None,
width: int = 0,
height: int = 0,
_id: str = "",
):
self.path = path
self.order = order
self.width = width
self.height = height
self.id = _id or path.stem
class BaseHandler:
"""Base file handler.
def as_dict(self) -> dict[str, str]:
body = {"path": self.path.as_posix()}
for attribute in "order", "width", "height", "id":
if attribute in body:
continue
value = getattr(self, attribute)
if value is not None:
body[attribute] = str(value)
return body
Handlers are based on a Python implementation for FrontMatter.
def size(self) -> tuple[int, int]:
return self.width, self.height
Sources:
https://frontmatter.codes/docs/markdown
https://github.com/eyeseast/python-frontmatter/blob/main/frontmatter/default_handlers.py
"""

def is_markdown(self) -> bool:
return self.path.suffix == ".md"
def __init__(self, path: Path) -> None:
self._path = path

def is_query(self) -> bool:
return self.path.suffix == ".sql"
@property
def _content(self) -> str:
return self._path.read_text()

def parse_header(self) -> dict[str, str]:
"""Parse the header of the file."""
header, _ = self.split()
return self._parse_header(header)

def _parse_header(self, header: str) -> dict[str, str]:
_ = self, header
return {}

def split(self) -> tuple[str, str]:
"""Split the file header from the content.
Returns :
str : The file header possibly containing arguments.
str : The file contents.
"""
return "", self._content


class QueryHandler(BaseHandler):
"""Handle query files."""

@staticmethod
def _get_arguments_parser() -> ArgumentParser:
Expand All @@ -114,38 +121,126 @@ def _get_arguments_parser() -> ArgumentParser:
parser.add_argument("-o", "--order", type=int)
parser.add_argument("-w", "--width", type=int)
parser.add_argument("-h", "--height", type=int)
parser.add_argument("-t", "--title", type=str)
parser.add_argument("-d", "--description", type=str)
return parser

def replace_from_arguments(self, arguments: list[str]) -> "WidgetMetadata":
replica = copy.deepcopy(self)
def _parse_header(self, header: str) -> dict[str, str]:
"""Header is an argparse string."""
parser = self._get_arguments_parser()
try:
args = parser.parse_args(arguments)
return vars(parser.parse_args(shlex.split(header)))
except (argparse.ArgumentError, SystemExit) as e:
logger.warning(f"Parsing {arguments}: {e}")
return replica
logger.warning(f"Parsing {self._path}: {e}")
return {}

replica.order = args.order if args.order is not None else self.order
replica.width = args.width or self.width
replica.height = args.height or self.height
replica.id = args.id or self.id
return replica

@classmethod
def from_path(cls, path: Path) -> "WidgetMetadata":
fallback_metadata = cls(path=path)
def split(self) -> tuple[str, str]:
"""Split the query file header from the contents.
The optional header is the first comment at the top of the file.
"""
try:
parsed_query = sqlglot.parse_one(path.read_text(), dialect=sqlglot.dialects.Databricks)
parsed_query = sqlglot.parse_one(self._content, dialect=sqlglot.dialects.Databricks)
except sqlglot.ParseError as e:
logger.warning(f"Parsing {path}: {e}")
return fallback_metadata
logger.warning(f"Parsing {self._path}: {e}")
return "", self._content

if parsed_query.comments is None or len(parsed_query.comments) == 0:
return fallback_metadata
return "", self._content

first_comment = parsed_query.comments[0]
return fallback_metadata.replace_from_arguments(shlex.split(first_comment))
return first_comment.strip(), self._content


class MarkdownHandler(BaseHandler):
"""Handle Markdown files."""

_FRONT_MATTER_BOUNDARY = re.compile(r"^-{3,}\s*$", re.MULTILINE)

def _parse_header(self, header: str) -> dict[str, str]:
"""Markdown configuration header is a YAML."""
_ = self
return yaml.safe_load(header) or {}

def split(self) -> tuple[str, str]:
"""Split the markdown file header from the contents.
The header is enclosed by a horizontal line marked with three dashes '---'.
"""
splits = self._FRONT_MATTER_BOUNDARY.split(self._content, 2)
if len(splits) == 3:
_, header, content = splits
return header.strip(), content.lstrip()
if len(splits) == 2:
logger.warning(f"Parsing {self._path}: Missing closing header boundary.")
return "", self._content


class WidgetMetadata:
def __init__(
self,
path: Path,
order: int | None = None,
width: int = 0,
height: int = 0,
_id: str = "",
title: str = "",
description: str = "",
):
self._path = path
self.order = order
self.width = width
self.height = height
self.id = _id or path.stem
self.title = title
self.description = description

def is_markdown(self) -> bool:
return self._path.suffix == ".md"

def is_query(self) -> bool:
return self._path.suffix == ".sql"

@property
def handler(self) -> BaseHandler:
handler = BaseHandler
if self.is_markdown():
handler = MarkdownHandler
elif self.is_query():
handler = QueryHandler
return handler(self._path)

@classmethod
def from_dict(cls, *, path: str | Path, **optionals) -> "WidgetMetadata":
path = Path(path)
if "id" in optionals:
optionals["_id"] = optionals["id"]
del optionals["id"]
return cls(path, **optionals)

def as_dict(self) -> dict[str, str]:
exclude_attributes = {
"handler", # Handler is inferred from file extension
"path", # Path is set explicitly below
}
body = {"path": self._path.as_posix()}
for attribute in dir(self):
if attribute.startswith("_") or callable(getattr(self, attribute)) or attribute in exclude_attributes:
continue
value = getattr(self, attribute)
if value is not None:
body[attribute] = str(value)
return body

@classmethod
def from_path(cls, path: Path) -> "WidgetMetadata":
widget_metadata = cls(path=path)
header = widget_metadata.handler.parse_header()
header.pop("path", None)
return cls.from_dict(path=path, **header)

def __repr__(self):
return f"WidgetMetdata<{self._path}>"


class Tile:
Expand Down Expand Up @@ -183,7 +278,8 @@ def place_after(self, position: Position) -> "Tile":

@property
def widget(self) -> Widget:
widget = Widget(name=self._widget_metadata.id, textbox_spec=self._widget_metadata.path.read_text())
_, text = self._widget_metadata.handler.split()
widget = Widget(name=self._widget_metadata.id, textbox_spec=text)
return widget

@classmethod
Expand All @@ -207,7 +303,7 @@ def _default_size(self) -> tuple[int, int]:

class QueryTile(Tile):
def _get_abstract_syntax_tree(self) -> sqlglot.Expression | None:
query = self._widget_metadata.path.read_text()
_, query = self._widget_metadata.handler.split()
try:
return sqlglot.parse_one(query, dialect=sqlglot.dialects.Databricks)
except sqlglot.ParseError as e:
Expand Down Expand Up @@ -236,7 +332,13 @@ def _find_fields(self) -> list[Field]:
def widget(self) -> Widget:
fields = self._find_fields()
named_query = self._get_named_query(fields)
spec = self._get_spec(fields)
frame = WidgetFrameSpec(
title=self._widget_metadata.title,
show_title=self._widget_metadata is not None,
description=self._widget_metadata.description,
show_description=self._widget_metadata.description is not None,
)
spec = self._get_spec(fields, frame=frame)
widget = Widget(name=self._widget_metadata.id, queries=[named_query], spec=spec)
return widget

Expand All @@ -247,10 +349,10 @@ def _get_named_query(self, fields: list[Field]) -> NamedQuery:
return named_query

@staticmethod
def _get_spec(fields: list[Field]) -> WidgetSpec:
def _get_spec(fields: list[Field], *, frame: WidgetFrameSpec | None = None) -> WidgetSpec:
field_encodings = [RenderFieldEncoding(field_name=field.name) for field in fields]
table_encodings = TableEncodingMap(field_encodings)
spec = TableV2Spec(encodings=table_encodings)
spec = TableV2Spec(encodings=table_encodings, frame=frame)
return spec

def infer_spec_type(self) -> type[WidgetSpec] | None:
Expand All @@ -273,9 +375,9 @@ def _default_size(self) -> tuple[int, int]:
return 1, 3

@staticmethod
def _get_spec(fields: list[Field]) -> CounterSpec:
def _get_spec(fields: list[Field], *, frame: WidgetFrameSpec | None = None) -> CounterSpec:
counter_encodings = CounterFieldEncoding(field_name=fields[0].name, display_name=fields[0].name)
spec = CounterSpec(CounterEncodingMap(value=counter_encodings))
spec = CounterSpec(CounterEncodingMap(value=counter_encodings), frame=frame)
return spec


Expand Down
31 changes: 31 additions & 0 deletions tests/integration/test_dashboards.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,3 +189,34 @@ def test_dashboards_deploys_dashboard_with_invalid_query(ws, make_dashboard, tmp
sdk_dashboard = dashboards.deploy_dashboard(lakeview_dashboard, dashboard_id=sdk_dashboard.dashboard_id)

assert ws.lakeview.get(sdk_dashboard.dashboard_id)


def test_dashboards_deploys_dashboard_with_markdown_header(ws, make_dashboard, tmp_path):
sdk_dashboard = make_dashboard()

for count, query_name in enumerate("abcdef"):
(tmp_path / f"{query_name}.sql").write_text(f"SELECT {count} AS count")

description = "---\norder: -1\n---\nBelow you see counters."
(tmp_path / "z_description.md").write_text(description)

dashboards = Dashboards(ws)
lakeview_dashboard = dashboards.create_dashboard(tmp_path)

sdk_dashboard = dashboards.deploy_dashboard(lakeview_dashboard, dashboard_id=sdk_dashboard.dashboard_id)

assert ws.lakeview.get(sdk_dashboard.dashboard_id)


def test_dashboards_deploys_dashboard_with_widget_title_and_description(ws, make_dashboard, tmp_path):
sdk_dashboard = make_dashboard()

description = "-- --title 'Counting' --description 'The answer to life'\nSELECT 42"
(tmp_path / "counter.sql").write_text(description)

dashboards = Dashboards(ws)
lakeview_dashboard = dashboards.create_dashboard(tmp_path)

sdk_dashboard = dashboards.deploy_dashboard(lakeview_dashboard, dashboard_id=sdk_dashboard.dashboard_id)

assert ws.lakeview.get(sdk_dashboard.dashboard_id)
Loading

0 comments on commit 83b0a27

Please sign in to comment.