Support header for markdown files (#161)

Resolves #159 ![Screenshot 2024-06-17 at 14 28 54](https://github.com/databrickslabs/lsql/assets/5946784/be74d43a-6388-4303-a6bb-6f5b907e0018) - [x] Add unit tests - [x] Add docs
databrickslabs · Jun 19, 2024 · 83b0a27 · 83b0a27
1 parent 2c2e5bf
commit 83b0a27
Show file tree

Hide file tree

Showing 4 changed files with 435 additions and 118 deletions.
diff --git a/docs/dashboards.md b/docs/dashboards.md
@@ -199,8 +199,17 @@ entities.
 
 Markdown files are used to define text widgets that can populate a dashboard. 
 
-The configuration file is written in YAML, and is structured in a way that is easy to read and 
-write.
+The configuration is defined at the top of the file in YAML enclosed by two horizontal rules marked with dashes (---):
+
+``` md
+---
+order: -1
+height: 5
+---
+# Churn dashboard
+
+Welcome to our churn dashboard! Let me show you around ...
+```
 
 [[back to top](#dashboards-as-code)]
 

diff --git a/src/databricks/labs/lsql/dashboards.py b/src/databricks/labs/lsql/dashboards.py
@@ -3,6 +3,7 @@
 import dataclasses
 import json
 import logging
+import re
 import shlex
 from argparse import ArgumentParser
 from dataclasses import dataclass
@@ -32,6 +33,7 @@
     TableEncodingMap,
     TableV2Spec,
     Widget,
+    WidgetFrameSpec,
     WidgetSpec,
 )
 
@@ -73,39 +75,44 @@ def from_path(cls, path: Path) -> "DashboardMetadata":
             return fallback_metadata
 
 
-class WidgetMetadata:
-    def __init__(
-        self,
-        path: Path,
-        order: int | None = None,
-        width: int = 0,
-        height: int = 0,
-        _id: str = "",
-    ):
-        self.path = path
-        self.order = order
-        self.width = width
-        self.height = height
-        self.id = _id or path.stem
+class BaseHandler:
+    """Base file handler.
 
-    def as_dict(self) -> dict[str, str]:
-        body = {"path": self.path.as_posix()}
-        for attribute in "order", "width", "height", "id":
-            if attribute in body:
-                continue
-            value = getattr(self, attribute)
-            if value is not None:
-                body[attribute] = str(value)
-        return body
+    Handlers are based on a Python implementation for FrontMatter.
 
-    def size(self) -> tuple[int, int]:
-        return self.width, self.height
+    Sources:
+        https://frontmatter.codes/docs/markdown
+        https://github.com/eyeseast/python-frontmatter/blob/main/frontmatter/default_handlers.py
+    """
 
-    def is_markdown(self) -> bool:
-        return self.path.suffix == ".md"
+    def __init__(self, path: Path) -> None:
+        self._path = path
 
-    def is_query(self) -> bool:
-        return self.path.suffix == ".sql"
+    @property
+    def _content(self) -> str:
+        return self._path.read_text()
+
+    def parse_header(self) -> dict[str, str]:
+        """Parse the header of the file."""
+        header, _ = self.split()
+        return self._parse_header(header)
+
+    def _parse_header(self, header: str) -> dict[str, str]:
+        _ = self, header
+        return {}
+
+    def split(self) -> tuple[str, str]:
+        """Split the file header from the content.
+
+        Returns :
+            str : The file header possibly containing arguments.
+            str : The file contents.
+        """
+        return "", self._content
+
+
+class QueryHandler(BaseHandler):
+    """Handle query files."""
 
     @staticmethod
     def _get_arguments_parser() -> ArgumentParser:
@@ -114,38 +121,126 @@ def _get_arguments_parser() -> ArgumentParser:
         parser.add_argument("-o", "--order", type=int)
         parser.add_argument("-w", "--width", type=int)
         parser.add_argument("-h", "--height", type=int)
+        parser.add_argument("-t", "--title", type=str)
+        parser.add_argument("-d", "--description", type=str)
         return parser
 
-    def replace_from_arguments(self, arguments: list[str]) -> "WidgetMetadata":
-        replica = copy.deepcopy(self)
+    def _parse_header(self, header: str) -> dict[str, str]:
+        """Header is an argparse string."""
         parser = self._get_arguments_parser()
         try:
-            args = parser.parse_args(arguments)
+            return vars(parser.parse_args(shlex.split(header)))
         except (argparse.ArgumentError, SystemExit) as e:
-            logger.warning(f"Parsing {arguments}: {e}")
-            return replica
+            logger.warning(f"Parsing {self._path}: {e}")
+            return {}
 
-        replica.order = args.order if args.order is not None else self.order
-        replica.width = args.width or self.width
-        replica.height = args.height or self.height
-        replica.id = args.id or self.id
-        return replica
-
-    @classmethod
-    def from_path(cls, path: Path) -> "WidgetMetadata":
-        fallback_metadata = cls(path=path)
+    def split(self) -> tuple[str, str]:
+        """Split the query file header from the contents.
 
+        The optional header is the first comment at the top of the file.
+        """
         try:
-            parsed_query = sqlglot.parse_one(path.read_text(), dialect=sqlglot.dialects.Databricks)
+            parsed_query = sqlglot.parse_one(self._content, dialect=sqlglot.dialects.Databricks)
         except sqlglot.ParseError as e:
-            logger.warning(f"Parsing {path}: {e}")
-            return fallback_metadata
+            logger.warning(f"Parsing {self._path}: {e}")
+            return "", self._content
 
         if parsed_query.comments is None or len(parsed_query.comments) == 0:
-            return fallback_metadata
+            return "", self._content
 
         first_comment = parsed_query.comments[0]
-        return fallback_metadata.replace_from_arguments(shlex.split(first_comment))
+        return first_comment.strip(), self._content
+
+
+class MarkdownHandler(BaseHandler):
+    """Handle Markdown files."""
+
+    _FRONT_MATTER_BOUNDARY = re.compile(r"^-{3,}\s*$", re.MULTILINE)
+
+    def _parse_header(self, header: str) -> dict[str, str]:
+        """Markdown configuration header is a YAML."""
+        _ = self
+        return yaml.safe_load(header) or {}
+
+    def split(self) -> tuple[str, str]:
+        """Split the markdown file header from the contents.
+
+        The header is enclosed by a horizontal line marked with three dashes '---'.
+        """
+        splits = self._FRONT_MATTER_BOUNDARY.split(self._content, 2)
+        if len(splits) == 3:
+            _, header, content = splits
+            return header.strip(), content.lstrip()
+        if len(splits) == 2:
+            logger.warning(f"Parsing {self._path}: Missing closing header boundary.")
+        return "", self._content
+
+
+class WidgetMetadata:
+    def __init__(
+        self,
+        path: Path,
+        order: int | None = None,
+        width: int = 0,
+        height: int = 0,
+        _id: str = "",
+        title: str = "",
+        description: str = "",
+    ):
+        self._path = path
+        self.order = order
+        self.width = width
+        self.height = height
+        self.id = _id or path.stem
+        self.title = title
+        self.description = description
+
+    def is_markdown(self) -> bool:
+        return self._path.suffix == ".md"
+
+    def is_query(self) -> bool:
+        return self._path.suffix == ".sql"
+
+    @property
+    def handler(self) -> BaseHandler:
+        handler = BaseHandler
+        if self.is_markdown():
+            handler = MarkdownHandler
+        elif self.is_query():
+            handler = QueryHandler
+        return handler(self._path)
+
+    @classmethod
+    def from_dict(cls, *, path: str | Path, **optionals) -> "WidgetMetadata":
+        path = Path(path)
+        if "id" in optionals:
+            optionals["_id"] = optionals["id"]
+            del optionals["id"]
+        return cls(path, **optionals)
+
+    def as_dict(self) -> dict[str, str]:
+        exclude_attributes = {
+            "handler",  # Handler is inferred from file extension
+            "path",  # Path is set explicitly below
+        }
+        body = {"path": self._path.as_posix()}
+        for attribute in dir(self):
+            if attribute.startswith("_") or callable(getattr(self, attribute)) or attribute in exclude_attributes:
+                continue
+            value = getattr(self, attribute)
+            if value is not None:
+                body[attribute] = str(value)
+        return body
+
+    @classmethod
+    def from_path(cls, path: Path) -> "WidgetMetadata":
+        widget_metadata = cls(path=path)
+        header = widget_metadata.handler.parse_header()
+        header.pop("path", None)
+        return cls.from_dict(path=path, **header)
+
+    def __repr__(self):
+        return f"WidgetMetdata<{self._path}>"
 
 
 class Tile:
@@ -183,7 +278,8 @@ def place_after(self, position: Position) -> "Tile":
 
     @property
     def widget(self) -> Widget:
-        widget = Widget(name=self._widget_metadata.id, textbox_spec=self._widget_metadata.path.read_text())
+        _, text = self._widget_metadata.handler.split()
+        widget = Widget(name=self._widget_metadata.id, textbox_spec=text)
         return widget
 
     @classmethod
@@ -207,7 +303,7 @@ def _default_size(self) -> tuple[int, int]:
 
 class QueryTile(Tile):
     def _get_abstract_syntax_tree(self) -> sqlglot.Expression | None:
-        query = self._widget_metadata.path.read_text()
+        _, query = self._widget_metadata.handler.split()
         try:
             return sqlglot.parse_one(query, dialect=sqlglot.dialects.Databricks)
         except sqlglot.ParseError as e:
@@ -236,7 +332,13 @@ def _find_fields(self) -> list[Field]:
     def widget(self) -> Widget:
         fields = self._find_fields()
         named_query = self._get_named_query(fields)
-        spec = self._get_spec(fields)
+        frame = WidgetFrameSpec(
+            title=self._widget_metadata.title,
+            show_title=self._widget_metadata is not None,
+            description=self._widget_metadata.description,
+            show_description=self._widget_metadata.description is not None,
+        )
+        spec = self._get_spec(fields, frame=frame)
         widget = Widget(name=self._widget_metadata.id, queries=[named_query], spec=spec)
         return widget
 
@@ -247,10 +349,10 @@ def _get_named_query(self, fields: list[Field]) -> NamedQuery:
         return named_query
 
     @staticmethod
-    def _get_spec(fields: list[Field]) -> WidgetSpec:
+    def _get_spec(fields: list[Field], *, frame: WidgetFrameSpec | None = None) -> WidgetSpec:
         field_encodings = [RenderFieldEncoding(field_name=field.name) for field in fields]
         table_encodings = TableEncodingMap(field_encodings)
-        spec = TableV2Spec(encodings=table_encodings)
+        spec = TableV2Spec(encodings=table_encodings, frame=frame)
         return spec
 
     def infer_spec_type(self) -> type[WidgetSpec] | None:
@@ -273,9 +375,9 @@ def _default_size(self) -> tuple[int, int]:
         return 1, 3
 
     @staticmethod
-    def _get_spec(fields: list[Field]) -> CounterSpec:
+    def _get_spec(fields: list[Field], *, frame: WidgetFrameSpec | None = None) -> CounterSpec:
         counter_encodings = CounterFieldEncoding(field_name=fields[0].name, display_name=fields[0].name)
-        spec = CounterSpec(CounterEncodingMap(value=counter_encodings))
+        spec = CounterSpec(CounterEncodingMap(value=counter_encodings), frame=frame)
         return spec
 
 

diff --git a/tests/integration/test_dashboards.py b/tests/integration/test_dashboards.py
@@ -189,3 +189,34 @@ def test_dashboards_deploys_dashboard_with_invalid_query(ws, make_dashboard, tmp
     sdk_dashboard = dashboards.deploy_dashboard(lakeview_dashboard, dashboard_id=sdk_dashboard.dashboard_id)
 
     assert ws.lakeview.get(sdk_dashboard.dashboard_id)
+
+
+def test_dashboards_deploys_dashboard_with_markdown_header(ws, make_dashboard, tmp_path):
+    sdk_dashboard = make_dashboard()
+
+    for count, query_name in enumerate("abcdef"):
+        (tmp_path / f"{query_name}.sql").write_text(f"SELECT {count} AS count")
+
+    description = "---\norder: -1\n---\nBelow you see counters."
+    (tmp_path / "z_description.md").write_text(description)
+
+    dashboards = Dashboards(ws)
+    lakeview_dashboard = dashboards.create_dashboard(tmp_path)
+
+    sdk_dashboard = dashboards.deploy_dashboard(lakeview_dashboard, dashboard_id=sdk_dashboard.dashboard_id)
+
+    assert ws.lakeview.get(sdk_dashboard.dashboard_id)
+
+
+def test_dashboards_deploys_dashboard_with_widget_title_and_description(ws, make_dashboard, tmp_path):
+    sdk_dashboard = make_dashboard()
+
+    description = "-- --title 'Counting' --description 'The answer to life'\nSELECT 42"
+    (tmp_path / "counter.sql").write_text(description)
+
+    dashboards = Dashboards(ws)
+    lakeview_dashboard = dashboards.create_dashboard(tmp_path)
+
+    sdk_dashboard = dashboards.deploy_dashboard(lakeview_dashboard, dashboard_id=sdk_dashboard.dashboard_id)
+
+    assert ws.lakeview.get(sdk_dashboard.dashboard_id)