Skip to content
Open
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 40 additions & 3 deletions bigframes/display/anywidget.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from importlib import resources
import functools
import math
from typing import Any, Dict, Iterator, List, Optional, Type
from typing import Any, Dict, Iterator, List, Optional, Tuple, Type
import uuid

import pandas as pd
Expand All @@ -26,6 +26,7 @@
from bigframes.core import blocks
import bigframes.dataframe
import bigframes.display.html
import bigframes.dtypes as dtypes

# anywidget and traitlets are optional dependencies. We don't want the import of
# this module to fail if they aren't installed, though. Instead, we try to
Expand Down Expand Up @@ -61,6 +62,9 @@ class TableWidget(WIDGET_BASE):
allow_none=True,
).tag(sync=True)
table_html = traitlets.Unicode().tag(sync=True)
sort_column = traitlets.Unicode("").tag(sync=True)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Have we considered having multiple columns as a possibility? I think a single column is a good starting point, but I think it's an alternative worth considering, especially when a particular column contains lots of duplicate values, like a "date" column.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree that multi-column sorting is particularly valuable when a column has many duplicate values. I would like to get the single column sorting checked in first as a PR. Then check in a second PR for multi-column sorting. This current PR is already complex enough. I prefer two separate PRs as enhancements.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, separate PR makes sense to me, thanks.

sort_ascending = traitlets.Bool(True).tag(sync=True)
orderable_columns = traitlets.List(traitlets.Unicode(), []).tag(sync=True)
_initial_load_complete = traitlets.Bool(False).tag(sync=True)
_batches: Optional[blocks.PandasBatches] = None
_error_message = traitlets.Unicode(allow_none=True, default_value=None).tag(
Expand All @@ -87,15 +91,20 @@ def __init__(self, dataframe: bigframes.dataframe.DataFrame):
self._all_data_loaded = False
self._batch_iter: Optional[Iterator[pd.DataFrame]] = None
self._cached_batches: List[pd.DataFrame] = []
self._last_sort_state: Optional[Tuple[str, bool]] = None
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can guess based on context what these mean, but I think a frozen dataclass would be much easier to understand at a glance.


# respect display options for initial page size
initial_page_size = bigframes.options.display.max_rows

# set traitlets properties that trigger observers
self.page_size = initial_page_size
self.orderable_columns = [
col
for col in dataframe.columns
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should iterate through dtypes directly to get both the type and the column name in one call.

Calling .dtypes for each iteration is making this into an O(n^2) algorithm. The n could be as large as 10,000, where that would definitely make a difference, though I imagine our widget would break well before then.

Aside: could you file an issue to check our limitations on the number of columns from a ux perspective? I imagine we'll need to solve that at some point soon-ish.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Complexity is reduced based on the suggestion
filed b/462525985 as TODO

if dtypes.is_orderable(dataframe.dtypes[col])
]

# len(dataframe) is expensive, since it will trigger a
# SELECT COUNT(*) query. It is a must have however.
# obtain the row counts
# TODO(b/428238610): Start iterating over the result of `to_pandas_batches()`
# before we get here so that the count might already be cached.
self._reset_batches_for_new_page_size()
Expand All @@ -113,6 +122,7 @@ def __init__(self, dataframe: bigframes.dataframe.DataFrame):
self.row_count = self._batches.total_rows

# get the initial page
self._get_next_batch()
self._set_table_html()

# Signals to the frontend that the initial data load is complete.
Expand Down Expand Up @@ -235,6 +245,27 @@ def _set_table_html(self) -> None:
)
return

# Apply sorting if a column is selected
df_to_display = self._dataframe
if self.sort_column:
try:
df_to_display = df_to_display.sort_values(
by=self.sort_column, ascending=self.sort_ascending
)
except KeyError:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm afraid of this. KeyError is a relatively common error in Python. Just looking at this code, it's not clear to me that this would always be the case of a missing column. Please check for the column presence explicitly, instead.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It should trigger this error to be honest. The user only allows the click on a sortable column name to trigger this part of code. Theoretically, we do not even need to catch this bug, because this exception should never be hit in our design. I will remove this try catch block.

self._error_message = f"Column '{self.sort_column}' not found. Please select a valid column to sort by."
# Revert to unsorted state if sorting fails
self.sort_column = ""

# Reset batches when sorting changes
if self._last_sort_state != (self.sort_column, self.sort_ascending):
self._batches = df_to_display._to_pandas_batches(page_size=self.page_size)
self._cached_batches = []
self._batch_iter = None
self._all_data_loaded = False
self._last_sort_state = (self.sort_column, self.sort_ascending)
self.page = 0 # Reset to first page

start = self.page * self.page_size
end = start + self.page_size

Expand Down Expand Up @@ -270,8 +301,14 @@ def _set_table_html(self) -> None:
self.table_html = bigframes.display.html.render_html(
dataframe=page_data,
table_id=f"table-{self._table_id}",
orderable_columns=self.orderable_columns,
)

@traitlets.observe("sort_column", "sort_ascending")
def _sort_changed(self, _change: Dict[str, Any]):
"""Handler for when sorting parameters change from the frontend."""
self._set_table_html()

@traitlets.observe("page")
def _page_changed(self, _change: Dict[str, Any]) -> None:
"""Handler for when the page number is changed from the frontend."""
Expand Down
11 changes: 9 additions & 2 deletions bigframes/display/html.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,15 @@
from __future__ import annotations

import html
from typing import Any

import pandas as pd
import pandas.api.types

from bigframes._config import options


def _is_dtype_numeric(dtype) -> bool:
def _is_dtype_numeric(dtype: Any) -> bool:
"""Check if a dtype is numeric for alignment purposes."""
return pandas.api.types.is_numeric_dtype(dtype)

Expand All @@ -33,18 +34,24 @@ def render_html(
*,
dataframe: pd.DataFrame,
table_id: str,
orderable_columns: list[str] | None = None,
) -> str:
"""Render a pandas DataFrame to HTML with specific styling."""
classes = "dataframe table table-striped table-hover"
table_html = [f'<table border="1" class="{classes}" id="{table_id}">']
precision = options.display.precision
orderable_columns = orderable_columns or []

# Render table head
table_html.append(" <thead>")
table_html.append(' <tr style="text-align: left;">')
for col in dataframe.columns:
th_classes = []
if col in orderable_columns:
th_classes.append("sortable")
class_str = f'class="{" ".join(th_classes)}"' if th_classes else ""
table_html.append(
f' <th style="text-align: left;"><div style="resize: horizontal; overflow: auto; box-sizing: border-box; width: 100%; height: 100%; padding: 0.5em;">{html.escape(str(col))}</div></th>'
f' <th style="text-align: left;" {class_str}><div style="resize: horizontal; overflow: auto; box-sizing: border-box; width: 100%; height: 100%; padding: 0.5em;">{html.escape(str(col))}</div></th>'
)
table_html.append(" </tr>")
table_html.append(" </thead>")
Expand Down
16 changes: 15 additions & 1 deletion bigframes/display/table_widget.css
Original file line number Diff line number Diff line change
Expand Up @@ -59,12 +59,20 @@

.bigframes-widget th {
background-color: var(--colab-primary-surface-color, var(--jp-layout-color0));
/* Uncomment once we support sorting: cursor: pointer; */
position: sticky;
top: 0;
z-index: 1;
}

.bigframes-widget th .sort-indicator {
padding-left: 4px;
visibility: hidden;
}

.bigframes-widget th:hover .sort-indicator {
visibility: visible;
}

.bigframes-widget button {
cursor: pointer;
display: inline-block;
Expand All @@ -78,3 +86,9 @@
opacity: 0.65;
pointer-events: none;
}

.bigframes-widget .error-message {
font-family:
-apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif;
font-size: 14px;
}
104 changes: 100 additions & 4 deletions bigframes/display/table_widget.js
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@ const ModelProperty = {
PAGE_SIZE: "page_size",
ROW_COUNT: "row_count",
TABLE_HTML: "table_html",
SORT_COLUMN: "sort_column",
SORT_ASCENDING: "sort_ascending",
ERROR_MESSAGE: "error_message",
ORDERABLE_COLUMNS: "orderable_columns",
};

const Event = {
Expand All @@ -38,7 +42,17 @@ function render({ model, el }) {
// Main container with a unique class for CSS scoping
el.classList.add("bigframes-widget");

// Structure
// Add error message container at the top
const errorContainer = document.createElement("div");
errorContainer.classList.add("error-message");
errorContainer.style.display = "none";
errorContainer.style.color = "red";
errorContainer.style.padding = "8px";
errorContainer.style.marginBottom = "8px";
errorContainer.style.border = "1px solid red";
errorContainer.style.borderRadius = "4px";
errorContainer.style.backgroundColor = "#ffebee";
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you move these styles to the CSS file?


const tableContainer = document.createElement("div");
const footer = document.createElement("div");
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.


Expand Down Expand Up @@ -126,14 +140,93 @@ function render({ model, el }) {
}
}

/** Updates the HTML in the table container and refreshes button states. */
function handleTableHTMLChange() {
// Note: Using innerHTML is safe here because the content is generated
// by a trusted backend (DataFrame.to_html).
tableContainer.innerHTML = model.get(ModelProperty.TABLE_HTML);

// Get sortable columns from backend
const sortableColumns = model.get(ModelProperty.ORDERABLE_COLUMNS);
const currentSortColumn = model.get(ModelProperty.SORT_COLUMN);
const currentSortAscending = model.get(ModelProperty.SORT_ASCENDING);

// Add click handlers to column headers for sorting
const headers = tableContainer.querySelectorAll("th");
headers.forEach((header) => {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is quite a bit of logic. I would like to get JavaScript-level unit tests setup before merging this PR.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've set up a comprehensive JavaScript unit testing environment and implemented tests for the TableWidget's interactive sorting functionality.

Testing Framework & Environment:
* Jest: Chosen as the primary testing framework due to its all-in-one nature (runner, assertion library, mocking) and strong community support.
* JSDOM: Utilized within Jest to simulate a browser environment, enabling DOM manipulation and event handling within Node.js.
* Babel: Configured to transpile modern JavaScript (ES modules) used in the widget and tests, ensuring compatibility.

Test Coverage: The table_widget.test.js file now includes unit tests that verify:
* The basic DOM structure generated by the widget.
* The interaction logic for column header clicks, including:
* Initiating ascending sort on the first click.
* Reversing to descending sort on the second click.
* Clearing the sort (returning to unsorted state) on the third click.
* The correct display of sorting indicators (▲ for ascending, ▼ for descending, ● for unsorted).
* Proper interaction with the model.set and model.save_changes methods.

**How to Run**:
To execute these tests from the project root directory:
```bash
cd tests/js
npm install  # Only needed if dependencies haven't been installed or have changed
npm test
```

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks! Could you also add a workflow for this to our GitHub Actions folder? https://github.com/googleapis/python-bigquery-dataframes/tree/main/.github/workflows

Something like this:

name: js-tests
on: push
jobs:
  build:
    runs-on: ubuntu-latest
    steps:
    - uses: actions/checkout@v2
    - name: Install modules
      working-directory: ./tests/js
      run: npm install
    - name: Run tests
      working-directory: ./tests/js
      run: npm test

const headerDiv = header.querySelector("div");
const columnName = headerDiv.textContent.trim();

// Only add sorting UI for sortable columns
if (columnName && sortableColumns.includes(columnName)) {
header.style.cursor = "pointer";

// Create a span for the indicator
const indicatorSpan = document.createElement("span");
indicatorSpan.classList.add("sort-indicator");
indicatorSpan.style.paddingLeft = "5px";

// Determine sort indicator and initial visibility
let indicator = "●"; // Default: unsorted (dot)
if (currentSortColumn === columnName) {
indicator = currentSortAscending ? "▲" : "▼";
indicatorSpan.style.visibility = "visible"; // Sorted arrows always visible
} else {
indicatorSpan.style.visibility = "hidden"; // Unsorted dot hidden by default
}
indicatorSpan.textContent = indicator;

// Add indicator to the header, replacing the old one if it exists
const existingIndicator = headerDiv.querySelector(".sort-indicator");
if (existingIndicator) {
headerDiv.removeChild(existingIndicator);
}
headerDiv.appendChild(indicatorSpan);

// Add hover effects for unsorted columns only
header.addEventListener("mouseover", () => {
if (currentSortColumn !== columnName) {
indicatorSpan.style.visibility = "visible";
}
});
header.addEventListener("mouseout", () => {
if (currentSortColumn !== columnName) {
indicatorSpan.style.visibility = "hidden";
}
});

// Add click handler for three-state toggle
header.addEventListener(Event.CLICK, () => {
if (currentSortColumn === columnName) {
if (currentSortAscending) {
// Currently ascending → switch to descending
model.set(ModelProperty.SORT_ASCENDING, false);
} else {
// Currently descending → clear sort (back to unsorted)
model.set(ModelProperty.SORT_COLUMN, "");
model.set(ModelProperty.SORT_ASCENDING, true);
}
} else {
// Not currently sorted → sort ascending
model.set(ModelProperty.SORT_COLUMN, columnName);
model.set(ModelProperty.SORT_ASCENDING, true);
}
model.save_changes();
});
}
});

updateButtonStates();
}

// Add error message handler
function handleErrorMessageChange() {
const errorMsg = model.get(ModelProperty.ERROR_MESSAGE);
if (errorMsg) {
errorContainer.textContent = errorMsg;
errorContainer.style.display = "block";
} else {
errorContainer.style.display = "none";
}
}

// Add event listeners
prevPage.addEventListener(Event.CLICK, () => handlePageChange(-1));
nextPage.addEventListener(Event.CLICK, () => handlePageChange(1));
Expand All @@ -145,6 +238,7 @@ function render({ model, el }) {
});
model.on(Event.CHANGE_TABLE_HTML, handleTableHTMLChange);
model.on(`change:${ModelProperty.ROW_COUNT}`, updateButtonStates);
model.on(`change:${ModelProperty.ERROR_MESSAGE}`, handleErrorMessageChange);
model.on(`change:_initial_load_complete`, (val) => {
if (val) {
updateButtonStates();
Expand All @@ -163,11 +257,13 @@ function render({ model, el }) {
footer.appendChild(paginationContainer);
footer.appendChild(pageSizeContainer);

el.appendChild(errorContainer);
el.appendChild(tableContainer);
el.appendChild(footer);

// Initial render
handleTableHTMLChange();
handleErrorMessageChange();
}

export default { render };
Loading