Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
45 commits
Select commit Hold shift + click to select a range
dd4853d
remove expensive len() call
shuoweil Jul 24, 2025
6afa44b
add testcase
shuoweil Jul 24, 2025
878e350
fix a typo
shuoweil Jul 24, 2025
01781df
change how row_count is updated
shuoweil Jul 25, 2025
5496e74
testcase stil fails, need to merged in 1888
shuoweil Jul 29, 2025
0b364b0
update the method of using PandasBatches.total_rows
shuoweil Jul 30, 2025
858488d
change tests in read_gbq_colab
shuoweil Aug 1, 2025
91b2c5e
polish comment
shuoweil Aug 1, 2025
0b50d0c
fix a test
shuoweil Aug 6, 2025
7549f74
change code and update more testcase
shuoweil Aug 12, 2025
aa25d1e
remove unneeded except
shuoweil Aug 14, 2025
a8cc856
add assert for total_rows
shuoweil Aug 14, 2025
f6789e5
get actual row_counts
shuoweil Aug 19, 2025
9a5ad86
avoid two query calls
shuoweil Aug 19, 2025
1d70cfd
remove double query when display widget
shuoweil Aug 21, 2025
223183a
get row count directly
shuoweil Sep 13, 2025
ac55b50
restore notebook
shuoweil Sep 16, 2025
b498ad5
restore notebook change
shuoweil Sep 16, 2025
90c5e35
remove duplicated code
shuoweil Sep 18, 2025
fad499c
minor updates
shuoweil Oct 2, 2025
635e821
still have zero total rows issue
shuoweil Oct 3, 2025
9db0a2a
now large dataset can get the correct row counts
shuoweil Oct 3, 2025
f6609cf
benchmark change
shuoweil Oct 3, 2025
dc332ef
revert a benchmark
shuoweil Oct 7, 2025
47193f1
revert executor change
shuoweil Oct 8, 2025
f4b6336
raising a NotImplementedError when the row count is none
shuoweil Oct 9, 2025
085687f
change return type
shuoweil Oct 11, 2025
1767bdc
Revert accidental change of dataframe.ipynb
shuoweil Oct 15, 2025
c9f27a1
remove unnecessary execution in benchmark
shuoweil Oct 15, 2025
e7bbea1
remove row_count check
shuoweil Oct 15, 2025
3bb0114
remove extra execute_result
shuoweil Oct 15, 2025
9e5c8be
remove unnecessary tests
shuoweil Oct 15, 2025
50c92a5
Fix: Address review comments on PandasBatches and docstring
shuoweil Oct 16, 2025
8f33c05
Revert: Revert import change in read_gbq_colab benchmark
shuoweil Oct 16, 2025
9c450f2
Revert: Revert unnecessary changes in read_gbq_colab benchmarks
shuoweil Oct 16, 2025
885bf89
Remove notebooks/Untitled-2.ipynb
shuoweil Oct 16, 2025
9a793e8
Remove notebooks/multimodal/audio_transcribe_partial_ordering.ipynb
shuoweil Oct 16, 2025
37eab08
remove unnecessary change
shuoweil Oct 16, 2025
058b7b7
revert typo
shuoweil Oct 16, 2025
bedfed4
add todo
shuoweil Oct 17, 2025
02cf227
change docstring
shuoweil Oct 17, 2025
af9a4e8
revert changes to tests/benchmark/read_gbq_colab
shuoweil Oct 22, 2025
93dbb4d
merge change
shuoweil Oct 22, 2025
76cdec6
update how we handle invalid row count
shuoweil Oct 24, 2025
d4e1e76
eliminate duplated flags
shuoweil Oct 24, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 52 additions & 23 deletions bigframes/display/anywidget.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,14 @@
import pandas as pd

import bigframes
from bigframes.core import blocks
import bigframes.dataframe
import bigframes.display.html

# anywidget and traitlets are optional dependencies. We don't want the import of this
# module to fail if they aren't installed, though. Instead, we try to limit the surface that
# these packages could affect. This makes unit testing easier and ensures we don't
# accidentally make these required packages.
# anywidget and traitlets are optional dependencies. We don't want the import of
# this module to fail if they aren't installed, though. Instead, we try to
# limit the surface that these packages could affect. This makes unit testing
# easier and ensures we don't accidentally make these required packages.
try:
import anywidget
import traitlets
Expand All @@ -46,9 +47,21 @@


class TableWidget(WIDGET_BASE):
"""An interactive, paginated table widget for BigFrames DataFrames.

This widget provides a user-friendly way to display and navigate through
large BigQuery DataFrames within a Jupyter environment.
"""
An interactive, paginated table widget for BigFrames DataFrames.
"""

page = traitlets.Int(0).tag(sync=True)
page_size = traitlets.Int(0).tag(sync=True)
row_count = traitlets.Int(0).tag(sync=True)
table_html = traitlets.Unicode().tag(sync=True)
_initial_load_complete = traitlets.Bool(False).tag(sync=True)
_batches: Optional[blocks.PandasBatches] = None
_error_message = traitlets.Unicode(allow_none=True, default_value=None).tag(
sync=True
)

def __init__(self, dataframe: bigframes.dataframe.DataFrame):
"""Initialize the TableWidget.
Expand All @@ -61,10 +74,11 @@ def __init__(self, dataframe: bigframes.dataframe.DataFrame):
"Please `pip install anywidget traitlets` or `pip install 'bigframes[anywidget]'` to use TableWidget."
)

super().__init__()
self._dataframe = dataframe

# Initialize attributes that might be needed by observers FIRST
super().__init__()

# Initialize attributes that might be needed by observers first
self._table_id = str(uuid.uuid4())
self._all_data_loaded = False
self._batch_iter: Optional[Iterator[pd.DataFrame]] = None
Expand All @@ -73,9 +87,6 @@ def __init__(self, dataframe: bigframes.dataframe.DataFrame):
# respect display options for initial page size
initial_page_size = bigframes.options.display.max_rows

# Initialize data fetching attributes.
self._batches = dataframe._to_pandas_batches(page_size=initial_page_size)

# set traitlets properties that trigger observers
self.page_size = initial_page_size

Expand All @@ -84,12 +95,21 @@ def __init__(self, dataframe: bigframes.dataframe.DataFrame):
# TODO(b/428238610): Start iterating over the result of `to_pandas_batches()`
# before we get here so that the count might already be cached.
# TODO(b/452747934): Allow row_count to be None and check to see if
# there are multiple pages and show "page 1 of many" in this case.
self.row_count = self._batches.total_rows or 0
# there are multiple pages and show "page 1 of many" in this case
self._reset_batches_for_new_page_size()
if self._batches is None or self._batches.total_rows is None:
self._error_message = "Could not determine total row count. Data might be unavailable or an error occurred."
self.row_count = 0
else:
self.row_count = self._batches.total_rows

# get the initial page
self._set_table_html()

# Signals to the frontend that the initial data load is complete.
# Also used as a guard to prevent observers from firing during initialization.
self._initial_load_complete = True

@functools.cached_property
def _esm(self):
"""Load JavaScript code from external file."""
Expand All @@ -100,11 +120,6 @@ def _css(self):
"""Load CSS code from external file."""
return resources.read_text(bigframes.display, "table_widget.css")

page = traitlets.Int(0).tag(sync=True)
page_size = traitlets.Int(25).tag(sync=True)
row_count = traitlets.Int(0).tag(sync=True)
table_html = traitlets.Unicode().tag(sync=True)

@traitlets.validate("page")
def _validate_page(self, proposal: Dict[str, Any]) -> int:
"""Validate and clamp the page number to a valid range.
Expand Down Expand Up @@ -171,7 +186,10 @@ def _get_next_batch(self) -> bool:
def _batch_iterator(self) -> Iterator[pd.DataFrame]:
"""Lazily initializes and returns the batch iterator."""
if self._batch_iter is None:
self._batch_iter = iter(self._batches)
if self._batches is None:
self._batch_iter = iter([])
else:
self._batch_iter = iter(self._batches)
return self._batch_iter

@property
Expand All @@ -181,15 +199,22 @@ def _cached_data(self) -> pd.DataFrame:
return pd.DataFrame(columns=self._dataframe.columns)
return pd.concat(self._cached_batches, ignore_index=True)

def _reset_batches_for_new_page_size(self):
def _reset_batches_for_new_page_size(self) -> None:
"""Reset the batch iterator when page size changes."""
self._batches = self._dataframe._to_pandas_batches(page_size=self.page_size)

self._cached_batches = []
self._batch_iter = None
self._all_data_loaded = False

def _set_table_html(self):
def _set_table_html(self) -> None:
"""Sets the current html data based on the current page and page size."""
if self._error_message:
self.table_html = (
f"<div class='bigframes-error-message'>{self._error_message}</div>"
)
return

start = self.page * self.page_size
end = start + self.page_size

Expand All @@ -211,13 +236,17 @@ def _set_table_html(self):
)

@traitlets.observe("page")
def _page_changed(self, _change: Dict[str, Any]):
def _page_changed(self, _change: Dict[str, Any]) -> None:
"""Handler for when the page number is changed from the frontend."""
if not self._initial_load_complete:
return
self._set_table_html()

@traitlets.observe("page_size")
def _page_size_changed(self, _change: Dict[str, Any]):
def _page_size_changed(self, _change: Dict[str, Any]) -> None:
"""Handler for when the page size is changed from the frontend."""
if not self._initial_load_complete:
return
# Reset the page to 0 when page size changes to avoid invalid page states
self.page = 0

Expand Down
6 changes: 6 additions & 0 deletions bigframes/display/table_widget.js
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,12 @@ function render({ model, el }) {
}
});
model.on(Event.CHANGE_TABLE_HTML, handleTableHTMLChange);
model.on(`change:${ModelProperty.ROW_COUNT}`, updateButtonStates);
model.on(`change:_initial_load_complete`, (val) => {
if (val) {
updateButtonStates();
}
});

// Assemble the DOM
paginationContainer.appendChild(prevPage);
Expand Down
68 changes: 48 additions & 20 deletions notebooks/dataframes/anywidget_mode.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -127,12 +127,24 @@
"id": "ce250157",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"✅ Completed. "
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "9e3e413eb0774a62818c58d217af8488",
"model_id": "aafd4f912b5f42e0896aa5f0c2c62620",
"version_major": 2,
"version_minor": 1
"version_minor": 0
},
"text/plain": [
"TableWidget(page_size=10, row_count=5552452, table_html='<table border=\"1\" class=\"dataframe table table-stripe…"
Expand Down Expand Up @@ -171,6 +183,18 @@
"id": "6920d49b",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"✅ Completed. "
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
Expand All @@ -181,17 +205,16 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "df5e93f0d03f45cda67aa6da7f9ef1ae",
"model_id": "5ec0ad9f11874d4f9d8edbc903ee7b5d",
"version_major": 2,
"version_minor": 1
"version_minor": 0
},
"text/plain": [
"TableWidget(page_size=10, row_count=5552452, table_html='<table border=\"1\" class=\"dataframe table table-stripe…"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
"output_type": "display_data"
}
],
"source": [
Expand Down Expand Up @@ -257,6 +280,20 @@
"id": "a9d5d13a",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"✅ Completed. \n",
" Query processed 171.4 MB in a moment of slot time.\n",
" "
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
Expand All @@ -267,17 +304,16 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "a4ec5248708442fabc59c446c78a1304",
"model_id": "651b5aac958c408183775152c2573a03",
"version_major": 2,
"version_minor": 1
"version_minor": 0
},
"text/plain": [
"TableWidget(page_size=10, row_count=5, table_html='<table border=\"1\" class=\"dataframe table table-striped tabl…"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
"output_type": "display_data"
}
],
"source": [
Expand All @@ -287,19 +323,11 @@
"print(f\"Small dataset pages: {math.ceil(small_widget.row_count / small_widget.page_size)}\")\n",
"small_widget"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c4e5836b-c872-4a9c-b9ec-14f6f338176d",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "venv",
"display_name": "3.10.18",
"language": "python",
"name": "python3"
},
Expand All @@ -313,7 +341,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.16"
"version": "3.10.18"
}
},
"nbformat": 4,
Expand Down
Loading