Skip to content

Commit 508deae

Browse files
authored
fix: Improve Anywidget pagination and display for unknown row counts (#2258)
Previously, when the total number of rows (row_count) was unknown (e.g., due to deferred computation or errors), it would incorrectly default to 0. This resulted in confusing UI, such as displaying "Page 1 of 0", and allowed users to navigate to empty pages without automatically returning to valid data. current display strategy for the interactive table widget: * When `row_count` is a positive number (e.g., 50): * Total Rows Display: Shows the exact count, like 50 total rows. * Pagination Display: Shows the page relative to the total rows, like Page 1 of 50. * Navigation: The "Next" button is disabled only on the final page. * When `row_count` is `None` (unknown): * Total Rows Display: Shows Total rows unknown. * Pagination Display: Shows the page relative to an unknown total, like Page 1 of many. * Navigation: The "Next" button is always enabled, allowing you to page forward until the backend determines there is no more data. Fixes #<428238610> 🦕
1 parent 7c062a6 commit 508deae

File tree

4 files changed

+310
-72
lines changed

4 files changed

+310
-72
lines changed

bigframes/display/anywidget.py

Lines changed: 44 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,11 @@ class TableWidget(WIDGET_BASE):
5555

5656
page = traitlets.Int(0).tag(sync=True)
5757
page_size = traitlets.Int(0).tag(sync=True)
58-
row_count = traitlets.Int(0).tag(sync=True)
58+
row_count = traitlets.Union(
59+
[traitlets.Int(), traitlets.Instance(type(None))],
60+
default_value=None,
61+
allow_none=True,
62+
).tag(sync=True)
5963
table_html = traitlets.Unicode().tag(sync=True)
6064
_initial_load_complete = traitlets.Bool(False).tag(sync=True)
6165
_batches: Optional[blocks.PandasBatches] = None
@@ -94,12 +98,17 @@ def __init__(self, dataframe: bigframes.dataframe.DataFrame):
9498
# SELECT COUNT(*) query. It is a must have however.
9599
# TODO(b/428238610): Start iterating over the result of `to_pandas_batches()`
96100
# before we get here so that the count might already be cached.
97-
# TODO(b/452747934): Allow row_count to be None and check to see if
98-
# there are multiple pages and show "page 1 of many" in this case
99101
self._reset_batches_for_new_page_size()
100-
if self._batches is None or self._batches.total_rows is None:
101-
self._error_message = "Could not determine total row count. Data might be unavailable or an error occurred."
102-
self.row_count = 0
102+
103+
if self._batches is None:
104+
self._error_message = "Could not retrieve data batches. Data might be unavailable or an error occurred."
105+
self.row_count = None
106+
elif self._batches.total_rows is None:
107+
# Total rows is unknown, this is an expected state.
108+
# TODO(b/461536343): Cheaply discover if we have exactly 1 page.
109+
# There are cases where total rows is not set, but there are no additional
110+
# pages. We could disable the "next" button in these cases.
111+
self.row_count = None
103112
else:
104113
self.row_count = self._batches.total_rows
105114

@@ -131,11 +140,22 @@ def _validate_page(self, proposal: Dict[str, Any]) -> int:
131140
Returns:
132141
The validated and clamped page number as an integer.
133142
"""
134-
135143
value = proposal["value"]
144+
145+
if value < 0:
146+
raise ValueError("Page number cannot be negative.")
147+
148+
# If truly empty or invalid page size, stay on page 0.
149+
# This handles cases where row_count is 0 or page_size is 0, preventing
150+
# division by zero or nonsensical pagination, regardless of row_count being None.
136151
if self.row_count == 0 or self.page_size == 0:
137152
return 0
138153

154+
# If row count is unknown, allow any non-negative page. The previous check
155+
# ensures that invalid page_size (0) is already handled.
156+
if self.row_count is None:
157+
return value
158+
139159
# Calculate the zero-indexed maximum page number.
140160
max_page = max(0, math.ceil(self.row_count / self.page_size) - 1)
141161

@@ -229,6 +249,23 @@ def _set_table_html(self) -> None:
229249
# Get the data for the current page
230250
page_data = cached_data.iloc[start:end]
231251

252+
# Handle case where user navigated beyond available data with unknown row count
253+
is_unknown_count = self.row_count is None
254+
is_beyond_data = self._all_data_loaded and len(page_data) == 0 and self.page > 0
255+
if is_unknown_count and is_beyond_data:
256+
# Calculate the last valid page (zero-indexed)
257+
total_rows = len(cached_data)
258+
if total_rows > 0:
259+
last_valid_page = max(0, math.ceil(total_rows / self.page_size) - 1)
260+
# Navigate back to the last valid page
261+
self.page = last_valid_page
262+
# Recursively call to display the correct page
263+
return self._set_table_html()
264+
else:
265+
# If no data at all, stay on page 0 with empty display
266+
self.page = 0
267+
return self._set_table_html()
268+
232269
# Generate HTML table
233270
self.table_html = bigframes.display.html.render_html(
234271
dataframe=page_data,

bigframes/display/table_widget.js

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -85,14 +85,21 @@ function render({ model, el }) {
8585
const rowCount = model.get(ModelProperty.ROW_COUNT);
8686
const pageSize = model.get(ModelProperty.PAGE_SIZE);
8787
const currentPage = model.get(ModelProperty.PAGE);
88-
const totalPages = Math.ceil(rowCount / pageSize);
89-
90-
rowCountLabel.textContent = `${rowCount.toLocaleString()} total rows`;
91-
paginationLabel.textContent = `Page ${(
92-
currentPage + 1
93-
).toLocaleString()} of ${(totalPages || 1).toLocaleString()}`;
94-
prevPage.disabled = currentPage === 0;
95-
nextPage.disabled = currentPage >= totalPages - 1;
88+
89+
if (rowCount === null) {
90+
// Unknown total rows
91+
rowCountLabel.textContent = "Total rows unknown";
92+
paginationLabel.textContent = `Page ${(currentPage + 1).toLocaleString()} of many`;
93+
prevPage.disabled = currentPage === 0;
94+
nextPage.disabled = false; // Allow navigation until we hit the end
95+
} else {
96+
// Known total rows
97+
const totalPages = Math.ceil(rowCount / pageSize);
98+
rowCountLabel.textContent = `${rowCount.toLocaleString()} total rows`;
99+
paginationLabel.textContent = `Page ${(currentPage + 1).toLocaleString()} of ${rowCount.toLocaleString()}`;
100+
prevPage.disabled = currentPage === 0;
101+
nextPage.disabled = currentPage >= totalPages - 1;
102+
}
96103
pageSizeSelect.value = pageSize;
97104
}
98105

notebooks/dataframes/anywidget_mode.ipynb

Lines changed: 97 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -35,16 +35,7 @@
3535
"execution_count": 2,
3636
"id": "ca22f059",
3737
"metadata": {},
38-
"outputs": [
39-
{
40-
"name": "stderr",
41-
"output_type": "stream",
42-
"text": [
43-
"/usr/local/google/home/shuowei/src/python-bigquery-dataframes/venv/lib/python3.10/site-packages/google/api_core/_python_version_support.py:266: FutureWarning: You are using a Python version (3.10.15) which Google will stop supporting in new releases of google.api_core once it reaches its end of life (2026-10-04). Please upgrade to the latest Python version, or at least Python 3.11, to continue receiving updates for google.api_core past that date.\n",
44-
" warnings.warn(message, FutureWarning)\n"
45-
]
46-
}
47-
],
38+
"outputs": [],
4839
"source": [
4940
"import bigframes.pandas as bpd"
5041
]
@@ -151,7 +142,7 @@
151142
{
152143
"data": {
153144
"application/vnd.jupyter.widget-view+json": {
154-
"model_id": "47795eaa10f149aeb99574232c0936eb",
145+
"model_id": "8fcad7b7e408422cae71d519cd2d4980",
155146
"version_major": 2,
156147
"version_minor": 1
157148
},
@@ -175,7 +166,7 @@
175166
}
176167
],
177168
"source": [
178-
"df"
169+
"df.set_index(\"name\")"
179170
]
180171
},
181172
{
@@ -214,7 +205,7 @@
214205
{
215206
"data": {
216207
"application/vnd.jupyter.widget-view+json": {
217-
"model_id": "8354ce0f82d3495a9b630dfc362f73ee",
208+
"model_id": "06cb98c577514d5c9654a7792d93f8e6",
218209
"version_major": 2,
219210
"version_minor": 1
220211
},
@@ -293,27 +284,8 @@
293284
{
294285
"data": {
295286
"text/html": [
296-
"\n",
297-
" Query started with request ID bigframes-dev:US.c45952fb-01b4-409c-9da4-f7c5bfc0d47d.<details><summary>SQL</summary><pre>SELECT\n",
298-
"`state` AS `state`,\n",
299-
"`gender` AS `gender`,\n",
300-
"`year` AS `year`,\n",
301-
"`name` AS `name`,\n",
302-
"`number` AS `number`\n",
303-
"FROM\n",
304-
"(SELECT\n",
305-
" *\n",
306-
"FROM (\n",
307-
" SELECT\n",
308-
" `state`,\n",
309-
" `gender`,\n",
310-
" `year`,\n",
311-
" `name`,\n",
312-
" `number`\n",
313-
" FROM `bigquery-public-data.usa_names.usa_1910_2013` FOR SYSTEM_TIME AS OF TIMESTAMP(&#x27;2025-10-30T21:48:48.979701+00:00&#x27;)\n",
314-
") AS `t0`)\n",
315-
"ORDER BY `name` ASC NULLS LAST ,`year` ASC NULLS LAST ,`state` ASC NULLS LAST\n",
316-
"LIMIT 5</pre></details>\n",
287+
"✅ Completed. \n",
288+
" Query processed 171.4 MB in a moment of slot time.\n",
317289
" "
318290
],
319291
"text/plain": [
@@ -333,7 +305,7 @@
333305
{
334306
"data": {
335307
"application/vnd.jupyter.widget-view+json": {
336-
"model_id": "59461286a17d4a42b6be6d9d9c7bf7e3",
308+
"model_id": "1672f826f7a347e38539dbb5fb72cd43",
337309
"version_major": 2,
338310
"version_minor": 1
339311
},
@@ -373,7 +345,7 @@
373345
"data": {
374346
"text/html": [
375347
"✅ Completed. \n",
376-
" Query processed 85.9 kB in 14 seconds of slot time.\n",
348+
" Query processed 85.9 kB in 12 seconds of slot time.\n",
377349
" "
378350
],
379351
"text/plain": [
@@ -387,7 +359,7 @@
387359
"name": "stderr",
388360
"output_type": "stream",
389361
"text": [
390-
"/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:969: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n",
362+
"/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:987: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n",
391363
"instead of using `db_dtypes` in the future when available in pandas\n",
392364
"(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n",
393365
" warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n"
@@ -408,7 +380,7 @@
408380
{
409381
"data": {
410382
"application/vnd.jupyter.widget-view+json": {
411-
"model_id": "d1794b42579542a8980bd158e521bd3e",
383+
"model_id": "127a2e356b834c18b6f07c58ee2c4228",
412384
"version_major": 2,
413385
"version_minor": 1
414386
},
@@ -443,6 +415,93 @@
443415
" LIMIT 5;\n",
444416
"\"\"\")"
445417
]
418+
},
419+
{
420+
"cell_type": "markdown",
421+
"id": "multi-index-display-markdown",
422+
"metadata": {},
423+
"source": [
424+
"## Display Multi-Index DataFrame in anywidget mode\n",
425+
"This section demonstrates how BigFrames can display a DataFrame with multiple levels of indexing (a \"multi-index\") when using the `anywidget` display mode."
426+
]
427+
},
428+
{
429+
"cell_type": "code",
430+
"execution_count": 11,
431+
"id": "ad7482aa",
432+
"metadata": {},
433+
"outputs": [
434+
{
435+
"data": {
436+
"text/html": [
437+
"✅ Completed. \n",
438+
" Query processed 483.3 GB in 51 minutes of slot time. [<a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:3eace7c0-7776-48d6-925c-965be33d8738&page=queryresults\">Job bigframes-dev:US.3eace7c0-7776-48d6-925c-965be33d8738 details</a>]\n",
439+
" "
440+
],
441+
"text/plain": [
442+
"<IPython.core.display.HTML object>"
443+
]
444+
},
445+
"metadata": {},
446+
"output_type": "display_data"
447+
},
448+
{
449+
"data": {
450+
"text/html": [
451+
"✅ Completed. \n",
452+
" Query processed 124.4 MB in 7 seconds of slot time. [<a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:job_UJ5cx4R1jW5cNxq_1H1x-9-ATfqS&page=queryresults\">Job bigframes-dev:US.job_UJ5cx4R1jW5cNxq_1H1x-9-ATfqS details</a>]\n",
453+
" "
454+
],
455+
"text/plain": [
456+
"<IPython.core.display.HTML object>"
457+
]
458+
},
459+
"metadata": {},
460+
"output_type": "display_data"
461+
},
462+
{
463+
"data": {
464+
"application/vnd.jupyter.widget-view+json": {
465+
"model_id": "3f9652b5fdc0441eac2b05ab36d571d0",
466+
"version_major": 2,
467+
"version_minor": 1
468+
},
469+
"text/plain": [
470+
"TableWidget(page_size=10, row_count=3967869, table_html='<table border=\"1\" class=\"dataframe table table-stripe…"
471+
]
472+
},
473+
"metadata": {},
474+
"output_type": "display_data"
475+
},
476+
{
477+
"data": {
478+
"text/html": [],
479+
"text/plain": [
480+
"Computation deferred. Computation will process 513.5 GB"
481+
]
482+
},
483+
"execution_count": 11,
484+
"metadata": {},
485+
"output_type": "execute_result"
486+
}
487+
],
488+
"source": [
489+
"import datetime\n",
490+
"\n",
491+
" # Read the PyPI downloads dataset\n",
492+
"pypi_df = bpd.read_gbq(\"bigquery-public-data.pypi.file_downloads\")\n",
493+
"\n",
494+
"# Filter for the last 7 days to reduce the data size for this example\n",
495+
"seven_days_ago = datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(days=7)\n",
496+
"pypi_df_recent = pypi_df[pypi_df[\"timestamp\"] > seven_days_ago]\n",
497+
" \n",
498+
"# Create a multi-index by grouping by date and project\n",
499+
"pypi_df_recent['date'] = pypi_df_recent['timestamp'].dt.date\n",
500+
"multi_index_df = pypi_df_recent.groupby([\"date\", \"project\"]).size().to_frame(\"downloads\")\n",
501+
" \n",
502+
"# Display the DataFrame with the multi-index\n",
503+
"multi_index_df"
504+
]
446505
}
447506
],
448507
"metadata": {

0 commit comments

Comments
 (0)