fix: Improve Anywidget pagination and display for unknown row counts (#2258)

shuoweil · web-flow · commit 508deae5869e · 2025-11-18T13:23:06.000-06:00
Previously, when the total number of rows (row_count) was unknown (e.g.,
due to deferred computation or errors), it would incorrectly default to
0. This resulted in confusing UI, such as displaying "Page 1 of 0", and
allowed users to navigate to empty pages without automatically returning
to valid data.

current display strategy for the interactive table widget:

   * When `row_count` is a positive number (e.g., 50):
       * Total Rows Display: Shows the exact count, like 50 total rows.
* Pagination Display: Shows the page relative to the total rows, like
Page 1 of 50.
* Navigation: The "Next" button is disabled only on the final page.

   * When `row_count` is `None` (unknown):
       * Total Rows Display: Shows Total rows unknown.
* Pagination Display: Shows the page relative to an unknown total, like
Page 1 of many.
* Navigation: The "Next" button is always enabled, allowing you to page
forward until the backend determines there is no more data.

Fixes #&lt;428238610&gt; 🦕
diff --git a/bigframes/display/anywidget.py b/bigframes/display/anywidget.py
@@ -55,7 +55,11 @@ class TableWidget(WIDGET_BASE):
 
     page = traitlets.Int(0).tag(sync=True)
     page_size = traitlets.Int(0).tag(sync=True)
-    row_count = traitlets.Int(0).tag(sync=True)
+    row_count = traitlets.Union(
+        [traitlets.Int(), traitlets.Instance(type(None))],
+        default_value=None,
+        allow_none=True,
+    ).tag(sync=True)
     table_html = traitlets.Unicode().tag(sync=True)
     _initial_load_complete = traitlets.Bool(False).tag(sync=True)
     _batches: Optional[blocks.PandasBatches] = None
@@ -94,12 +98,17 @@ def __init__(self, dataframe: bigframes.dataframe.DataFrame):
         # SELECT COUNT(*) query. It is a must have however.
         # TODO(b/428238610): Start iterating over the result of `to_pandas_batches()`
         # before we get here so that the count might already be cached.
-        # TODO(b/452747934): Allow row_count to be None and check to see if
-        # there are multiple pages and show "page 1 of many" in this case
         self._reset_batches_for_new_page_size()
-        if self._batches is None or self._batches.total_rows is None:
-            self._error_message = "Could not determine total row count. Data might be unavailable or an error occurred."
-            self.row_count = 0
+
+        if self._batches is None:
+            self._error_message = "Could not retrieve data batches. Data might be unavailable or an error occurred."
+            self.row_count = None
+        elif self._batches.total_rows is None:
+            # Total rows is unknown, this is an expected state.
+            # TODO(b/461536343): Cheaply discover if we have exactly 1 page.
+            # There are cases where total rows is not set, but there are no additional
+            # pages. We could disable the "next" button in these cases.
+            self.row_count = None
         else:
             self.row_count = self._batches.total_rows
 
@@ -131,11 +140,22 @@ def _validate_page(self, proposal: Dict[str, Any]) -> int:
         Returns:
             The validated and clamped page number as an integer.
         """
-
         value = proposal["value"]
+
+        if value < 0:
+            raise ValueError("Page number cannot be negative.")
+
+        # If truly empty or invalid page size, stay on page 0.
+        # This handles cases where row_count is 0 or page_size is 0, preventing
+        # division by zero or nonsensical pagination, regardless of row_count being None.
         if self.row_count == 0 or self.page_size == 0:
             return 0
 
+        # If row count is unknown, allow any non-negative page. The previous check
+        # ensures that invalid page_size (0) is already handled.
+        if self.row_count is None:
+            return value
+
         # Calculate the zero-indexed maximum page number.
         max_page = max(0, math.ceil(self.row_count / self.page_size) - 1)
 
@@ -229,6 +249,23 @@ def _set_table_html(self) -> None:
         # Get the data for the current page
         page_data = cached_data.iloc[start:end]
 
+        # Handle case where user navigated beyond available data with unknown row count
+        is_unknown_count = self.row_count is None
+        is_beyond_data = self._all_data_loaded and len(page_data) == 0 and self.page > 0
+        if is_unknown_count and is_beyond_data:
+            # Calculate the last valid page (zero-indexed)
+            total_rows = len(cached_data)
+            if total_rows > 0:
+                last_valid_page = max(0, math.ceil(total_rows / self.page_size) - 1)
+                # Navigate back to the last valid page
+                self.page = last_valid_page
+                # Recursively call to display the correct page
+                return self._set_table_html()
+            else:
+                # If no data at all, stay on page 0 with empty display
+                self.page = 0
+                return self._set_table_html()
+
         # Generate HTML table
         self.table_html = bigframes.display.html.render_html(
             dataframe=page_data,
diff --git a/bigframes/display/table_widget.js b/bigframes/display/table_widget.js
@@ -85,14 +85,21 @@ function render({ model, el }) {
 		const rowCount = model.get(ModelProperty.ROW_COUNT);
 		const pageSize = model.get(ModelProperty.PAGE_SIZE);
 		const currentPage = model.get(ModelProperty.PAGE);
-		const totalPages = Math.ceil(rowCount / pageSize);
-
-		rowCountLabel.textContent = `${rowCount.toLocaleString()} total rows`;
-		paginationLabel.textContent = `Page ${(
-			currentPage + 1
-		).toLocaleString()} of ${(totalPages || 1).toLocaleString()}`;
-		prevPage.disabled = currentPage === 0;
-		nextPage.disabled = currentPage >= totalPages - 1;
+
+		if (rowCount === null) {
+			// Unknown total rows
+			rowCountLabel.textContent = "Total rows unknown";
+			paginationLabel.textContent = `Page ${(currentPage + 1).toLocaleString()} of many`;
+			prevPage.disabled = currentPage === 0;
+			nextPage.disabled = false; // Allow navigation until we hit the end
+		} else {
+			// Known total rows
+			const totalPages = Math.ceil(rowCount / pageSize);
+			rowCountLabel.textContent = `${rowCount.toLocaleString()} total rows`;
+			paginationLabel.textContent = `Page ${(currentPage + 1).toLocaleString()} of ${rowCount.toLocaleString()}`;
+			prevPage.disabled = currentPage === 0;
+			nextPage.disabled = currentPage >= totalPages - 1;
+		}
 		pageSizeSelect.value = pageSize;
 	}
 
diff --git a/notebooks/dataframes/anywidget_mode.ipynb b/notebooks/dataframes/anywidget_mode.ipynb
@@ -35,16 +35,7 @@
    "execution_count": 2,
    "id": "ca22f059",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/venv/lib/python3.10/site-packages/google/api_core/_python_version_support.py:266: FutureWarning: You are using a Python version (3.10.15) which Google will stop supporting in new releases of google.api_core once it reaches its end of life (2026-10-04). Please upgrade to the latest Python version, or at least Python 3.11, to continue receiving updates for google.api_core past that date.\n",
-      "  warnings.warn(message, FutureWarning)\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "import bigframes.pandas as bpd"
    ]
@@ -151,7 +142,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "47795eaa10f149aeb99574232c0936eb",
+       "model_id": "8fcad7b7e408422cae71d519cd2d4980",
        "version_major": 2,
        "version_minor": 1
       },
@@ -175,7 +166,7 @@
     }
    ],
    "source": [
-    "df"
+    "df.set_index(\"name\")"
    ]
   },
   {
@@ -214,7 +205,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "8354ce0f82d3495a9b630dfc362f73ee",
+       "model_id": "06cb98c577514d5c9654a7792d93f8e6",
        "version_major": 2,
        "version_minor": 1
       },
@@ -293,27 +284,8 @@
     {
      "data": {
       "text/html": [
-       "\n",
-       "    Query started with request ID bigframes-dev:US.c45952fb-01b4-409c-9da4-f7c5bfc0d47d.<details><summary>SQL</summary><pre>SELECT\n",
-       "`state` AS `state`,\n",
-       "`gender` AS `gender`,\n",
-       "`year` AS `year`,\n",
-       "`name` AS `name`,\n",
-       "`number` AS `number`\n",
-       "FROM\n",
-       "(SELECT\n",
-       "  *\n",
-       "FROM (\n",
-       "  SELECT\n",
-       "    `state`,\n",
-       "    `gender`,\n",
-       "    `year`,\n",
-       "    `name`,\n",
-       "    `number`\n",
-       "  FROM `bigquery-public-data.usa_names.usa_1910_2013` FOR SYSTEM_TIME AS OF TIMESTAMP(&#x27;2025-10-30T21:48:48.979701+00:00&#x27;)\n",
-       ") AS `t0`)\n",
-       "ORDER BY `name` ASC NULLS LAST ,`year` ASC NULLS LAST ,`state` ASC NULLS LAST\n",
-       "LIMIT 5</pre></details>\n",
+       "✅ Completed. \n",
+       "    Query processed 171.4 MB in a moment of slot time.\n",
        "    "
       ],
       "text/plain": [
@@ -333,7 +305,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "59461286a17d4a42b6be6d9d9c7bf7e3",
+       "model_id": "1672f826f7a347e38539dbb5fb72cd43",
        "version_major": 2,
        "version_minor": 1
       },
@@ -373,7 +345,7 @@
      "data": {
       "text/html": [
        "✅ Completed. \n",
-       "    Query processed 85.9 kB in 14 seconds of slot time.\n",
+       "    Query processed 85.9 kB in 12 seconds of slot time.\n",
        "    "
       ],
       "text/plain": [
@@ -387,7 +359,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:969: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n",
+      "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:987: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n",
       "instead of using `db_dtypes` in the future when available in pandas\n",
       "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n",
       "  warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n"
@@ -408,7 +380,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "d1794b42579542a8980bd158e521bd3e",
+       "model_id": "127a2e356b834c18b6f07c58ee2c4228",
        "version_major": 2,
        "version_minor": 1
       },
@@ -443,6 +415,93 @@
     "  LIMIT 5;\n",
     "\"\"\")"
    ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "multi-index-display-markdown",
+   "metadata": {},
+   "source": [
+    "## Display Multi-Index DataFrame in anywidget mode\n",
+    "This section demonstrates how BigFrames can display a DataFrame with multiple levels of indexing (a \"multi-index\") when using the `anywidget` display mode."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "ad7482aa",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "✅ Completed. \n",
+       "    Query processed 483.3 GB in 51 minutes of slot time. [<a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:3eace7c0-7776-48d6-925c-965be33d8738&page=queryresults\">Job bigframes-dev:US.3eace7c0-7776-48d6-925c-965be33d8738 details</a>]\n",
+       "    "
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "✅ Completed. \n",
+       "    Query processed 124.4 MB in 7 seconds of slot time. [<a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:job_UJ5cx4R1jW5cNxq_1H1x-9-ATfqS&page=queryresults\">Job bigframes-dev:US.job_UJ5cx4R1jW5cNxq_1H1x-9-ATfqS details</a>]\n",
+       "    "
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "3f9652b5fdc0441eac2b05ab36d571d0",
+       "version_major": 2,
+       "version_minor": 1
+      },
+      "text/plain": [
+       "TableWidget(page_size=10, row_count=3967869, table_html='<table border=\"1\" class=\"dataframe table table-stripe…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [],
+      "text/plain": [
+       "Computation deferred. Computation will process 513.5 GB"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import datetime\n",
+    "\n",
+    " # Read the PyPI downloads dataset\n",
+    "pypi_df = bpd.read_gbq(\"bigquery-public-data.pypi.file_downloads\")\n",
+    "\n",
+    "# Filter for the last 7 days to reduce the data size for this example\n",
+    "seven_days_ago = datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(days=7)\n",
+    "pypi_df_recent = pypi_df[pypi_df[\"timestamp\"] > seven_days_ago]\n",
+    " \n",
+    "# Create a multi-index by grouping by date and project\n",
+    "pypi_df_recent['date'] = pypi_df_recent['timestamp'].dt.date\n",
+    "multi_index_df = pypi_df_recent.groupby([\"date\", \"project\"]).size().to_frame(\"downloads\")\n",
+    " \n",
+    "# Display the DataFrame with the multi-index\n",
+    "multi_index_df"
+   ]
   }
  ],
  "metadata": {
diff --git a/tests/system/small/test_anywidget.py b/tests/system/small/test_anywidget.py