From 0dc287e4a92875ff38f7b381094c374858f4868e Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Sat, 8 Nov 2025 03:42:18 +0000 Subject: [PATCH 01/17] feat: Refactor anywidget display to use _ipython_display_ --- README.rst | 13 ++++ bigframes/dataframe.py | 78 +++++++++++++++-------- notebooks/dataframes/anywidget_mode.ipynb | 31 +++------ 3 files changed, 75 insertions(+), 47 deletions(-) diff --git a/README.rst b/README.rst index 84de370652..abf779aeab 100644 --- a/README.rst +++ b/README.rst @@ -110,6 +110,19 @@ For details, see the `third_party directory. +Display Enhancements +-------------------- + +**DataFrame `_repr_html_()` and `_ipython_display_()` Integration** + +The `bigframes.pandas.DataFrame._repr_html_()` method has been updated. When `bpd.options.display.repr_mode` is set to “anywidget”, it will: + +* Wrap the import of `anywidget` in a `try...except ImportError` block. If the dependency is not found, it will issue a `warnings.warn` message and fall back to returning the deferred representation. +* If the import is successful, it instantiates a new `TableWidget`, passing the DataFrame's data. +* Return the widget instance, which Jupyter automatically renders. A new widget instance is created for each `_repr_html_()` call to ensure cell outputs are isolated. + +A new `_ipython_display_()` method has been introduced to handle the actual widget rendering, separating concerns from `_repr_html_()`. + Contact Us ---------- diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py index 173aa48db8..7f15c6f2e4 100644 --- a/bigframes/dataframe.py +++ b/bigframes/dataframe.py @@ -837,10 +837,12 @@ def _repr_html_(self) -> str: """ opts = bigframes.options.display max_results = opts.max_rows - if opts.repr_mode == "deferred": + # For anywidget mode, return deferred representation + # The actual widget display is handled by _ipython_display_() + if opts.repr_mode in ("deferred", "anywidget"): return formatter.repr_query_job(self._compute_dry_run()) - # Process blob columns first, regardless of display mode + # Process blob columns first for non-deferred modes self._cached() df = self.copy() if bigframes.options.display.blob_display: @@ -855,29 +857,6 @@ def _repr_html_(self) -> str: else: blob_cols = [] - if opts.repr_mode == "anywidget": - try: - from IPython.display import display as ipython_display - - from bigframes import display - - # Always create a new widget instance for each display call - # This ensures that each cell gets its own widget and prevents - # unintended sharing between cells - widget = display.TableWidget(df.copy()) - - ipython_display(widget) - return "" # Return empty string since we used display() - - except (AttributeError, ValueError, ImportError): - # Fallback if anywidget is not available - warnings.warn( - "Anywidget mode is not available. " - "Please `pip install anywidget traitlets` or `pip install 'bigframes[anywidget]'` to use interactive tables. " - f"Falling back to deferred mode. Error: {traceback.format_exc()}" - ) - return formatter.repr_query_job(self._compute_dry_run()) - # Continue with regular HTML rendering for non-anywidget modes # TODO(swast): pass max_columns and get the true column count back. Maybe # get 1 more column than we have requested so that pandas can add the @@ -937,6 +916,55 @@ def obj_ref_rt_to_html(obj_ref_rt) -> str: html_string += f"[{row_count} rows x {column_count} columns in total]" return html_string + def _ipython_display_(self): + """ + Custom display method for IPython/Jupyter environments. + This is called by IPython's display system when the object is displayed. + """ + opts = bigframes.options.display + + # Only handle widget display in anywidget mode + if opts.repr_mode == "anywidget": + try: + from bigframes import display + + # Process blob columns if needed + self._cached() + df = self.copy() + if bigframes.options.display.blob_display: + blob_cols = [ + series_name + for series_name, series in df.items() + if series.dtype == bigframes.dtypes.OBJ_REF_DTYPE + ] + for col in blob_cols: + df[col] = df[col].blob._get_runtime( + mode="R", with_metadata=True + ) + + # Create and display the widget + widget = display.TableWidget(df) + + # IPython will automatically display the widget + # since we're returning it from _ipython_display_() + from IPython.display import display as ipython_display + + ipython_display(widget) + return # Important: return None to signal we handled display + + except (AttributeError, ValueError, ImportError): + # Fallback: let IPython use _repr_html_() instead + warnings.warn( + "Anywidget mode is not available. " + "Please `pip install anywidget traitlets` or `pip install 'bigframes[anywidget]'` to use interactive tables. " + f"Falling back to deferred mode. Error: {traceback.format_exc()}" + ) + # Don't return anything - let IPython fall back to _repr_html_() + return + + # For other modes, don't handle display - let IPython use _repr_html_() + return + def __delitem__(self, key: str): df = self.drop(columns=[key]) self._set_block(df._get_block()) diff --git a/notebooks/dataframes/anywidget_mode.ipynb b/notebooks/dataframes/anywidget_mode.ipynb index f7a4b0e2d6..b300ee1fdb 100644 --- a/notebooks/dataframes/anywidget_mode.ipynb +++ b/notebooks/dataframes/anywidget_mode.ipynb @@ -36,6 +36,7 @@ "id": "ca22f059", "metadata": {}, "outputs": [], + "outputs": [], "source": [ "import bigframes.pandas as bpd" ] @@ -143,6 +144,7 @@ "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "8fcad7b7e408422cae71d519cd2d4980", + "model_id": "4cc789c49be246bb94967e625986900a", "version_major": 2, "version_minor": 1 }, @@ -152,17 +154,6 @@ }, "metadata": {}, "output_type": "display_data" - }, - { - "data": { - "text/html": [], - "text/plain": [ - "Computation deferred. Computation will process 171.4 MB" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" } ], "source": [ @@ -206,6 +197,7 @@ "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "06cb98c577514d5c9654a7792d93f8e6", + "model_id": "b1089664ce03440d81ef206da1210229", "version_major": 2, "version_minor": 1 }, @@ -284,6 +276,8 @@ { "data": { "text/html": [ + "✅ Completed. \n", + " Query processed 171.4 MB in a moment of slot time.\n", "✅ Completed. \n", " Query processed 171.4 MB in a moment of slot time.\n", " " @@ -306,6 +300,7 @@ "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "1672f826f7a347e38539dbb5fb72cd43", + "model_id": "f18925fc13304fb2ae34056f2cb1c68b", "version_major": 2, "version_minor": 1 }, @@ -346,6 +341,7 @@ "text/html": [ "✅ Completed. \n", " Query processed 85.9 kB in 12 seconds of slot time.\n", + " Query processed 85.9 kB in 15 seconds of slot time.\n", " " ], "text/plain": [ @@ -359,6 +355,7 @@ "name": "stderr", "output_type": "stream", "text": [ + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:987: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:987: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", "instead of using `db_dtypes` in the future when available in pandas\n", "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", @@ -381,6 +378,7 @@ "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "127a2e356b834c18b6f07c58ee2c4228", + "model_id": "2335d3161b704a6da85165dbebf5ca0f", "version_major": 2, "version_minor": 1 }, @@ -390,17 +388,6 @@ }, "metadata": {}, "output_type": "display_data" - }, - { - "data": { - "text/html": [], - "text/plain": [ - "Computation deferred. Computation will process 0 Bytes" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" } ], "source": [ From 94c05d7c6c533cc4d0b12115afabcc2b64b09d6c Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Sat, 8 Nov 2025 03:53:06 +0000 Subject: [PATCH 02/17] Revert accidental changes to README.rst --- README.rst | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/README.rst b/README.rst index abf779aeab..84de370652 100644 --- a/README.rst +++ b/README.rst @@ -110,19 +110,6 @@ For details, see the `third_party directory. -Display Enhancements --------------------- - -**DataFrame `_repr_html_()` and `_ipython_display_()` Integration** - -The `bigframes.pandas.DataFrame._repr_html_()` method has been updated. When `bpd.options.display.repr_mode` is set to “anywidget”, it will: - -* Wrap the import of `anywidget` in a `try...except ImportError` block. If the dependency is not found, it will issue a `warnings.warn` message and fall back to returning the deferred representation. -* If the import is successful, it instantiates a new `TableWidget`, passing the DataFrame's data. -* Return the widget instance, which Jupyter automatically renders. A new widget instance is created for each `_repr_html_()` call to ensure cell outputs are isolated. - -A new `_ipython_display_()` method has been introduced to handle the actual widget rendering, separating concerns from `_repr_html_()`. - Contact Us ---------- From 1b15ef0fe4ae34df02f6b5e684106e746f7b1c8f Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Sat, 8 Nov 2025 03:56:56 +0000 Subject: [PATCH 03/17] add testcase --- tests/system/small/test_anywidget.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/tests/system/small/test_anywidget.py b/tests/system/small/test_anywidget.py index 99734dc30c..4b411f3794 100644 --- a/tests/system/small/test_anywidget.py +++ b/tests/system/small/test_anywidget.py @@ -13,6 +13,8 @@ # limitations under the License. +import unittest.mock as mock + import pandas as pd import pytest @@ -684,6 +686,23 @@ def test_widget_with_unknown_row_count_empty_dataframe( assert widget.page == 0 -# TODO(shuowei): Add tests for custom index and multiindex +def test_repr_html_anywidget_fallback(paginated_bf_df: bf.dataframe.DataFrame): + """ + Test that _repr_html_ falls back to deferred mode when anywidget is not available. + """ + with bf.option_context("display.repr_mode", "anywidget"): + # Use a mock to simulate the absence of the 'anywidget' module. + with mock.patch.dict( + "sys.modules", {"anywidget": None, "IPython": mock.MagicMock()} + ): + # The warning is now expected inside the _ipython_display_ call, not _repr_html_ + # The test setup doesn't easily allow capturing warnings from ipython display hooks. + # Instead we focus on the fallback behavior of _repr_html_ + html = paginated_bf_df._repr_html_() + assert "Computation deferred." in html + assert "Computation will process" in html + + +# TODO(b/332316283): Add tests for custom index and multiindex # This may not be necessary for the SQL Cell use case but should be # considered for completeness. From 53953bb4e0dc8628d887bf455948f6502e60b04c Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Tue, 11 Nov 2025 23:00:04 +0000 Subject: [PATCH 04/17] ipython_display_ to set fallback mimetypes --- bigframes/dataframe.py | 30 ++++++++++++---------------- tests/system/small/test_anywidget.py | 28 ++++++++++++++++++-------- 2 files changed, 33 insertions(+), 25 deletions(-) diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py index 7f15c6f2e4..1acafc2ef0 100644 --- a/bigframes/dataframe.py +++ b/bigframes/dataframe.py @@ -789,9 +789,7 @@ def __repr__(self) -> str: opts = bigframes.options.display max_results = opts.max_rows - # anywdiget mode uses the same display logic as the "deferred" mode - # for faster execution - if opts.repr_mode in ("deferred", "anywidget"): + if opts.repr_mode == "deferred": return formatter.repr_query_job(self._compute_dry_run()) # TODO(swast): pass max_columns and get the true column count back. Maybe @@ -837,9 +835,7 @@ def _repr_html_(self) -> str: """ opts = bigframes.options.display max_results = opts.max_rows - # For anywidget mode, return deferred representation - # The actual widget display is handled by _ipython_display_() - if opts.repr_mode in ("deferred", "anywidget"): + if opts.repr_mode == "deferred": return formatter.repr_query_job(self._compute_dry_run()) # Process blob columns first for non-deferred modes @@ -916,7 +912,7 @@ def obj_ref_rt_to_html(obj_ref_rt) -> str: html_string += f"[{row_count} rows x {column_count} columns in total]" return html_string - def _ipython_display_(self): + def _repr_mimebundle_(self, include=None, exclude=None): """ Custom display method for IPython/Jupyter environments. This is called by IPython's display system when the object is displayed. @@ -944,26 +940,26 @@ def _ipython_display_(self): # Create and display the widget widget = display.TableWidget(df) + widget_repr = widget._repr_mimebundle_(include=include, exclude=exclude) - # IPython will automatically display the widget - # since we're returning it from _ipython_display_() - from IPython.display import display as ipython_display - - ipython_display(widget) - return # Important: return None to signal we handled display + # Use deferred repr for text/plain of anywidget display. + # This avoids kicking off a query when the user is just + # printing the last expression in a cell. + widget_repr["text/plain"] = repr(self) + widget_repr["text/html"] = self._repr_html_() + return widget_repr except (AttributeError, ValueError, ImportError): # Fallback: let IPython use _repr_html_() instead warnings.warn( "Anywidget mode is not available. " "Please `pip install anywidget traitlets` or `pip install 'bigframes[anywidget]'` to use interactive tables. " - f"Falling back to deferred mode. Error: {traceback.format_exc()}" + f"Falling back to static HTML. Error: {traceback.format_exc()}" ) # Don't return anything - let IPython fall back to _repr_html_() - return + pass - # For other modes, don't handle display - let IPython use _repr_html_() - return + return {"text/html": self._repr_html_(), "text/plain": repr(self)} def __delitem__(self, key: str): df = self.drop(columns=[key]) diff --git a/tests/system/small/test_anywidget.py b/tests/system/small/test_anywidget.py index 4b411f3794..c15cf90c04 100644 --- a/tests/system/small/test_anywidget.py +++ b/tests/system/small/test_anywidget.py @@ -686,21 +686,33 @@ def test_widget_with_unknown_row_count_empty_dataframe( assert widget.page == 0 -def test_repr_html_anywidget_fallback(paginated_bf_df: bf.dataframe.DataFrame): +def test_repr_mimebundle_anywidget_fallback(paginated_bf_df: bf.dataframe.DataFrame): """ - Test that _repr_html_ falls back to deferred mode when anywidget is not available. + Test that _repr_mimebundle_ falls back to static html when anywidget is not available. """ with bf.option_context("display.repr_mode", "anywidget"): # Use a mock to simulate the absence of the 'anywidget' module. with mock.patch.dict( "sys.modules", {"anywidget": None, "IPython": mock.MagicMock()} ): - # The warning is now expected inside the _ipython_display_ call, not _repr_html_ - # The test setup doesn't easily allow capturing warnings from ipython display hooks. - # Instead we focus on the fallback behavior of _repr_html_ - html = paginated_bf_df._repr_html_() - assert "Computation deferred." in html - assert "Computation will process" in html + bundle = paginated_bf_df._repr_mimebundle_() + assert "application/vnd.jupyter.widget-view+json" not in bundle + assert "text/html" in bundle + html = bundle["text/html"] + assert "page_1_row_1" in html + assert "page_1_row_2" in html + assert "page_2_row_1" not in html + + +def test_repr_mimebundle_anywidget_success(paginated_bf_df: bf.dataframe.DataFrame): + """ + Test that _repr_mimebundle_ returns a widget view when anywidget is available. + """ + with bf.option_context("display.repr_mode", "anywidget"): + bundle = paginated_bf_df._repr_mimebundle_() + assert "application/vnd.jupyter.widget-view+json" in bundle + assert "text/html" in bundle + assert "text/plain" in bundle # TODO(b/332316283): Add tests for custom index and multiindex From 5397a59b55a3925aeff1e16650122b097b664dfe Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Thu, 13 Nov 2025 20:03:18 +0000 Subject: [PATCH 05/17] remove _repr_html_() --- bigframes/dataframe.py | 104 +++--- notebooks/dataframes/anywidget_mode.ipynb | 423 ++++++++++++++++++++-- tests/system/small/test_dataframe.py | 21 +- tests/unit/test_dataframe_polars.py | 21 +- 4 files changed, 469 insertions(+), 100 deletions(-) diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py index 1acafc2ef0..72697de89e 100644 --- a/bigframes/dataframe.py +++ b/bigframes/dataframe.py @@ -827,11 +827,58 @@ def __repr__(self) -> str: lines.append(f"[{row_count} rows x {column_count} columns]") return "\n".join(lines) - def _repr_html_(self) -> str: + def _repr_mimebundle_(self, include=None, exclude=None): + """ + Custom display method for IPython/Jupyter environments. + This is called by IPython's display system when the object is displayed. + """ + opts = bigframes.options.display + + # Only handle widget display in anywidget mode + if opts.repr_mode == "anywidget": + try: + from bigframes import display + + # Process blob columns if needed + self._cached() + df = self.copy() + if bigframes.options.display.blob_display: + blob_cols = [ + series_name + for series_name, series in df.items() + if series.dtype == bigframes.dtypes.OBJ_REF_DTYPE + ] + for col in blob_cols: + df[col] = df[col].blob._get_runtime( + mode="R", with_metadata=True + ) + + # Create and display the widget + widget = display.TableWidget(df) + widget_repr = widget._repr_mimebundle_(include=include, exclude=exclude) + + # Use deferred repr for text/plain of anywidget display. + # This avoids kicking off a query when the user is just + # printing the last expression in a cell. + widget_repr["text/plain"] = repr(self) + widget_repr["text/html"] = self._repr_html_fallback() + return widget_repr + + except (AttributeError, ValueError, ImportError): + # Fallback: let IPython use _repr_html_() instead + warnings.warn( + "Anywidget mode is not available. " + "Please `pip install anywidget traitlets` or `pip install 'bigframes[anywidget]'` to use interactive tables. " + f"Falling back to static HTML. Error: {traceback.format_exc()}" + ) + # Don't return anything - let IPython fall back to _repr_html_() + pass + + return {"text/html": self._repr_html_fallback(), "text/plain": repr(self)} + + def _repr_html_fallback(self) -> str: """ - Returns an html string primarily for use by notebooks for displaying - a representation of the DataFrame. Displays 20 rows by default since - many notebooks are not configured for large tables. + Generates a static HTML table as a fallback representation. """ opts = bigframes.options.display max_results = opts.max_rows @@ -912,55 +959,6 @@ def obj_ref_rt_to_html(obj_ref_rt) -> str: html_string += f"[{row_count} rows x {column_count} columns in total]" return html_string - def _repr_mimebundle_(self, include=None, exclude=None): - """ - Custom display method for IPython/Jupyter environments. - This is called by IPython's display system when the object is displayed. - """ - opts = bigframes.options.display - - # Only handle widget display in anywidget mode - if opts.repr_mode == "anywidget": - try: - from bigframes import display - - # Process blob columns if needed - self._cached() - df = self.copy() - if bigframes.options.display.blob_display: - blob_cols = [ - series_name - for series_name, series in df.items() - if series.dtype == bigframes.dtypes.OBJ_REF_DTYPE - ] - for col in blob_cols: - df[col] = df[col].blob._get_runtime( - mode="R", with_metadata=True - ) - - # Create and display the widget - widget = display.TableWidget(df) - widget_repr = widget._repr_mimebundle_(include=include, exclude=exclude) - - # Use deferred repr for text/plain of anywidget display. - # This avoids kicking off a query when the user is just - # printing the last expression in a cell. - widget_repr["text/plain"] = repr(self) - widget_repr["text/html"] = self._repr_html_() - return widget_repr - - except (AttributeError, ValueError, ImportError): - # Fallback: let IPython use _repr_html_() instead - warnings.warn( - "Anywidget mode is not available. " - "Please `pip install anywidget traitlets` or `pip install 'bigframes[anywidget]'` to use interactive tables. " - f"Falling back to static HTML. Error: {traceback.format_exc()}" - ) - # Don't return anything - let IPython fall back to _repr_html_() - pass - - return {"text/html": self._repr_html_(), "text/plain": repr(self)} - def __delitem__(self, key: str): df = self.drop(columns=[key]) self._set_block(df._get_block()) diff --git a/notebooks/dataframes/anywidget_mode.ipynb b/notebooks/dataframes/anywidget_mode.ipynb index b300ee1fdb..f6cbcf6c58 100644 --- a/notebooks/dataframes/anywidget_mode.ipynb +++ b/notebooks/dataframes/anywidget_mode.ipynb @@ -74,11 +74,50 @@ "id": "f289d250", "metadata": {}, "outputs": [ + { + "data": { + "text/html": [ + "✅ Completed. " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "✅ Completed. \n", + " Query processed 0 Bytes in a moment of slot time.\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "name": "stdout", "output_type": "stream", "text": [ - "Computation deferred. Computation will process 171.4 MB\n" + "state gender year name number\n", + " AL F 1910 Vera 71\n", + " AR F 1910 Viola 37\n", + " AR F 1910 Alice 57\n", + " AR F 1910 Edna 95\n", + " AR F 1910 Ollie 40\n", + " CA F 1910 Beatrice 37\n", + " CT F 1910 Marion 36\n", + " CT F 1910 Marie 36\n", + " FL F 1910 Alice 53\n", + " GA F 1910 Thelma 133\n", + "...\n", + "\n", + "[5552452 rows x 5 columns]\n" ] } ], @@ -124,7 +163,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 11, "id": "ce250157", "metadata": {}, "outputs": [ @@ -140,20 +179,152 @@ "metadata": {}, "output_type": "display_data" }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dataframe.py:955: UserWarning: Anywidget mode is not available. Please `pip install anywidget traitlets` or `pip install 'bigframes[anywidget]'` to use interactive tables. Falling back to static HTML. Error: Traceback (most recent call last):\n", + " File \"/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dataframe.py\", line 947, in _repr_mimebundle_\n", + " widget_repr[\"text/plain\"] = formatter.to_string(\n", + "AttributeError: module 'bigframes.formatting_helpers' has no attribute 'to_string'\n", + "\n", + " warnings.warn(\n" + ] + }, { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "8fcad7b7e408422cae71d519cd2d4980", - "model_id": "4cc789c49be246bb94967e625986900a", - "version_major": 2, - "version_minor": 1 - }, + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
stategenderyearnamenumber
0ALF1910Vera71
1ARF1910Viola37
2ARF1910Alice57
3ARF1910Edna95
4ARF1910Ollie40
5CAF1910Beatrice37
6CTF1910Marion36
7CTF1910Marie36
8FLF1910Alice53
9GAF1910Thelma133
\n", + "

10 rows × 5 columns

\n", + "
[5552452 rows x 5 columns in total]" + ], "text/plain": [ - "TableWidget(page_size=10, row_count=5552452, table_html='\n", + "\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
resultgcs_pathissuerlanguagepublication_dateclass_internationalclass_usapplication_numberfiling_datepriority_date_eurepresentative_line_1_euapplicant_line_1inventor_line_1title_line_1number
0{'application_number': None, 'class_internatio...gs://gcs-public-data--labeled-patents/espacene...EUDE29.08.018E04H 6/12<NA>18157874.121.02.201822.02.2017Liedtke & Partner PatentanwälteSHB Hebezeugbau GmbHVOLGER, AlexanderSTEUERUNGSSYSTEM FÜR AUTOMATISCHE PARKHÄUSEREP 3 366 869 A1
1{'application_number': None, 'class_internatio...gs://gcs-public-data--labeled-patents/espacene...EUDE03.10.2018G06F 11/30<NA>18157347.819.02.201831.03.2017Hoffmann EitleFUJITSU LIMITEDKukihara, KensukeMETHOD EXECUTED BY A COMPUTER, INFORMATION PRO...EP 3 382 553 A1
2{'application_number': None, 'class_internatio...gs://gcs-public-data--labeled-patents/espacene...EUDE03.10.2018H01L 21/20<NA>18166536.516.02.2016<NA>Scheider, Sascha et alEV Group E. Thallner GmbHKurz, FlorianVORRICHTUNG ZUM BONDEN VON SUBSTRATENEP 3 382 744 A1
3{'application_number': None, 'class_internatio...gs://gcs-public-data--labeled-patents/espacene...EUDE03.10.2018A01K 31/00<NA>18171005.405.02.201505.02.2014Stork Bamberger PatentanwälteLinco Food Systems A/SThrane, UffeMASTHÄHNCHENCONTAINER ALS BESTANDTEIL EINER E...EP 3 381 276 A1
4{'application_number': None, 'class_internatio...gs://gcs-public-data--labeled-patents/espacene...EUDE03.10.2018H05B 6/12<NA>18165514.303.04.201830.03.2017<NA>BSH Hausger√§te GmbHAcero Acero, JesusVORRICHTUNG ZUR INDUKTIVEN ENERGIE√úBERTRAGUNGEP 3 383 141 A2
\n", + "

5 rows × 15 columns

\n", + "[5 rows x 15 columns in total]" + ], + "text/plain": [ + " result \\\n", + "0 {'application_number': None, 'class_internatio... \n", + "1 {'application_number': None, 'class_internatio... \n", + "2 {'application_number': None, 'class_internatio... \n", + "3 {'application_number': None, 'class_internatio... \n", + "4 {'application_number': None, 'class_internatio... \n", + "\n", + " gcs_path issuer language \\\n", + "0 gs://gcs-public-data--labeled-patents/espacene... EU DE \n", + "1 gs://gcs-public-data--labeled-patents/espacene... EU DE \n", + "2 gs://gcs-public-data--labeled-patents/espacene... EU DE \n", + "3 gs://gcs-public-data--labeled-patents/espacene... EU DE \n", + "4 gs://gcs-public-data--labeled-patents/espacene... EU DE \n", + "\n", + " publication_date class_international class_us application_number \\\n", + "0 29.08.018 E04H 6/12 18157874.1 \n", + "1 03.10.2018 G06F 11/30 18157347.8 \n", + "2 03.10.2018 H01L 21/20 18166536.5 \n", + "3 03.10.2018 A01K 31/00 18171005.4 \n", + "4 03.10.2018 H05B 6/12 18165514.3 \n", + "\n", + " filing_date priority_date_eu representative_line_1_eu \\\n", + "0 21.02.2018 22.02.2017 Liedtke & Partner Patentanw√§lte \n", + "1 19.02.2018 31.03.2017 Hoffmann Eitle \n", + "2 16.02.2016 Scheider, Sascha et al \n", + "3 05.02.2015 05.02.2014 Stork Bamberger Patentanw√§lte \n", + "4 03.04.2018 30.03.2017 \n", + "\n", + " applicant_line_1 inventor_line_1 \\\n", + "0 SHB Hebezeugbau GmbH VOLGER, Alexander \n", + "1 FUJITSU LIMITED Kukihara, Kensuke \n", + "2 EV Group E. Thallner GmbH Kurz, Florian \n", + "3 Linco Food Systems A/S Thrane, Uffe \n", + "4 BSH Hausger√§te GmbH Acero Acero, Jesus \n", + "\n", + " title_line_1 number \n", + "0 STEUERUNGSSYSTEM F√úR AUTOMATISCHE PARKH√ÑUSER EP 3 366 869 A1 \n", + "1 METHOD EXECUTED BY A COMPUTER, INFORMATION PRO... EP 3 382 553 A1 \n", + "2 VORRICHTUNG ZUM BONDEN VON SUBSTRATEN EP 3 382 744 A1 \n", + "3 MASTH√ÑHNCHENCONTAINER ALS BESTANDTEIL EINER E... EP 3 381 276 A1 \n", + "4 VORRICHTUNG ZUR INDUKTIVEN ENERGIE√úBERTRAGUNG EP 3 383 141 A2 \n", + "\n", + "[5 rows x 15 columns]" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py index 19d3c67e19..801ef6ed82 100644 --- a/tests/system/small/test_dataframe.py +++ b/tests/system/small/test_dataframe.py @@ -958,26 +958,23 @@ def test_repr_w_display_options(scalars_dfs, session): assert (executions_post - executions_pre) <= 3 -def test_repr_html_w_all_rows(scalars_dfs, session): +def test_repr_mimebundle_html_w_all_rows(scalars_dfs, session): metrics = session._metrics - scalars_df, _ = scalars_dfs - # get a pandas df of the expected format - df, _ = scalars_df._block.to_pandas() - pandas_df = df.set_axis(scalars_df._block.column_labels, axis=1) - pandas_df.index.name = scalars_df.index.name + scalars_df, scalars_pandas_df = scalars_dfs + + bf_head = scalars_df.head(10) + pd_head = scalars_pandas_df.head(10) executions_pre = metrics.execution_count # When there are 10 or fewer rows, the outputs should be identical except for the extra note. - actual = scalars_df.head(10)._repr_html_() + actual = bf_head._repr_mimebundle_()["text/html"] executions_post = metrics.execution_count with display_options.pandas_repr(bigframes.options.display): - pandas_repr = pandas_df.head(10)._repr_html_() + pandas_repr = pd_head._repr_html_() - expected = ( - pandas_repr - + f"[{len(pandas_df.index)} rows x {len(pandas_df.columns)} columns in total]" - ) + row_count, col_count = bf_head.shape + expected = pandas_repr + f"[{row_count} rows x {col_count} columns in total]" assert actual == expected assert (executions_post - executions_pre) <= 3 diff --git a/tests/unit/test_dataframe_polars.py b/tests/unit/test_dataframe_polars.py index b83380d789..e1cc7023a6 100644 --- a/tests/unit/test_dataframe_polars.py +++ b/tests/unit/test_dataframe_polars.py @@ -737,23 +737,20 @@ def test_join_repr(scalars_dfs): assert actual == expected -def test_repr_html_w_all_rows(scalars_dfs, session): - scalars_df, _ = scalars_dfs - # get a pandas df of the expected format - df, _ = scalars_df._block.to_pandas() - pandas_df = df.set_axis(scalars_df._block.column_labels, axis=1) - pandas_df.index.name = scalars_df.index.name +def test_repr_mimebundle_html_w_all_rows(scalars_dfs, session): + scalars_df, scalars_pandas_df = scalars_dfs + + bf_head = scalars_df.head(10) + pd_head = scalars_pandas_df.head(10) # When there are 10 or fewer rows, the outputs should be identical except for the extra note. - actual = scalars_df.head(10)._repr_html_() + actual = bf_head._repr_mimebundle()["text/html"] with display_options.pandas_repr(bigframes.options.display): - pandas_repr = pandas_df.head(10)._repr_html_() + pandas_repr = pd_head._repr_html_() - expected = ( - pandas_repr - + f"[{len(pandas_df.index)} rows x {len(pandas_df.columns)} columns in total]" - ) + row_count, col_count = bf_head.shape + expected = pandas_repr + f"[{row_count} rows x {col_count} columns in total]" assert actual == expected From 689fa74d0190666f0c60b8be982ae2c783d7f43d Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Fri, 14 Nov 2025 00:31:03 +0000 Subject: [PATCH 06/17] Revert "remove _repr_html_()" This reverts commit 61a9903dba4b27ad0b69091f2a4e30d9571c9c9c. --- bigframes/dataframe.py | 104 +++--- notebooks/dataframes/anywidget_mode.ipynb | 418 +--------------------- tests/system/small/test_dataframe.py | 21 +- tests/unit/test_dataframe_polars.py | 21 +- 4 files changed, 95 insertions(+), 469 deletions(-) diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py index 72697de89e..1acafc2ef0 100644 --- a/bigframes/dataframe.py +++ b/bigframes/dataframe.py @@ -827,58 +827,11 @@ def __repr__(self) -> str: lines.append(f"[{row_count} rows x {column_count} columns]") return "\n".join(lines) - def _repr_mimebundle_(self, include=None, exclude=None): - """ - Custom display method for IPython/Jupyter environments. - This is called by IPython's display system when the object is displayed. - """ - opts = bigframes.options.display - - # Only handle widget display in anywidget mode - if opts.repr_mode == "anywidget": - try: - from bigframes import display - - # Process blob columns if needed - self._cached() - df = self.copy() - if bigframes.options.display.blob_display: - blob_cols = [ - series_name - for series_name, series in df.items() - if series.dtype == bigframes.dtypes.OBJ_REF_DTYPE - ] - for col in blob_cols: - df[col] = df[col].blob._get_runtime( - mode="R", with_metadata=True - ) - - # Create and display the widget - widget = display.TableWidget(df) - widget_repr = widget._repr_mimebundle_(include=include, exclude=exclude) - - # Use deferred repr for text/plain of anywidget display. - # This avoids kicking off a query when the user is just - # printing the last expression in a cell. - widget_repr["text/plain"] = repr(self) - widget_repr["text/html"] = self._repr_html_fallback() - return widget_repr - - except (AttributeError, ValueError, ImportError): - # Fallback: let IPython use _repr_html_() instead - warnings.warn( - "Anywidget mode is not available. " - "Please `pip install anywidget traitlets` or `pip install 'bigframes[anywidget]'` to use interactive tables. " - f"Falling back to static HTML. Error: {traceback.format_exc()}" - ) - # Don't return anything - let IPython fall back to _repr_html_() - pass - - return {"text/html": self._repr_html_fallback(), "text/plain": repr(self)} - - def _repr_html_fallback(self) -> str: + def _repr_html_(self) -> str: """ - Generates a static HTML table as a fallback representation. + Returns an html string primarily for use by notebooks for displaying + a representation of the DataFrame. Displays 20 rows by default since + many notebooks are not configured for large tables. """ opts = bigframes.options.display max_results = opts.max_rows @@ -959,6 +912,55 @@ def obj_ref_rt_to_html(obj_ref_rt) -> str: html_string += f"[{row_count} rows x {column_count} columns in total]" return html_string + def _repr_mimebundle_(self, include=None, exclude=None): + """ + Custom display method for IPython/Jupyter environments. + This is called by IPython's display system when the object is displayed. + """ + opts = bigframes.options.display + + # Only handle widget display in anywidget mode + if opts.repr_mode == "anywidget": + try: + from bigframes import display + + # Process blob columns if needed + self._cached() + df = self.copy() + if bigframes.options.display.blob_display: + blob_cols = [ + series_name + for series_name, series in df.items() + if series.dtype == bigframes.dtypes.OBJ_REF_DTYPE + ] + for col in blob_cols: + df[col] = df[col].blob._get_runtime( + mode="R", with_metadata=True + ) + + # Create and display the widget + widget = display.TableWidget(df) + widget_repr = widget._repr_mimebundle_(include=include, exclude=exclude) + + # Use deferred repr for text/plain of anywidget display. + # This avoids kicking off a query when the user is just + # printing the last expression in a cell. + widget_repr["text/plain"] = repr(self) + widget_repr["text/html"] = self._repr_html_() + return widget_repr + + except (AttributeError, ValueError, ImportError): + # Fallback: let IPython use _repr_html_() instead + warnings.warn( + "Anywidget mode is not available. " + "Please `pip install anywidget traitlets` or `pip install 'bigframes[anywidget]'` to use interactive tables. " + f"Falling back to static HTML. Error: {traceback.format_exc()}" + ) + # Don't return anything - let IPython fall back to _repr_html_() + pass + + return {"text/html": self._repr_html_(), "text/plain": repr(self)} + def __delitem__(self, key: str): df = self.drop(columns=[key]) self._set_block(df._get_block()) diff --git a/notebooks/dataframes/anywidget_mode.ipynb b/notebooks/dataframes/anywidget_mode.ipynb index f6cbcf6c58..83e1684174 100644 --- a/notebooks/dataframes/anywidget_mode.ipynb +++ b/notebooks/dataframes/anywidget_mode.ipynb @@ -74,50 +74,11 @@ "id": "f289d250", "metadata": {}, "outputs": [ - { - "data": { - "text/html": [ - "✅ Completed. " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "✅ Completed. \n", - " Query processed 0 Bytes in a moment of slot time.\n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, { "name": "stdout", "output_type": "stream", "text": [ - "state gender year name number\n", - " AL F 1910 Vera 71\n", - " AR F 1910 Viola 37\n", - " AR F 1910 Alice 57\n", - " AR F 1910 Edna 95\n", - " AR F 1910 Ollie 40\n", - " CA F 1910 Beatrice 37\n", - " CT F 1910 Marion 36\n", - " CT F 1910 Marie 36\n", - " FL F 1910 Alice 53\n", - " GA F 1910 Thelma 133\n", - "...\n", - "\n", - "[5552452 rows x 5 columns]\n" + "Computation deferred. Computation will process 171.4 MB\n" ] } ], @@ -163,7 +124,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 6, "id": "ce250157", "metadata": {}, "outputs": [ @@ -179,152 +140,19 @@ "metadata": {}, "output_type": "display_data" }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dataframe.py:955: UserWarning: Anywidget mode is not available. Please `pip install anywidget traitlets` or `pip install 'bigframes[anywidget]'` to use interactive tables. Falling back to static HTML. Error: Traceback (most recent call last):\n", - " File \"/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dataframe.py\", line 947, in _repr_mimebundle_\n", - " widget_repr[\"text/plain\"] = formatter.to_string(\n", - "AttributeError: module 'bigframes.formatting_helpers' has no attribute 'to_string'\n", - "\n", - " warnings.warn(\n" - ] - }, { "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
stategenderyearnamenumber
0ALF1910Vera71
1ARF1910Viola37
2ARF1910Alice57
3ARF1910Edna95
4ARF1910Ollie40
5CAF1910Beatrice37
6CTF1910Marion36
7CTF1910Marie36
8FLF1910Alice53
9GAF1910Thelma133
\n", - "

10 rows × 5 columns

\n", - "
[5552452 rows x 5 columns in total]" - ], + "application/vnd.jupyter.widget-view+json": { + "model_id": "4cc789c49be246bb94967e625986900a", + "version_major": 2, + "version_minor": 1 + }, "text/plain": [ - "state gender year name number\n", - " AL F 1910 Vera 71\n", - " AR F 1910 Viola 37\n", - " AR F 1910 Alice 57\n", - " AR F 1910 Edna 95\n", - " AR F 1910 Ollie 40\n", - " CA F 1910 Beatrice 37\n", - " CT F 1910 Marion 36\n", - " CT F 1910 Marie 36\n", - " FL F 1910 Alice 53\n", - " GA F 1910 Thelma 133\n", - "...\n", - "\n", - "[5552452 rows x 5 columns]" + "TableWidget(page_size=10, row_count=5552452, table_html='" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "✅ Completed. " - ], + "application/vnd.jupyter.widget-view+json": { + "model_id": "2335d3161b704a6da85165dbebf5ca0f", + "version_major": 2, + "version_minor": 1 + }, "text/plain": [ - "" + "TableWidget(page_size=10, row_count=5, table_html='
\n", - "\n", - "
\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
resultgcs_pathissuerlanguagepublication_dateclass_internationalclass_usapplication_numberfiling_datepriority_date_eurepresentative_line_1_euapplicant_line_1inventor_line_1title_line_1number
0{'application_number': None, 'class_internatio...gs://gcs-public-data--labeled-patents/espacene...EUDE29.08.018E04H 6/12<NA>18157874.121.02.201822.02.2017Liedtke & Partner PatentanwälteSHB Hebezeugbau GmbHVOLGER, AlexanderSTEUERUNGSSYSTEM FÜR AUTOMATISCHE PARKHÄUSEREP 3 366 869 A1
1{'application_number': None, 'class_internatio...gs://gcs-public-data--labeled-patents/espacene...EUDE03.10.2018G06F 11/30<NA>18157347.819.02.201831.03.2017Hoffmann EitleFUJITSU LIMITEDKukihara, KensukeMETHOD EXECUTED BY A COMPUTER, INFORMATION PRO...EP 3 382 553 A1
2{'application_number': None, 'class_internatio...gs://gcs-public-data--labeled-patents/espacene...EUDE03.10.2018H01L 21/20<NA>18166536.516.02.2016<NA>Scheider, Sascha et alEV Group E. Thallner GmbHKurz, FlorianVORRICHTUNG ZUM BONDEN VON SUBSTRATENEP 3 382 744 A1
3{'application_number': None, 'class_internatio...gs://gcs-public-data--labeled-patents/espacene...EUDE03.10.2018A01K 31/00<NA>18171005.405.02.201505.02.2014Stork Bamberger PatentanwälteLinco Food Systems A/SThrane, UffeMASTHÄHNCHENCONTAINER ALS BESTANDTEIL EINER E...EP 3 381 276 A1
4{'application_number': None, 'class_internatio...gs://gcs-public-data--labeled-patents/espacene...EUDE03.10.2018H05B 6/12<NA>18165514.303.04.201830.03.2017<NA>BSH Hausger√§te GmbHAcero Acero, JesusVORRICHTUNG ZUR INDUKTIVEN ENERGIE√úBERTRAGUNGEP 3 383 141 A2
\n", - "

5 rows × 15 columns

\n", - "[5 rows x 15 columns in total]" - ], - "text/plain": [ - " result \\\n", - "0 {'application_number': None, 'class_internatio... \n", - "1 {'application_number': None, 'class_internatio... \n", - "2 {'application_number': None, 'class_internatio... \n", - "3 {'application_number': None, 'class_internatio... \n", - "4 {'application_number': None, 'class_internatio... \n", - "\n", - " gcs_path issuer language \\\n", - "0 gs://gcs-public-data--labeled-patents/espacene... EU DE \n", - "1 gs://gcs-public-data--labeled-patents/espacene... EU DE \n", - "2 gs://gcs-public-data--labeled-patents/espacene... EU DE \n", - "3 gs://gcs-public-data--labeled-patents/espacene... EU DE \n", - "4 gs://gcs-public-data--labeled-patents/espacene... EU DE \n", - "\n", - " publication_date class_international class_us application_number \\\n", - "0 29.08.018 E04H 6/12 18157874.1 \n", - "1 03.10.2018 G06F 11/30 18157347.8 \n", - "2 03.10.2018 H01L 21/20 18166536.5 \n", - "3 03.10.2018 A01K 31/00 18171005.4 \n", - "4 03.10.2018 H05B 6/12 18165514.3 \n", - "\n", - " filing_date priority_date_eu representative_line_1_eu \\\n", - "0 21.02.2018 22.02.2017 Liedtke & Partner Patentanw√§lte \n", - "1 19.02.2018 31.03.2017 Hoffmann Eitle \n", - "2 16.02.2016 Scheider, Sascha et al \n", - "3 05.02.2015 05.02.2014 Stork Bamberger Patentanw√§lte \n", - "4 03.04.2018 30.03.2017 \n", - "\n", - " applicant_line_1 inventor_line_1 \\\n", - "0 SHB Hebezeugbau GmbH VOLGER, Alexander \n", - "1 FUJITSU LIMITED Kukihara, Kensuke \n", - "2 EV Group E. Thallner GmbH Kurz, Florian \n", - "3 Linco Food Systems A/S Thrane, Uffe \n", - "4 BSH Hausger√§te GmbH Acero Acero, Jesus \n", - "\n", - " title_line_1 number \n", - "0 STEUERUNGSSYSTEM F√úR AUTOMATISCHE PARKH√ÑUSER EP 3 366 869 A1 \n", - "1 METHOD EXECUTED BY A COMPUTER, INFORMATION PRO... EP 3 382 553 A1 \n", - "2 VORRICHTUNG ZUM BONDEN VON SUBSTRATEN EP 3 382 744 A1 \n", - "3 MASTH√ÑHNCHENCONTAINER ALS BESTANDTEIL EINER E... EP 3 381 276 A1 \n", - "4 VORRICHTUNG ZUR INDUKTIVEN ENERGIE√úBERTRAGUNG EP 3 383 141 A2 \n", - "\n", - "[5 rows x 15 columns]" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" } ], "source": [ diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py index 801ef6ed82..19d3c67e19 100644 --- a/tests/system/small/test_dataframe.py +++ b/tests/system/small/test_dataframe.py @@ -958,23 +958,26 @@ def test_repr_w_display_options(scalars_dfs, session): assert (executions_post - executions_pre) <= 3 -def test_repr_mimebundle_html_w_all_rows(scalars_dfs, session): +def test_repr_html_w_all_rows(scalars_dfs, session): metrics = session._metrics - scalars_df, scalars_pandas_df = scalars_dfs - - bf_head = scalars_df.head(10) - pd_head = scalars_pandas_df.head(10) + scalars_df, _ = scalars_dfs + # get a pandas df of the expected format + df, _ = scalars_df._block.to_pandas() + pandas_df = df.set_axis(scalars_df._block.column_labels, axis=1) + pandas_df.index.name = scalars_df.index.name executions_pre = metrics.execution_count # When there are 10 or fewer rows, the outputs should be identical except for the extra note. - actual = bf_head._repr_mimebundle_()["text/html"] + actual = scalars_df.head(10)._repr_html_() executions_post = metrics.execution_count with display_options.pandas_repr(bigframes.options.display): - pandas_repr = pd_head._repr_html_() + pandas_repr = pandas_df.head(10)._repr_html_() - row_count, col_count = bf_head.shape - expected = pandas_repr + f"[{row_count} rows x {col_count} columns in total]" + expected = ( + pandas_repr + + f"[{len(pandas_df.index)} rows x {len(pandas_df.columns)} columns in total]" + ) assert actual == expected assert (executions_post - executions_pre) <= 3 diff --git a/tests/unit/test_dataframe_polars.py b/tests/unit/test_dataframe_polars.py index e1cc7023a6..b83380d789 100644 --- a/tests/unit/test_dataframe_polars.py +++ b/tests/unit/test_dataframe_polars.py @@ -737,20 +737,23 @@ def test_join_repr(scalars_dfs): assert actual == expected -def test_repr_mimebundle_html_w_all_rows(scalars_dfs, session): - scalars_df, scalars_pandas_df = scalars_dfs - - bf_head = scalars_df.head(10) - pd_head = scalars_pandas_df.head(10) +def test_repr_html_w_all_rows(scalars_dfs, session): + scalars_df, _ = scalars_dfs + # get a pandas df of the expected format + df, _ = scalars_df._block.to_pandas() + pandas_df = df.set_axis(scalars_df._block.column_labels, axis=1) + pandas_df.index.name = scalars_df.index.name # When there are 10 or fewer rows, the outputs should be identical except for the extra note. - actual = bf_head._repr_mimebundle()["text/html"] + actual = scalars_df.head(10)._repr_html_() with display_options.pandas_repr(bigframes.options.display): - pandas_repr = pd_head._repr_html_() + pandas_repr = pandas_df.head(10)._repr_html_() - row_count, col_count = bf_head.shape - expected = pandas_repr + f"[{row_count} rows x {col_count} columns in total]" + expected = ( + pandas_repr + + f"[{len(pandas_df.index)} rows x {len(pandas_df.columns)} columns in total]" + ) assert actual == expected From 6b407d406415c4043bba77d5c66e1a507965996e Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Fri, 14 Nov 2025 04:34:53 +0000 Subject: [PATCH 07/17] change the fallback function name to better reflect the change --- bigframes/dataframe.py | 24 +- notebooks/dataframes/anywidget_mode.ipynb | 394 +++++++++++++++++++++- tests/system/small/test_anywidget.py | 8 +- tests/system/small/test_dataframe.py | 3 +- tests/unit/test_dataframe_polars.py | 3 +- 5 files changed, 408 insertions(+), 24 deletions(-) diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py index 1acafc2ef0..1a8fdc6b80 100644 --- a/bigframes/dataframe.py +++ b/bigframes/dataframe.py @@ -827,7 +827,7 @@ def __repr__(self) -> str: lines.append(f"[{row_count} rows x {column_count} columns]") return "\n".join(lines) - def _repr_html_(self) -> str: + def _repr_html_fallback_(self) -> str: """ Returns an html string primarily for use by notebooks for displaying a representation of the DataFrame. Displays 20 rows by default since @@ -940,26 +940,36 @@ def _repr_mimebundle_(self, include=None, exclude=None): # Create and display the widget widget = display.TableWidget(df) - widget_repr = widget._repr_mimebundle_(include=include, exclude=exclude) + widget_repr_result = widget._repr_mimebundle_( + include=include, exclude=exclude + ) + + # Handle both tuple (data, metadata) and dict returns + if isinstance(widget_repr_result, tuple): + widget_repr = dict( + widget_repr_result[0] + ) # Extract data dict from tuple + else: + widget_repr = dict(widget_repr_result) # Use deferred repr for text/plain of anywidget display. # This avoids kicking off a query when the user is just # printing the last expression in a cell. - widget_repr["text/plain"] = repr(self) - widget_repr["text/html"] = self._repr_html_() + widget_repr["text/plain"] = repr(df) + widget_repr["text/html"] = self._repr_html_fallback_() return widget_repr except (AttributeError, ValueError, ImportError): - # Fallback: let IPython use _repr_html_() instead + # Fallback: let IPython use _repr_html_fallback_() instead warnings.warn( "Anywidget mode is not available. " "Please `pip install anywidget traitlets` or `pip install 'bigframes[anywidget]'` to use interactive tables. " f"Falling back to static HTML. Error: {traceback.format_exc()}" ) - # Don't return anything - let IPython fall back to _repr_html_() + # Don't return anything - let IPython fall back to _repr_html_fallback_() pass - return {"text/html": self._repr_html_(), "text/plain": repr(self)} + return {"text/html": self._repr_html_fallback_(), "text/plain": repr(self)} def __delitem__(self, key: str): df = self.drop(columns=[key]) diff --git a/notebooks/dataframes/anywidget_mode.ipynb b/notebooks/dataframes/anywidget_mode.ipynb index 83e1684174..ed5c4ff52f 100644 --- a/notebooks/dataframes/anywidget_mode.ipynb +++ b/notebooks/dataframes/anywidget_mode.ipynb @@ -74,11 +74,50 @@ "id": "f289d250", "metadata": {}, "outputs": [ + { + "data": { + "text/html": [ + "✅ Completed. " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "✅ Completed. \n", + " Query processed 0 Bytes in a moment of slot time.\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "name": "stdout", "output_type": "stream", "text": [ - "Computation deferred. Computation will process 171.4 MB\n" + "state gender year name number\n", + " AL F 1910 Annie 482\n", + " AL F 1910 Myrtle 104\n", + " AR F 1910 Lillian 56\n", + " CT F 1910 Anne 38\n", + " CT F 1910 Frances 45\n", + " FL F 1910 Margaret 53\n", + " GA F 1910 Mae 73\n", + " GA F 1910 Beatrice 96\n", + " GA F 1910 Lola 47\n", + " IA F 1910 Viola 49\n", + "...\n", + "\n", + "[5552452 rows x 5 columns]\n" ] } ], @@ -143,16 +182,142 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "4cc789c49be246bb94967e625986900a", + "model_id": "cb4a27e1d96444bfb641510919f67a3a", "version_major": 2, "version_minor": 1 }, + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
stategenderyearnamenumber
0ALF1910Annie482
1ALF1910Myrtle104
2ARF1910Lillian56
3CTF1910Anne38
4CTF1910Frances45
5FLF1910Margaret53
6GAF1910Mae73
7GAF1910Beatrice96
8GAF1910Lola47
9IAF1910Viola49
\n", + "

10 rows × 5 columns

\n", + "
[5552452 rows x 5 columns in total]" + ], "text/plain": [ - "TableWidget(page_size=10, row_count=5552452, table_html='Job bigframes-dev:US.job_NKlR3QmJIAgNiezHGjjstO6ii2Qc details]\n", " " ], "text/plain": [ @@ -370,19 +535,228 @@ "metadata": {}, "output_type": "display_data" }, + { + "data": { + "text/html": [ + "✅ Completed. " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "✅ Completed. " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:987: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" + ] + }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "2335d3161b704a6da85165dbebf5ca0f", + "model_id": "a307efb2635a4a10a9f6d0fc75c52c67", "version_major": 2, "version_minor": 1 }, + "text/html": [ + "
\n", + "\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
resultgcs_pathissuerlanguagepublication_dateclass_internationalclass_usapplication_numberfiling_datepriority_date_eurepresentative_line_1_euapplicant_line_1inventor_line_1title_line_1number
0{'application_number': None, 'class_internatio...gs://gcs-public-data--labeled-patents/espacene...EUDE03.10.2018H05B 6/12<NA>18165514.303.04.201830.03.2017<NA>BSH Hausger√§te GmbHAcero Acero, JesusVORRICHTUNG ZUR INDUKTIVEN ENERGIE√úBERTRAGUNGEP 3 383 141 A2
1{'application_number': None, 'class_internatio...gs://gcs-public-data--labeled-patents/espacene...EUDE29.08.018E04H 6/12<NA>18157874.121.02.201822.02.2017Liedtke & Partner PatentanwälteSHB Hebezeugbau GmbHVOLGER, AlexanderSTEUERUNGSSYSTEM FÜR AUTOMATISCHE PARKHÄUSEREP 3 366 869 A1
2{'application_number': None, 'class_internatio...gs://gcs-public-data--labeled-patents/espacene...EUDE03.10.2018H01L 21/20<NA>18166536.516.02.2016<NA>Scheider, Sascha et alEV Group E. Thallner GmbHKurz, FlorianVORRICHTUNG ZUM BONDEN VON SUBSTRATENEP 3 382 744 A1
3{'application_number': None, 'class_internatio...gs://gcs-public-data--labeled-patents/espacene...EUDE03.10.2018G06F 11/30<NA>18157347.819.02.201831.03.2017Hoffmann EitleFUJITSU LIMITEDKukihara, KensukeMETHOD EXECUTED BY A COMPUTER, INFORMATION PRO...EP 3 382 553 A1
4{'application_number': None, 'class_internatio...gs://gcs-public-data--labeled-patents/espacene...EUDE03.10.2018A01K 31/00<NA>18171005.405.02.201505.02.2014Stork Bamberger PatentanwälteLinco Food Systems A/SThrane, UffeMASTHÄHNCHENCONTAINER ALS BESTANDTEIL EINER E...EP 3 381 276 A1
\n", + "

5 rows × 15 columns

\n", + "[5 rows x 15 columns in total]" + ], "text/plain": [ - "TableWidget(page_size=10, row_count=5, table_html=' 18165514.3 03.04.2018 \n", + " 29.08.018 E04H 6/12 18157874.1 21.02.2018 \n", + " 03.10.2018 H01L 21/20 18166536.5 16.02.2016 \n", + " 03.10.2018 G06F 11/30 18157347.8 19.02.2018 \n", + " 03.10.2018 A01K 31/00 18171005.4 05.02.2015 \n", + "\n", + "priority_date_eu representative_line_1_eu applicant_line_1 \\\n", + " 30.03.2017 BSH Hausgeräte GmbH \n", + " 22.02.2017 Liedtke & Partner Patentanwälte SHB Hebezeugbau GmbH \n", + " Scheider, Sascha et al EV Group E. Thallner GmbH \n", + " 31.03.2017 Hoffmann Eitle FUJITSU LIMITED \n", + " 05.02.2014 Stork Bamberger Patentanwälte Linco Food Systems A/S \n", + "\n", + " inventor_line_1 title_line_1 \\\n", + "Acero Acero, Jesus VORRICHTUNG ZUR INDUKTIVEN ENERGIEÜBERTRAGUNG \n", + " VOLGER, Alexander STEUERUNGSSYSTEM FÜR AUTOMATISCHE PARKHÄUSER \n", + " Kurz, Florian VORRICHTUNG ZUM BONDEN VON SUBSTRATEN \n", + " Kukihara, Kensuke METHOD EXECUTED BY A COMPUTER, INFORMATION PROC... \n", + " Thrane, Uffe MASTHÄHNCHENCONTAINER ALS BESTANDTEIL EINER EI... \n", + "\n", + " number \n", + "EP 3 383 141 A2 \n", + "EP 3 366 869 A1 \n", + "EP 3 382 744 A1 \n", + "EP 3 382 553 A1 \n", + "EP 3 381 276 A1 \n", + "\n", + "[5 rows x 15 columns]" ] }, + "execution_count": 10, "metadata": {}, - "output_type": "display_data" + "output_type": "execute_result" } ], "source": [ diff --git a/tests/system/small/test_anywidget.py b/tests/system/small/test_anywidget.py index c15cf90c04..a5beb35fd1 100644 --- a/tests/system/small/test_anywidget.py +++ b/tests/system/small/test_anywidget.py @@ -690,11 +690,9 @@ def test_repr_mimebundle_anywidget_fallback(paginated_bf_df: bf.dataframe.DataFr """ Test that _repr_mimebundle_ falls back to static html when anywidget is not available. """ - with bf.option_context("display.repr_mode", "anywidget"): - # Use a mock to simulate the absence of the 'anywidget' module. - with mock.patch.dict( - "sys.modules", {"anywidget": None, "IPython": mock.MagicMock()} - ): + with bf.option_context("display.repr_mode", "anywidget", "display.max_rows", 2): + # Mock the ANYWIDGET_INSTALLED flag to simulate absence of anywidget + with mock.patch("bigframes.display.anywidget.ANYWIDGET_INSTALLED", False): bundle = paginated_bf_df._repr_mimebundle_() assert "application/vnd.jupyter.widget-view+json" not in bundle assert "text/html" in bundle diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py index 19d3c67e19..4ae6ff1421 100644 --- a/tests/system/small/test_dataframe.py +++ b/tests/system/small/test_dataframe.py @@ -968,7 +968,8 @@ def test_repr_html_w_all_rows(scalars_dfs, session): executions_pre = metrics.execution_count # When there are 10 or fewer rows, the outputs should be identical except for the extra note. - actual = scalars_df.head(10)._repr_html_() + bundle = scalars_df.head(10)._repr_mimebundle_() + actual = bundle["text/html"] executions_post = metrics.execution_count with display_options.pandas_repr(bigframes.options.display): diff --git a/tests/unit/test_dataframe_polars.py b/tests/unit/test_dataframe_polars.py index b83380d789..49c8ff6b77 100644 --- a/tests/unit/test_dataframe_polars.py +++ b/tests/unit/test_dataframe_polars.py @@ -745,7 +745,8 @@ def test_repr_html_w_all_rows(scalars_dfs, session): pandas_df.index.name = scalars_df.index.name # When there are 10 or fewer rows, the outputs should be identical except for the extra note. - actual = scalars_df.head(10)._repr_html_() + bundle = scalars_df.head(10)._repr_mimebundle_() + actual = bundle["text/html"] with display_options.pandas_repr(bigframes.options.display): pandas_repr = pandas_df.head(10)._repr_html_() From 3cd960ee42cc005a0591a4b66ce62273f6260cd5 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Fri, 14 Nov 2025 04:54:23 +0000 Subject: [PATCH 08/17] code refactor --- bigframes/dataframe.py | 75 ++++---- notebooks/dataframes/anywidget_mode.ipynb | 214 +++++++++++----------- tests/system/small/test_dataframe.py | 2 +- tests/unit/test_dataframe_polars.py | 2 +- 4 files changed, 147 insertions(+), 146 deletions(-) diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py index 1a8fdc6b80..1f26e2fcb2 100644 --- a/bigframes/dataframe.py +++ b/bigframes/dataframe.py @@ -815,6 +815,7 @@ def __repr__(self) -> str: repr_string = pandas_df.to_string(**to_string_kwargs) # Modify the end of the string to reflect count. + # Remove pandas' default row/column summary to add our own. lines = repr_string.split("\n") pattern = re.compile("\\[[0-9]+ rows x [0-9]+ columns\\]") if pattern.match(lines[-1]): @@ -912,52 +913,52 @@ def obj_ref_rt_to_html(obj_ref_rt) -> str: html_string += f"[{row_count} rows x {column_count} columns in total]" return html_string + def _get_anywidget_bundle(self, include=None, exclude=None): + """ + Helper method to create and return the anywidget mimebundle. + This function encapsulates the logic for anywidget display. + """ + from bigframes import display + + # Process blob columns if needed + self._cached() + df = self.copy() + if bigframes.options.display.blob_display: + blob_cols = [ + series_name + for series_name, series in df.items() + if series.dtype == bigframes.dtypes.OBJ_REF_DTYPE + ] + for col in blob_cols: + df[col] = df[col].blob._get_runtime(mode="R", with_metadata=True) + + # Create and display the widget + widget = display.TableWidget(df) + widget_repr_result = widget._repr_mimebundle_(include=include, exclude=exclude) + + # Handle both tuple (data, metadata) and dict returns + if isinstance(widget_repr_result, tuple): + widget_repr = dict(widget_repr_result[0]) # Extract data dict from tuple + else: + widget_repr = dict(widget_repr_result) + + # Use deferred repr for text/plain of anywidget display. + # This avoids kicking off a query when the user is just + # printing the last expression in a cell. + widget_repr["text/plain"] = repr(df) + widget_repr["text/html"] = self._repr_html_fallback_() + return widget_repr + def _repr_mimebundle_(self, include=None, exclude=None): """ Custom display method for IPython/Jupyter environments. This is called by IPython's display system when the object is displayed. """ opts = bigframes.options.display - # Only handle widget display in anywidget mode if opts.repr_mode == "anywidget": try: - from bigframes import display - - # Process blob columns if needed - self._cached() - df = self.copy() - if bigframes.options.display.blob_display: - blob_cols = [ - series_name - for series_name, series in df.items() - if series.dtype == bigframes.dtypes.OBJ_REF_DTYPE - ] - for col in blob_cols: - df[col] = df[col].blob._get_runtime( - mode="R", with_metadata=True - ) - - # Create and display the widget - widget = display.TableWidget(df) - widget_repr_result = widget._repr_mimebundle_( - include=include, exclude=exclude - ) - - # Handle both tuple (data, metadata) and dict returns - if isinstance(widget_repr_result, tuple): - widget_repr = dict( - widget_repr_result[0] - ) # Extract data dict from tuple - else: - widget_repr = dict(widget_repr_result) - - # Use deferred repr for text/plain of anywidget display. - # This avoids kicking off a query when the user is just - # printing the last expression in a cell. - widget_repr["text/plain"] = repr(df) - widget_repr["text/html"] = self._repr_html_fallback_() - return widget_repr + return self._get_anywidget_bundle(include=include, exclude=exclude) except (AttributeError, ValueError, ImportError): # Fallback: let IPython use _repr_html_fallback_() instead diff --git a/notebooks/dataframes/anywidget_mode.ipynb b/notebooks/dataframes/anywidget_mode.ipynb index ed5c4ff52f..24a45c88a8 100644 --- a/notebooks/dataframes/anywidget_mode.ipynb +++ b/notebooks/dataframes/anywidget_mode.ipynb @@ -105,16 +105,16 @@ "output_type": "stream", "text": [ "state gender year name number\n", - " AL F 1910 Annie 482\n", - " AL F 1910 Myrtle 104\n", - " AR F 1910 Lillian 56\n", - " CT F 1910 Anne 38\n", - " CT F 1910 Frances 45\n", - " FL F 1910 Margaret 53\n", - " GA F 1910 Mae 73\n", - " GA F 1910 Beatrice 96\n", - " GA F 1910 Lola 47\n", - " IA F 1910 Viola 49\n", + " AL F 1910 Vera 71\n", + " AR F 1910 Viola 37\n", + " AR F 1910 Alice 57\n", + " AR F 1910 Edna 95\n", + " AR F 1910 Ollie 40\n", + " CA F 1910 Beatrice 37\n", + " CT F 1910 Marion 36\n", + " CT F 1910 Marie 36\n", + " FL F 1910 Alice 53\n", + " GA F 1910 Thelma 133\n", "...\n", "\n", "[5552452 rows x 5 columns]\n" @@ -182,7 +182,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "cb4a27e1d96444bfb641510919f67a3a", + "model_id": "dcf0f4199d86493cb30ec2c94bbcbd78", "version_major": 2, "version_minor": 1 }, @@ -218,80 +218,80 @@ " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", "
ALF1910Annie482Vera71
1ALARF1910Myrtle104Viola37
2ARF1910Lillian56Alice57
3CTARF1910Anne38Edna95
4CTARF1910Frances45Ollie40
5FLCAF1910Margaret53Beatrice37
6GACTF1910Mae73Marion36
7GACTF1910Beatrice96Marie36
8GAFLF1910Lola47Alice53
9IAGAF1910Viola49Thelma133
\n", @@ -300,16 +300,16 @@ ], "text/plain": [ "state gender year name number\n", - " AL F 1910 Annie 482\n", - " AL F 1910 Myrtle 104\n", - " AR F 1910 Lillian 56\n", - " CT F 1910 Anne 38\n", - " CT F 1910 Frances 45\n", - " FL F 1910 Margaret 53\n", - " GA F 1910 Mae 73\n", - " GA F 1910 Beatrice 96\n", - " GA F 1910 Lola 47\n", - " IA F 1910 Viola 49\n", + " AL F 1910 Vera 71\n", + " AR F 1910 Viola 37\n", + " AR F 1910 Alice 57\n", + " AR F 1910 Edna 95\n", + " AR F 1910 Ollie 40\n", + " CA F 1910 Beatrice 37\n", + " CT F 1910 Marion 36\n", + " CT F 1910 Marie 36\n", + " FL F 1910 Alice 53\n", + " GA F 1910 Thelma 133\n", "...\n", "\n", "[5552452 rows x 5 columns]" @@ -360,7 +360,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "3d186029fa6c47309543526642dda588", + "model_id": "cea93652992346149c8cf05ea647f522", "version_major": 2, "version_minor": 1 }, @@ -462,7 +462,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "48f2d276428942398ea7d09635c02a46", + "model_id": "b3137e47084a4040bf37425b55b2a3b6", "version_major": 2, "version_minor": 1 }, @@ -502,7 +502,7 @@ "data": { "text/html": [ "✅ Completed. \n", - " Query processed 85.9 kB in 25 seconds of slot time. [Job bigframes-dev:US.job_NKlR3QmJIAgNiezHGjjstO6ii2Qc details]\n", + " Query processed 85.9 kB in 12 seconds of slot time.\n", " " ], "text/plain": [ @@ -572,7 +572,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "a307efb2635a4a10a9f6d0fc75c52c67", + "model_id": "92cd2243b8ce4713a58bd48a139213f0", "version_major": 2, "version_minor": 1 }, @@ -619,24 +619,6 @@ " gs://gcs-public-data--labeled-patents/espacene...\n", " EU\n", " DE\n", - " 03.10.2018\n", - " H05B 6/12\n", - " <NA>\n", - " 18165514.3\n", - " 03.04.2018\n", - " 30.03.2017\n", - " <NA>\n", - " BSH Hausger√§te GmbH\n", - " Acero Acero, Jesus\n", - " VORRICHTUNG ZUR INDUKTIVEN ENERGIE√úBERTRAGUNG\n", - " EP 3 383 141 A2\n", - " \n", - " \n", - " 1\n", - " {'application_number': None, 'class_internatio...\n", - " gs://gcs-public-data--labeled-patents/espacene...\n", - " EU\n", - " DE\n", " 29.08.018\n", " E04H 6/12\n", " <NA>\n", @@ -650,6 +632,24 @@ " EP 3 366 869 A1\n", " \n", " \n", + " 1\n", + " {'application_number': None, 'class_internatio...\n", + " gs://gcs-public-data--labeled-patents/espacene...\n", + " EU\n", + " DE\n", + " 03.10.2018\n", + " H05B 6/12\n", + " <NA>\n", + " 18165514.3\n", + " 03.04.2018\n", + " 30.03.2017\n", + " <NA>\n", + " BSH Hausger√§te GmbH\n", + " Acero Acero, Jesus\n", + " VORRICHTUNG ZUR INDUKTIVEN ENERGIE√úBERTRAGUNG\n", + " EP 3 383 141 A2\n", + " \n", + " \n", " 2\n", " {'application_number': None, 'class_internatio...\n", " gs://gcs-public-data--labeled-patents/espacene...\n", @@ -709,47 +709,47 @@ "[5 rows x 15 columns in total]" ], "text/plain": [ - " result \\\n", - "{'application_number': None, 'class_internation... \n", - "{'application_number': None, 'class_internation... \n", - "{'application_number': None, 'class_internation... \n", - "{'application_number': None, 'class_internation... \n", - "{'application_number': None, 'class_internation... \n", + " result \\\n", + "0 {'application_number': None, 'class_internatio... \n", + "1 {'application_number': None, 'class_internatio... \n", + "2 {'application_number': None, 'class_internatio... \n", + "3 {'application_number': None, 'class_internatio... \n", + "4 {'application_number': None, 'class_internatio... \n", "\n", - " gcs_path issuer language \\\n", - "gs://gcs-public-data--labeled-patents/espacenet... EU DE \n", - "gs://gcs-public-data--labeled-patents/espacenet... EU DE \n", - "gs://gcs-public-data--labeled-patents/espacenet... EU DE \n", - "gs://gcs-public-data--labeled-patents/espacenet... EU DE \n", - "gs://gcs-public-data--labeled-patents/espacenet... EU DE \n", + " gcs_path issuer language \\\n", + "0 gs://gcs-public-data--labeled-patents/espacene... EU DE \n", + "1 gs://gcs-public-data--labeled-patents/espacene... EU DE \n", + "2 gs://gcs-public-data--labeled-patents/espacene... EU DE \n", + "3 gs://gcs-public-data--labeled-patents/espacene... EU DE \n", + "4 gs://gcs-public-data--labeled-patents/espacene... EU DE \n", "\n", - "publication_date class_international class_us application_number filing_date \\\n", - " 03.10.2018 H05B 6/12 18165514.3 03.04.2018 \n", - " 29.08.018 E04H 6/12 18157874.1 21.02.2018 \n", - " 03.10.2018 H01L 21/20 18166536.5 16.02.2016 \n", - " 03.10.2018 G06F 11/30 18157347.8 19.02.2018 \n", - " 03.10.2018 A01K 31/00 18171005.4 05.02.2015 \n", + " publication_date class_international class_us application_number \\\n", + "0 29.08.018 E04H 6/12 18157874.1 \n", + "1 03.10.2018 H05B 6/12 18165514.3 \n", + "2 03.10.2018 H01L 21/20 18166536.5 \n", + "3 03.10.2018 G06F 11/30 18157347.8 \n", + "4 03.10.2018 A01K 31/00 18171005.4 \n", "\n", - "priority_date_eu representative_line_1_eu applicant_line_1 \\\n", - " 30.03.2017 BSH Hausger√§te GmbH \n", - " 22.02.2017 Liedtke & Partner Patentanw√§lte SHB Hebezeugbau GmbH \n", - " Scheider, Sascha et al EV Group E. Thallner GmbH \n", - " 31.03.2017 Hoffmann Eitle FUJITSU LIMITED \n", - " 05.02.2014 Stork Bamberger Patentanw√§lte Linco Food Systems A/S \n", + " filing_date priority_date_eu representative_line_1_eu \\\n", + "0 21.02.2018 22.02.2017 Liedtke & Partner Patentanw√§lte \n", + "1 03.04.2018 30.03.2017 \n", + "2 16.02.2016 Scheider, Sascha et al \n", + "3 19.02.2018 31.03.2017 Hoffmann Eitle \n", + "4 05.02.2015 05.02.2014 Stork Bamberger Patentanw√§lte \n", "\n", - " inventor_line_1 title_line_1 \\\n", - "Acero Acero, Jesus VORRICHTUNG ZUR INDUKTIVEN ENERGIE√úBERTRAGUNG \n", - " VOLGER, Alexander STEUERUNGSSYSTEM F√úR AUTOMATISCHE PARKH√ÑUSER \n", - " Kurz, Florian VORRICHTUNG ZUM BONDEN VON SUBSTRATEN \n", - " Kukihara, Kensuke METHOD EXECUTED BY A COMPUTER, INFORMATION PROC... \n", - " Thrane, Uffe MASTH√ÑHNCHENCONTAINER ALS BESTANDTEIL EINER EI... \n", + " applicant_line_1 inventor_line_1 \\\n", + "0 SHB Hebezeugbau GmbH VOLGER, Alexander \n", + "1 BSH Hausger√§te GmbH Acero Acero, Jesus \n", + "2 EV Group E. Thallner GmbH Kurz, Florian \n", + "3 FUJITSU LIMITED Kukihara, Kensuke \n", + "4 Linco Food Systems A/S Thrane, Uffe \n", "\n", - " number \n", - "EP 3 383 141 A2 \n", - "EP 3 366 869 A1 \n", - "EP 3 382 744 A1 \n", - "EP 3 382 553 A1 \n", - "EP 3 381 276 A1 \n", + " title_line_1 number \n", + "0 STEUERUNGSSYSTEM F√úR AUTOMATISCHE PARKH√ÑUSER EP 3 366 869 A1 \n", + "1 VORRICHTUNG ZUR INDUKTIVEN ENERGIE√úBERTRAGUNG EP 3 383 141 A2 \n", + "2 VORRICHTUNG ZUM BONDEN VON SUBSTRATEN EP 3 382 744 A1 \n", + "3 METHOD EXECUTED BY A COMPUTER, INFORMATION PRO... EP 3 382 553 A1 \n", + "4 MASTH√ÑHNCHENCONTAINER ALS BESTANDTEIL EINER E... EP 3 381 276 A1 \n", "\n", "[5 rows x 15 columns]" ] diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py index 4ae6ff1421..6b3f9a2c13 100644 --- a/tests/system/small/test_dataframe.py +++ b/tests/system/small/test_dataframe.py @@ -958,7 +958,7 @@ def test_repr_w_display_options(scalars_dfs, session): assert (executions_post - executions_pre) <= 3 -def test_repr_html_w_all_rows(scalars_dfs, session): +def test_mimebundle_html_repr_w_all_rows(scalars_dfs, session): metrics = session._metrics scalars_df, _ = scalars_dfs # get a pandas df of the expected format diff --git a/tests/unit/test_dataframe_polars.py b/tests/unit/test_dataframe_polars.py index 49c8ff6b77..39dbacd087 100644 --- a/tests/unit/test_dataframe_polars.py +++ b/tests/unit/test_dataframe_polars.py @@ -737,7 +737,7 @@ def test_join_repr(scalars_dfs): assert actual == expected -def test_repr_html_w_all_rows(scalars_dfs, session): +def test_mimebundle_html_repr_w_all_rows(scalars_dfs, session): scalars_df, _ = scalars_dfs # get a pandas df of the expected format df, _ = scalars_df._block.to_pandas() From b0f15255c21932ad0131b77bb6b8e7df1dee623d Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Fri, 14 Nov 2025 05:05:49 +0000 Subject: [PATCH 09/17] fix mypy --- bigframes/streaming/dataframe.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/bigframes/streaming/dataframe.py b/bigframes/streaming/dataframe.py index 7dc9e964bc..2e3b1accd1 100644 --- a/bigframes/streaming/dataframe.py +++ b/bigframes/streaming/dataframe.py @@ -291,13 +291,13 @@ def __repr__(self, *args, **kwargs): __repr__.__doc__ = _curate_df_doc(inspect.getdoc(dataframe.DataFrame.__repr__)) - def _repr_html_(self, *args, **kwargs): - return _return_type_wrapper(self._df._repr_html_, StreamingDataFrame)( + def _repr_html_fallback_(self, *args, **kwargs): + return _return_type_wrapper(self._df._repr_html_fallback_, StreamingDataFrame)( *args, **kwargs ) - _repr_html_.__doc__ = _curate_df_doc( - inspect.getdoc(dataframe.DataFrame._repr_html_) + _repr_html_fallback_.__doc__ = _curate_df_doc( + inspect.getdoc(dataframe.DataFrame._repr_html_fallback_) ) @property From 7e478952f0c9441dc5c0bf3a4d8adff90860df25 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Fri, 14 Nov 2025 06:27:13 +0000 Subject: [PATCH 10/17] Revert unncessary change --- bigframes/dataframe.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py index 1f26e2fcb2..b25851622a 100644 --- a/bigframes/dataframe.py +++ b/bigframes/dataframe.py @@ -789,7 +789,9 @@ def __repr__(self) -> str: opts = bigframes.options.display max_results = opts.max_rows - if opts.repr_mode == "deferred": + # anywdiget mode uses the same display logic as the "deferred" mode + # for faster execution + if opts.repr_mode in ("deferred", "anywidget"): return formatter.repr_query_job(self._compute_dry_run()) # TODO(swast): pass max_columns and get the true column count back. Maybe @@ -815,7 +817,6 @@ def __repr__(self) -> str: repr_string = pandas_df.to_string(**to_string_kwargs) # Modify the end of the string to reflect count. - # Remove pandas' default row/column summary to add our own. lines = repr_string.split("\n") pattern = re.compile("\\[[0-9]+ rows x [0-9]+ columns\\]") if pattern.match(lines[-1]): From 736929a0defbb0bfc32f4b9ca5f36f7a483562b0 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Tue, 18 Nov 2025 21:11:28 +0000 Subject: [PATCH 11/17] use test/plain for print(df) --- bigframes/dataframe.py | 10 +- notebooks/dataframes/anywidget_mode.ipynb | 298 ++++++---------------- tests/system/small/test_anywidget.py | 21 +- 3 files changed, 101 insertions(+), 228 deletions(-) diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py index b25851622a..97fd22e1c2 100644 --- a/bigframes/dataframe.py +++ b/bigframes/dataframe.py @@ -789,9 +789,7 @@ def __repr__(self) -> str: opts = bigframes.options.display max_results = opts.max_rows - # anywdiget mode uses the same display logic as the "deferred" mode - # for faster execution - if opts.repr_mode in ("deferred", "anywidget"): + if opts.repr_mode == "deferred": return formatter.repr_query_job(self._compute_dry_run()) # TODO(swast): pass max_columns and get the true column count back. Maybe @@ -944,9 +942,9 @@ def _get_anywidget_bundle(self, include=None, exclude=None): widget_repr = dict(widget_repr_result) # Use deferred repr for text/plain of anywidget display. - # This avoids kicking off a query when the user is just - # printing the last expression in a cell. - widget_repr["text/plain"] = repr(df) + # This ensures consistency with __repr__ and avoids unnecessary query execution + # when the user is just printing the last expression in a cell. + widget_repr["text/plain"] = formatter.repr_query_job(df._compute_dry_run()) widget_repr["text/html"] = self._repr_html_fallback_() return widget_repr diff --git a/notebooks/dataframes/anywidget_mode.ipynb b/notebooks/dataframes/anywidget_mode.ipynb index 24a45c88a8..0f7c45d8ce 100644 --- a/notebooks/dataframes/anywidget_mode.ipynb +++ b/notebooks/dataframes/anywidget_mode.ipynb @@ -36,7 +36,6 @@ "id": "ca22f059", "metadata": {}, "outputs": [], - "outputs": [], "source": [ "import bigframes.pandas as bpd" ] @@ -70,7 +69,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "f289d250", "metadata": {}, "outputs": [ @@ -104,17 +103,17 @@ "name": "stdout", "output_type": "stream", "text": [ - "state gender year name number\n", - " AL F 1910 Vera 71\n", - " AR F 1910 Viola 37\n", - " AR F 1910 Alice 57\n", - " AR F 1910 Edna 95\n", - " AR F 1910 Ollie 40\n", - " CA F 1910 Beatrice 37\n", - " CT F 1910 Marion 36\n", - " CT F 1910 Marie 36\n", - " FL F 1910 Alice 53\n", - " GA F 1910 Thelma 133\n", + "state gender year name number\n", + " AL F 1910 Cora 61\n", + " AL F 1910 Anna 74\n", + " AR F 1910 Willie 132\n", + " CO F 1910 Anna 42\n", + " FL F 1910 Louise 70\n", + " GA F 1910 Catherine 57\n", + " IL F 1910 Jessie 43\n", + " IN F 1910 Anna 100\n", + " IN F 1910 Pauline 77\n", + " IN F 1910 Beulah 39\n", "...\n", "\n", "[5552452 rows x 5 columns]\n" @@ -182,6 +181,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { + "model_id": "5384f97c565f4de9814893c5d39a27e9", "model_id": "dcf0f4199d86493cb30ec2c94bbcbd78", "version_major": 2, "version_minor": 1 @@ -218,80 +218,80 @@ " AL\n", " F\n", " 1910\n", - " Vera\n", - " 71\n", + " Cora\n", + " 61\n", " \n", " \n", " 1\n", - " AR\n", + " AL\n", " F\n", " 1910\n", - " Viola\n", - " 37\n", + " Anna\n", + " 74\n", " \n", " \n", " 2\n", " AR\n", " F\n", " 1910\n", - " Alice\n", - " 57\n", + " Willie\n", + " 132\n", " \n", " \n", " 3\n", - " AR\n", + " CO\n", " F\n", " 1910\n", - " Edna\n", - " 95\n", + " Anna\n", + " 42\n", " \n", " \n", " 4\n", - " AR\n", + " FL\n", " F\n", " 1910\n", - " Ollie\n", - " 40\n", + " Louise\n", + " 70\n", " \n", " \n", " 5\n", - " CA\n", + " GA\n", " F\n", " 1910\n", - " Beatrice\n", - " 37\n", + " Catherine\n", + " 57\n", " \n", " \n", " 6\n", - " CT\n", + " IL\n", " F\n", " 1910\n", - " Marion\n", - " 36\n", + " Jessie\n", + " 43\n", " \n", " \n", " 7\n", - " CT\n", + " IN\n", " F\n", " 1910\n", - " Marie\n", - " 36\n", + " Anna\n", + " 100\n", " \n", " \n", " 8\n", - " FL\n", + " IN\n", " F\n", " 1910\n", - " Alice\n", - " 53\n", + " Pauline\n", + " 77\n", " \n", " \n", " 9\n", - " GA\n", + " IN\n", " F\n", " 1910\n", - " Thelma\n", - " 133\n", + " Beulah\n", + " 39\n", " \n", " \n", "\n", @@ -299,20 +299,7 @@ "[5552452 rows x 5 columns in total]" ], "text/plain": [ - "state gender year name number\n", - " AL F 1910 Vera 71\n", - " AR F 1910 Viola 37\n", - " AR F 1910 Alice 57\n", - " AR F 1910 Edna 95\n", - " AR F 1910 Ollie 40\n", - " CA F 1910 Beatrice 37\n", - " CT F 1910 Marion 36\n", - " CT F 1910 Marie 36\n", - " FL F 1910 Alice 53\n", - " GA F 1910 Thelma 133\n", - "...\n", - "\n", - "[5552452 rows x 5 columns]" + "Computation deferred. Computation will process 171.4 MB" ] }, "execution_count": 6, @@ -321,7 +308,7 @@ } ], "source": [ - "df.set_index(\"name\")" + "df" ] }, { @@ -360,6 +347,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { + "model_id": "bb32dbe7265e43dca0e8c341be36be84", "model_id": "cea93652992346149c8cf05ea647f522", "version_major": 2, "version_minor": 1 @@ -439,8 +427,6 @@ { "data": { "text/html": [ - "✅ Completed. \n", - " Query processed 171.4 MB in a moment of slot time.\n", "✅ Completed. \n", " Query processed 171.4 MB in a moment of slot time.\n", " " @@ -462,6 +448,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { + "model_id": "f4dbd2b24e9a43e2a8003638e427ee1f", "model_id": "b3137e47084a4040bf37425b55b2a3b6", "version_major": 2, "version_minor": 1 @@ -502,6 +489,7 @@ "data": { "text/html": [ "✅ Completed. \n", + " Query processed 85.9 kB in 30 seconds of slot time.\n", " Query processed 85.9 kB in 12 seconds of slot time.\n", " " ], @@ -516,7 +504,6 @@ "name": "stderr", "output_type": "stream", "text": [ - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:987: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:987: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", "instead of using `db_dtypes` in the future when available in pandas\n", "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", @@ -535,6 +522,16 @@ "metadata": {}, "output_type": "display_data" }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:987: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", + "instead of using `db_dtypes` in the future when available in pandas\n", + "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", + " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" + ] + }, { "data": { "text/html": [ @@ -559,20 +556,10 @@ "metadata": {}, "output_type": "display_data" }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/dtypes.py:987: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n", - "instead of using `db_dtypes` in the future when available in pandas\n", - "(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n", - " warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n" - ] - }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "92cd2243b8ce4713a58bd48a139213f0", + "model_id": "fe0abfd9a9bd4ee8aac06a95631d7f7b", "version_major": 2, "version_minor": 1 }, @@ -638,24 +625,6 @@ " EU\n", " DE\n", " 03.10.2018\n", - " H05B 6/12\n", - " <NA>\n", - " 18165514.3\n", - " 03.04.2018\n", - " 30.03.2017\n", - " <NA>\n", - " BSH Hausger√§te GmbH\n", - " Acero Acero, Jesus\n", - " VORRICHTUNG ZUR INDUKTIVEN ENERGIE√úBERTRAGUNG\n", - " EP 3 383 141 A2\n", - " \n", - " \n", - " 2\n", - " {'application_number': None, 'class_internatio...\n", - " gs://gcs-public-data--labeled-patents/espacene...\n", - " EU\n", - " DE\n", - " 03.10.2018\n", " H01L 21/20\n", " <NA>\n", " 18166536.5\n", @@ -668,7 +637,7 @@ " EP 3 382 744 A1\n", " \n", " \n", - " 3\n", + " 2\n", " {'application_number': None, 'class_internatio...\n", " gs://gcs-public-data--labeled-patents/espacene...\n", " EU\n", @@ -686,7 +655,7 @@ " EP 3 382 553 A1\n", " \n", " \n", - " 4\n", + " 3\n", " {'application_number': None, 'class_internatio...\n", " gs://gcs-public-data--labeled-patents/espacene...\n", " EU\n", @@ -703,55 +672,31 @@ " MASTH√ÑHNCHENCONTAINER ALS BESTANDTEIL EINER E...\n", " EP 3 381 276 A1\n", " \n", + " \n", + " 4\n", + " {'application_number': None, 'class_internatio...\n", + " gs://gcs-public-data--labeled-patents/espacene...\n", + " EU\n", + " DE\n", + " 03.10.2018\n", + " H05B 6/12\n", + " <NA>\n", + " 18165514.3\n", + " 03.04.2018\n", + " 30.03.2017\n", + " <NA>\n", + " BSH Hausger√§te GmbH\n", + " Acero Acero, Jesus\n", + " VORRICHTUNG ZUR INDUKTIVEN ENERGIE√úBERTRAGUNG\n", + " EP 3 383 141 A2\n", + " \n", " \n", "\n", "

5 rows × 15 columns

\n", "[5 rows x 15 columns in total]" ], "text/plain": [ - " result \\\n", - "0 {'application_number': None, 'class_internatio... \n", - "1 {'application_number': None, 'class_internatio... \n", - "2 {'application_number': None, 'class_internatio... \n", - "3 {'application_number': None, 'class_internatio... \n", - "4 {'application_number': None, 'class_internatio... \n", - "\n", - " gcs_path issuer language \\\n", - "0 gs://gcs-public-data--labeled-patents/espacene... EU DE \n", - "1 gs://gcs-public-data--labeled-patents/espacene... EU DE \n", - "2 gs://gcs-public-data--labeled-patents/espacene... EU DE \n", - "3 gs://gcs-public-data--labeled-patents/espacene... EU DE \n", - "4 gs://gcs-public-data--labeled-patents/espacene... EU DE \n", - "\n", - " publication_date class_international class_us application_number \\\n", - "0 29.08.018 E04H 6/12 18157874.1 \n", - "1 03.10.2018 H05B 6/12 18165514.3 \n", - "2 03.10.2018 H01L 21/20 18166536.5 \n", - "3 03.10.2018 G06F 11/30 18157347.8 \n", - "4 03.10.2018 A01K 31/00 18171005.4 \n", - "\n", - " filing_date priority_date_eu representative_line_1_eu \\\n", - "0 21.02.2018 22.02.2017 Liedtke & Partner Patentanw√§lte \n", - "1 03.04.2018 30.03.2017 \n", - "2 16.02.2016 Scheider, Sascha et al \n", - "3 19.02.2018 31.03.2017 Hoffmann Eitle \n", - "4 05.02.2015 05.02.2014 Stork Bamberger Patentanw√§lte \n", - "\n", - " applicant_line_1 inventor_line_1 \\\n", - "0 SHB Hebezeugbau GmbH VOLGER, Alexander \n", - "1 BSH Hausger√§te GmbH Acero Acero, Jesus \n", - "2 EV Group E. Thallner GmbH Kurz, Florian \n", - "3 FUJITSU LIMITED Kukihara, Kensuke \n", - "4 Linco Food Systems A/S Thrane, Uffe \n", - "\n", - " title_line_1 number \n", - "0 STEUERUNGSSYSTEM F√úR AUTOMATISCHE PARKH√ÑUSER EP 3 366 869 A1 \n", - "1 VORRICHTUNG ZUR INDUKTIVEN ENERGIE√úBERTRAGUNG EP 3 383 141 A2 \n", - "2 VORRICHTUNG ZUM BONDEN VON SUBSTRATEN EP 3 382 744 A1 \n", - "3 METHOD EXECUTED BY A COMPUTER, INFORMATION PRO... EP 3 382 553 A1 \n", - "4 MASTH√ÑHNCHENCONTAINER ALS BESTANDTEIL EINER E... EP 3 381 276 A1 \n", - "\n", - "[5 rows x 15 columns]" + "Computation deferred. Computation will process 0 Bytes" ] }, "execution_count": 10, @@ -771,93 +716,6 @@ " LIMIT 5;\n", "\"\"\")" ] - }, - { - "cell_type": "markdown", - "id": "multi-index-display-markdown", - "metadata": {}, - "source": [ - "## Display Multi-Index DataFrame in anywidget mode\n", - "This section demonstrates how BigFrames can display a DataFrame with multiple levels of indexing (a \"multi-index\") when using the `anywidget` display mode." - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "ad7482aa", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "✅ Completed. \n", - " Query processed 483.3 GB in 51 minutes of slot time. [Job bigframes-dev:US.3eace7c0-7776-48d6-925c-965be33d8738 details]\n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "✅ Completed. \n", - " Query processed 124.4 MB in 7 seconds of slot time. [Job bigframes-dev:US.job_UJ5cx4R1jW5cNxq_1H1x-9-ATfqS details]\n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "3f9652b5fdc0441eac2b05ab36d571d0", - "version_major": 2, - "version_minor": 1 - }, - "text/plain": [ - "TableWidget(page_size=10, row_count=3967869, table_html=' seven_days_ago]\n", - " \n", - "# Create a multi-index by grouping by date and project\n", - "pypi_df_recent['date'] = pypi_df_recent['timestamp'].dt.date\n", - "multi_index_df = pypi_df_recent.groupby([\"date\", \"project\"]).size().to_frame(\"downloads\")\n", - " \n", - "# Display the DataFrame with the multi-index\n", - "multi_index_df" - ] } ], "metadata": { diff --git a/tests/system/small/test_anywidget.py b/tests/system/small/test_anywidget.py index a5beb35fd1..a85cc40857 100644 --- a/tests/system/small/test_anywidget.py +++ b/tests/system/small/test_anywidget.py @@ -686,7 +686,9 @@ def test_widget_with_unknown_row_count_empty_dataframe( assert widget.page == 0 -def test_repr_mimebundle_anywidget_fallback(paginated_bf_df: bf.dataframe.DataFrame): +def test_repr_mimebundle_should_fallback_to_html_if_anywidget_is_unavailable( + paginated_bf_df: bf.dataframe.DataFrame, +): """ Test that _repr_mimebundle_ falls back to static html when anywidget is not available. """ @@ -702,7 +704,9 @@ def test_repr_mimebundle_anywidget_fallback(paginated_bf_df: bf.dataframe.DataFr assert "page_2_row_1" not in html -def test_repr_mimebundle_anywidget_success(paginated_bf_df: bf.dataframe.DataFrame): +def test_repr_mimebundle_should_return_widget_view_if_anywidget_is_available( + paginated_bf_df: bf.dataframe.DataFrame, +): """ Test that _repr_mimebundle_ returns a widget view when anywidget is available. """ @@ -713,6 +717,19 @@ def test_repr_mimebundle_anywidget_success(paginated_bf_df: bf.dataframe.DataFra assert "text/plain" in bundle +def test_repr_in_anywidget_mode_should_not_be_deferred( + paginated_bf_df: bf.dataframe.DataFrame, +): + """ + Test that repr(df) is not deferred in anywidget mode. + This is to ensure that print(df) works as expected. + """ + with bf.option_context("display.repr_mode", "anywidget"): + representation = repr(paginated_bf_df) + assert "Computation deferred" not in representation + assert "page_1_row_1" in representation + + # TODO(b/332316283): Add tests for custom index and multiindex # This may not be necessary for the SQL Cell use case but should be # considered for completeness. From eb61fc0b2fe18087d498d2f4f8deaf70430799a9 Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Tue, 18 Nov 2025 23:43:38 +0000 Subject: [PATCH 12/17] fix failed testcase --- bigframes/core/indexes/base.py | 2 +- notebooks/dataframes/anywidget_mode.ipynb | 128 +++++++++++----------- tests/system/small/test_progress_bar.py | 9 +- 3 files changed, 72 insertions(+), 67 deletions(-) diff --git a/bigframes/core/indexes/base.py b/bigframes/core/indexes/base.py index 41b32d99e4..8d87801767 100644 --- a/bigframes/core/indexes/base.py +++ b/bigframes/core/indexes/base.py @@ -378,7 +378,7 @@ def __repr__(self) -> __builtins__.str: max_results = opts.max_rows # anywdiget mode uses the same display logic as the "deferred" mode # for faster execution - if opts.repr_mode in ("deferred", "anywidget"): + if opts.repr_mode == "deferred": _, dry_run_query_job = self._block._compute_dry_run() return formatter.repr_query_job(dry_run_query_job) diff --git a/notebooks/dataframes/anywidget_mode.ipynb b/notebooks/dataframes/anywidget_mode.ipynb index 0f7c45d8ce..2b9b0a1fa2 100644 --- a/notebooks/dataframes/anywidget_mode.ipynb +++ b/notebooks/dataframes/anywidget_mode.ipynb @@ -69,7 +69,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "id": "f289d250", "metadata": {}, "outputs": [ @@ -104,16 +104,16 @@ "output_type": "stream", "text": [ "state gender year name number\n", - " AL F 1910 Cora 61\n", - " AL F 1910 Anna 74\n", - " AR F 1910 Willie 132\n", - " CO F 1910 Anna 42\n", - " FL F 1910 Louise 70\n", - " GA F 1910 Catherine 57\n", - " IL F 1910 Jessie 43\n", - " IN F 1910 Anna 100\n", - " IN F 1910 Pauline 77\n", - " IN F 1910 Beulah 39\n", + " AL F 1910 Hazel 51\n", + " AL F 1910 Lucy 76\n", + " AR F 1910 Nellie 39\n", + " AR F 1910 Lena 40\n", + " CO F 1910 Thelma 36\n", + " CO F 1910 Ruth 68\n", + " CT F 1910 Elizabeth 86\n", + " DC F 1910 Mary 80\n", + " FL F 1910 Annie 101\n", + " FL F 1910 Alma 39\n", "...\n", "\n", "[5552452 rows x 5 columns]\n" @@ -218,79 +218,79 @@ " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", @@ -559,7 +559,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "fe0abfd9a9bd4ee8aac06a95631d7f7b", + "model_id": "54007f0106044ce49520b92a4662192f", "version_major": 2, "version_minor": 1 }, @@ -625,16 +625,16 @@ " \n", " \n", " \n", - " \n", + " \n", " \n", - " \n", - " \n", + " \n", + " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", @@ -661,16 +661,16 @@ " \n", " \n", " \n", - " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", @@ -679,16 +679,16 @@ " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", "
ALF1910Cora61Hazel51
1ALF1910Anna74Lucy76
2ARF1910Willie132Nellie39
3COARF1910Anna42Lena40
4FLCOF1910Louise70Thelma36
5GACOF1910Catherine57Ruth68
6ILCTF1910Jessie43Elizabeth86
7INDCF1910Anna100Mary80
8INFLF1910Pauline77Annie101
9INFLF1910BeulahAlma39
EUDE03.10.2018H01L 21/20H05B 6/12<NA>18166536.516.02.201618165514.303.04.201830.03.2017<NA>Scheider, Sascha et alEV Group E. Thallner GmbHKurz, FlorianVORRICHTUNG ZUM BONDEN VON SUBSTRATENEP 3 382 744 A1BSH Hausger√§te GmbHAcero Acero, JesusVORRICHTUNG ZUR INDUKTIVEN ENERGIE√úBERTRAGUNGEP 3 383 141 A2
2EUDE03.10.2018A01K 31/00H01L 21/20<NA>18171005.405.02.201505.02.2014Stork Bamberger PatentanwälteLinco Food Systems A/SThrane, UffeMASTHÄHNCHENCONTAINER ALS BESTANDTEIL EINER E...EP 3 381 276 A118166536.516.02.2016<NA>Scheider, Sascha et alEV Group E. Thallner GmbHKurz, FlorianVORRICHTUNG ZUM BONDEN VON SUBSTRATENEP 3 382 744 A1
4EUDE03.10.2018H05B 6/12<NA>18165514.303.04.201830.03.2017A01K 31/00<NA>BSH Hausgeräte GmbHAcero Acero, JesusVORRICHTUNG ZUR INDUKTIVEN ENERGIEÜBERTRAGUNGEP 3 383 141 A218171005.405.02.201505.02.2014Stork Bamberger PatentanwälteLinco Food Systems A/SThrane, UffeMASTHÄHNCHENCONTAINER ALS BESTANDTEIL EINER E...EP 3 381 276 A1
\n", diff --git a/tests/system/small/test_progress_bar.py b/tests/system/small/test_progress_bar.py index 0c9c4070f4..d726bfde2c 100644 --- a/tests/system/small/test_progress_bar.py +++ b/tests/system/small/test_progress_bar.py @@ -153,7 +153,9 @@ def test_repr_anywidget_dataframe(penguins_df_default_index: bf.dataframe.DataFr pytest.importorskip("anywidget") with bf.option_context("display.repr_mode", "anywidget"): actual_repr = repr(penguins_df_default_index) - assert EXPECTED_DRY_RUN_MESSAGE in actual_repr + assert "species" in actual_repr + assert "island" in actual_repr + assert "[344 rows x 7 columns]" in actual_repr def test_repr_anywidget_index(penguins_df_default_index: bf.dataframe.DataFrame): @@ -161,4 +163,7 @@ def test_repr_anywidget_index(penguins_df_default_index: bf.dataframe.DataFrame) with bf.option_context("display.repr_mode", "anywidget"): index = penguins_df_default_index.index actual_repr = repr(index) - assert EXPECTED_DRY_RUN_MESSAGE in actual_repr + # In non-interactive environments, should still get a useful summary. + assert "Index" in actual_repr + assert "0, 1, 2, 3, 4" in actual_repr + assert "dtype='Int64'" in actual_repr From 3079a3c3b2e5352b3ef76029170c8e7cc1a327dc Mon Sep 17 00:00:00 2001 From: Shuowei Li Date: Thu, 20 Nov 2025 20:29:05 +0000 Subject: [PATCH 13/17] reuse available data and optimize error handling --- bigframes/core/indexes/base.py | 2 - bigframes/dataframe.py | 132 +++- bigframes/streaming/dataframe.py | 8 +- notebooks/dataframes/anywidget_mode.ipynb | 751 ++++++++++++++-------- tests/system/small/test_anywidget.py | 84 +-- 5 files changed, 673 insertions(+), 304 deletions(-) diff --git a/bigframes/core/indexes/base.py b/bigframes/core/indexes/base.py index 8d87801767..9576ca8e18 100644 --- a/bigframes/core/indexes/base.py +++ b/bigframes/core/indexes/base.py @@ -376,8 +376,6 @@ def __repr__(self) -> __builtins__.str: # metadata, like we do with DataFrame. opts = bigframes.options.display max_results = opts.max_rows - # anywdiget mode uses the same display logic as the "deferred" mode - # for faster execution if opts.repr_mode == "deferred": _, dry_run_query_job = self._block._compute_dry_run() return formatter.repr_query_job(dry_run_query_job) diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py index 97fd22e1c2..6f19abc98d 100644 --- a/bigframes/dataframe.py +++ b/bigframes/dataframe.py @@ -827,7 +827,7 @@ def __repr__(self) -> str: lines.append(f"[{row_count} rows x {column_count} columns]") return "\n".join(lines) - def _repr_html_fallback_(self) -> str: + def _repr_html_fallback(self) -> str: """ Returns an html string primarily for use by notebooks for displaying a representation of the DataFrame. Displays 20 rows by default since @@ -941,11 +941,34 @@ def _get_anywidget_bundle(self, include=None, exclude=None): else: widget_repr = dict(widget_repr_result) - # Use deferred repr for text/plain of anywidget display. - # This ensures consistency with __repr__ and avoids unnecessary query execution - # when the user is just printing the last expression in a cell. - widget_repr["text/plain"] = formatter.repr_query_job(df._compute_dry_run()) - widget_repr["text/html"] = self._repr_html_fallback_() + # At this point, we have already executed the query as part of the + # widget construction. Let's use the information available to render + # the HTML and plain text versions. + widget_repr["text/html"] = widget.table_html + + # Re-create the text representation from what we know. + opts = bigframes.options.display + with display_options.pandas_repr(opts): + import pandas.io.formats + + # safe to mutate this, this dict is owned by this code, and does not affect global config + to_string_kwargs = ( + pandas.io.formats.format.get_dataframe_repr_params() # type: ignore + ) + if not self._has_index: + to_string_kwargs.update({"index": False}) + repr_string = widget._cached_data.to_string(**to_string_kwargs) + + lines = repr_string.split("\n") + row_count = widget.row_count + if row_count is not None and row_count > len(widget._cached_data): + lines.append("...") + + lines.append("") + column_count = len(self.columns) + lines.append(f"[{row_count or '?'} rows x {column_count} columns]") + widget_repr["text/plain"] = "\n".join(lines) + return widget_repr def _repr_mimebundle_(self, include=None, exclude=None): @@ -959,17 +982,106 @@ def _repr_mimebundle_(self, include=None, exclude=None): try: return self._get_anywidget_bundle(include=include, exclude=exclude) - except (AttributeError, ValueError, ImportError): - # Fallback: let IPython use _repr_html_fallback_() instead + except ImportError: + # Fallback: let IPython use _repr_html_fallback() instead warnings.warn( "Anywidget mode is not available. " "Please `pip install anywidget traitlets` or `pip install 'bigframes[anywidget]'` to use interactive tables. " f"Falling back to static HTML. Error: {traceback.format_exc()}" ) - # Don't return anything - let IPython fall back to _repr_html_fallback_() + # Don't return anything - let IPython fall back to _repr_html_fallback() pass - return {"text/html": self._repr_html_fallback_(), "text/plain": repr(self)} + # In non-anywidget mode, fetch data once and use it for both HTML + # and plain text representations to avoid multiple queries. + opts = bigframes.options.display + max_results = opts.max_rows + + # Process blob columns first, logic from _repr_html_fallback + self._cached() + df = self.copy() + if bigframes.options.display.blob_display: + blob_cols = [ + series_name + for series_name, series in df.items() + if series.dtype == bigframes.dtypes.OBJ_REF_DTYPE + ] + for col in blob_cols: + df[col] = df[col].blob._get_runtime(mode="R", with_metadata=True) + else: + blob_cols = [] + + pandas_df, row_count, query_job = df._block.retrieve_repr_request_results( + max_results + ) + self._set_internal_query_job(query_job) + column_count = len(pandas_df.columns) + + # Generate HTML representation + with display_options.pandas_repr(opts): + if bigframes.options.display.blob_display and blob_cols: + + def obj_ref_rt_to_html(obj_ref_rt) -> str: + obj_ref_rt_json = json.loads(obj_ref_rt) + obj_ref_details = obj_ref_rt_json["objectref"]["details"] + if "gcs_metadata" in obj_ref_details: + gcs_metadata = obj_ref_details["gcs_metadata"] + content_type = typing.cast( + str, gcs_metadata.get("content_type", "") + ) + if content_type.startswith("image"): + size_str = "" + if bigframes.options.display.blob_display_width: + size_str = f' width="{bigframes.options.display.blob_display_width}"' + if bigframes.options.display.blob_display_height: + size_str = ( + size_str + + f' height="{bigframes.options.display.blob_display_height}"' + ) + url = obj_ref_rt_json["access_urls"]["read_url"] + return f'' + + return f'uri: {obj_ref_rt_json["objectref"]["uri"]}, authorizer: {obj_ref_rt_json["objectref"]["authorizer"]}' + + formatters = {blob_col: obj_ref_rt_to_html for blob_col in blob_cols} + with pandas.option_context("display.max_colwidth", None): + html_string = pandas_df.to_html( + escape=False, + notebook=True, + max_rows=pandas.get_option("display.max_rows"), + max_cols=pandas.get_option("display.max_columns"), + show_dimensions=pandas.get_option("display.show_dimensions"), + formatters=formatters, # type: ignore + ) + else: + html_string = pandas_df._repr_html_() # type:ignore + + html_string += f"[{row_count} rows x {column_count} columns in total]" + + # Generate text representation + with display_options.pandas_repr(opts): + import pandas.io.formats + + to_string_kwargs = ( + pandas.io.formats.format.get_dataframe_repr_params() # type: ignore + ) + if not self._has_index: + to_string_kwargs.update({"index": False}) + repr_string = pandas_df.to_string(**to_string_kwargs) + + lines = repr_string.split("\n") + pattern = re.compile("\\[[0-9]+ rows x [0-9]+ columns\\]") + if pattern.match(lines[-1]): + lines = lines[:-2] + + if row_count > len(lines) - 1: + lines.append("...") + + lines.append("") + lines.append(f"[{row_count} rows x {column_count} columns]") + text_representation = "\n".join(lines) + + return {"text/html": html_string, "text/plain": text_representation} def __delitem__(self, key: str): df = self.drop(columns=[key]) diff --git a/bigframes/streaming/dataframe.py b/bigframes/streaming/dataframe.py index 2e3b1accd1..1f1792d61f 100644 --- a/bigframes/streaming/dataframe.py +++ b/bigframes/streaming/dataframe.py @@ -291,13 +291,13 @@ def __repr__(self, *args, **kwargs): __repr__.__doc__ = _curate_df_doc(inspect.getdoc(dataframe.DataFrame.__repr__)) - def _repr_html_fallback_(self, *args, **kwargs): - return _return_type_wrapper(self._df._repr_html_fallback_, StreamingDataFrame)( + def _repr_html_fallback(self, *args, **kwargs): + return _return_type_wrapper(self._df._repr_html_fallback, StreamingDataFrame)( *args, **kwargs ) - _repr_html_fallback_.__doc__ = _curate_df_doc( - inspect.getdoc(dataframe.DataFrame._repr_html_fallback_) + _repr_html_fallback.__doc__ = _curate_df_doc( + inspect.getdoc(dataframe.DataFrame._repr_html_fallback) ) @property diff --git a/notebooks/dataframes/anywidget_mode.ipynb b/notebooks/dataframes/anywidget_mode.ipynb index 2b9b0a1fa2..49ae884455 100644 --- a/notebooks/dataframes/anywidget_mode.ipynb +++ b/notebooks/dataframes/anywidget_mode.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 11, "id": "d10bfca4", "metadata": {}, "outputs": [], @@ -32,7 +32,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 12, "id": "ca22f059", "metadata": {}, "outputs": [], @@ -50,7 +50,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 13, "id": "1bc5aaf3", "metadata": {}, "outputs": [], @@ -69,10 +69,21 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 14, "id": "f289d250", "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/log_adapter.py:182: TimeTravelCacheWarning: Reading cached table from 2025-11-20 20:12:26.389598+00:00 to avoid\n", + "incompatibilies with previous reads of this table. To read the latest\n", + "version, set `use_cache=False` or close the current session with\n", + "Session.close() or bigframes.pandas.close_session().\n", + " return method(*args, **kwargs)\n" + ] + }, { "data": { "text/html": [ @@ -104,16 +115,16 @@ "output_type": "stream", "text": [ "state gender year name number\n", - " AL F 1910 Hazel 51\n", - " AL F 1910 Lucy 76\n", - " AR F 1910 Nellie 39\n", - " AR F 1910 Lena 40\n", - " CO F 1910 Thelma 36\n", - " CO F 1910 Ruth 68\n", - " CT F 1910 Elizabeth 86\n", - " DC F 1910 Mary 80\n", - " FL F 1910 Annie 101\n", - " FL F 1910 Alma 39\n", + " AL F 1910 Cora 61\n", + " AL F 1910 Anna 74\n", + " AR F 1910 Willie 132\n", + " CO F 1910 Anna 42\n", + " FL F 1910 Louise 70\n", + " GA F 1910 Catherine 57\n", + " IL F 1910 Jessie 43\n", + " IN F 1910 Anna 100\n", + " IN F 1910 Pauline 77\n", + " IN F 1910 Beulah 39\n", "...\n", "\n", "[5552452 rows x 5 columns]\n" @@ -135,7 +146,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 15, "id": "42bb02ab", "metadata": {}, "outputs": [ @@ -162,7 +173,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 16, "id": "ce250157", "metadata": {}, "outputs": [ @@ -187,122 +198,210 @@ "version_minor": 1 }, "text/html": [ - "
\n", - "\n", - "\n", + "
\n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - "
stategenderyearnamenumber
state
gender
year
name
number
0ALF1910Hazel51\n", + " AL\n", + " \n", + " F\n", + " \n", + " 1910\n", + " \n", + " Hazel\n", + " \n", + " 51\n", + "
1ALF1910Lucy76\n", + " AL\n", + " \n", + " F\n", + " \n", + " 1910\n", + " \n", + " Lucy\n", + " \n", + " 76\n", + "
2ARF1910Nellie39\n", + " AR\n", + " \n", + " F\n", + " \n", + " 1910\n", + " \n", + " Nellie\n", + " \n", + " 39\n", + "
3ARF1910Lena40\n", + " AR\n", + " \n", + " F\n", + " \n", + " 1910\n", + " \n", + " Lena\n", + " \n", + " 40\n", + "
4COF1910Thelma36\n", + " CO\n", + " \n", + " F\n", + " \n", + " 1910\n", + " \n", + " Thelma\n", + " \n", + " 36\n", + "
5COF1910Ruth68\n", + " CO\n", + " \n", + " F\n", + " \n", + " 1910\n", + " \n", + " Ruth\n", + " \n", + " 68\n", + "
6CTF1910Elizabeth86\n", + " CT\n", + " \n", + " F\n", + " \n", + " 1910\n", + " \n", + " Elizabeth\n", + " \n", + " 86\n", + "
7DCF1910Mary80\n", + " DC\n", + " \n", + " F\n", + " \n", + " 1910\n", + " \n", + " Mary\n", + " \n", + " 80\n", + "
8FLF1910Annie101\n", + " FL\n", + " \n", + " F\n", + " \n", + " 1910\n", + " \n", + " Annie\n", + " \n", + " 101\n", + "
9FLF1910Alma39\n", + " FL\n", + " \n", + " F\n", + " \n", + " 1910\n", + " \n", + " Alma\n", + " \n", + " 39\n", + "
\n", - "

10 rows × 5 columns

\n", - "
[5552452 rows x 5 columns in total]" + "" ], "text/plain": [ - "Computation deferred. Computation will process 171.4 MB" + "state gender year name number\n", + " AL F 1910 Hazel 51\n", + " AL F 1910 Lucy 76\n", + " AR F 1910 Nellie 39\n", + " AR F 1910 Lena 40\n", + " CO F 1910 Thelma 36\n", + " CO F 1910 Ruth 68\n", + " CT F 1910 Elizabeth 86\n", + " DC F 1910 Mary 80\n", + " FL F 1910 Annie 101\n", + " FL F 1910 Alma 39\n", + "\n", + "[10 rows x 5 columns]\n", + "...\n", + "\n", + "[5552452 rows x 5 columns]" ] }, - "execution_count": 6, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -321,7 +420,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 17, "id": "6920d49b", "metadata": {}, "outputs": [ @@ -356,7 +455,7 @@ "TableWidget(page_size=10, row_count=5552452, table_html='\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - "
gs://gcs-public-data--labeled-patents/espacene...EUDE29.08.018E04H 6/12<NA>18157874.121.02.201822.02.2017Liedtke & Partner PatentanwälteSHB Hebezeugbau GmbHVOLGER, AlexanderSTEUERUNGSSYSTEM FÜR AUTOMATISCHE PARKHÄUSEREP 3 366 869 A1\n", + " {'application_number': None, 'class_international': None, 'filing_date': None, 'publication_date': None, 'full_response': '{}', 'status': 'INVALID_ARGUMENT: Invalid field in objectref details, only a JSON object named gcs_metadata is allowed [type.googleapis.com/util.MessageSetPayload=\\'[dremel.DremelErrorWithDetails] { argument_error { query_error { } } debug_info { error_message_template: "Invalid field in objectref details, only a JSON object named $0 is allowed" error_id: 3270173750 } }\\']'}\n", + " \n", + " gs://gcs-public-data--labeled-patents/espacenet_de73.pdf\n", + " \n", + " EU\n", + " \n", + " DE\n", + " \n", + " 03.10.2018\n", + " \n", + " H05B 6/12\n", + " \n", + " <NA>\n", + " \n", + " 18165514.3\n", + " \n", + " 03.04.2018\n", + " \n", + " 30.03.2017\n", + " \n", + " <NA>\n", + " \n", + " BSH Hausgeräte GmbH\n", + " \n", + " Acero Acero, Jesus\n", + " \n", + " VORRICHTUNG ZUR INDUKTIVEN ENERGIEÜBERTRAGUNG\n", + " \n", + " EP 3 383 141 A2\n", + "
1{'application_number': None, 'class_internatio...gs://gcs-public-data--labeled-patents/espacene...EUDE03.10.2018H05B 6/12<NA>18165514.303.04.201830.03.2017<NA>BSH Hausgeräte GmbHAcero Acero, JesusVORRICHTUNG ZUR INDUKTIVEN ENERGIEÜBERTRAGUNGEP 3 383 141 A2\n", + " {'application_number': None, 'class_international': None, 'filing_date': None, 'publication_date': None, 'full_response': '{}', 'status': 'INVALID_ARGUMENT: Invalid field in objectref details, only a JSON object named gcs_metadata is allowed [type.googleapis.com/util.MessageSetPayload=\\'[dremel.DremelErrorWithDetails] { argument_error { query_error { } } debug_info { error_message_template: "Invalid field in objectref details, only a JSON object named $0 is allowed" error_id: 3270173750 } }\\']'}\n", + " \n", + " gs://gcs-public-data--labeled-patents/espacenet_de2.pdf\n", + " \n", + " EU\n", + " \n", + " DE\n", + " \n", + " 29.08.018\n", + " \n", + " E04H 6/12\n", + " \n", + " <NA>\n", + " \n", + " 18157874.1\n", + " \n", + " 21.02.2018\n", + " \n", + " 22.02.2017\n", + " \n", + " Liedtke & Partner Patentanwälte\n", + " \n", + " SHB Hebezeugbau GmbH\n", + " \n", + " VOLGER, Alexander\n", + " \n", + " STEUERUNGSSYSTEM FÜR AUTOMATISCHE PARKHÄUSER\n", + " \n", + " EP 3 366 869 A1\n", + "
2{'application_number': None, 'class_internatio...gs://gcs-public-data--labeled-patents/espacene...EUDE03.10.2018G06F 11/30<NA>18157347.819.02.201831.03.2017Hoffmann EitleFUJITSU LIMITEDKukihara, KensukeMETHOD EXECUTED BY A COMPUTER, INFORMATION PRO...EP 3 382 553 A1\n", + " {'application_number': None, 'class_international': None, 'filing_date': None, 'publication_date': None, 'full_response': '{}', 'status': 'INVALID_ARGUMENT: Invalid field in objectref details, only a JSON object named gcs_metadata is allowed [type.googleapis.com/util.MessageSetPayload=\\'[dremel.DremelErrorWithDetails] { argument_error { query_error { } } debug_info { error_message_template: "Invalid field in objectref details, only a JSON object named $0 is allowed" error_id: 3270173750 } }\\']'}\n", + " \n", + " gs://gcs-public-data--labeled-patents/espacenet_de70.pdf\n", + " \n", + " EU\n", + " \n", + " DE\n", + " \n", + " 03.10.2018\n", + " \n", + " H01L 21/20\n", + " \n", + " <NA>\n", + " \n", + " 18166536.5\n", + " \n", + " 16.02.2016\n", + " \n", + " <NA>\n", + " \n", + " Scheider, Sascha et al\n", + " \n", + " EV Group E. Thallner GmbH\n", + " \n", + " Kurz, Florian\n", + " \n", + " VORRICHTUNG ZUM BONDEN VON SUBSTRATEN\n", + " \n", + " EP 3 382 744 A1\n", + "
3{'application_number': None, 'class_internatio...gs://gcs-public-data--labeled-patents/espacene...EUDE03.10.2018H01L 21/20<NA>18166536.516.02.2016<NA>Scheider, Sascha et alEV Group E. Thallner GmbHKurz, FlorianVORRICHTUNG ZUM BONDEN VON SUBSTRATENEP 3 382 744 A1\n", + " {'application_number': None, 'class_international': None, 'filing_date': None, 'publication_date': None, 'full_response': '{}', 'status': 'INVALID_ARGUMENT: Invalid field in objectref details, only a JSON object named gcs_metadata is allowed [type.googleapis.com/util.MessageSetPayload=\\'[dremel.DremelErrorWithDetails] { argument_error { query_error { } } debug_info { error_message_template: "Invalid field in objectref details, only a JSON object named $0 is allowed" error_id: 3270173750 } }\\']'}\n", + " \n", + " gs://gcs-public-data--labeled-patents/espacenet_de5.pdf\n", + " \n", + " EU\n", + " \n", + " DE\n", + " \n", + " 03.10.2018\n", + " \n", + " G06F 11/30\n", + " \n", + " <NA>\n", + " \n", + " 18157347.8\n", + " \n", + " 19.02.2018\n", + " \n", + " 31.03.2017\n", + " \n", + " Hoffmann Eitle\n", + " \n", + " FUJITSU LIMITED\n", + " \n", + " Kukihara, Kensuke\n", + " \n", + " METHOD EXECUTED BY A COMPUTER, INFORMATION PROCESSING APPARATUS AND\n", + " \n", + " EP 3 382 553 A1\n", + "
4{'application_number': None, 'class_internatio...gs://gcs-public-data--labeled-patents/espacene...EUDE03.10.2018A01K 31/00<NA>18171005.405.02.201505.02.2014Stork Bamberger PatentanwälteLinco Food Systems A/SThrane, UffeMASTHÄHNCHENCONTAINER ALS BESTANDTEIL EINER E...EP 3 381 276 A1\n", + " {'application_number': None, 'class_international': None, 'filing_date': None, 'publication_date': None, 'full_response': '{}', 'status': 'INVALID_ARGUMENT: Invalid field in objectref details, only a JSON object named gcs_metadata is allowed [type.googleapis.com/util.MessageSetPayload=\\'[dremel.DremelErrorWithDetails] { argument_error { query_error { } } debug_info { error_message_template: "Invalid field in objectref details, only a JSON object named $0 is allowed" error_id: 3270173750 } }\\']'}\n", + " \n", + " gs://gcs-public-data--labeled-patents/espacenet_de56.pdf\n", + " \n", + " EU\n", + " \n", + " DE\n", + " \n", + " 03.10.2018\n", + " \n", + " A01K 31/00\n", + " \n", + " <NA>\n", + " \n", + " 18171005.4\n", + " \n", + " 05.02.2015\n", + " \n", + " 05.02.2014\n", + " \n", + " Stork Bamberger Patentanwälte\n", + " \n", + " Linco Food Systems A/S\n", + " \n", + " Thrane, Uffe\n", + " \n", + " MASTHÄHNCHENCONTAINER ALS BESTANDTEIL EINER EINHEIT UND EINER ANORDNUNG\n", + " \n", + " EP 3 381 276 A1\n", + "
\n", - "

5 rows × 15 columns

\n", - "[5 rows x 15 columns in total]" + "" ], "text/plain": [ - "Computation deferred. Computation will process 0 Bytes" + " result \\\n", + "0 {'application_number': None, 'class_internatio... \n", + "1 {'application_number': None, 'class_internatio... \n", + "2 {'application_number': None, 'class_internatio... \n", + "3 {'application_number': None, 'class_internatio... \n", + "4 {'application_number': None, 'class_internatio... \n", + "\n", + " gcs_path issuer language \\\n", + "0 gs://gcs-public-data--labeled-patents/espacene... EU DE \n", + "1 gs://gcs-public-data--labeled-patents/espacene... EU DE \n", + "2 gs://gcs-public-data--labeled-patents/espacene... EU DE \n", + "3 gs://gcs-public-data--labeled-patents/espacene... EU DE \n", + "4 gs://gcs-public-data--labeled-patents/espacene... EU DE \n", + "\n", + " publication_date class_international class_us application_number \\\n", + "0 03.10.2018 H05B 6/12 18165514.3 \n", + "1 29.08.018 E04H 6/12 18157874.1 \n", + "2 03.10.2018 H01L 21/20 18166536.5 \n", + "3 03.10.2018 G06F 11/30 18157347.8 \n", + "4 03.10.2018 A01K 31/00 18171005.4 \n", + "\n", + " filing_date priority_date_eu representative_line_1_eu \\\n", + "0 03.04.2018 30.03.2017 \n", + "1 21.02.2018 22.02.2017 Liedtke & Partner Patentanwälte \n", + "2 16.02.2016 Scheider, Sascha et al \n", + "3 19.02.2018 31.03.2017 Hoffmann Eitle \n", + "4 05.02.2015 05.02.2014 Stork Bamberger Patentanwälte \n", + "\n", + " applicant_line_1 inventor_line_1 \\\n", + "0 BSH Hausgeräte GmbH Acero Acero, Jesus \n", + "1 SHB Hebezeugbau GmbH VOLGER, Alexander \n", + "2 EV Group E. Thallner GmbH Kurz, Florian \n", + "3 FUJITSU LIMITED Kukihara, Kensuke \n", + "4 Linco Food Systems A/S Thrane, Uffe \n", + "\n", + " title_line_1 number \n", + "0 VORRICHTUNG ZUR INDUKTIVEN ENERGIEÜBERTRAGUNG EP 3 383 141 A2 \n", + "1 STEUERUNGSSYSTEM FÜR AUTOMATISCHE PARKHÄUSER EP 3 366 869 A1 \n", + "2 VORRICHTUNG ZUM BONDEN VON SUBSTRATEN EP 3 382 744 A1 \n", + "3 METHOD EXECUTED BY A COMPUTER, INFORMATION PRO... EP 3 382 553 A1 \n", + "4 MASTHÄHNCHENCONTAINER ALS BESTANDTEIL EINER E... EP 3 381 276 A1 \n", + "\n", + "[5 rows x 15 columns]\n", + "\n", + "[5 rows x 15 columns]" ] }, - "execution_count": 10, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } diff --git a/tests/system/small/test_anywidget.py b/tests/system/small/test_anywidget.py index a85cc40857..072c1ddfda 100644 --- a/tests/system/small/test_anywidget.py +++ b/tests/system/small/test_anywidget.py @@ -179,12 +179,14 @@ def test_widget_initialization_should_calculate_total_row_count( assert widget.row_count == EXPECTED_ROW_COUNT -def test_widget_initialization_should_set_default_pagination( +def test_widget_initialization_should_default_to_page_zero( table_widget, ): - """A TableWidget should initialize with page 0 and the correct page size.""" - # The `table_widget` fixture already creates the widget. - # Assert its state. + """ + Given a new TableWidget, when it is initialized, + then its page number should default to 0. + """ + assert table_widget.page == 0 assert table_widget.page_size == EXPECTED_PAGE_SIZE @@ -231,8 +233,8 @@ def test_widget_navigation_should_display_correct_page( _assert_html_matches_pandas_slice(html, expected_slice, paginated_pandas_df) -def test_widget_navigation_should_raise_error_for_negative_input( - table_widget, paginated_pandas_df: pd.DataFrame +def test_setting_negative_page_should_raise_error( + table_widget, ): """ Given a widget, when a negative page number is set, @@ -242,19 +244,20 @@ def test_widget_navigation_should_raise_error_for_negative_input( table_widget.page = -1 -def test_widget_navigation_should_clamp_to_last_page_for_out_of_bounds_input( +def test_setting_page_beyond_max_should_clamp_to_last_page( table_widget, paginated_pandas_df: pd.DataFrame ): """ - Given a widget, when a page number greater than the max is set, + Given a widget, + when a page number greater than the max is set, then the page number should be clamped to the last valid page. """ - expected_slice = paginated_pandas_df.iloc[4:6] + expected_slice = paginated_pandas_df.iloc[4:6] # Last page data - table_widget.page = 100 + table_widget.page = 100 # Set page far beyond the total of 3 pages html = table_widget.table_html - assert table_widget.page == 2 + assert table_widget.page == 2 # Page is clamped to the last valid page (0-indexed) _assert_html_matches_pandas_slice(html, expected_slice, paginated_pandas_df) @@ -304,65 +307,74 @@ def test_widget_with_few_rows_should_display_all_rows(small_widget, small_pandas _assert_html_matches_pandas_slice(html, small_pandas_df, small_pandas_df) -def test_widget_with_few_rows_should_have_only_one_page(small_widget): +def test_navigation_beyond_last_page_should_be_clamped(small_widget): """ - Given a DataFrame smaller than the page size, the widget should - clamp page navigation, effectively having only one page. + Given a DataFrame smaller than the page size, + when navigating beyond the last page, + then the page should be clamped to the last valid page (page 0). """ assert small_widget.page == 0 - # Attempt to navigate past the end - small_widget.page = 1 + small_widget.page = 1 # Attempt to navigate past the end - # Should be clamped back to the only valid page - assert small_widget.page == 0 + assert small_widget.page == 0 # Should be clamped back to the only valid page -def test_widget_page_size_should_be_immutable_after_creation( +def test_global_options_change_should_not_affect_existing_widget_page_size( paginated_bf_df: bf.dataframe.DataFrame, ): """ - A widget's page size should be fixed on creation and not be affected - by subsequent changes to global options. + Given an existing widget, + when global display options are changed, + then the widget's page size should remain unchanged. """ with bf.option_context("display.repr_mode", "anywidget", "display.max_rows", 2): from bigframes.display import TableWidget widget = TableWidget(paginated_bf_df) assert widget.page_size == 2 - - # Navigate to second page to ensure widget is in a non-default state - widget.page = 1 + widget.page = 1 # a non-default state assert widget.page == 1 - # Change global max_rows - widget should not be affected - bf.options.display.max_rows = 10 + bf.options.display.max_rows = 10 # Change global setting - assert widget.page_size == 2 # Should remain unchanged - assert widget.page == 1 # Should remain on same page + assert widget.page_size == 2 # Should remain unchanged + assert widget.page == 1 # Page should not be reset -def test_empty_widget_should_have_zero_row_count(empty_bf_df: bf.dataframe.DataFrame): - """Given an empty DataFrame, the widget's row count should be 0.""" +def test_widget_with_empty_dataframe_should_have_zero_row_count( + empty_bf_df: bf.dataframe.DataFrame, +): + """ + Given an empty DataFrame, + when a widget is created from it, + then its row_count should be 0. + """ + with bf.option_context("display.repr_mode", "anywidget"): from bigframes.display import TableWidget widget = TableWidget(empty_bf_df) - assert widget.row_count == 0 + assert widget.row_count == 0 -def test_empty_widget_should_render_table_headers(empty_bf_df: bf.dataframe.DataFrame): - """Given an empty DataFrame, the widget should still render table headers.""" +def test_widget_with_empty_dataframe_should_render_table_headers( + empty_bf_df: bf.dataframe.DataFrame, +): + """ + Given an empty DataFrame, + when a widget is created from it, + then its HTML representation should still render the table headers. + """ with bf.option_context("display.repr_mode", "anywidget"): from bigframes.display import TableWidget widget = TableWidget(empty_bf_df) - html = widget.table_html - assert " Date: Thu, 20 Nov 2025 22:41:16 +0000 Subject: [PATCH 14/17] update the testcase due to my optimization reduce the query calls number --- notebooks/dataframes/anywidget_mode.ipynb | 140 +++++++++++----------- tests/system/small/test_ipython.py | 2 +- 2 files changed, 69 insertions(+), 73 deletions(-) diff --git a/notebooks/dataframes/anywidget_mode.ipynb b/notebooks/dataframes/anywidget_mode.ipynb index 49ae884455..8a04e941ea 100644 --- a/notebooks/dataframes/anywidget_mode.ipynb +++ b/notebooks/dataframes/anywidget_mode.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 11, + "execution_count": 22, "id": "d10bfca4", "metadata": {}, "outputs": [], @@ -32,7 +32,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 23, "id": "ca22f059", "metadata": {}, "outputs": [], @@ -50,7 +50,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 24, "id": "1bc5aaf3", "metadata": {}, "outputs": [], @@ -69,7 +69,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 25, "id": "f289d250", "metadata": {}, "outputs": [ @@ -114,17 +114,17 @@ "name": "stdout", "output_type": "stream", "text": [ - "state gender year name number\n", - " AL F 1910 Cora 61\n", - " AL F 1910 Anna 74\n", - " AR F 1910 Willie 132\n", - " CO F 1910 Anna 42\n", - " FL F 1910 Louise 70\n", - " GA F 1910 Catherine 57\n", - " IL F 1910 Jessie 43\n", - " IN F 1910 Anna 100\n", - " IN F 1910 Pauline 77\n", - " IN F 1910 Beulah 39\n", + "state gender year name number\n", + " AL F 1910 Vera 71\n", + " AR F 1910 Viola 37\n", + " AR F 1910 Alice 57\n", + " AR F 1910 Edna 95\n", + " AR F 1910 Ollie 40\n", + " CA F 1910 Beatrice 37\n", + " CT F 1910 Marion 36\n", + " CT F 1910 Marie 36\n", + " FL F 1910 Alice 53\n", + " GA F 1910 Thelma 133\n", "...\n", "\n", "[5552452 rows x 5 columns]\n" @@ -146,7 +146,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 26, "id": "42bb02ab", "metadata": {}, "outputs": [ @@ -173,7 +173,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 27, "id": "ce250157", "metadata": {}, "outputs": [ @@ -192,13 +192,12 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "5384f97c565f4de9814893c5d39a27e9", - "model_id": "dcf0f4199d86493cb30ec2c94bbcbd78", + "model_id": "90cd21ebae0840c2bcc39b539da4ad0a", "version_major": 2, "version_minor": 1 }, "text/html": [ - "\n", + "
\n", " \n", " \n", " \n", @@ -220,15 +219,15 @@ " 1910\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", @@ -254,10 +253,10 @@ " 1910\n", " \n", " \n", " \n", " \n", " \n", @@ -271,15 +270,15 @@ " 1910\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", @@ -322,15 +321,15 @@ " 1910\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", @@ -356,15 +355,15 @@ " 1910\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
state
\n", - " Hazel\n", + " Vera\n", " \n", - " 51\n", + " 71\n", "
\n", - " AL\n", + " AR\n", " \n", " F\n", @@ -237,10 +236,10 @@ " 1910\n", " \n", - " Lucy\n", + " Viola\n", " \n", - " 76\n", + " 37\n", "
\n", - " Nellie\n", + " Alice\n", " \n", - " 39\n", + " 57\n", "
\n", - " Lena\n", + " Edna\n", " \n", - " 40\n", + " 95\n", "
\n", - " CO\n", + " AR\n", " \n", " F\n", @@ -288,15 +287,15 @@ " 1910\n", " \n", - " Thelma\n", + " Ollie\n", " \n", - " 36\n", + " 40\n", "
\n", - " CO\n", + " CA\n", " \n", " F\n", @@ -305,10 +304,10 @@ " 1910\n", " \n", - " Ruth\n", + " Beatrice\n", " \n", - " 68\n", + " 37\n", "
\n", - " Elizabeth\n", + " Marion\n", " \n", - " 86\n", + " 36\n", "
\n", - " DC\n", + " CT\n", " \n", " F\n", @@ -339,10 +338,10 @@ " 1910\n", " \n", - " Mary\n", + " Marie\n", " \n", - " 80\n", + " 36\n", "
\n", - " Annie\n", + " Alice\n", " \n", - " 101\n", + " 53\n", "
\n", - " FL\n", + " GA\n", " \n", " F\n", @@ -373,27 +372,27 @@ " 1910\n", " \n", - " Alma\n", + " Thelma\n", " \n", - " 39\n", + " 133\n", "
" ], "text/plain": [ - "state gender year name number\n", - " AL F 1910 Hazel 51\n", - " AL F 1910 Lucy 76\n", - " AR F 1910 Nellie 39\n", - " AR F 1910 Lena 40\n", - " CO F 1910 Thelma 36\n", - " CO F 1910 Ruth 68\n", - " CT F 1910 Elizabeth 86\n", - " DC F 1910 Mary 80\n", - " FL F 1910 Annie 101\n", - " FL F 1910 Alma 39\n", + "state gender year name number\n", + " AL F 1910 Vera 71\n", + " AR F 1910 Viola 37\n", + " AR F 1910 Alice 57\n", + " AR F 1910 Edna 95\n", + " AR F 1910 Ollie 40\n", + " CA F 1910 Beatrice 37\n", + " CT F 1910 Marion 36\n", + " CT F 1910 Marie 36\n", + " FL F 1910 Alice 53\n", + " GA F 1910 Thelma 133\n", "\n", "[10 rows x 5 columns]\n", "...\n", @@ -401,7 +400,7 @@ "[5552452 rows x 5 columns]" ] }, - "execution_count": 16, + "execution_count": 27, "metadata": {}, "output_type": "execute_result" } @@ -420,7 +419,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 28, "id": "6920d49b", "metadata": {}, "outputs": [ @@ -446,8 +445,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "bb32dbe7265e43dca0e8c341be36be84", - "model_id": "cea93652992346149c8cf05ea647f522", + "model_id": "a16e1564457c4ba9822c2c952d1ac855", "version_major": 2, "version_minor": 1 }, @@ -455,7 +453,7 @@ "TableWidget(page_size=10, row_count=5552452, table_html='". We may need to implement a full-fledged repr module to better support types not in pandas. - if bigframes.options.display.blob_display and blob_cols: - - def obj_ref_rt_to_html(obj_ref_rt) -> str: - obj_ref_rt_json = json.loads(obj_ref_rt) - obj_ref_details = obj_ref_rt_json["objectref"]["details"] - if "gcs_metadata" in obj_ref_details: - gcs_metadata = obj_ref_details["gcs_metadata"] - content_type = typing.cast( - str, gcs_metadata.get("content_type", "") - ) - if content_type.startswith("image"): - size_str = "" - if bigframes.options.display.blob_display_width: - size_str = f' width="{bigframes.options.display.blob_display_width}"' - if bigframes.options.display.blob_display_height: - size_str = ( - size_str - + f' height="{bigframes.options.display.blob_display_height}"' - ) - url = obj_ref_rt_json["access_urls"]["read_url"] - return f'' - - return f'uri: {obj_ref_rt_json["objectref"]["uri"]}, authorizer: {obj_ref_rt_json["objectref"]["authorizer"]}' - - formatters = {blob_col: obj_ref_rt_to_html for blob_col in blob_cols} - - # set max_colwidth so not to truncate the image url - with pandas.option_context("display.max_colwidth", None): - max_rows = pandas.get_option("display.max_rows") - max_cols = pandas.get_option("display.max_columns") - show_dimensions = pandas.get_option("display.show_dimensions") - html_string = pandas_df.to_html( - escape=False, - notebook=True, - max_rows=max_rows, - max_cols=max_cols, - show_dimensions=show_dimensions, - formatters=formatters, # type: ignore - ) - else: - # _repr_html_ stub is missing so mypy thinks it's a Series. Ignore mypy. - html_string = pandas_df._repr_html_() # type:ignore + return self._create_html_representation( + pandas_df, row_count, column_count, blob_cols + ) - html_string += f"[{row_count} rows x {column_count} columns in total]" - return html_string + def _process_blob_columns(self) -> tuple[DataFrame, list[str]]: + """Process blob columns for display.""" + self._cached() + df = self + blob_cols = [] + if bigframes.options.display.blob_display: + blob_cols = [ + series_name + for series_name, series in self.items() + if series.dtype == bigframes.dtypes.OBJ_REF_DTYPE + ] + if blob_cols: + df = self.copy() + for col in blob_cols: + df[col] = df[col].blob._get_runtime(mode="R", with_metadata=True) + return df, blob_cols def _get_anywidget_bundle(self, include=None, exclude=None): """ @@ -919,17 +876,7 @@ def _get_anywidget_bundle(self, include=None, exclude=None): """ from bigframes import display - # Process blob columns if needed - self._cached() - df = self.copy() - if bigframes.options.display.blob_display: - blob_cols = [ - series_name - for series_name, series in df.items() - if series.dtype == bigframes.dtypes.OBJ_REF_DTYPE - ] - for col in blob_cols: - df[col] = df[col].blob._get_runtime(mode="R", with_metadata=True) + df, _ = self._process_blob_columns() # Create and display the widget widget = display.TableWidget(df) @@ -946,7 +893,16 @@ def _get_anywidget_bundle(self, include=None, exclude=None): # the HTML and plain text versions. widget_repr["text/html"] = widget.table_html - # Re-create the text representation from what we know. + widget_repr["text/plain"] = self._create_text_representation( + widget._cached_data, widget.row_count + ) + + return widget_repr + + def _create_text_representation( + self, pandas_df: pandas.DataFrame, total_rows: typing.Optional[int] + ) -> str: + """Create a text representation of the DataFrame.""" opts = bigframes.options.display with display_options.pandas_repr(opts): import pandas.io.formats @@ -957,19 +913,20 @@ def _get_anywidget_bundle(self, include=None, exclude=None): ) if not self._has_index: to_string_kwargs.update({"index": False}) - repr_string = widget._cached_data.to_string(**to_string_kwargs) + + # We add our own dimensions string, so don't want pandas to. + to_string_kwargs.update({"show_dimensions": False}) + repr_string = pandas_df.to_string(**to_string_kwargs) lines = repr_string.split("\n") - row_count = widget.row_count - if row_count is not None and row_count > len(widget._cached_data): + + if total_rows is not None and total_rows > len(pandas_df): lines.append("...") lines.append("") column_count = len(self.columns) - lines.append(f"[{row_count or '?'} rows x {column_count} columns]") - widget_repr["text/plain"] = "\n".join(lines) - - return widget_repr + lines.append(f"[{total_rows or '?'} rows x {column_count} columns]") + return "\n".join(lines) def _repr_mimebundle_(self, include=None, exclude=None): """ @@ -997,19 +954,7 @@ def _repr_mimebundle_(self, include=None, exclude=None): opts = bigframes.options.display max_results = opts.max_rows - # Process blob columns first, logic from _repr_html_fallback - self._cached() - df = self.copy() - if bigframes.options.display.blob_display: - blob_cols = [ - series_name - for series_name, series in df.items() - if series.dtype == bigframes.dtypes.OBJ_REF_DTYPE - ] - for col in blob_cols: - df[col] = df[col].blob._get_runtime(mode="R", with_metadata=True) - else: - blob_cols = [] + df, blob_cols = self._process_blob_columns() pandas_df, row_count, query_job = df._block.retrieve_repr_request_results( max_results @@ -1017,7 +962,23 @@ def _repr_mimebundle_(self, include=None, exclude=None): self._set_internal_query_job(query_job) column_count = len(pandas_df.columns) - # Generate HTML representation + html_string = self._create_html_representation( + pandas_df, row_count, column_count, blob_cols + ) + + text_representation = self._create_text_representation(pandas_df, row_count) + + return {"text/html": html_string, "text/plain": text_representation} + + def _create_html_representation( + self, + pandas_df: pandas.DataFrame, + row_count: int, + column_count: int, + blob_cols: list[str], + ) -> str: + """Create an HTML representation of the DataFrame.""" + opts = bigframes.options.display with display_options.pandas_repr(opts): if bigframes.options.display.blob_display and blob_cols: @@ -1057,31 +1018,7 @@ def obj_ref_rt_to_html(obj_ref_rt) -> str: html_string = pandas_df._repr_html_() # type:ignore html_string += f"[{row_count} rows x {column_count} columns in total]" - - # Generate text representation - with display_options.pandas_repr(opts): - import pandas.io.formats - - to_string_kwargs = ( - pandas.io.formats.format.get_dataframe_repr_params() # type: ignore - ) - if not self._has_index: - to_string_kwargs.update({"index": False}) - repr_string = pandas_df.to_string(**to_string_kwargs) - - lines = repr_string.split("\n") - pattern = re.compile("\\[[0-9]+ rows x [0-9]+ columns\\]") - if pattern.match(lines[-1]): - lines = lines[:-2] - - if row_count > len(lines) - 1: - lines.append("...") - - lines.append("") - lines.append(f"[{row_count} rows x {column_count} columns]") - text_representation = "\n".join(lines) - - return {"text/html": html_string, "text/plain": text_representation} + return html_string def __delitem__(self, key: str): df = self.drop(columns=[key]) diff --git a/notebooks/dataframes/anywidget_mode.ipynb b/notebooks/dataframes/anywidget_mode.ipynb index 8a04e941ea..072e9eb505 100644 --- a/notebooks/dataframes/anywidget_mode.ipynb +++ b/notebooks/dataframes/anywidget_mode.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 22, + "execution_count": 1, "id": "d10bfca4", "metadata": {}, "outputs": [], @@ -32,7 +32,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 2, "id": "ca22f059", "metadata": {}, "outputs": [], @@ -50,7 +50,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 3, "id": "1bc5aaf3", "metadata": {}, "outputs": [], @@ -69,21 +69,10 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 4, "id": "f289d250", "metadata": {}, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/google/home/shuowei/src/python-bigquery-dataframes/bigframes/core/log_adapter.py:182: TimeTravelCacheWarning: Reading cached table from 2025-11-20 20:12:26.389598+00:00 to avoid\n", - "incompatibilies with previous reads of this table. To read the latest\n", - "version, set `use_cache=False` or close the current session with\n", - "Session.close() or bigframes.pandas.close_session().\n", - " return method(*args, **kwargs)\n" - ] - }, { "data": { "text/html": [ @@ -146,7 +135,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 5, "id": "42bb02ab", "metadata": {}, "outputs": [ @@ -173,7 +162,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 6, "id": "ce250157", "metadata": {}, "outputs": [ @@ -192,12 +181,12 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "90cd21ebae0840c2bcc39b539da4ad0a", + "model_id": "c94ad42fd4eb45f7833bf5f571be3ffe", "version_major": 2, "version_minor": 1 }, "text/html": [ - "
\n", + "
\n", " \n", " \n", " \n", @@ -219,15 +208,15 @@ " 1910\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", @@ -253,15 +242,15 @@ " 1910\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
state
\n", - " Vera\n", + " Cora\n", " \n", - " 71\n", + " 61\n", "
\n", - " AR\n", + " AL\n", " \n", " F\n", @@ -236,10 +225,10 @@ " 1910\n", " \n", - " Viola\n", + " Anna\n", " \n", - " 37\n", + " 74\n", "
\n", - " Alice\n", + " Willie\n", " \n", - " 57\n", + " 132\n", "
\n", - " AR\n", + " CO\n", " \n", " F\n", @@ -270,15 +259,15 @@ " 1910\n", " \n", - " Edna\n", + " Anna\n", " \n", - " 95\n", + " 42\n", "
\n", - " AR\n", + " FL\n", " \n", " F\n", @@ -287,15 +276,15 @@ " 1910\n", " \n", - " Ollie\n", + " Louise\n", " \n", - " 40\n", + " 70\n", "
\n", - " CA\n", + " GA\n", " \n", " F\n", @@ -304,15 +293,15 @@ " 1910\n", " \n", - " Beatrice\n", + " Catherine\n", " \n", - " 37\n", + " 57\n", "
\n", - " CT\n", + " IL\n", " \n", " F\n", @@ -321,15 +310,15 @@ " 1910\n", " \n", - " Marion\n", + " Jessie\n", " \n", - " 36\n", + " 43\n", "
\n", - " CT\n", + " IN\n", " \n", " F\n", @@ -338,15 +327,15 @@ " 1910\n", " \n", - " Marie\n", + " Anna\n", " \n", - " 36\n", + " 100\n", "
\n", - " FL\n", + " IN\n", " \n", " F\n", @@ -355,15 +344,15 @@ " 1910\n", " \n", - " Alice\n", + " Pauline\n", " \n", - " 53\n", + " 77\n", "
\n", - " GA\n", + " IN\n", " \n", " F\n", @@ -372,35 +361,33 @@ " 1910\n", " \n", - " Thelma\n", + " Beulah\n", " \n", - " 133\n", + " 39\n", "
" ], "text/plain": [ - "state gender year name number\n", - " AL F 1910 Vera 71\n", - " AR F 1910 Viola 37\n", - " AR F 1910 Alice 57\n", - " AR F 1910 Edna 95\n", - " AR F 1910 Ollie 40\n", - " CA F 1910 Beatrice 37\n", - " CT F 1910 Marion 36\n", - " CT F 1910 Marie 36\n", - " FL F 1910 Alice 53\n", - " GA F 1910 Thelma 133\n", - "\n", - "[10 rows x 5 columns]\n", + "state gender year name number\n", + " AL F 1910 Cora 61\n", + " AL F 1910 Anna 74\n", + " AR F 1910 Willie 132\n", + " CO F 1910 Anna 42\n", + " FL F 1910 Louise 70\n", + " GA F 1910 Catherine 57\n", + " IL F 1910 Jessie 43\n", + " IN F 1910 Anna 100\n", + " IN F 1910 Pauline 77\n", + " IN F 1910 Beulah 39\n", "...\n", "\n", "[5552452 rows x 5 columns]" ] }, - "execution_count": 27, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -419,7 +406,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 7, "id": "6920d49b", "metadata": {}, "outputs": [ @@ -445,7 +432,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "a16e1564457c4ba9822c2c952d1ac855", + "model_id": "8a98c765463d4a56a92d07101917cd0e", "version_major": 2, "version_minor": 1 }, @@ -453,7 +440,7 @@ "TableWidget(page_size=10, row_count=5552452, table_html=' Date: Fri, 21 Nov 2025 23:16:25 +0000 Subject: [PATCH 17/17] notebook change --- notebooks/dataframes/anywidget_mode.ipynb | 175 +++++++++++----------- 1 file changed, 87 insertions(+), 88 deletions(-) diff --git a/notebooks/dataframes/anywidget_mode.ipynb b/notebooks/dataframes/anywidget_mode.ipynb index d7325725d4..7f319945ed 100644 --- a/notebooks/dataframes/anywidget_mode.ipynb +++ b/notebooks/dataframes/anywidget_mode.ipynb @@ -103,17 +103,17 @@ "name": "stdout", "output_type": "stream", "text": [ - "state gender year name number\n", - " AL F 1910 Vera 71\n", - " AR F 1910 Viola 37\n", - " AR F 1910 Alice 57\n", - " AR F 1910 Edna 95\n", - " AR F 1910 Ollie 40\n", - " CA F 1910 Beatrice 37\n", - " CT F 1910 Marion 36\n", - " CT F 1910 Marie 36\n", - " FL F 1910 Alice 53\n", - " GA F 1910 Thelma 133\n", + "state gender year name number\n", + " AL F 1910 Hazel 51\n", + " AL F 1910 Lucy 76\n", + " AR F 1910 Nellie 39\n", + " AR F 1910 Lena 40\n", + " CO F 1910 Thelma 36\n", + " CO F 1910 Ruth 68\n", + " CT F 1910 Elizabeth 86\n", + " DC F 1910 Mary 80\n", + " FL F 1910 Annie 101\n", + " FL F 1910 Alma 39\n", "...\n", "\n", "[5552452 rows x 5 columns]\n" @@ -181,12 +181,12 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "c94ad42fd4eb45f7833bf5f571be3ffe", + "model_id": "43e82cd0caf54e3cb9bb40afb4959e01", "version_major": 2, "version_minor": 1 }, "text/html": [ - "
\n", + "
\n", " \n", " \n", " \n", @@ -208,15 +208,15 @@ " 1910\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", @@ -242,15 +242,15 @@ " 1910\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
state
\n", - " Cora\n", + " Vera\n", " \n", - " 61\n", + " 71\n", "
\n", - " AL\n", + " AR\n", " \n", " F\n", @@ -225,10 +225,10 @@ " 1910\n", " \n", - " Anna\n", + " Viola\n", " \n", - " 74\n", + " 37\n", "
\n", - " Willie\n", + " Alice\n", " \n", - " 132\n", + " 57\n", "
\n", - " CO\n", + " AR\n", " \n", " F\n", @@ -259,15 +259,15 @@ " 1910\n", " \n", - " Anna\n", + " Edna\n", " \n", - " 42\n", + " 95\n", "
\n", - " FL\n", + " AR\n", " \n", " F\n", @@ -276,15 +276,15 @@ " 1910\n", " \n", - " Louise\n", + " Ollie\n", " \n", - " 70\n", + " 40\n", "
\n", - " GA\n", + " CA\n", " \n", " F\n", @@ -293,15 +293,15 @@ " 1910\n", " \n", - " Catherine\n", + " Beatrice\n", " \n", - " 57\n", + " 37\n", "
\n", - " IL\n", + " CT\n", " \n", " F\n", @@ -310,15 +310,15 @@ " 1910\n", " \n", - " Jessie\n", + " Marion\n", " \n", - " 43\n", + " 36\n", "
\n", - " IN\n", + " CT\n", " \n", " F\n", @@ -327,15 +327,15 @@ " 1910\n", " \n", - " Anna\n", + " Marie\n", " \n", - " 100\n", + " 36\n", "
\n", - " IN\n", + " FL\n", " \n", " F\n", @@ -344,15 +344,15 @@ " 1910\n", " \n", - " Pauline\n", + " Alice\n", " \n", - " 77\n", + " 53\n", "
\n", - " IN\n", + " GA\n", " \n", " F\n", @@ -361,27 +361,27 @@ " 1910\n", " \n", - " Beulah\n", + " Thelma\n", " \n", - " 39\n", + " 133\n", "
" ], "text/plain": [ - "state gender year name number\n", - " AL F 1910 Cora 61\n", - " AL F 1910 Anna 74\n", - " AR F 1910 Willie 132\n", - " CO F 1910 Anna 42\n", - " FL F 1910 Louise 70\n", - " GA F 1910 Catherine 57\n", - " IL F 1910 Jessie 43\n", - " IN F 1910 Anna 100\n", - " IN F 1910 Pauline 77\n", - " IN F 1910 Beulah 39\n", + "state gender year name number\n", + " AL F 1910 Vera 71\n", + " AR F 1910 Viola 37\n", + " AR F 1910 Alice 57\n", + " AR F 1910 Edna 95\n", + " AR F 1910 Ollie 40\n", + " CA F 1910 Beatrice 37\n", + " CT F 1910 Marion 36\n", + " CT F 1910 Marie 36\n", + " FL F 1910 Alice 53\n", + " GA F 1910 Thelma 133\n", "...\n", "\n", "[5552452 rows x 5 columns]" @@ -394,7 +394,6 @@ ], "source": [ "df" - "df" ] }, { @@ -433,7 +432,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "8a98c765463d4a56a92d07101917cd0e", + "model_id": "24937ff614ac4e73bd34ab0b2c11d664", "version_major": 2, "version_minor": 1 }, @@ -533,7 +532,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "ae9c63f402b849af93f5803fc11fc9a5", + "model_id": "c2c1a5ce2e4249c185ce2f5023facecb", "version_major": 2, "version_minor": 1 }, @@ -573,7 +572,7 @@ "data": { "text/html": [ "✅ Completed. \n", - " Query processed 85.9 kB in 15 seconds of slot time.\n", + " Query processed 85.9 kB in 11 seconds of slot time.\n", " " ], "text/plain": [ @@ -618,12 +617,12 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "c8ac0daf5fd54eca9760632e63cb5b08", + "model_id": "aff7260d6de148a08556231ab5649e30", "version_major": 2, "version_minor": 1 }, "text/html": [ - "\n", + "
\n", " \n", " \n", " \n", @@ -649,7 +648,7 @@ " {'application_number': None, 'class_international': None, 'filing_date': None, 'publication_date': None, 'full_response': '{}', 'status': 'INVALID_ARGUMENT: Invalid field in objectref details, only a JSON object named gcs_metadata is allowed [type.googleapis.com/util.MessageSetPayload=\\'[dremel.DremelErrorWithDetails] { argument_error { query_error { } } debug_info { error_message_template: "Invalid field in objectref details, only a JSON object named $0 is allowed" error_id: 3270173750 } }\\']'}\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", @@ -696,7 +695,7 @@ " {'application_number': None, 'class_international': None, 'filing_date': None, 'publication_date': None, 'full_response': '{}', 'status': 'INVALID_ARGUMENT: Invalid field in objectref details, only a JSON object named gcs_metadata is allowed [type.googleapis.com/util.MessageSetPayload=\\'[dremel.DremelErrorWithDetails] { argument_error { query_error { } } debug_info { error_message_template: "Invalid field in objectref details, only a JSON object named $0 is allowed" error_id: 3270173750 } }\\']'}\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", @@ -898,29 +897,29 @@ "4 gs://gcs-public-data--labeled-patents/espacene... EU DE \n", "\n", " publication_date class_international class_us application_number \\\n", - "0 29.08.018 E04H 6/12 18157874.1 \n", - "1 03.10.2018 H05B 6/12 18165514.3 \n", + "0 03.10.2018 H05B 6/12 18165514.3 \n", + "1 29.08.018 E04H 6/12 18157874.1 \n", "2 03.10.2018 H01L 21/20 18166536.5 \n", "3 03.10.2018 G06F 11/30 18157347.8 \n", "4 03.10.2018 A01K 31/00 18171005.4 \n", "\n", " filing_date priority_date_eu representative_line_1_eu \\\n", - "0 21.02.2018 22.02.2017 Liedtke & Partner Patentanwälte \n", - "1 03.04.2018 30.03.2017 \n", + "0 03.04.2018 30.03.2017 \n", + "1 21.02.2018 22.02.2017 Liedtke & Partner Patentanwälte \n", "2 16.02.2016 Scheider, Sascha et al \n", "3 19.02.2018 31.03.2017 Hoffmann Eitle \n", "4 05.02.2015 05.02.2014 Stork Bamberger Patentanwälte \n", "\n", " applicant_line_1 inventor_line_1 \\\n", - "0 SHB Hebezeugbau GmbH VOLGER, Alexander \n", - "1 BSH Hausgeräte GmbH Acero Acero, Jesus \n", + "0 BSH Hausgeräte GmbH Acero Acero, Jesus \n", + "1 SHB Hebezeugbau GmbH VOLGER, Alexander \n", "2 EV Group E. Thallner GmbH Kurz, Florian \n", "3 FUJITSU LIMITED Kukihara, Kensuke \n", "4 Linco Food Systems A/S Thrane, Uffe \n", "\n", " title_line_1 number \n", - "0 STEUERUNGSSYSTEM FÜR AUTOMATISCHE PARKHÄUSER EP 3 366 869 A1 \n", - "1 VORRICHTUNG ZUR INDUKTIVEN ENERGIEÜBERTRAGUNG EP 3 383 141 A2 \n", + "0 VORRICHTUNG ZUR INDUKTIVEN ENERGIEÜBERTRAGUNG EP 3 383 141 A2 \n", + "1 STEUERUNGSSYSTEM FÜR AUTOMATISCHE PARKHÄUSER EP 3 366 869 A1 \n", "2 VORRICHTUNG ZUM BONDEN VON SUBSTRATEN EP 3 382 744 A1 \n", "3 METHOD EXECUTED BY A COMPUTER, INFORMATION PRO... EP 3 382 553 A1 \n", "4 MASTHÄHNCHENCONTAINER ALS BESTANDTEIL EINER E... EP 3 381 276 A1 \n",
result
\n", - " gs://gcs-public-data--labeled-patents/espacenet_de2.pdf\n", + " gs://gcs-public-data--labeled-patents/espacenet_de73.pdf\n", " \n", " EU\n", @@ -658,37 +657,37 @@ " DE\n", " \n", - " 29.08.018\n", + " 03.10.2018\n", " \n", - " E04H 6/12\n", + " H05B 6/12\n", " \n", " <NA>\n", " \n", - " 18157874.1\n", + " 18165514.3\n", " \n", - " 21.02.2018\n", + " 03.04.2018\n", " \n", - " 22.02.2017\n", + " 30.03.2017\n", " \n", - " Liedtke & Partner Patentanwälte\n", + " <NA>\n", " \n", - " SHB Hebezeugbau GmbH\n", + " BSH Hausgeräte GmbH\n", " \n", - " VOLGER, Alexander\n", + " Acero Acero, Jesus\n", " \n", - " STEUERUNGSSYSTEM FÜR AUTOMATISCHE PARKHÄUSER\n", + " VORRICHTUNG ZUR INDUKTIVEN ENERGIEÜBERTRAGUNG\n", " \n", - " EP 3 366 869 A1\n", + " EP 3 383 141 A2\n", "
\n", - " gs://gcs-public-data--labeled-patents/espacenet_de73.pdf\n", + " gs://gcs-public-data--labeled-patents/espacenet_de2.pdf\n", " \n", " EU\n", @@ -705,37 +704,37 @@ " DE\n", " \n", - " 03.10.2018\n", + " 29.08.018\n", " \n", - " H05B 6/12\n", + " E04H 6/12\n", " \n", " <NA>\n", " \n", - " 18165514.3\n", + " 18157874.1\n", " \n", - " 03.04.2018\n", + " 21.02.2018\n", " \n", - " 30.03.2017\n", + " 22.02.2017\n", " \n", - " <NA>\n", + " Liedtke & Partner Patentanwälte\n", " \n", - " BSH Hausgeräte GmbH\n", + " SHB Hebezeugbau GmbH\n", " \n", - " Acero Acero, Jesus\n", + " VOLGER, Alexander\n", " \n", - " VORRICHTUNG ZUR INDUKTIVEN ENERGIEÜBERTRAGUNG\n", + " STEUERUNGSSYSTEM FÜR AUTOMATISCHE PARKHÄUSER\n", " \n", - " EP 3 383 141 A2\n", + " EP 3 366 869 A1\n", "