googleapis
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 0 deletions b/‎.gitignore‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.pre-commit-config.yaml‎
Lines changed: 2 additions & 2 deletions b/‎.pre-commit-config.yaml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎CHANGELOG.md‎
Lines changed: 306 additions & 1 deletion b/‎CHANGELOG.md‎
Lines changed: 306 additions & 1 deletion
diff --git a/‎GEMINI.md‎
Lines changed: 147 additions & 0 deletions b/‎GEMINI.md‎
Lines changed: 147 additions & 0 deletions
diff --git a/‎bigframes/_config/auth.py‎
Lines changed: 57 additions & 0 deletions b/‎bigframes/_config/auth.py‎
Lines changed: 57 additions & 0 deletions
diff --git a/‎bigframes/_config/display_options.py‎
Lines changed: 10 additions & 1 deletion b/‎bigframes/_config/display_options.py‎
Lines changed: 10 additions & 1 deletion
diff --git a/‎bigframes/_importing.py‎
Lines changed: 8 additions & 3 deletions b/‎bigframes/_importing.py‎
Lines changed: 8 additions & 3 deletions
diff --git a/‎bigframes/bigquery/__init__.py‎
Lines changed: 49 additions & 28 deletions b/‎bigframes/bigquery/__init__.py‎
Lines changed: 49 additions & 28 deletions
@@ -62,3 +62,4 @@ system_tests/local_test_setup
 # Make sure a generated file isn't accidentally committed.
 pylintrc
 pylintrc.test
+dummy.pkl
@@ -20,7 +20,7 @@ repos:
     hooks:
     -   id: trailing-whitespace
     -   id: end-of-file-fixer
-        exclude: "^tests/unit/core/compile/sqlglot/snapshots"
+        exclude: "^tests/unit/core/compile/sqlglot/.*snapshots"
     -   id: check-yaml
 -   repo: https://github.com/pycqa/isort
     rev: 5.12.0
@@ -43,7 +43,7 @@ repos:
         exclude: "^third_party"
         args: ["--check-untyped-defs", "--explicit-package-bases", "--ignore-missing-imports"]
 -   repo: https://github.com/biomejs/pre-commit
-    rev: v2.0.2
+    rev: v2.2.4
     hooks:
     -   id: biome-check
         files: '\.(js|css)$'
@@ -0,0 +1,147 @@
+# Contribution guidelines, tailored for LLM agents
+
+## Testing
+
+We use `nox` to instrument our tests.
+
+- To test your changes, run unit tests with `nox`:
+
+  ```bash
+  nox -r -s unit
+  ```
+
+- To run a single unit test:
+
+  ```bash
+  nox -r -s unit-3.13 -- -k <name of test>
+  ```
+
+- To run system tests, you can execute::
+
+   # Run all system tests
+   $ nox -r -s system
+
+   # Run a single system test
+   $ nox -r -s system-3.13 -- -k <name of test>
+
+- The codebase must have better coverage than it had previously after each
+  change. You can test coverage via `nox -s unit system cover` (takes a long
+  time).
+
+## Code Style
+
+- We use the automatic code formatter `black`. You can run it using
+  the nox session `format`. This will eliminate many lint errors. Run via:
+
+  ```bash
+  nox -r -s format
+  ```
+
+- PEP8 compliance is required, with exceptions defined in the linter configuration.
+  If you have ``nox`` installed, you can test that you have not introduced
+  any non-compliant code via:
+
+  ```
+  nox -r -s lint
+  ```
+
+- When writing tests, use the idiomatic "pytest" style.
+
+## Documentation
+
+If a method or property is implementing the same interface as a third-party
+package such as pandas or scikit-learn, place the relevant docstring in the
+corresponding `third_party/bigframes_vendored/package_name` directory, not in
+the `bigframes` directory. Implementations may be placed in the `bigframes`
+directory, though.
+
+### Testing code samples
+
+Code samples are very important for accurate documentation. We use the "doctest"
+framework to ensure the samples are functioning as expected. After adding a code
+sample, please ensure it is correct by running doctest. To run the samples
+doctests for just a single method, refer to the following example:
+
+```bash
+pytest --doctest-modules bigframes/pandas/__init__.py::bigframes.pandas.cut
+```
+
+## Tips for implementing common BigFrames features
+
+### Adding a scalar operator
+
+For an example, see commit
+[c5b7fdae74a22e581f7705bc0cf5390e928f4425](https://github.com/googleapis/python-bigquery-dataframes/commit/c5b7fdae74a22e581f7705bc0cf5390e928f4425).
+
+To add a new scalar operator, follow these steps:
+
+1.  **Define the operation dataclass:**
+    - In `bigframes/operations/`, find the relevant file (e.g., `geo_ops.py` for geography functions) or create a new one.
+    - Create a new dataclass inheriting from `base_ops.UnaryOp` for unary
+      operators, `base_ops.BinaryOp` for binary operators, `base_ops.TernaryOp`
+      for ternary operators, or `base_ops.NaryOp for operators with many
+      arguments. Note that these operators are counting the number column-like
+      arguments. A function that takes only a single column but several literal
+      values would still be a `UnaryOp`.
+    - Define the `name` of the operation and any parameters it requires.
+    - Implement the `output_type` method to specify the data type of the result.
+
+2.  **Export the new operation:**
+    - In `bigframes/operations/__init__.py`, import your new operation dataclass and add it to the `__all__` list.
+
+3.  **Implement the user-facing function (pandas-like):**
+
+    - Identify the canonical function from pandas / geopandas / awkward array /
+      other popular Python package that this operator implements.
+    - Find the corresponding class in BigFrames. For example, the implementation
+      for most geopandas.GeoSeries methods is in
+      `bigframes/geopandas/geoseries.py`. Pandas Series methods are implemented
+      in `bigframes/series.py` or one of the accessors, such as `StringMethods`
+      in `bigframes/operations/strings.py`.
+    - Create the user-facing function that will be called by users (e.g., `length`).
+    - If the SQL method differs from pandas or geopandas in a way that can't be
+      made the same, raise a `NotImplementedError` with an appropriate message and
+      link to the feedback form.
+    - Add the docstring to the corresponding file in
+      `third_party/bigframes_vendored`, modeled after pandas / geopandas.
+
+4.  **Implement the user-facing function (SQL-like):**
+
+    - In `bigframes/bigquery/_operations/`, find the relevant file (e.g., `geo.py`) or create a new one.
+    - Create the user-facing function that will be called by users (e.g., `st_length`).
+    - This function should take a `Series` for any column-like inputs, plus any other parameters.
+    - Inside the function, call `series._apply_unary_op`,
+      `series._apply_binary_op`, or similar passing the operation dataclass you
+      created.
+    - Add a comprehensive docstring with examples.
+    - In `bigframes/bigquery/__init__.py`, import your new user-facing function and add it to the `__all__` list.
+
+5.  **Implement the compilation logic:**
+    - In `bigframes/core/compile/scalar_op_compiler.py`:
+        - If the BigQuery function has a direct equivalent in Ibis, you can often reuse an existing Ibis method.
+        - If not, define a new Ibis UDF using `@ibis_udf.scalar.builtin` to map to the specific BigQuery function signature.
+        - Create a new compiler implementation function (e.g., `geo_length_op_impl`).
+        - Register this function to your operation dataclass using `@scalar_op_compiler.register_unary_op` or `@scalar_op_compiler.register_binary_op`.
+        - This implementation will translate the BigQuery DataFrames operation into the appropriate Ibis expression.
+
+6.  **Add Tests:**
+    - Add system tests in the `tests/system/` directory to verify the end-to-end
+      functionality of the new operator. Test various inputs, including edge cases
+      and `NULL` values.
+
+      Where possible, run the same test code against pandas or GeoPandas and
+      compare that the outputs are the same (except for dtypes if BigFrames
+      differs from pandas).
+    - If you are overriding a pandas or GeoPandas property, add a unit test to
+      ensure the correct behavior (e.g., raising `NotImplementedError` if the
+      functionality is not supported).
+
+
+## Constraints
+
+- Only add git commits. Do not change git history.
+- Follow the spec file for development.
+  - Check off items in the "Acceptance
+    criteria" and "Detailed steps" sections with `[x]`.
+  - Please do this as they are completed.
+  - Refer back to the spec after each step.
@@ -0,0 +1,57 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import annotations
+
+import threading
+from typing import Optional
+
+import google.auth.credentials
+import google.auth.transport.requests
+import pydata_google_auth
+
+_SCOPES = ["https://www.googleapis.com/auth/cloud-platform"]
+
+# Put the lock here rather than in BigQueryOptions so that BigQueryOptions
+# remains deepcopy-able.
+_AUTH_LOCK = threading.Lock()
+_cached_credentials: Optional[google.auth.credentials.Credentials] = None
+_cached_project_default: Optional[str] = None
+
+
+def get_default_credentials_with_project() -> tuple[
+    google.auth.credentials.Credentials, Optional[str]
+]:
+    global _AUTH_LOCK, _cached_credentials, _cached_project_default
+
+    with _AUTH_LOCK:
+        if _cached_credentials is not None:
+            return _cached_credentials, _cached_project_default
+
+        _cached_credentials, _cached_project_default = pydata_google_auth.default(
+            scopes=_SCOPES, use_local_webserver=False
+        )
+
+        # Ensure an access token is available.
+        _cached_credentials.refresh(google.auth.transport.requests.Request())
+
+    return _cached_credentials, _cached_project_default
+
+
+def reset_default_credentials_and_project():
+    global _AUTH_LOCK, _cached_credentials, _cached_project_default
+
+    with _AUTH_LOCK:
+        _cached_credentials = None
+        _cached_project_default = None
@@ -26,11 +26,16 @@
 class DisplayOptions:
     __doc__ = vendored_pandas_config.display_options_doc
 
+    # Options borrowed from pandas.
     max_columns: int = 20
-    max_rows: int = 25
+    max_rows: int = 10
+    precision: int = 6
+
+    # Options unique to BigQuery DataFrames.
     progress_bar: Optional[str] = "auto"
     repr_mode: Literal["head", "deferred", "anywidget"] = "head"
 
+    max_colwidth: Optional[int] = 50
     max_info_columns: int = 100
     max_info_rows: Optional[int] = 200000
     memory_usage: bool = True
@@ -48,10 +53,14 @@ def pandas_repr(display_options: DisplayOptions):
     so that we don't override pandas behavior.
     """
     with pd.option_context(
+        "display.max_colwidth",
+        display_options.max_colwidth,
         "display.max_columns",
         display_options.max_columns,
         "display.max_rows",
         display_options.max_rows,
+        "display.precision",
+        display_options.precision,
         "display.show_dimensions",
         True,
     ) as pandas_context:
 
@@ -14,6 +14,7 @@
 import importlib
 from types import ModuleType
 
+import numpy
 from packaging import version
 
 # Keep this in sync with setup.py
@@ -22,9 +23,13 @@
 
 def import_polars() -> ModuleType:
     polars_module = importlib.import_module("polars")
-    imported_version = version.Version(polars_module.build_info()["version"])
-    if imported_version < POLARS_MIN_VERSION:
+    # Check for necessary methods instead of the version number because we
+    # can't trust the polars version until
+    # https://github.com/pola-rs/polars/issues/23940 is fixed.
+    try:
+        polars_module.lit(numpy.int64(100), dtype=polars_module.Int64())
+    except TypeError:
         raise ImportError(
-            f"Imported polars version: {imported_version} is below the minimum version: {POLARS_MIN_VERSION}"
+            f"Imported polars version is likely below the minimum version: {POLARS_MIN_VERSION}"
         )
     return polars_module
@@ -16,6 +16,9 @@
 such as array functions:
 https://cloud.google.com/bigquery/docs/reference/standard-sql/array_functions. """
 
+import sys
+
+from bigframes.bigquery._operations import ai
 from bigframes.bigquery._operations.approx_agg import approx_top_count
 from bigframes.bigquery._operations.array import (
     array_agg,
@@ -29,6 +32,9 @@
 )
 from bigframes.bigquery._operations.geo import (
     st_area,
+    st_buffer,
+    st_centroid,
+    st_convexhull,
     st_difference,
     st_distance,
     st_intersection,
@@ -45,44 +51,59 @@
     json_value,
     json_value_array,
     parse_json,
+    to_json,
+    to_json_string,
 )
 from bigframes.bigquery._operations.search import create_vector_index, vector_search
 from bigframes.bigquery._operations.sql import sql_scalar
 from bigframes.bigquery._operations.struct import struct
+from bigframes.core import log_adapter
 
-__all__ = [
+_functions = [
     # approximate aggregate ops
-    "approx_top_count",
+    approx_top_count,
     # array ops
-    "array_length",
-    "array_agg",
-    "array_to_string",
+    array_agg,
+    array_length,
+    array_to_string,
+    # datetime ops
+    unix_micros,
+    unix_millis,
+    unix_seconds,
     # geo ops
-    "st_area",
-    "st_difference",
-    "st_distance",
-    "st_intersection",
-    "st_isclosed",
-    "st_length",
+    st_area,
+    st_buffer,
+    st_centroid,
+    st_convexhull,
+    st_difference,
+    st_distance,
+    st_intersection,
+    st_isclosed,
+    st_length,
     # json ops
-    "json_extract",
-    "json_extract_array",
-    "json_extract_string_array",
-    "json_query",
-    "json_query_array",
-    "json_set",
-    "json_value",
-    "json_value_array",
-    "parse_json",
+    json_extract,
+    json_extract_array,
+    json_extract_string_array,
+    json_query,
+    json_query_array,
+    json_set,
+    json_value,
+    json_value_array,
+    parse_json,
+    to_json,
+    to_json_string,
     # search ops
-    "create_vector_index",
-    "vector_search",
+    create_vector_index,
+    vector_search,
     # sql ops
-    "sql_scalar",
+    sql_scalar,
     # struct ops
-    "struct",
-    # datetime ops
-    "unix_micros",
-    "unix_millis",
-    "unix_seconds",
+    struct,
 ]
+
+__all__ = [f.__name__ for f in _functions] + ["ai"]
+
+_module = sys.modules[__name__]
+for f in _functions:
+    _decorated_object = log_adapter.method_logger(f, custom_base_name="bigquery")
+    setattr(_module, f.__name__, _decorated_object)