test: re-enable Multimodal tests (#2235)

GarrettWu · web-flow · commit 4318d66bbfbf · 2025-11-06T13:54:01.000-08:00
* Revert "fix: Correct connection normalization in blob system tests (#2222)" This reverts commit a0e1e50. * fix
diff --git a/tests/system/conftest.py b/tests/system/conftest.py
@@ -70,23 +70,6 @@ def _hash_digest_file(hasher, filepath):
             hasher.update(chunk)
 
 
-@pytest.fixture(scope="session")
-def normalize_connection_id():
-    """Normalizes the connection ID by casefolding only the LOCATION component.
-
-    Connection format: PROJECT.LOCATION.CONNECTION_NAME
-    Only LOCATION is case-insensitive; PROJECT and CONNECTION_NAME must be lowercase.
-    """
-
-    def normalize(connection_id: str) -> str:
-        parts = connection_id.split(".")
-        if len(parts) == 3:
-            return f"{parts[0]}.{parts[1].casefold()}.{parts[2]}"
-        return connection_id  # Return unchanged if invalid format
-
-    return normalize
-
-
 @pytest.fixture(scope="session")
 def tokyo_location() -> str:
     return TOKYO_LOCATION
@@ -212,7 +195,8 @@ def bq_connection_name() -> str:
 
 @pytest.fixture(scope="session")
 def bq_connection(bigquery_client: bigquery.Client, bq_connection_name: str) -> str:
-    return f"{bigquery_client.project}.{bigquery_client.location}.{bq_connection_name}"
+    # TODO(b/458169181): LOCATION casefold is needed for the mutimodal backend bug. Remove after the bug is fixed.
+    return f"{bigquery_client.project}.{bigquery_client.location.casefold()}.{bq_connection_name}"
 
 
 @pytest.fixture(scope="session", autouse=True)
diff --git a/tests/system/large/blob/test_function.py b/tests/system/large/blob/test_function.py
@@ -52,7 +52,6 @@ def images_output_uris(images_output_folder: str) -> list[str]:
     ]
 
 
-@pytest.mark.skip(reason="b/457416070")
 def test_blob_exif(
     bq_connection: str,
     session: bigframes.Session,
@@ -104,7 +103,6 @@ def test_blob_exif_verbose(
     assert content_series.dtype == dtypes.JSON_DTYPE
 
 
-@pytest.mark.skip(reason="b/457416070")
 def test_blob_image_blur_to_series(
     images_mm_df: bpd.DataFrame,
     bq_connection: str,
@@ -138,7 +136,6 @@ def test_blob_image_blur_to_series(
     assert not actual.blob.size().isna().any()
 
 
-@pytest.mark.skip(reason="b/457416070")
 def test_blob_image_blur_to_series_verbose(
     images_mm_df: bpd.DataFrame,
     bq_connection: str,
@@ -166,7 +163,6 @@ def test_blob_image_blur_to_series_verbose(
     assert not actual.blob.size().isna().any()
 
 
-@pytest.mark.skip(reason="b/457416070")
 def test_blob_image_blur_to_folder(
     images_mm_df: bpd.DataFrame,
     bq_connection: str,
@@ -199,7 +195,6 @@ def test_blob_image_blur_to_folder(
     assert not actual.blob.size().isna().any()
 
 
-@pytest.mark.skip(reason="b/457416070")
 def test_blob_image_blur_to_folder_verbose(
     images_mm_df: bpd.DataFrame,
     bq_connection: str,
@@ -259,7 +254,6 @@ def test_blob_image_blur_to_bq_verbose(images_mm_df: bpd.DataFrame, bq_connectio
     assert content_series.dtype == dtypes.BYTES_DTYPE
 
 
-@pytest.mark.skip(reason="b/457416070")
 def test_blob_image_resize_to_series(
     images_mm_df: bpd.DataFrame,
     bq_connection: str,
@@ -297,7 +291,6 @@ def test_blob_image_resize_to_series(
     assert not actual.blob.size().isna().any()
 
 
-@pytest.mark.skip(reason="b/457416070")
 def test_blob_image_resize_to_series_verbose(
     images_mm_df: bpd.DataFrame,
     bq_connection: str,
@@ -332,7 +325,6 @@ def test_blob_image_resize_to_series_verbose(
     assert not actual.blob.size().isna().any()
 
 
-@pytest.mark.skip(reason="b/457416070")
 def test_blob_image_resize_to_folder(
     images_mm_df: bpd.DataFrame,
     bq_connection: str,
@@ -366,7 +358,6 @@ def test_blob_image_resize_to_folder(
     assert not actual.blob.size().isna().any()
 
 
-@pytest.mark.skip(reason="b/457416070")
 def test_blob_image_resize_to_folder_verbose(
     images_mm_df: bpd.DataFrame,
     bq_connection: str,
@@ -429,7 +420,6 @@ def test_blob_image_resize_to_bq_verbose(
     assert content_series.dtype == dtypes.BYTES_DTYPE
 
 
-@pytest.mark.skip(reason="b/457416070")
 def test_blob_image_normalize_to_series(
     images_mm_df: bpd.DataFrame,
     bq_connection: str,
@@ -502,7 +492,6 @@ def test_blob_image_normalize_to_series_verbose(
     assert hasattr(content_series, "blob")
 
 
-@pytest.mark.skip(reason="b/457416070")
 def test_blob_image_normalize_to_folder(
     images_mm_df: bpd.DataFrame,
     bq_connection: str,
@@ -609,7 +598,6 @@ def test_blob_image_normalize_to_bq_verbose(
     assert content_series.dtype == dtypes.BYTES_DTYPE
 
 
-@pytest.mark.skip(reason="b/457416070")
 def test_blob_pdf_extract(
     pdf_mm_df: bpd.DataFrame,
     bq_connection: str,
@@ -645,7 +633,6 @@ def test_blob_pdf_extract(
         ), f"Item (verbose=False): Expected keyword '{keyword}' not found in extracted text. "
 
 
-@pytest.mark.skip(reason="b/457416070")
 def test_blob_pdf_extract_verbose(
     pdf_mm_df: bpd.DataFrame,
     bq_connection: str,
@@ -683,7 +670,6 @@ def test_blob_pdf_extract_verbose(
         ), f"Item (verbose=True): Expected keyword '{keyword}' not found in extracted text. "
 
 
-@pytest.mark.skip(reason="b/457416070")
 def test_blob_pdf_chunk(pdf_mm_df: bpd.DataFrame, bq_connection: str):
     actual = (
         pdf_mm_df["pdf"]
@@ -723,7 +709,6 @@ def test_blob_pdf_chunk(pdf_mm_df: bpd.DataFrame, bq_connection: str):
         ), f"Item (verbose=False): Expected keyword '{keyword}' not found in extracted text. "
 
 
-@pytest.mark.skip(reason="b/457416070")
 def test_blob_pdf_chunk_verbose(pdf_mm_df: bpd.DataFrame, bq_connection: str):
     actual = (
         pdf_mm_df["pdf"]
diff --git a/tests/system/small/bigquery/test_ai.py b/tests/system/small/bigquery/test_ai.py
@@ -273,10 +273,11 @@ def test_ai_if(session):
     assert result.dtype == dtypes.BOOL_DTYPE
 
 
-@pytest.mark.skip(reason="b/457416070")
-def test_ai_if_multi_model(session):
+def test_ai_if_multi_model(session, bq_connection):
     df = session.from_glob_path(
-        "gs://bigframes-dev-testing/a_multimodel/images/*", name="image"
+        "gs://bigframes-dev-testing/a_multimodel/images/*",
+        name="image",
+        connection=bq_connection,
     )
 
     result = bbq.ai.if_((df["image"], " contains an animal"))
@@ -294,10 +295,11 @@ def test_ai_classify(session):
     assert result.dtype == dtypes.STRING_DTYPE
 
 
-@pytest.mark.skip(reason="b/457416070")
-def test_ai_classify_multi_model(session):
+def test_ai_classify_multi_model(session, bq_connection):
     df = session.from_glob_path(
-        "gs://bigframes-dev-testing/a_multimodel/images/*", name="image"
+        "gs://bigframes-dev-testing/a_multimodel/images/*",
+        name="image",
+        connection=bq_connection,
     )
 
     result = bbq.ai.classify(df["image"], ["photo", "cartoon"])
diff --git a/tests/system/small/blob/test_io.py b/tests/system/small/blob/test_io.py
@@ -12,36 +12,27 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Callable
 from unittest import mock
 
 import IPython.display
 import pandas as pd
-import pytest
 
 import bigframes
 import bigframes.pandas as bpd
 
 
 def test_blob_create_from_uri_str(
-    bq_connection: str,
-    session: bigframes.Session,
-    images_uris,
-    normalize_connection_id: Callable[[str], str],
+    bq_connection: str, session: bigframes.Session, images_uris
 ):
     uri_series = bpd.Series(images_uris, session=session)
     blob_series = uri_series.str.to_blob(connection=bq_connection)
 
     pd_blob_df = blob_series.struct.explode().to_pandas()
-    pd_blob_df["authorizer"] = pd_blob_df["authorizer"].apply(normalize_connection_id)
     expected_pd_df = pd.DataFrame(
         {
             "uri": images_uris,
             "version": [None, None],
-            "authorizer": [
-                normalize_connection_id(bq_connection),
-                normalize_connection_id(bq_connection),
-            ],
+            "authorizer": [bq_connection.casefold(), bq_connection.casefold()],
             "details": [None, None],
         }
     )
@@ -52,11 +43,7 @@ def test_blob_create_from_uri_str(
 
 
 def test_blob_create_from_glob_path(
-    bq_connection: str,
-    session: bigframes.Session,
-    images_gcs_path,
-    images_uris,
-    normalize_connection_id: Callable[[str], str],
+    bq_connection: str, session: bigframes.Session, images_gcs_path, images_uris
 ):
     blob_df = session.from_glob_path(
         images_gcs_path, connection=bq_connection, name="blob_col"
@@ -68,16 +55,12 @@ def test_blob_create_from_glob_path(
         .sort_values("uri")
         .reset_index(drop=True)
     )
-    pd_blob_df["authorizer"] = pd_blob_df["authorizer"].apply(normalize_connection_id)
 
     expected_df = pd.DataFrame(
         {
             "uri": images_uris,
             "version": [None, None],
-            "authorizer": [
-                normalize_connection_id(bq_connection),
-                normalize_connection_id(bq_connection),
-            ],
+            "authorizer": [bq_connection.casefold(), bq_connection.casefold()],
             "details": [None, None],
         }
     )
@@ -88,11 +71,7 @@ def test_blob_create_from_glob_path(
 
 
 def test_blob_create_read_gbq_object_table(
-    bq_connection: str,
-    session: bigframes.Session,
-    images_gcs_path,
-    images_uris,
-    normalize_connection_id: Callable[[str], str],
+    bq_connection: str, session: bigframes.Session, images_gcs_path, images_uris
 ):
     obj_table = session._create_object_table(images_gcs_path, bq_connection)
 
@@ -104,15 +83,11 @@ def test_blob_create_read_gbq_object_table(
         .sort_values("uri")
         .reset_index(drop=True)
     )
-    pd_blob_df["authorizer"] = pd_blob_df["authorizer"].apply(normalize_connection_id)
     expected_df = pd.DataFrame(
         {
             "uri": images_uris,
             "version": [None, None],
-            "authorizer": [
-                normalize_connection_id(bq_connection),
-                normalize_connection_id(bq_connection),
-            ],
+            "authorizer": [bq_connection.casefold(), bq_connection.casefold()],
             "details": [None, None],
         }
     )
@@ -122,7 +97,6 @@ def test_blob_create_read_gbq_object_table(
     )
 
 
-@pytest.mark.skip(reason="b/457416070")
 def test_display_images(monkeypatch, images_mm_df: bpd.DataFrame):
     mock_display = mock.Mock()
     monkeypatch.setattr(IPython.display, "display", mock_display)
diff --git a/tests/system/small/blob/test_properties.py b/tests/system/small/blob/test_properties.py
@@ -12,12 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from __future__ import annotations
-
-from typing import Callable
-
 import pandas as pd
-import pytest
 
 import bigframes.dtypes as dtypes
 import bigframes.pandas as bpd
@@ -32,27 +27,17 @@ def test_blob_uri(images_uris: list[str], images_mm_df: bpd.DataFrame):
     )
 
 
-def test_blob_authorizer(
-    images_mm_df: bpd.DataFrame,
-    bq_connection: str,
-    normalize_connection_id: Callable[[str], str],
-):
+def test_blob_authorizer(images_mm_df: bpd.DataFrame, bq_connection: str):
     actual = images_mm_df["blob_col"].blob.authorizer().to_pandas()
-    actual = actual.apply(normalize_connection_id)
     expected = pd.Series(
-        [
-            normalize_connection_id(bq_connection),
-            normalize_connection_id(bq_connection),
-        ],
-        name="authorizer",
+        [bq_connection.casefold(), bq_connection.casefold()], name="authorizer"
     )
 
     pd.testing.assert_series_equal(
         actual, expected, check_dtype=False, check_index_type=False
     )
 
 
-@pytest.mark.skip(reason="b/457416070")
 def test_blob_version(images_mm_df: bpd.DataFrame):
     actual = images_mm_df["blob_col"].blob.version().to_pandas()
     expected = pd.Series(["1753907851152593", "1753907851111538"], name="version")
@@ -62,7 +47,6 @@ def test_blob_version(images_mm_df: bpd.DataFrame):
     )
 
 
-@pytest.mark.skip(reason="b/457416070")
 def test_blob_metadata(images_mm_df: bpd.DataFrame):
     actual = images_mm_df["blob_col"].blob.metadata().to_pandas()
     expected = pd.Series(
@@ -87,7 +71,6 @@ def test_blob_metadata(images_mm_df: bpd.DataFrame):
     pd.testing.assert_series_equal(actual, expected)
 
 
-@pytest.mark.skip(reason="b/457416070")
 def test_blob_content_type(images_mm_df: bpd.DataFrame):
     actual = images_mm_df["blob_col"].blob.content_type().to_pandas()
     expected = pd.Series(["image/jpeg", "image/jpeg"], name="content_type")
@@ -97,7 +80,6 @@ def test_blob_content_type(images_mm_df: bpd.DataFrame):
     )
 
 
-@pytest.mark.skip(reason="b/457416070")
 def test_blob_md5_hash(images_mm_df: bpd.DataFrame):
     actual = images_mm_df["blob_col"].blob.md5_hash().to_pandas()
     expected = pd.Series(
@@ -110,7 +92,6 @@ def test_blob_md5_hash(images_mm_df: bpd.DataFrame):
     )
 
 
-@pytest.mark.skip(reason="b/457416070")
 def test_blob_size(images_mm_df: bpd.DataFrame):
     actual = images_mm_df["blob_col"].blob.size().to_pandas()
     expected = pd.Series([338390, 43333], name="size")
@@ -120,7 +101,6 @@ def test_blob_size(images_mm_df: bpd.DataFrame):
     )
 
 
-@pytest.mark.skip(reason="b/457416070")
 def test_blob_updated(images_mm_df: bpd.DataFrame):
     actual = images_mm_df["blob_col"].blob.updated().to_pandas()
     expected = pd.Series(
diff --git a/tests/system/small/ml/test_multimodal_llm.py b/tests/system/small/ml/test_multimodal_llm.py
@@ -21,7 +21,6 @@
 from bigframes.testing import utils
 
 
-@pytest.mark.skip(reason="b/457416070")
 @pytest.mark.flaky(retries=2)
 def test_multimodal_embedding_generator_predict_default_params_success(
     images_mm_df, session, bq_connection