diff --git a/tests/system/conftest.py b/tests/system/conftest.py index 70a379fe0e..2f08a695e9 100644 --- a/tests/system/conftest.py +++ b/tests/system/conftest.py @@ -70,6 +70,23 @@ def _hash_digest_file(hasher, filepath): hasher.update(chunk) +@pytest.fixture(scope="session") +def normalize_connection_id(): + """Normalizes the connection ID by casefolding only the LOCATION component. + + Connection format: PROJECT.LOCATION.CONNECTION_NAME + Only LOCATION is case-insensitive; PROJECT and CONNECTION_NAME must be lowercase. + """ + + def normalize(connection_id: str) -> str: + parts = connection_id.split(".") + if len(parts) == 3: + return f"{parts[0]}.{parts[1].casefold()}.{parts[2]}" + return connection_id # Return unchanged if invalid format + + return normalize + + @pytest.fixture(scope="session") def tokyo_location() -> str: return TOKYO_LOCATION diff --git a/tests/system/large/blob/test_function.py b/tests/system/large/blob/test_function.py index 7963fabd0b..9ba8126dc6 100644 --- a/tests/system/large/blob/test_function.py +++ b/tests/system/large/blob/test_function.py @@ -52,6 +52,7 @@ def images_output_uris(images_output_folder: str) -> list[str]: ] +@pytest.mark.skip(reason="b/457416070") def test_blob_exif( bq_connection: str, session: bigframes.Session, @@ -103,6 +104,7 @@ def test_blob_exif_verbose( assert content_series.dtype == dtypes.JSON_DTYPE +@pytest.mark.skip(reason="b/457416070") def test_blob_image_blur_to_series( images_mm_df: bpd.DataFrame, bq_connection: str, @@ -136,6 +138,7 @@ def test_blob_image_blur_to_series( assert not actual.blob.size().isna().any() +@pytest.mark.skip(reason="b/457416070") def test_blob_image_blur_to_series_verbose( images_mm_df: bpd.DataFrame, bq_connection: str, @@ -163,6 +166,7 @@ def test_blob_image_blur_to_series_verbose( assert not actual.blob.size().isna().any() +@pytest.mark.skip(reason="b/457416070") def test_blob_image_blur_to_folder( images_mm_df: bpd.DataFrame, bq_connection: str, @@ -195,6 +199,7 @@ def test_blob_image_blur_to_folder( assert not actual.blob.size().isna().any() +@pytest.mark.skip(reason="b/457416070") def test_blob_image_blur_to_folder_verbose( images_mm_df: bpd.DataFrame, bq_connection: str, @@ -254,6 +259,7 @@ def test_blob_image_blur_to_bq_verbose(images_mm_df: bpd.DataFrame, bq_connectio assert content_series.dtype == dtypes.BYTES_DTYPE +@pytest.mark.skip(reason="b/457416070") def test_blob_image_resize_to_series( images_mm_df: bpd.DataFrame, bq_connection: str, @@ -291,6 +297,7 @@ def test_blob_image_resize_to_series( assert not actual.blob.size().isna().any() +@pytest.mark.skip(reason="b/457416070") def test_blob_image_resize_to_series_verbose( images_mm_df: bpd.DataFrame, bq_connection: str, @@ -325,6 +332,7 @@ def test_blob_image_resize_to_series_verbose( assert not actual.blob.size().isna().any() +@pytest.mark.skip(reason="b/457416070") def test_blob_image_resize_to_folder( images_mm_df: bpd.DataFrame, bq_connection: str, @@ -358,6 +366,7 @@ def test_blob_image_resize_to_folder( assert not actual.blob.size().isna().any() +@pytest.mark.skip(reason="b/457416070") def test_blob_image_resize_to_folder_verbose( images_mm_df: bpd.DataFrame, bq_connection: str, @@ -420,6 +429,7 @@ def test_blob_image_resize_to_bq_verbose( assert content_series.dtype == dtypes.BYTES_DTYPE +@pytest.mark.skip(reason="b/457416070") def test_blob_image_normalize_to_series( images_mm_df: bpd.DataFrame, bq_connection: str, @@ -492,6 +502,7 @@ def test_blob_image_normalize_to_series_verbose( assert hasattr(content_series, "blob") +@pytest.mark.skip(reason="b/457416070") def test_blob_image_normalize_to_folder( images_mm_df: bpd.DataFrame, bq_connection: str, @@ -598,6 +609,7 @@ def test_blob_image_normalize_to_bq_verbose( assert content_series.dtype == dtypes.BYTES_DTYPE +@pytest.mark.skip(reason="b/457416070") def test_blob_pdf_extract( pdf_mm_df: bpd.DataFrame, bq_connection: str, @@ -633,6 +645,7 @@ def test_blob_pdf_extract( ), f"Item (verbose=False): Expected keyword '{keyword}' not found in extracted text. " +@pytest.mark.skip(reason="b/457416070") def test_blob_pdf_extract_verbose( pdf_mm_df: bpd.DataFrame, bq_connection: str, @@ -670,6 +683,7 @@ def test_blob_pdf_extract_verbose( ), f"Item (verbose=True): Expected keyword '{keyword}' not found in extracted text. " +@pytest.mark.skip(reason="b/457416070") def test_blob_pdf_chunk(pdf_mm_df: bpd.DataFrame, bq_connection: str): actual = ( pdf_mm_df["pdf"] @@ -709,6 +723,7 @@ def test_blob_pdf_chunk(pdf_mm_df: bpd.DataFrame, bq_connection: str): ), f"Item (verbose=False): Expected keyword '{keyword}' not found in extracted text. " +@pytest.mark.skip(reason="b/457416070") def test_blob_pdf_chunk_verbose(pdf_mm_df: bpd.DataFrame, bq_connection: str): actual = ( pdf_mm_df["pdf"] diff --git a/tests/system/small/bigquery/test_ai.py b/tests/system/small/bigquery/test_ai.py index 0c7c40031b..6df4a7a528 100644 --- a/tests/system/small/bigquery/test_ai.py +++ b/tests/system/small/bigquery/test_ai.py @@ -273,6 +273,7 @@ def test_ai_if(session): assert result.dtype == dtypes.BOOL_DTYPE +@pytest.mark.skip(reason="b/457416070") def test_ai_if_multi_model(session): df = session.from_glob_path( "gs://bigframes-dev-testing/a_multimodel/images/*", name="image" @@ -293,6 +294,7 @@ def test_ai_classify(session): assert result.dtype == dtypes.STRING_DTYPE +@pytest.mark.skip(reason="b/457416070") def test_ai_classify_multi_model(session): df = session.from_glob_path( "gs://bigframes-dev-testing/a_multimodel/images/*", name="image" diff --git a/tests/system/small/blob/test_io.py b/tests/system/small/blob/test_io.py index 5ada4fabb0..5da113a5e1 100644 --- a/tests/system/small/blob/test_io.py +++ b/tests/system/small/blob/test_io.py @@ -12,27 +12,36 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import Callable from unittest import mock import IPython.display import pandas as pd +import pytest import bigframes import bigframes.pandas as bpd def test_blob_create_from_uri_str( - bq_connection: str, session: bigframes.Session, images_uris + bq_connection: str, + session: bigframes.Session, + images_uris, + normalize_connection_id: Callable[[str], str], ): uri_series = bpd.Series(images_uris, session=session) blob_series = uri_series.str.to_blob(connection=bq_connection) pd_blob_df = blob_series.struct.explode().to_pandas() + pd_blob_df["authorizer"] = pd_blob_df["authorizer"].apply(normalize_connection_id) expected_pd_df = pd.DataFrame( { "uri": images_uris, "version": [None, None], - "authorizer": [bq_connection.casefold(), bq_connection.casefold()], + "authorizer": [ + normalize_connection_id(bq_connection), + normalize_connection_id(bq_connection), + ], "details": [None, None], } ) @@ -43,7 +52,11 @@ def test_blob_create_from_uri_str( def test_blob_create_from_glob_path( - bq_connection: str, session: bigframes.Session, images_gcs_path, images_uris + bq_connection: str, + session: bigframes.Session, + images_gcs_path, + images_uris, + normalize_connection_id: Callable[[str], str], ): blob_df = session.from_glob_path( images_gcs_path, connection=bq_connection, name="blob_col" @@ -55,12 +68,16 @@ def test_blob_create_from_glob_path( .sort_values("uri") .reset_index(drop=True) ) + pd_blob_df["authorizer"] = pd_blob_df["authorizer"].apply(normalize_connection_id) expected_df = pd.DataFrame( { "uri": images_uris, "version": [None, None], - "authorizer": [bq_connection.casefold(), bq_connection.casefold()], + "authorizer": [ + normalize_connection_id(bq_connection), + normalize_connection_id(bq_connection), + ], "details": [None, None], } ) @@ -71,7 +88,11 @@ def test_blob_create_from_glob_path( def test_blob_create_read_gbq_object_table( - bq_connection: str, session: bigframes.Session, images_gcs_path, images_uris + bq_connection: str, + session: bigframes.Session, + images_gcs_path, + images_uris, + normalize_connection_id: Callable[[str], str], ): obj_table = session._create_object_table(images_gcs_path, bq_connection) @@ -83,11 +104,15 @@ def test_blob_create_read_gbq_object_table( .sort_values("uri") .reset_index(drop=True) ) + pd_blob_df["authorizer"] = pd_blob_df["authorizer"].apply(normalize_connection_id) expected_df = pd.DataFrame( { "uri": images_uris, "version": [None, None], - "authorizer": [bq_connection.casefold(), bq_connection.casefold()], + "authorizer": [ + normalize_connection_id(bq_connection), + normalize_connection_id(bq_connection), + ], "details": [None, None], } ) @@ -97,6 +122,7 @@ def test_blob_create_read_gbq_object_table( ) +@pytest.mark.skip(reason="b/457416070") def test_display_images(monkeypatch, images_mm_df: bpd.DataFrame): mock_display = mock.Mock() monkeypatch.setattr(IPython.display, "display", mock_display) diff --git a/tests/system/small/blob/test_properties.py b/tests/system/small/blob/test_properties.py index 47d4d2aa04..c411c01f13 100644 --- a/tests/system/small/blob/test_properties.py +++ b/tests/system/small/blob/test_properties.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + +from typing import Callable + import pandas as pd +import pytest import bigframes.dtypes as dtypes import bigframes.pandas as bpd @@ -27,10 +32,19 @@ def test_blob_uri(images_uris: list[str], images_mm_df: bpd.DataFrame): ) -def test_blob_authorizer(images_mm_df: bpd.DataFrame, bq_connection: str): +def test_blob_authorizer( + images_mm_df: bpd.DataFrame, + bq_connection: str, + normalize_connection_id: Callable[[str], str], +): actual = images_mm_df["blob_col"].blob.authorizer().to_pandas() + actual = actual.apply(normalize_connection_id) expected = pd.Series( - [bq_connection.casefold(), bq_connection.casefold()], name="authorizer" + [ + normalize_connection_id(bq_connection), + normalize_connection_id(bq_connection), + ], + name="authorizer", ) pd.testing.assert_series_equal( @@ -38,6 +52,7 @@ def test_blob_authorizer(images_mm_df: bpd.DataFrame, bq_connection: str): ) +@pytest.mark.skip(reason="b/457416070") def test_blob_version(images_mm_df: bpd.DataFrame): actual = images_mm_df["blob_col"].blob.version().to_pandas() expected = pd.Series(["1753907851152593", "1753907851111538"], name="version") @@ -47,6 +62,7 @@ def test_blob_version(images_mm_df: bpd.DataFrame): ) +@pytest.mark.skip(reason="b/457416070") def test_blob_metadata(images_mm_df: bpd.DataFrame): actual = images_mm_df["blob_col"].blob.metadata().to_pandas() expected = pd.Series( @@ -71,6 +87,7 @@ def test_blob_metadata(images_mm_df: bpd.DataFrame): pd.testing.assert_series_equal(actual, expected) +@pytest.mark.skip(reason="b/457416070") def test_blob_content_type(images_mm_df: bpd.DataFrame): actual = images_mm_df["blob_col"].blob.content_type().to_pandas() expected = pd.Series(["image/jpeg", "image/jpeg"], name="content_type") @@ -80,6 +97,7 @@ def test_blob_content_type(images_mm_df: bpd.DataFrame): ) +@pytest.mark.skip(reason="b/457416070") def test_blob_md5_hash(images_mm_df: bpd.DataFrame): actual = images_mm_df["blob_col"].blob.md5_hash().to_pandas() expected = pd.Series( @@ -92,6 +110,7 @@ def test_blob_md5_hash(images_mm_df: bpd.DataFrame): ) +@pytest.mark.skip(reason="b/457416070") def test_blob_size(images_mm_df: bpd.DataFrame): actual = images_mm_df["blob_col"].blob.size().to_pandas() expected = pd.Series([338390, 43333], name="size") @@ -101,6 +120,7 @@ def test_blob_size(images_mm_df: bpd.DataFrame): ) +@pytest.mark.skip(reason="b/457416070") def test_blob_updated(images_mm_df: bpd.DataFrame): actual = images_mm_df["blob_col"].blob.updated().to_pandas() expected = pd.Series( diff --git a/tests/system/small/ml/test_multimodal_llm.py b/tests/system/small/ml/test_multimodal_llm.py index 48a69f522c..fe34f9c02b 100644 --- a/tests/system/small/ml/test_multimodal_llm.py +++ b/tests/system/small/ml/test_multimodal_llm.py @@ -21,6 +21,7 @@ from bigframes.testing import utils +@pytest.mark.skip(reason="b/457416070") @pytest.mark.flaky(retries=2) def test_multimodal_embedding_generator_predict_default_params_success( images_mm_df, session, bq_connection