Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions tests/system/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,23 @@ def _hash_digest_file(hasher, filepath):
hasher.update(chunk)


@pytest.fixture(scope="session")
def normalize_connection_id():
"""Normalizes the connection ID by casefolding only the LOCATION component.

Connection format: PROJECT.LOCATION.CONNECTION_NAME
Only LOCATION is case-insensitive; PROJECT and CONNECTION_NAME must be lowercase.
"""

def normalize(connection_id: str) -> str:
parts = connection_id.split(".")
if len(parts) == 3:
return f"{parts[0]}.{parts[1].casefold()}.{parts[2]}"
return connection_id # Return unchanged if invalid format

return normalize


@pytest.fixture(scope="session")
def tokyo_location() -> str:
return TOKYO_LOCATION
Expand Down
15 changes: 15 additions & 0 deletions tests/system/large/blob/test_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ def images_output_uris(images_output_folder: str) -> list[str]:
]


@pytest.mark.skip(reason="b/457416070")
def test_blob_exif(
bq_connection: str,
session: bigframes.Session,
Expand Down Expand Up @@ -103,6 +104,7 @@ def test_blob_exif_verbose(
assert content_series.dtype == dtypes.JSON_DTYPE


@pytest.mark.skip(reason="b/457416070")
def test_blob_image_blur_to_series(
images_mm_df: bpd.DataFrame,
bq_connection: str,
Expand Down Expand Up @@ -136,6 +138,7 @@ def test_blob_image_blur_to_series(
assert not actual.blob.size().isna().any()


@pytest.mark.skip(reason="b/457416070")
def test_blob_image_blur_to_series_verbose(
images_mm_df: bpd.DataFrame,
bq_connection: str,
Expand Down Expand Up @@ -163,6 +166,7 @@ def test_blob_image_blur_to_series_verbose(
assert not actual.blob.size().isna().any()


@pytest.mark.skip(reason="b/457416070")
def test_blob_image_blur_to_folder(
images_mm_df: bpd.DataFrame,
bq_connection: str,
Expand Down Expand Up @@ -195,6 +199,7 @@ def test_blob_image_blur_to_folder(
assert not actual.blob.size().isna().any()


@pytest.mark.skip(reason="b/457416070")
def test_blob_image_blur_to_folder_verbose(
images_mm_df: bpd.DataFrame,
bq_connection: str,
Expand Down Expand Up @@ -254,6 +259,7 @@ def test_blob_image_blur_to_bq_verbose(images_mm_df: bpd.DataFrame, bq_connectio
assert content_series.dtype == dtypes.BYTES_DTYPE


@pytest.mark.skip(reason="b/457416070")
def test_blob_image_resize_to_series(
images_mm_df: bpd.DataFrame,
bq_connection: str,
Expand Down Expand Up @@ -291,6 +297,7 @@ def test_blob_image_resize_to_series(
assert not actual.blob.size().isna().any()


@pytest.mark.skip(reason="b/457416070")
def test_blob_image_resize_to_series_verbose(
images_mm_df: bpd.DataFrame,
bq_connection: str,
Expand Down Expand Up @@ -325,6 +332,7 @@ def test_blob_image_resize_to_series_verbose(
assert not actual.blob.size().isna().any()


@pytest.mark.skip(reason="b/457416070")
def test_blob_image_resize_to_folder(
images_mm_df: bpd.DataFrame,
bq_connection: str,
Expand Down Expand Up @@ -358,6 +366,7 @@ def test_blob_image_resize_to_folder(
assert not actual.blob.size().isna().any()


@pytest.mark.skip(reason="b/457416070")
def test_blob_image_resize_to_folder_verbose(
images_mm_df: bpd.DataFrame,
bq_connection: str,
Expand Down Expand Up @@ -420,6 +429,7 @@ def test_blob_image_resize_to_bq_verbose(
assert content_series.dtype == dtypes.BYTES_DTYPE


@pytest.mark.skip(reason="b/457416070")
def test_blob_image_normalize_to_series(
images_mm_df: bpd.DataFrame,
bq_connection: str,
Expand Down Expand Up @@ -492,6 +502,7 @@ def test_blob_image_normalize_to_series_verbose(
assert hasattr(content_series, "blob")


@pytest.mark.skip(reason="b/457416070")
def test_blob_image_normalize_to_folder(
images_mm_df: bpd.DataFrame,
bq_connection: str,
Expand Down Expand Up @@ -598,6 +609,7 @@ def test_blob_image_normalize_to_bq_verbose(
assert content_series.dtype == dtypes.BYTES_DTYPE


@pytest.mark.skip(reason="b/457416070")
def test_blob_pdf_extract(
pdf_mm_df: bpd.DataFrame,
bq_connection: str,
Expand Down Expand Up @@ -633,6 +645,7 @@ def test_blob_pdf_extract(
), f"Item (verbose=False): Expected keyword '{keyword}' not found in extracted text. "


@pytest.mark.skip(reason="b/457416070")
def test_blob_pdf_extract_verbose(
pdf_mm_df: bpd.DataFrame,
bq_connection: str,
Expand Down Expand Up @@ -670,6 +683,7 @@ def test_blob_pdf_extract_verbose(
), f"Item (verbose=True): Expected keyword '{keyword}' not found in extracted text. "


@pytest.mark.skip(reason="b/457416070")
def test_blob_pdf_chunk(pdf_mm_df: bpd.DataFrame, bq_connection: str):
actual = (
pdf_mm_df["pdf"]
Expand Down Expand Up @@ -709,6 +723,7 @@ def test_blob_pdf_chunk(pdf_mm_df: bpd.DataFrame, bq_connection: str):
), f"Item (verbose=False): Expected keyword '{keyword}' not found in extracted text. "


@pytest.mark.skip(reason="b/457416070")
def test_blob_pdf_chunk_verbose(pdf_mm_df: bpd.DataFrame, bq_connection: str):
actual = (
pdf_mm_df["pdf"]
Expand Down
2 changes: 2 additions & 0 deletions tests/system/small/bigquery/test_ai.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,7 @@ def test_ai_if(session):
assert result.dtype == dtypes.BOOL_DTYPE


@pytest.mark.skip(reason="b/457416070")
def test_ai_if_multi_model(session):
df = session.from_glob_path(
"gs://bigframes-dev-testing/a_multimodel/images/*", name="image"
Expand All @@ -293,6 +294,7 @@ def test_ai_classify(session):
assert result.dtype == dtypes.STRING_DTYPE


@pytest.mark.skip(reason="b/457416070")
def test_ai_classify_multi_model(session):
df = session.from_glob_path(
"gs://bigframes-dev-testing/a_multimodel/images/*", name="image"
Expand Down
38 changes: 32 additions & 6 deletions tests/system/small/blob/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,27 +12,36 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Callable
from unittest import mock

import IPython.display
import pandas as pd
import pytest

import bigframes
import bigframes.pandas as bpd


def test_blob_create_from_uri_str(
bq_connection: str, session: bigframes.Session, images_uris
bq_connection: str,
session: bigframes.Session,
images_uris,
normalize_connection_id: Callable[[str], str],
):
uri_series = bpd.Series(images_uris, session=session)
blob_series = uri_series.str.to_blob(connection=bq_connection)

pd_blob_df = blob_series.struct.explode().to_pandas()
pd_blob_df["authorizer"] = pd_blob_df["authorizer"].apply(normalize_connection_id)
expected_pd_df = pd.DataFrame(
{
"uri": images_uris,
"version": [None, None],
"authorizer": [bq_connection.casefold(), bq_connection.casefold()],
"authorizer": [
normalize_connection_id(bq_connection),
normalize_connection_id(bq_connection),
],
"details": [None, None],
}
)
Expand All @@ -43,7 +52,11 @@ def test_blob_create_from_uri_str(


def test_blob_create_from_glob_path(
bq_connection: str, session: bigframes.Session, images_gcs_path, images_uris
bq_connection: str,
session: bigframes.Session,
images_gcs_path,
images_uris,
normalize_connection_id: Callable[[str], str],
):
blob_df = session.from_glob_path(
images_gcs_path, connection=bq_connection, name="blob_col"
Expand All @@ -55,12 +68,16 @@ def test_blob_create_from_glob_path(
.sort_values("uri")
.reset_index(drop=True)
)
pd_blob_df["authorizer"] = pd_blob_df["authorizer"].apply(normalize_connection_id)

expected_df = pd.DataFrame(
{
"uri": images_uris,
"version": [None, None],
"authorizer": [bq_connection.casefold(), bq_connection.casefold()],
"authorizer": [
normalize_connection_id(bq_connection),
normalize_connection_id(bq_connection),
],
"details": [None, None],
}
)
Expand All @@ -71,7 +88,11 @@ def test_blob_create_from_glob_path(


def test_blob_create_read_gbq_object_table(
bq_connection: str, session: bigframes.Session, images_gcs_path, images_uris
bq_connection: str,
session: bigframes.Session,
images_gcs_path,
images_uris,
normalize_connection_id: Callable[[str], str],
):
obj_table = session._create_object_table(images_gcs_path, bq_connection)

Expand All @@ -83,11 +104,15 @@ def test_blob_create_read_gbq_object_table(
.sort_values("uri")
.reset_index(drop=True)
)
pd_blob_df["authorizer"] = pd_blob_df["authorizer"].apply(normalize_connection_id)
expected_df = pd.DataFrame(
{
"uri": images_uris,
"version": [None, None],
"authorizer": [bq_connection.casefold(), bq_connection.casefold()],
"authorizer": [
normalize_connection_id(bq_connection),
normalize_connection_id(bq_connection),
],
"details": [None, None],
}
)
Expand All @@ -97,6 +122,7 @@ def test_blob_create_read_gbq_object_table(
)


@pytest.mark.skip(reason="b/457416070")
def test_display_images(monkeypatch, images_mm_df: bpd.DataFrame):
mock_display = mock.Mock()
monkeypatch.setattr(IPython.display, "display", mock_display)
Expand Down
24 changes: 22 additions & 2 deletions tests/system/small/blob/test_properties.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,12 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import annotations

from typing import Callable

import pandas as pd
import pytest

import bigframes.dtypes as dtypes
import bigframes.pandas as bpd
Expand All @@ -27,17 +32,27 @@ def test_blob_uri(images_uris: list[str], images_mm_df: bpd.DataFrame):
)


def test_blob_authorizer(images_mm_df: bpd.DataFrame, bq_connection: str):
def test_blob_authorizer(
images_mm_df: bpd.DataFrame,
bq_connection: str,
normalize_connection_id: Callable[[str], str],
):
actual = images_mm_df["blob_col"].blob.authorizer().to_pandas()
actual = actual.apply(normalize_connection_id)
expected = pd.Series(
[bq_connection.casefold(), bq_connection.casefold()], name="authorizer"
[
normalize_connection_id(bq_connection),
normalize_connection_id(bq_connection),
],
name="authorizer",
)

pd.testing.assert_series_equal(
actual, expected, check_dtype=False, check_index_type=False
)


@pytest.mark.skip(reason="b/457416070")
def test_blob_version(images_mm_df: bpd.DataFrame):
actual = images_mm_df["blob_col"].blob.version().to_pandas()
expected = pd.Series(["1753907851152593", "1753907851111538"], name="version")
Expand All @@ -47,6 +62,7 @@ def test_blob_version(images_mm_df: bpd.DataFrame):
)


@pytest.mark.skip(reason="b/457416070")
def test_blob_metadata(images_mm_df: bpd.DataFrame):
actual = images_mm_df["blob_col"].blob.metadata().to_pandas()
expected = pd.Series(
Expand All @@ -71,6 +87,7 @@ def test_blob_metadata(images_mm_df: bpd.DataFrame):
pd.testing.assert_series_equal(actual, expected)


@pytest.mark.skip(reason="b/457416070")
def test_blob_content_type(images_mm_df: bpd.DataFrame):
actual = images_mm_df["blob_col"].blob.content_type().to_pandas()
expected = pd.Series(["image/jpeg", "image/jpeg"], name="content_type")
Expand All @@ -80,6 +97,7 @@ def test_blob_content_type(images_mm_df: bpd.DataFrame):
)


@pytest.mark.skip(reason="b/457416070")
def test_blob_md5_hash(images_mm_df: bpd.DataFrame):
actual = images_mm_df["blob_col"].blob.md5_hash().to_pandas()
expected = pd.Series(
Expand All @@ -92,6 +110,7 @@ def test_blob_md5_hash(images_mm_df: bpd.DataFrame):
)


@pytest.mark.skip(reason="b/457416070")
def test_blob_size(images_mm_df: bpd.DataFrame):
actual = images_mm_df["blob_col"].blob.size().to_pandas()
expected = pd.Series([338390, 43333], name="size")
Expand All @@ -101,6 +120,7 @@ def test_blob_size(images_mm_df: bpd.DataFrame):
)


@pytest.mark.skip(reason="b/457416070")
def test_blob_updated(images_mm_df: bpd.DataFrame):
actual = images_mm_df["blob_col"].blob.updated().to_pandas()
expected = pd.Series(
Expand Down
1 change: 1 addition & 0 deletions tests/system/small/ml/test_multimodal_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from bigframes.testing import utils


@pytest.mark.skip(reason="b/457416070")
@pytest.mark.flaky(retries=2)
def test_multimodal_embedding_generator_predict_default_params_success(
images_mm_df, session, bq_connection
Expand Down