googleapis · shuoweil · Jul 14, 2025 · Jul 15, 2025 · Jul 15, 2025 · Jul 15, 2025
@@ -2488,6 +2488,11 @@ def join(
             )
             if result is not None:
                 return result
+
+            # For block identify joins with null indices, perform cross join
+            if block_identity_join and how == "left":
+                return join_with_single_row(self, other)
+
             raise bigframes.exceptions.NullIndexError(
                 "Cannot implicitly align objects. Set an explicit index using set_index."
             )

@@ -2288,8 +2288,13 @@ def _assign_scalar(self, label: str, value: Union[int, float, str]) -> DataFrame
     def _assign_series_join_on_index(
         self, label: str, series: bigframes.series.Series
     ) -> DataFrame:
+        # Only use block_identity_join for null indices
+        use_block_identity_join = (
+            self._block.index.nlevels == 0 and series._block.index.nlevels == 0
+        )
+
         block, (get_column_left, get_column_right) = self._block.join(
-            series._block, how="left"
+            series._block, how="left", block_identity_join=use_block_identity_join
         )
 
         column_ids = [

@@ -601,6 +601,27 @@ def scalars_df_2_index(
     return session.read_gbq(scalars_table_id_2, index_col="rowindex")
 
 
+@pytest.fixture(scope="session")
+def scalars_df_null_index_partial_ordering(
+    scalars_table_id: str, unordered_session: bigframes.Session
+) -> bigframes.dataframe.DataFrame:
+    """DataFrame pointing at test data with null index in partial ordering mode."""
+    return unordered_session.read_gbq(
+        scalars_table_id, index_col=bigframes.enums.DefaultIndexKind.NULL
+    ).sort_values("rowindex")
+
+
+@pytest.fixture(scope="session")
+def scalars_series_null_index_partial_ordering(
+    scalars_table_id: str, unordered_session: bigframes.Session
+) -> bigframes.series.Series:
+    """Series pointing at test data with null index in partial ordering mode."""
+    df = unordered_session.read_gbq(
+        scalars_table_id, index_col=bigframes.enums.DefaultIndexKind.NULL
+    ).sort_values("rowindex")
+    return df["int64_col"]
+
+
 @pytest.fixture(scope="session")
 def scalars_pandas_df_default_index() -> pd.DataFrame:
     """pd.DataFrame pointing at test data."""
@@ -1529,3 +1550,12 @@ def audio_mm_df(
     return session.from_glob_path(
         audio_gcs_path, name="audio", connection=bq_connection
     )
+
+
+@pytest.fixture(scope="session")
+def audio_mm_df_partial_ordering(
+    audio_gcs_path, unordered_session: bigframes.Session, bq_connection: str
+) -> bpd.DataFrame:
+    return unordered_session.from_glob_path(
+        audio_gcs_path, name="audio", connection=bq_connection
+    )
@@ -454,3 +454,26 @@ def test_blob_transcribe(
         assert (
             keyword.lower() in actual_text.lower()
         ), f"Item (verbose={verbose}): Expected keyword '{keyword}' not found in transcribed text. "
+
+
+@pytest.mark.parametrize(
+    "model_name",
+    [
+        "gemini-2.0-flash-001",
+        "gemini-2.0-flash-lite-001",
+    ],
+)
+def test_audio_transcribe_partial_ordering_integration(
+    audio_mm_df_partial_ordering: bpd.DataFrame,
+    model_name: str,
+):
+    """Integration test for audio_transcribe with partial ordering mode."""
+    df = audio_mm_df_partial_ordering.copy()
+    bpd.options.bigquery.ordering_mode = "partial"
+
+    df["transcribed_text"] = df["audio"].blob.audio_transcribe(model_name=model_name)
+    result = df.to_pandas(ordered=False)
+
+    assert "transcribed_text" in result.columns
+    assert len(result) > 0
+    assert result["transcribed_text"].iloc[0] is not None
@@ -2949,6 +2949,42 @@ def test_df_join_series(scalars_dfs, how):
         assert_pandas_df_equal(bf_result, pd_result, ignore_order=True)
 
 
+def test_assign_series_with_null_index_should_add_column_correctly(
+    scalars_df_null_index_partial_ordering: bigframes.dataframe.DataFrame,
+    scalars_series_null_index_partial_ordering: bigframes.series.Series,
+):
+    """Test that DataFrame column assignment works with null indices in partial ordering mode."""
+    df = scalars_df_null_index_partial_ordering[["int64_col", "string_col"]].head(3)
+    series_to_assign = scalars_series_null_index_partial_ordering.head(3)
+    expected_series = pd.Series(
+        [
+            -987654321,
+            -987654321,
+            -987654321,
+            314159,
+            314159,
+            314159,
+            123456789,
+            123456789,
+            123456789,
+        ],
+        dtype="Int64",
+    )
+
+    #  Assign the Series as a new column in the DataFrame
+    df["new_col"] = series_to_assign
+
+    # Materialize the full DataFrame to a pandas object to get the computed result.
+    result_df = df[["int64_col", "new_col"]].to_pandas()
+    result_series = result_df["new_col"]
+
+    pd.testing.assert_series_equal(
+        result_series.sort_values().reset_index(drop=True),
+        expected_series,
+        check_names=False,
+    )
+
+
 @pytest.mark.parametrize(
     ("by", "ascending", "na_position"),
     [

@@ -14,8 +14,11 @@
 
 
 import pandas as pd
+import pandas.testing
 import pytest
 
+import bigframes.core
+import bigframes.core.blocks as blocks
 import bigframes.exceptions
 import bigframes.pandas as bpd
 
@@ -398,5 +401,39 @@ def test_null_index_transpose(scalars_df_null_index):
         _ = scalars_df_null_index.T
 
 
-def test_null_index_contains(scalars_df_null_index):
-    assert 3 not in scalars_df_null_index
+@pytest.mark.parametrize(
+    ("session_fixture",),
+    [
+        pytest.param("session"),
+        pytest.param("unordered_session"),
+    ],
+)
+def test_identity_join_with_null_index_should_return_cartesian_product(
+    request, session_fixture
+):
+    """Test the Block.join method with block_identity_join=True and null indices."""
+    session = request.getfixturevalue(session_fixture)
+    left_data = pd.DataFrame({"a": [1, 2, 3]})
+    right_data = pd.DataFrame({"b": [10, 20, 30]})
+
+    left_block = blocks.Block.from_local(left_data, session=session)
+    right_block = blocks.Block.from_local(right_data, session=session)
+
+    expected_df = pd.DataFrame(
+        {
+            "a": [1, 2, 3],
+            "b": [10, 20, 30],
+        }
+    )
+
+    # Perform the identity join on the two blocks
+    result_block, (left_mapping, right_mapping) = left_block.join(
+        right_block, how="left", block_identity_join=True
+    )
+
+    result_df, _ = result_block.to_pandas()
+    pandas.testing.assert_frame_equal(
+        result_df.sort_values(by=["a", "b"]).reset_index(drop=True),
+        expected_df,
+        check_dtype=False,
+    )