feat: Add __contains__ to Index, Series, DataFrame (#1899)

TrevorBergeron · web-flow · commit 07222bfe2f6a · 2025-07-11T12:48:07.000-07:00
diff --git a/bigframes/core/indexes/base.py b/bigframes/core/indexes/base.py
@@ -16,8 +16,9 @@
 
 from __future__ import annotations
 
+import functools
 import typing
-from typing import Hashable, Literal, Optional, overload, Sequence, Union
+from typing import cast, Hashable, Literal, Optional, overload, Sequence, Union
 
 import bigframes_vendored.constants as constants
 import bigframes_vendored.pandas.core.indexes.base as vendored_pandas_index
@@ -529,6 +530,29 @@ def isin(self, values) -> Index:
             )
         ).fillna(value=False)
 
+    def __contains__(self, key) -> bool:
+        hash(key)  # to throw for unhashable values
+        if self.nlevels == 0:
+            return False
+
+        if (not isinstance(key, tuple)) or (self.nlevels == 1):
+            key = (key,)
+
+        match_exprs = []
+        for key_part, index_col, dtype in zip(
+            key, self._block.index_columns, self._block.index.dtypes
+        ):
+            key_type = bigframes.dtypes.is_compatible(key_part, dtype)
+            if key_type is None:
+                return False
+            key_expr = ex.const(key_part, key_type)
+            match_expr = ops.eq_null_match_op.as_expr(ex.deref(index_col), key_expr)
+            match_exprs.append(match_expr)
+
+        match_expr_final = functools.reduce(ops.and_op.as_expr, match_exprs)
+        block, match_col = self._block.project_expr(match_expr_final)
+        return cast(bool, block.get_stat(match_col, agg_ops.AnyOp()))
+
     def _apply_unary_expr(
         self,
         op: ex.Expression,
diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
@@ -374,6 +374,9 @@ def __len__(self):
     def __iter__(self):
         return iter(self.columns)
 
+    def __contains__(self, key) -> bool:
+        return key in self.columns
+
     def astype(
         self,
         dtype: Union[
diff --git a/bigframes/series.py b/bigframes/series.py
@@ -257,6 +257,9 @@ def __iter__(self) -> typing.Iterator:
             map(lambda x: x.squeeze(axis=1), self._block.to_pandas_batches())
         )
 
+    def __contains__(self, key) -> bool:
+        return key in self.index
+
     def copy(self) -> Series:
         return Series(self._block)
 
diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py
@@ -4451,6 +4451,22 @@ def test_df___array__(scalars_df_index, scalars_pandas_df_index):
     )
 
 
+@pytest.mark.parametrize(
+    ("key",),
+    [
+        ("hello",),
+        (2,),
+        ("int64_col",),
+        (None,),
+    ],
+)
+def test_df_contains(scalars_df_index, scalars_pandas_df_index, key):
+    bf_result = key in scalars_df_index
+    pd_result = key in scalars_pandas_df_index
+
+    assert bf_result == pd_result
+
+
 def test_df_getattr_attribute_error_when_pandas_has(scalars_df_index):
     # swapaxes is implemented in pandas but not in bigframes
     with pytest.raises(AttributeError):
diff --git a/tests/system/small/test_index.py b/tests/system/small/test_index.py
@@ -398,6 +398,18 @@ def test_index_drop_duplicates(scalars_df_index, scalars_pandas_df_index, keep):
     )
 
 
+@pytest.mark.parametrize(
+    ("key",),
+    [("hello",), (2,), (123123321,), (2.0,), (False,), ((2,),), (pd.NA,)],
+)
+def test_index_contains(scalars_df_index, scalars_pandas_df_index, key):
+    col_name = "int64_col"
+    bf_result = key in scalars_df_index.set_index(col_name).index
+    pd_result = key in scalars_pandas_df_index.set_index(col_name).index
+
+    assert bf_result == pd_result
+
+
 def test_index_isin_list(scalars_df_index, scalars_pandas_df_index):
     col_name = "int64_col"
     bf_series = (
diff --git a/tests/system/small/test_multiindex.py b/tests/system/small/test_multiindex.py
@@ -1388,3 +1388,26 @@ def test_column_multi_index_w_na_stack(scalars_df_index, scalars_pandas_df_index
     # Pandas produces pd.NA, where bq dataframes produces NaN
     pd_result["c"] = pd_result["c"].replace(pandas.NA, np.nan)
     pandas.testing.assert_frame_equal(bf_result, pd_result, check_dtype=False)
+
+
+@pytest.mark.parametrize(
+    ("key",),
+    [
+        ("hello",),
+        (2,),
+        (123123321,),
+        (2.0,),
+        (pandas.NA,),
+        (False,),
+        ((2,),),
+        ((2, False),),
+        ((2.0, False),),
+        ((2, True),),
+    ],
+)
+def test_multi_index_contains(scalars_df_index, scalars_pandas_df_index, key):
+    col_name = ["int64_col", "bool_col"]
+    bf_result = key in scalars_df_index.set_index(col_name).index
+    pd_result = key in scalars_pandas_df_index.set_index(col_name).index
+
+    assert bf_result == pd_result
diff --git a/tests/system/small/test_null_index.py b/tests/system/small/test_null_index.py
@@ -396,3 +396,7 @@ def test_null_index_index_property(scalars_df_null_index):
 def test_null_index_transpose(scalars_df_null_index):
     with pytest.raises(bigframes.exceptions.NullIndexError):
         _ = scalars_df_null_index.T
+
+
+def test_null_index_contains(scalars_df_null_index):
+    assert 3 not in scalars_df_null_index
diff --git a/tests/system/small/test_series.py b/tests/system/small/test_series.py
@@ -424,6 +424,22 @@ def test_series_get_column_default(scalars_dfs):
     assert result == "default_val"
 
 
+@pytest.mark.parametrize(
+    ("key",),
+    [
+        ("hello",),
+        (2,),
+        ("int64_col",),
+        (None,),
+    ],
+)
+def test_series_contains(scalars_df_index, scalars_pandas_df_index, key):
+    bf_result = key in scalars_df_index["int64_col"]
+    pd_result = key in scalars_pandas_df_index["int64_col"]
+
+    assert bf_result == pd_result
+
+
 def test_series_equals_identical(scalars_df_index, scalars_pandas_df_index):
     bf_result = scalars_df_index.int64_col.equals(scalars_df_index.int64_col)
     pd_result = scalars_pandas_df_index.int64_col.equals(

Original file line number	Diff line number	Diff line change
`@@ -257,6 +257,9 @@ def __iter__(self) -> typing.Iterator:`
`257`	`257`	`map(lambda x: x.squeeze(axis=1), self._block.to_pandas_batches())`
`258`	`258`	`)`
`259`	`259`
	`260`	`+ def __contains__(self, key) -> bool:`
	`261`	`+ return key in self.index`
	`262`	`+`
`260`	`263`	`def copy(self) -> Series:`
`261`	`264`	`return Series(self._block)`
`262`	`265`