From 6bac8c3ea31473f2598a0f529f5bf5b62a1cbfc5 Mon Sep 17 00:00:00 2001 From: Trevor Bergeron Date: Wed, 14 May 2025 17:53:13 +0000 Subject: [PATCH] perf: Faster local data comparison using idenitity --- bigframes/core/local_data.py | 4 ++-- tests/unit/test_local_data.py | 13 +++++++++++++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/bigframes/core/local_data.py b/bigframes/core/local_data.py index d23f3538dd..2e8c4aff44 100644 --- a/bigframes/core/local_data.py +++ b/bigframes/core/local_data.py @@ -54,8 +54,8 @@ def from_arrow(cls, table: pa.Table) -> LocalTableMetadata: @dataclasses.dataclass(frozen=True) class ManagedArrowTable: - data: pa.Table = dataclasses.field(hash=False) - schema: schemata.ArraySchema = dataclasses.field(hash=False) + data: pa.Table = dataclasses.field(hash=False, compare=False) + schema: schemata.ArraySchema = dataclasses.field(hash=False, compare=False) id: uuid.UUID = dataclasses.field(default_factory=uuid.uuid4) @functools.cached_property diff --git a/tests/unit/test_local_data.py b/tests/unit/test_local_data.py index bb7330aba4..71479e89d4 100644 --- a/tests/unit/test_local_data.py +++ b/tests/unit/test_local_data.py @@ -64,3 +64,16 @@ def test_local_data_well_formed_round_trip_sliced(): result.reset_index(drop=True), check_dtype=False, ) + + +def test_local_data_equal_self(): + local_entry = local_data.ManagedArrowTable.from_pandas(pd_data) + assert local_entry == local_entry + assert hash(local_entry) == hash(local_entry) + + +def test_local_data_not_equal_other(): + local_entry = local_data.ManagedArrowTable.from_pandas(pd_data) + local_entry2 = local_data.ManagedArrowTable.from_pandas(pd_data[::2]) + assert local_entry != local_entry2 + assert hash(local_entry) != hash(local_entry2)