Skip to content
Merged
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 20 additions & 4 deletions pandas/core/dtypes/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -375,12 +375,28 @@ def __eq__(self, other: Any) -> bool:
# but same order is not necessary. There is no distinction between
# ordered=False and ordered=None: CDT(., False) and CDT(., None)
# will be equal if they have the same categories.
if (
self.categories.dtype == other.categories.dtype
and self.categories.equals(other.categories)
):
left = self.categories
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you add a comment that the ordering of checks is for perf

right = other.categories
if not left.dtype == right.dtype:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is_dtype_equal

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thatd require a circular import (actually id like to put is_dtype_equal "above" this file, but thats for another day)

return False

if len(left) != len(right):
return False

if self.categories.equals(other.categories):
# Check and see if they happen to be identical categories
return True

if left.dtype != object:
# Faster than calculating hash
indexer = left.get_indexer(right)
# Because left and right have the same length and are unique,
# `indexer` not having any -1s implies that there is a
# bijection between `left` and `right`.
return (indexer != -1).all()

# With object-dtype we need a comparison that identifies
# e.g. int(2) as distinct from float(2)
return hash(self) == hash(other)

def __repr__(self) -> str_type:
Expand Down