Skip to content

Commit e057427

Browse files
authored
PERF: ArrowExtensionArray.isna when zero nulls or all nulls (#51630)
1 parent cb2336e commit e057427

File tree

3 files changed

+24
-0
lines changed

3 files changed

+24
-0
lines changed

asv_bench/benchmarks/frame_methods.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -444,6 +444,22 @@ def time_dropna_axis_mixed_dtypes(self, how, axis):
444444
self.df_mixed.dropna(how=how, axis=axis)
445445

446446

447+
class Isna:
448+
params = ["float64", "Float64", "float64[pyarrow]"]
449+
param_names = ["dtype"]
450+
451+
def setup(self, dtype):
452+
data = np.random.randn(10000, 1000)
453+
# all-na columns
454+
data[:, 600:800] = np.nan
455+
# partial-na columns
456+
data[800:1000, 4000:5000] = np.nan
457+
self.df = DataFrame(data, dtype=dtype)
458+
459+
def time_isna(self, dtype):
460+
self.df.isna()
461+
462+
447463
class Count:
448464
params = [0, 1]
449465
param_names = ["axis"]

doc/source/whatsnew/v2.1.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@ Deprecations
100100

101101
Performance improvements
102102
~~~~~~~~~~~~~~~~~~~~~~~~
103+
- Performance improvement in :meth:`~arrays.ArrowExtensionArray.isna` when array has zero nulls or is all nulls (:issue:`51630`)
103104
- Performance improvement in :meth:`DataFrame.first_valid_index` and :meth:`DataFrame.last_valid_index` for extension array dtypes (:issue:`51549`)
104105
- Performance improvement in :meth:`DataFrame.clip` and :meth:`Series.clip` (:issue:`51472`)
105106
- Performance improvement in :func:`read_parquet` on string columns when using ``use_nullable_dtypes=True`` (:issue:`47345`)

pandas/core/arrays/arrow/array.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -557,6 +557,13 @@ def isna(self) -> npt.NDArray[np.bool_]:
557557
558558
This should return a 1-D array the same length as 'self'.
559559
"""
560+
# GH51630: fast paths
561+
null_count = self._data.null_count
562+
if null_count == 0:
563+
return np.zeros(len(self), dtype=np.bool_)
564+
elif null_count == len(self):
565+
return np.ones(len(self), dtype=np.bool_)
566+
560567
return self._data.is_null().to_numpy()
561568

562569
def argsort(

0 commit comments

Comments
 (0)