Skip to content

Commit 643d62c

Browse files
REF: move __from_arrow__ to common base class for numeric masked arrays (#38411)
1 parent 639a9c2 commit 643d62c

File tree

3 files changed

+47
-69
lines changed

3 files changed

+47
-69
lines changed

pandas/core/arrays/floating.py

Lines changed: 3 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import numbers
2-
from typing import TYPE_CHECKING, List, Optional, Tuple, Type, Union
2+
from typing import List, Optional, Tuple, Type
33
import warnings
44

55
import numpy as np
@@ -27,13 +27,10 @@
2727
from pandas.core.tools.numeric import to_numeric
2828

2929
from .masked import BaseMaskedDtype
30-
from .numeric import NumericArray
30+
from .numeric import NumericArray, NumericDtype
3131

32-
if TYPE_CHECKING:
33-
import pyarrow
3432

35-
36-
class FloatingDtype(BaseMaskedDtype):
33+
class FloatingDtype(NumericDtype):
3734
"""
3835
An ExtensionDtype to hold a single size of floating dtype.
3936
@@ -72,34 +69,6 @@ def _get_common_dtype(self, dtypes: List[DtypeObj]) -> Optional[DtypeObj]:
7269
return FLOAT_STR_TO_DTYPE[str(np_dtype)]
7370
return None
7471

75-
def __from_arrow__(
76-
self, array: Union["pyarrow.Array", "pyarrow.ChunkedArray"]
77-
) -> "FloatingArray":
78-
"""
79-
Construct FloatingArray from pyarrow Array/ChunkedArray.
80-
"""
81-
import pyarrow
82-
83-
from pandas.core.arrays._arrow_utils import pyarrow_array_to_numpy_and_mask
84-
85-
pyarrow_type = pyarrow.from_numpy_dtype(self.type)
86-
if not array.type.equals(pyarrow_type):
87-
array = array.cast(pyarrow_type)
88-
89-
if isinstance(array, pyarrow.Array):
90-
chunks = [array]
91-
else:
92-
# pyarrow.ChunkedArray
93-
chunks = array.chunks
94-
95-
results = []
96-
for arr in chunks:
97-
data, mask = pyarrow_array_to_numpy_and_mask(arr, dtype=self.type)
98-
float_arr = FloatingArray(data.copy(), ~mask, copy=False)
99-
results.append(float_arr)
100-
101-
return FloatingArray._concat_same_type(results)
102-
10372

10473
def coerce_to_array(
10574
values, dtype=None, mask=None, copy: bool = False

pandas/core/arrays/integer.py

Lines changed: 3 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import numbers
2-
from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Type, Union
2+
from typing import Dict, List, Optional, Tuple, Type
33
import warnings
44

55
import numpy as np
@@ -27,13 +27,10 @@
2727
from pandas.core.tools.numeric import to_numeric
2828

2929
from .masked import BaseMaskedArray, BaseMaskedDtype
30-
from .numeric import NumericArray
30+
from .numeric import NumericArray, NumericDtype
3131

32-
if TYPE_CHECKING:
33-
import pyarrow
3432

35-
36-
class _IntegerDtype(BaseMaskedDtype):
33+
class _IntegerDtype(NumericDtype):
3734
"""
3835
An ExtensionDtype to hold a single size & kind of integer dtype.
3936
@@ -92,34 +89,6 @@ def _get_common_dtype(self, dtypes: List[DtypeObj]) -> Optional[DtypeObj]:
9289
return FLOAT_STR_TO_DTYPE[str(np_dtype)]
9390
return None
9491

95-
def __from_arrow__(
96-
self, array: Union["pyarrow.Array", "pyarrow.ChunkedArray"]
97-
) -> "IntegerArray":
98-
"""
99-
Construct IntegerArray from pyarrow Array/ChunkedArray.
100-
"""
101-
import pyarrow
102-
103-
from pandas.core.arrays._arrow_utils import pyarrow_array_to_numpy_and_mask
104-
105-
pyarrow_type = pyarrow.from_numpy_dtype(self.type)
106-
if not array.type.equals(pyarrow_type):
107-
array = array.cast(pyarrow_type)
108-
109-
if isinstance(array, pyarrow.Array):
110-
chunks = [array]
111-
else:
112-
# pyarrow.ChunkedArray
113-
chunks = array.chunks
114-
115-
results = []
116-
for arr in chunks:
117-
data, mask = pyarrow_array_to_numpy_and_mask(arr, dtype=self.type)
118-
int_arr = IntegerArray(data.copy(), ~mask, copy=False)
119-
results.append(int_arr)
120-
121-
return IntegerArray._concat_same_type(results)
122-
12392

12493
def safe_cast(values, dtype, copy: bool):
12594
"""

pandas/core/arrays/numeric.py

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import datetime
2+
from typing import TYPE_CHECKING, Union
23

34
import numpy as np
45

@@ -13,7 +14,46 @@
1314
is_list_like,
1415
)
1516

16-
from .masked import BaseMaskedArray
17+
from .masked import BaseMaskedArray, BaseMaskedDtype
18+
19+
if TYPE_CHECKING:
20+
import pyarrow
21+
22+
23+
class NumericDtype(BaseMaskedDtype):
24+
def __from_arrow__(
25+
self, array: Union["pyarrow.Array", "pyarrow.ChunkedArray"]
26+
) -> BaseMaskedArray:
27+
"""
28+
Construct IntegerArray/FloatingArray from pyarrow Array/ChunkedArray.
29+
"""
30+
import pyarrow
31+
32+
from pandas.core.arrays._arrow_utils import pyarrow_array_to_numpy_and_mask
33+
34+
array_class = self.construct_array_type()
35+
36+
pyarrow_type = pyarrow.from_numpy_dtype(self.type)
37+
if not array.type.equals(pyarrow_type):
38+
array = array.cast(pyarrow_type)
39+
40+
if isinstance(array, pyarrow.Array):
41+
chunks = [array]
42+
else:
43+
# pyarrow.ChunkedArray
44+
chunks = array.chunks
45+
46+
results = []
47+
for arr in chunks:
48+
data, mask = pyarrow_array_to_numpy_and_mask(arr, dtype=self.type)
49+
num_arr = array_class(data.copy(), ~mask, copy=False)
50+
results.append(num_arr)
51+
52+
if len(results) == 1:
53+
# avoid additional copy in _concat_same_type
54+
return results[0]
55+
else:
56+
return array_class._concat_same_type(results)
1757

1858

1959
class NumericArray(BaseMaskedArray):

0 commit comments

Comments
 (0)