Skip to content

Commit 3e770f1

Browse files
xfail request reformat and fill_value changes
As requested in review feedback, change tests to use request.node.addmarker in test_*.py testcases, rather than xfailing in base class tests. Also, prototyped an attempt to create properly-sized EAs for complex numbers so that dim2 test case passes for complex (pandas-dev#54445). Signed-off-by: Michael Tiemann <[email protected]>
1 parent ab6cb98 commit 3e770f1

30 files changed

+171
-113
lines changed

pandas/core/arrays/_mixins.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -517,7 +517,7 @@ def _cast_quantile_result(self, res_values: np.ndarray) -> np.ndarray:
517517
# numpy-like methods
518518

519519
@classmethod
520-
def _empty(cls, shape: Shape, dtype: ExtensionDtype) -> Self:
520+
def _empty(cls, shape: Shape, dtype: ExtensionDtype, fill_value: object = None) -> Self:
521521
"""
522522
Analogous to np.empty(shape, dtype=dtype)
523523

pandas/core/arrays/arrow/array.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -247,7 +247,7 @@ def __init__(self, values: pa.Array | pa.ChunkedArray) -> None:
247247
self._dtype = ArrowDtype(self._pa_array.type)
248248

249249
@classmethod
250-
def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, copy: bool = False):
250+
def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, fill_value: object | None = None, copy: bool = False):
251251
"""
252252
Construct a new ExtensionArray from a sequence of scalars.
253253
"""

pandas/core/arrays/base.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -262,7 +262,7 @@ class ExtensionArray:
262262
# ------------------------------------------------------------------------
263263

264264
@classmethod
265-
def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, copy: bool = False):
265+
def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, fill_value: object = None, copy: bool = False):
266266
"""
267267
Construct a new ExtensionArray from a sequence of scalars.
268268
@@ -2084,7 +2084,7 @@ def _rank(
20842084
)
20852085

20862086
@classmethod
2087-
def _empty(cls, shape: Shape, dtype: ExtensionDtype):
2087+
def _empty(cls, shape: Shape, dtype: ExtensionDtype, fill_value: object = None):
20882088
"""
20892089
Create an ExtensionArray with the given shape and dtype.
20902090
@@ -2096,7 +2096,7 @@ def _empty(cls, shape: Shape, dtype: ExtensionDtype):
20962096
# Implementer note: while ExtensionDtype.empty is the public way to
20972097
# call this method, it is still required to implement this `_empty`
20982098
# method as well (it is called internally in pandas)
2099-
obj = cls._from_sequence([], dtype=dtype)
2099+
obj = cls._from_sequence([], dtype=dtype, fill_value=fill_value)
21002100

21012101
taker = np.broadcast_to(np.intp(-1), shape)
21022102
result = obj.take(taker, allow_fill=True)

pandas/core/arrays/categorical.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -505,7 +505,7 @@ def _internal_fill_value(self) -> int:
505505

506506
@classmethod
507507
def _from_sequence(
508-
cls, scalars, *, dtype: Dtype | None = None, copy: bool = False
508+
cls, scalars, *, dtype: Dtype | None = None, fill_value: object = None, copy: bool = False
509509
) -> Self:
510510
return cls(scalars, dtype=dtype, copy=copy)
511511

@@ -1800,7 +1800,7 @@ def value_counts(self, dropna: bool = True) -> Series:
18001800
# "ExtensionDtype"
18011801
@classmethod
18021802
def _empty( # type: ignore[override]
1803-
cls, shape: Shape, dtype: CategoricalDtype
1803+
cls, shape: Shape, dtype: CategoricalDtype, fill_value: object = None
18041804
) -> Self:
18051805
"""
18061806
Analogous to np.empty(shape, dtype=dtype)

pandas/core/arrays/datetimes.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -293,7 +293,7 @@ def _simple_new( # type: ignore[override]
293293
return result
294294

295295
@classmethod
296-
def _from_sequence(cls, scalars, *, dtype=None, copy: bool = False):
296+
def _from_sequence(cls, scalars, *, dtype=None, fill_value: object = None, copy: bool = False):
297297
return cls._from_sequence_not_strict(scalars, dtype=dtype, copy=copy)
298298

299299
@classmethod

pandas/core/arrays/interval.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -382,6 +382,7 @@ def _from_sequence(
382382
scalars,
383383
*,
384384
dtype: Dtype | None = None,
385+
fill_value: object = None,
385386
copy: bool = False,
386387
) -> Self:
387388
return cls(scalars, dtype=dtype, copy=copy)

pandas/core/arrays/masked.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -145,13 +145,13 @@ def __init__(
145145
self._mask = mask
146146

147147
@classmethod
148-
def _from_sequence(cls, scalars, *, dtype=None, copy: bool = False) -> Self:
148+
def _from_sequence(cls, scalars, *, dtype=None, fill_value: object = None, copy: bool = False) -> Self:
149149
values, mask = cls._coerce_to_array(scalars, dtype=dtype, copy=copy)
150150
return cls(values, mask)
151151

152152
@classmethod
153153
@doc(ExtensionArray._empty)
154-
def _empty(cls, shape: Shape, dtype: ExtensionDtype):
154+
def _empty(cls, shape: Shape, dtype: ExtensionDtype, fill_value: object = None):
155155
values = np.empty(shape, dtype=dtype.type)
156156
values.fill(cls._internal_fill_value)
157157
mask = np.ones(shape, dtype=bool)

pandas/core/arrays/numpy_.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ def __init__(
117117

118118
@classmethod
119119
def _from_sequence(
120-
cls, scalars, *, dtype: Dtype | None = None, copy: bool = False
120+
cls, scalars, *, dtype: Dtype | None = None, fill_value: object = None, copy: bool = False
121121
) -> NumpyExtensionArray:
122122
if isinstance(dtype, NumpyEADtype):
123123
dtype = dtype._dtype

pandas/core/arrays/period.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -276,6 +276,7 @@ def _from_sequence(
276276
scalars,
277277
*,
278278
dtype: Dtype | None = None,
279+
fill_value: object | None = None,
279280
copy: bool = False,
280281
) -> Self:
281282
if dtype is not None:

pandas/core/arrays/sparse/array.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -583,7 +583,7 @@ def __setitem__(self, key, value) -> None:
583583
raise TypeError(msg)
584584

585585
@classmethod
586-
def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, copy: bool = False):
586+
def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, fill_value: object | None = None, copy: bool = False):
587587
return cls(scalars, dtype=dtype)
588588

589589
@classmethod

pandas/core/arrays/string_.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -340,7 +340,7 @@ def _validate(self):
340340
lib.convert_nans_to_NA(self._ndarray)
341341

342342
@classmethod
343-
def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, copy: bool = False):
343+
def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, fill_value: object | None = None, copy: bool = False):
344344
if dtype and not (isinstance(dtype, str) and dtype == "string"):
345345
dtype = pandas_dtype(dtype)
346346
assert isinstance(dtype, StringDtype) and dtype.storage == "python"
@@ -377,7 +377,7 @@ def _from_sequence_of_strings(
377377
return cls._from_sequence(strings, dtype=dtype, copy=copy)
378378

379379
@classmethod
380-
def _empty(cls, shape, dtype) -> StringArray:
380+
def _empty(cls, shape, dtype, fill_value = None) -> StringArray:
381381
values = np.empty(shape, dtype=object)
382382
values[:] = libmissing.NA
383383
return cls(values).astype(dtype, copy=False)

pandas/core/arrays/string_arrow.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ def __len__(self) -> int:
137137
return len(self._pa_array)
138138

139139
@classmethod
140-
def _from_sequence(cls, scalars, dtype: Dtype | None = None, copy: bool = False):
140+
def _from_sequence(cls, scalars, dtype: Dtype | None = None, fill_value: object | None = None, copy: bool = False):
141141
from pandas.core.arrays.masked import BaseMaskedArray
142142

143143
_chk_pyarrow_available()

pandas/core/arrays/timedeltas.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -229,7 +229,7 @@ def _simple_new( # type: ignore[override]
229229
return result
230230

231231
@classmethod
232-
def _from_sequence(cls, data, *, dtype=None, copy: bool = False) -> Self:
232+
def _from_sequence(cls, data, *, dtype=None, fill_value: object = None, copy: bool = False) -> Self:
233233
if dtype:
234234
dtype = _validate_td64_dtype(dtype)
235235

pandas/core/internals/managers.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -945,8 +945,13 @@ def fast_xs(self, loc: int) -> SingleBlockManager:
945945
immutable_ea = isinstance(dtype, ExtensionDtype) and dtype._is_immutable
946946

947947
if isinstance(dtype, ExtensionDtype) and not immutable_ea:
948+
result = self.blocks[0].iget((slice(None), loc))
949+
if isinstance(result, np.ndarray):
950+
empty_fill_value = result[0]
951+
else:
952+
empty_fill_value = result._values_for_factorize()[0][0]
948953
cls = dtype.construct_array_type()
949-
result = cls._empty((n,), dtype=dtype)
954+
result = cls._empty((n,), dtype=dtype, fill_value=empty_fill_value)
950955
else:
951956
# error: Argument "dtype" to "empty" has incompatible type
952957
# "Union[Type[object], dtype[Any], ExtensionDtype, None]"; expected

pandas/tests/extension/base/dim2.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from pandas.core.dtypes.common import (
1010
is_bool_dtype,
1111
is_integer_dtype,
12+
is_complex_dtype,
1213
)
1314

1415
import pandas as pd
@@ -213,12 +214,6 @@ def test_reductions_2d_axis0(self, data, method, min_count):
213214

214215
kwargs = {}
215216
if method in ["std", "var"]:
216-
if data.dtype.kind == "c":
217-
pytest.skip(
218-
f"{data.dtype.name} extension array"
219-
"collapses to real-valued numbers"
220-
f"with operator {method}"
221-
)
222217
# pass ddof=0 so we get all-zero std instead of all-NA std
223218
kwargs["ddof"] = 0
224219
elif method in ["prod", "sum"]:
@@ -268,6 +263,9 @@ def get_reduction_result_dtype(dtype):
268263
data = data.astype("Float64")
269264
if method == "mean":
270265
tm.assert_extension_array_equal(result, data)
266+
elif is_complex_dtype(data) and method in ["std", "var"]:
267+
# std and var produce real-only results
268+
tm.assert_extension_array_equal(result, data - data, check_dtype=False)
271269
else:
272270
tm.assert_extension_array_equal(result, data - data)
273271

pandas/tests/extension/base/io.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,13 @@
1010

1111
class BaseParsingTests(BaseExtensionTests):
1212
@pytest.mark.parametrize("engine", ["c", "python"])
13-
def test_EA_types(self, engine, data):
14-
if data.dtype.kind == "c":
15-
pytest.skip(f"the dtype {data.dtype.name} is not supported for parsing")
13+
def test_EA_types(self, engine, data, request):
14+
if engine == "c" and data.dtype.kind == "c":
15+
request.node.add_marker(
16+
pytest.mark.xfail(
17+
reason=f"engine '{engine}' cannot parse the dtype {data.dtype.name}"
18+
)
19+
)
1620
df = pd.DataFrame({"with_dtype": pd.Series(data, dtype=str(data.dtype))})
1721
csv_output = df.to_csv(index=False, na_rep=np.nan)
1822
result = pd.read_csv(

pandas/tests/extension/base/missing.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -87,12 +87,7 @@ def test_fillna_limit_backfill(self, data_missing):
8787
expected = pd.Series(data_missing.take([1, 0, 1, 1, 1]))
8888
tm.assert_series_equal(result, expected)
8989

90-
def test_fillna_no_op_returns_copy(self, data):
91-
if data.dtype.kind == "c":
92-
pytest.skip(
93-
f"no cython implementation of backfill(ndarray[{data.dtype.name}_t],"
94-
f"ndarray[{data.dtype.name}_t], int64_t) in libs/algos.pxd"
95-
)
90+
def test_fillna_no_op_returns_copy(self, data, request):
9691
data = data[~data.isna()]
9792

9893
valid = data[0]

pandas/tests/extension/base/ops.py

Lines changed: 21 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ class BaseOpsUtil(BaseExtensionTests):
1818
divmod_exc: type[Exception] | None = TypeError
1919

2020
def _get_expected_exception(
21-
self, op_name: str, obj, other
21+
self, op_name: str, obj, other, request
2222
) -> type[Exception] | None:
2323
# Find the Exception, if any we expect to raise calling
2424
# obj.__op_name__(other)
@@ -53,8 +53,8 @@ def get_op_from_name(self, op_name: str):
5353
# case that still requires overriding _check_op or _combine, please let
5454
# us know at github.com/pandas-dev/pandas/issues
5555
@final
56-
def check_opname(self, ser: pd.Series, op_name: str, other):
57-
exc = self._get_expected_exception(op_name, ser, other)
56+
def check_opname(self, ser: pd.Series, op_name: str, other, request):
57+
exc = self._get_expected_exception(op_name, ser, other, request)
5858
op = self.get_op_from_name(op_name)
5959

6060
self._check_op(ser, op, other, op_name, exc)
@@ -90,12 +90,12 @@ def _check_op(
9090

9191
# see comment on check_opname
9292
@final
93-
def _check_divmod_op(self, ser: pd.Series, op, other):
93+
def _check_divmod_op(self, ser: pd.Series, op, other, request):
9494
# check that divmod behavior matches behavior of floordiv+mod
9595
if op is divmod:
96-
exc = self._get_expected_exception("__divmod__", ser, other)
96+
exc = self._get_expected_exception("__divmod__", ser, other, request)
9797
else:
98-
exc = self._get_expected_exception("__rdivmod__", ser, other)
98+
exc = self._get_expected_exception("__rdivmod__", ser, other, request)
9999
if exc is None:
100100
result_div, result_mod = op(ser, other)
101101
if op is divmod:
@@ -127,47 +127,47 @@ class BaseArithmeticOpsTests(BaseOpsUtil):
127127
series_array_exc: type[Exception] | None = TypeError
128128
divmod_exc: type[Exception] | None = TypeError
129129

130-
def test_arith_series_with_scalar(self, data, all_arithmetic_operators):
130+
def test_arith_series_with_scalar(self, data, all_arithmetic_operators, request):
131131
# series & scalar
132132
op_name = all_arithmetic_operators
133133
ser = pd.Series(data)
134-
self.check_opname(ser, op_name, ser.iloc[0])
134+
self.check_opname(ser, op_name, ser.iloc[0], request)
135135

136-
def test_arith_frame_with_scalar(self, data, all_arithmetic_operators):
136+
def test_arith_frame_with_scalar(self, data, all_arithmetic_operators, request):
137137
# frame & scalar
138138
op_name = all_arithmetic_operators
139139
df = pd.DataFrame({"A": data})
140-
self.check_opname(df, op_name, data[0])
140+
self.check_opname(df, op_name, data[0], request)
141141

142-
def test_arith_series_with_array(self, data, all_arithmetic_operators):
142+
def test_arith_series_with_array(self, data, all_arithmetic_operators, request):
143143
# ndarray & other series
144144
op_name = all_arithmetic_operators
145145
ser = pd.Series(data)
146-
self.check_opname(ser, op_name, pd.Series([ser.iloc[0]] * len(ser)))
146+
self.check_opname(ser, op_name, pd.Series([ser.iloc[0]] * len(ser)), request)
147147

148-
def test_divmod(self, data):
148+
def test_divmod(self, data, request):
149149
ser = pd.Series(data)
150-
self._check_divmod_op(ser, divmod, 1)
151-
self._check_divmod_op(1, ops.rdivmod, ser)
150+
self._check_divmod_op(ser, divmod, 1, request)
151+
self._check_divmod_op(1, ops.rdivmod, ser, request)
152152

153-
def test_divmod_series_array(self, data, data_for_twos):
153+
def test_divmod_series_array(self, data, data_for_twos, request):
154154
ser = pd.Series(data)
155-
self._check_divmod_op(ser, divmod, data)
155+
self._check_divmod_op(ser, divmod, data, request)
156156

157157
other = data_for_twos
158-
self._check_divmod_op(other, ops.rdivmod, ser)
158+
self._check_divmod_op(other, ops.rdivmod, ser, request)
159159

160160
other = pd.Series(other)
161-
self._check_divmod_op(other, ops.rdivmod, ser)
161+
self._check_divmod_op(other, ops.rdivmod, ser, request)
162162

163-
def test_add_series_with_extension_array(self, data):
163+
def test_add_series_with_extension_array(self, data, request):
164164
# Check adding an ExtensionArray to a Series of the same dtype matches
165165
# the behavior of adding the arrays directly and then wrapping in a
166166
# Series.
167167

168168
ser = pd.Series(data)
169169

170-
exc = self._get_expected_exception("__add__", ser, data)
170+
exc = self._get_expected_exception("__add__", ser, data, request)
171171
if exc is not None:
172172
with pytest.raises(exc):
173173
ser + data

pandas/tests/extension/decimal/array.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ def dtype(self):
9797
return self._dtype
9898

9999
@classmethod
100-
def _from_sequence(cls, scalars, dtype=None, copy=False):
100+
def _from_sequence(cls, scalars, dtype=None, fill_value=None, copy=False):
101101
return cls(scalars)
102102

103103
@classmethod

pandas/tests/extension/decimal/test_decimal.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -120,12 +120,12 @@ def test_fillna_limit_backfill(self, data_missing):
120120
):
121121
super().test_fillna_limit_backfill(data_missing)
122122

123-
def test_fillna_no_op_returns_copy(self, data):
123+
def test_fillna_no_op_returns_copy(self, data, request):
124124
msg = "ExtensionArray.fillna 'method' keyword is deprecated"
125125
with tm.assert_produces_warning(
126126
FutureWarning, match=msg, check_stacklevel=False
127127
):
128-
super().test_fillna_no_op_returns_copy(data)
128+
super().test_fillna_no_op_returns_copy(data, request)
129129

130130
def test_fillna_series(self, data_missing):
131131
msg = "ExtensionArray.fillna added a 'copy' keyword"
@@ -311,11 +311,11 @@ class TestArithmeticOps(base.BaseArithmeticOpsTests):
311311
series_array_exc = None
312312

313313
def _get_expected_exception(
314-
self, op_name: str, obj, other
314+
self, op_name: str, obj, other, request
315315
) -> type[Exception] | None:
316316
return None
317317

318-
def test_arith_series_with_array(self, data, all_arithmetic_operators):
318+
def test_arith_series_with_array(self, data, all_arithmetic_operators, request):
319319
op_name = all_arithmetic_operators
320320
s = pd.Series(data)
321321

@@ -327,13 +327,13 @@ def test_arith_series_with_array(self, data, all_arithmetic_operators):
327327

328328
# Decimal supports ops with int, but not float
329329
other = pd.Series([int(d * 100) for d in data])
330-
self.check_opname(s, op_name, other)
330+
self.check_opname(s, op_name, other, request)
331331

332332
if "mod" not in op_name:
333-
self.check_opname(s, op_name, s * 2)
333+
self.check_opname(s, op_name, s * 2, request)
334334

335-
self.check_opname(s, op_name, 0)
336-
self.check_opname(s, op_name, 5)
335+
self.check_opname(s, op_name, 0, request)
336+
self.check_opname(s, op_name, 5, request)
337337
context.traps[decimal.DivisionByZero] = divbyzerotrap
338338
context.traps[decimal.InvalidOperation] = invalidoptrap
339339

pandas/tests/extension/json/array.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ def __init__(self, values, dtype=None, copy=False) -> None:
8383
# self._values = self.values = self.data
8484

8585
@classmethod
86-
def _from_sequence(cls, scalars, dtype=None, copy=False):
86+
def _from_sequence(cls, scalars, dtype=None, fill_value=None, copy=False):
8787
return cls(scalars)
8888

8989
@classmethod

0 commit comments

Comments
 (0)