Skip to content

Commit 097ff0c

Browse files
authored
CLN: assorted cleanups, ported tests (#39463)
1 parent 7b795b2 commit 097ff0c

File tree

7 files changed

+62
-50
lines changed

7 files changed

+62
-50
lines changed

pandas/_libs/tslibs/timedeltas.pyx

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -259,39 +259,39 @@ cdef convert_to_timedelta64(object ts, str unit):
259259
ts = np.timedelta64(ts.value, "ns")
260260
elif is_datetime64_object(ts):
261261
# only accept a NaT here
262-
if ts.astype('int64') == NPY_NAT:
263-
return np.timedelta64(NPY_NAT)
262+
if ts.astype("int64") == NPY_NAT:
263+
return np.timedelta64(NPY_NAT, "ns")
264264
elif is_timedelta64_object(ts):
265265
ts = ensure_td64ns(ts)
266266
elif is_integer_object(ts):
267267
if ts == NPY_NAT:
268268
return np.timedelta64(NPY_NAT, "ns")
269269
else:
270-
if unit in ['Y', 'M', 'W']:
270+
if unit in ["Y", "M", "W"]:
271271
ts = np.timedelta64(ts, unit)
272272
else:
273273
ts = cast_from_unit(ts, unit)
274274
ts = np.timedelta64(ts, "ns")
275275
elif is_float_object(ts):
276-
if unit in ['Y', 'M', 'W']:
276+
if unit in ["Y", "M", "W"]:
277277
ts = np.timedelta64(int(ts), unit)
278278
else:
279279
ts = cast_from_unit(ts, unit)
280280
ts = np.timedelta64(ts, "ns")
281281
elif isinstance(ts, str):
282-
if len(ts) > 0 and ts[0] == 'P':
282+
if len(ts) > 0 and ts[0] == "P":
283283
ts = parse_iso_format_string(ts)
284284
else:
285285
ts = parse_timedelta_string(ts)
286286
ts = np.timedelta64(ts, "ns")
287287
elif is_tick_object(ts):
288-
ts = np.timedelta64(ts.nanos, 'ns')
288+
ts = np.timedelta64(ts.nanos, "ns")
289289

290290
if PyDelta_Check(ts):
291-
ts = np.timedelta64(delta_to_nanoseconds(ts), 'ns')
291+
ts = np.timedelta64(delta_to_nanoseconds(ts), "ns")
292292
elif not is_timedelta64_object(ts):
293293
raise ValueError(f"Invalid type for timedelta scalar: {type(ts)}")
294-
return ts.astype('timedelta64[ns]')
294+
return ts.astype("timedelta64[ns]")
295295

296296

297297
@cython.boundscheck(False)

pandas/core/internals/managers.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,7 @@
2424
from pandas.errors import PerformanceWarning
2525
from pandas.util._validators import validate_bool_kwarg
2626

27-
from pandas.core.dtypes.cast import (
28-
find_common_type,
29-
infer_dtype_from_scalar,
30-
maybe_promote,
31-
)
27+
from pandas.core.dtypes.cast import find_common_type, infer_dtype_from_scalar
3228
from pandas.core.dtypes.common import (
3329
DT64NS_DTYPE,
3430
is_dtype_equal,
@@ -1332,7 +1328,7 @@ def _slice_take_blocks_ax0(
13321328
return [blk.getitem_block(slobj, new_mgr_locs=slice(0, sllen))]
13331329
elif not allow_fill or self.ndim == 1:
13341330
if allow_fill and fill_value is None:
1335-
_, fill_value = maybe_promote(blk.dtype)
1331+
fill_value = blk.fill_value
13361332

13371333
if not allow_fill and only_slice:
13381334
# GH#33597 slice instead of take, so we get

pandas/io/pytables.py

Lines changed: 13 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -3875,7 +3875,7 @@ def _create_axes(
38753875

38763876
# add my values
38773877
vaxes = []
3878-
for i, (b, b_items) in enumerate(zip(blocks, blk_items)):
3878+
for i, (blk, b_items) in enumerate(zip(blocks, blk_items)):
38793879

38803880
# shape of the data column are the indexable axes
38813881
klass = DataCol
@@ -3907,13 +3907,13 @@ def _create_axes(
39073907
new_name = name or f"values_block_{i}"
39083908
data_converted = _maybe_convert_for_string_atom(
39093909
new_name,
3910-
b,
3910+
blk,
39113911
existing_col=existing_col,
39123912
min_itemsize=min_itemsize,
39133913
nan_rep=nan_rep,
39143914
encoding=self.encoding,
39153915
errors=self.errors,
3916-
block_columns=b_items,
3916+
columns=b_items,
39173917
)
39183918
adj_name = _maybe_adjust_name(new_name, self.version)
39193919

@@ -4886,13 +4886,15 @@ def _maybe_convert_for_string_atom(
48864886
nan_rep,
48874887
encoding,
48884888
errors,
4889-
block_columns: List[str],
4889+
columns: List[str],
48904890
):
4891-
if not block.is_object:
4892-
return block.values
4891+
bvalues = block.values
48934892

4894-
dtype_name = block.dtype.name
4895-
inferred_type = lib.infer_dtype(block.values, skipna=False)
4893+
if bvalues.dtype != object:
4894+
return bvalues
4895+
4896+
dtype_name = bvalues.dtype.name
4897+
inferred_type = lib.infer_dtype(bvalues, skipna=False)
48964898

48974899
if inferred_type == "date":
48984900
raise TypeError("[date] is not implemented as a table column")
@@ -4904,7 +4906,7 @@ def _maybe_convert_for_string_atom(
49044906
)
49054907

49064908
elif not (inferred_type == "string" or dtype_name == "object"):
4907-
return block.values
4909+
return bvalues
49084910

49094911
blocks: List[Block] = block.fillna(nan_rep, downcast=False)
49104912
# Note: because block is always object dtype, fillna goes
@@ -4923,13 +4925,11 @@ def _maybe_convert_for_string_atom(
49234925

49244926
# expected behaviour:
49254927
# search block for a non-string object column by column
4926-
for i in range(block.shape[0]):
4928+
for i in range(data.shape[0]):
49274929
col = block.iget(i)
49284930
inferred_type = lib.infer_dtype(col, skipna=False)
49294931
if inferred_type != "string":
4930-
error_column_label = (
4931-
block_columns[i] if len(block_columns) > i else f"No.{i}"
4932-
)
4932+
error_column_label = columns[i] if len(columns) > i else f"No.{i}"
49334933
raise TypeError(
49344934
f"Cannot serialize the column [{error_column_label}]\n"
49354935
f"because its data contents are not [string] but "
@@ -4938,7 +4938,6 @@ def _maybe_convert_for_string_atom(
49384938

49394939
# itemsize is the maximum length of a string (along any dimension)
49404940
data_converted = _convert_string_array(data, encoding, errors).reshape(data.shape)
4941-
assert data_converted.shape == block.shape, (data_converted.shape, block.shape)
49424941
itemsize = data_converted.itemsize
49434942

49444943
# specified min_itemsize?

pandas/tests/frame/test_repr_info.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,20 @@
2323

2424

2525
class TestDataFrameReprInfoEtc:
26+
def test_repr_bytes_61_lines(self):
27+
# GH#12857
28+
lets = list("ACDEFGHIJKLMNOP")
29+
slen = 50
30+
nseqs = 1000
31+
words = [[np.random.choice(lets) for x in range(slen)] for _ in range(nseqs)]
32+
df = DataFrame(words).astype("U1")
33+
assert (df.dtypes == object).all()
34+
35+
# smoke tests; at one point this raised with 61 but not 60
36+
repr(df)
37+
repr(df.iloc[:60, :])
38+
repr(df.iloc[:61, :])
39+
2640
def test_repr_unicode_level_names(self, frame_or_series):
2741
index = MultiIndex.from_tuples([(0, 0), (1, 1)], names=["\u0394", "i1"])
2842

pandas/tests/indexing/test_indexing.py

Lines changed: 16 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -55,11 +55,10 @@ def test_setitem_ndarray_1d(self):
5555
with pytest.raises(ValueError, match=msg):
5656
df[2:5] = np.arange(1, 4) * 1j
5757

58-
@pytest.mark.parametrize("idxr", [tm.getitem, tm.loc, tm.iloc])
59-
def test_getitem_ndarray_3d(self, index, frame_or_series, idxr):
58+
def test_getitem_ndarray_3d(self, index, frame_or_series, indexer_sli):
6059
# GH 25567
6160
obj = gen_obj(frame_or_series, index)
62-
idxr = idxr(obj)
61+
idxr = indexer_sli(obj)
6362
nd3 = np.random.randint(5, size=(2, 2, 2))
6463

6564
msg = "|".join(
@@ -78,19 +77,18 @@ def test_getitem_ndarray_3d(self, index, frame_or_series, idxr):
7877
with tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False):
7978
idxr[nd3]
8079

81-
@pytest.mark.parametrize("indexer", [tm.setitem, tm.loc, tm.iloc])
82-
def test_setitem_ndarray_3d(self, index, frame_or_series, indexer):
80+
def test_setitem_ndarray_3d(self, index, frame_or_series, indexer_sli):
8381
# GH 25567
8482
obj = gen_obj(frame_or_series, index)
85-
idxr = indexer(obj)
83+
idxr = indexer_sli(obj)
8684
nd3 = np.random.randint(5, size=(2, 2, 2))
8785

88-
if indexer.__name__ == "iloc":
86+
if indexer_sli.__name__ == "iloc":
8987
err = ValueError
9088
msg = f"Cannot set values with ndim > {obj.ndim}"
9189
elif (
9290
isinstance(index, pd.IntervalIndex)
93-
and indexer.__name__ == "setitem"
91+
and indexer_sli.__name__ == "setitem"
9492
and obj.ndim == 1
9593
):
9694
err = AttributeError
@@ -948,8 +946,7 @@ def test_none_coercion_mixed_dtypes(self):
948946

949947

950948
class TestDatetimelikeCoercion:
951-
@pytest.mark.parametrize("indexer", [tm.setitem, tm.loc, tm.iloc])
952-
def test_setitem_dt64_string_scalar(self, tz_naive_fixture, indexer):
949+
def test_setitem_dt64_string_scalar(self, tz_naive_fixture, indexer_sli):
953950
# dispatching _can_hold_element to underling DatetimeArray
954951
tz = tz_naive_fixture
955952

@@ -961,7 +958,7 @@ def test_setitem_dt64_string_scalar(self, tz_naive_fixture, indexer):
961958
newval = "2018-01-01"
962959
values._validate_setitem_value(newval)
963960

964-
indexer(ser)[0] = newval
961+
indexer_sli(ser)[0] = newval
965962

966963
if tz is None:
967964
# TODO(EA2D): we can make this no-copy in tz-naive case too
@@ -974,12 +971,11 @@ def test_setitem_dt64_string_scalar(self, tz_naive_fixture, indexer):
974971
@pytest.mark.parametrize(
975972
"key", [[0, 1], slice(0, 2), np.array([True, True, False])]
976973
)
977-
@pytest.mark.parametrize("indexer", [tm.setitem, tm.loc, tm.iloc])
978-
def test_setitem_dt64_string_values(self, tz_naive_fixture, indexer, key, box):
974+
def test_setitem_dt64_string_values(self, tz_naive_fixture, indexer_sli, key, box):
979975
# dispatching _can_hold_element to underling DatetimeArray
980976
tz = tz_naive_fixture
981977

982-
if isinstance(key, slice) and indexer is tm.loc:
978+
if isinstance(key, slice) and indexer_sli is tm.loc:
983979
key = slice(0, 1)
984980

985981
dti = date_range("2016-01-01", periods=3, tz=tz)
@@ -990,7 +986,7 @@ def test_setitem_dt64_string_values(self, tz_naive_fixture, indexer, key, box):
990986
newvals = box(["2019-01-01", "2010-01-02"])
991987
values._validate_setitem_value(newvals)
992988

993-
indexer(ser)[key] = newvals
989+
indexer_sli(ser)[key] = newvals
994990

995991
if tz is None:
996992
# TODO(EA2D): we can make this no-copy in tz-naive case too
@@ -1000,26 +996,24 @@ def test_setitem_dt64_string_values(self, tz_naive_fixture, indexer, key, box):
1000996
assert ser._values is values
1001997

1002998
@pytest.mark.parametrize("scalar", ["3 Days", offsets.Hour(4)])
1003-
@pytest.mark.parametrize("indexer", [tm.setitem, tm.loc, tm.iloc])
1004-
def test_setitem_td64_scalar(self, indexer, scalar):
999+
def test_setitem_td64_scalar(self, indexer_sli, scalar):
10051000
# dispatching _can_hold_element to underling TimedeltaArray
10061001
tdi = timedelta_range("1 Day", periods=3)
10071002
ser = Series(tdi)
10081003

10091004
values = ser._values
10101005
values._validate_setitem_value(scalar)
10111006

1012-
indexer(ser)[0] = scalar
1007+
indexer_sli(ser)[0] = scalar
10131008
assert ser._values._data is values._data
10141009

10151010
@pytest.mark.parametrize("box", [list, np.array, pd.array])
10161011
@pytest.mark.parametrize(
10171012
"key", [[0, 1], slice(0, 2), np.array([True, True, False])]
10181013
)
1019-
@pytest.mark.parametrize("indexer", [tm.setitem, tm.loc, tm.iloc])
1020-
def test_setitem_td64_string_values(self, indexer, key, box):
1014+
def test_setitem_td64_string_values(self, indexer_sli, key, box):
10211015
# dispatching _can_hold_element to underling TimedeltaArray
1022-
if isinstance(key, slice) and indexer is tm.loc:
1016+
if isinstance(key, slice) and indexer_sli is tm.loc:
10231017
key = slice(0, 1)
10241018

10251019
tdi = timedelta_range("1 Day", periods=3)
@@ -1030,7 +1024,7 @@ def test_setitem_td64_string_values(self, indexer, key, box):
10301024
newvals = box(["10 Days", "44 hours"])
10311025
values._validate_setitem_value(newvals)
10321026

1033-
indexer(ser)[key] = newvals
1027+
indexer_sli(ser)[key] = newvals
10341028
assert ser._values._data is values._data
10351029

10361030

pandas/tests/series/indexing/test_setitem.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -273,6 +273,10 @@ def test_setitem_dt64_into_int_series(self, dtype):
273273
tm.assert_series_equal(ser, expected)
274274
assert isinstance(ser[0], type(val))
275275

276+
ser = orig.copy()
277+
ser[:-1] = [val, val]
278+
tm.assert_series_equal(ser, expected)
279+
276280
ser = orig.copy()
277281
ser[:-1] = np.array([val, val])
278282
tm.assert_series_equal(ser, expected)

pandas/tests/series/methods/test_item.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,15 @@
1+
"""
2+
Series.item method, mainly testing that we get python scalars as opposed to
3+
numpy scalars.
4+
"""
15
import pytest
26

37
from pandas import Series, Timedelta, Timestamp, date_range
48

59

610
class TestItem:
711
def test_item(self):
12+
# We are testing that we get python scalars as opposed to numpy scalars
813
ser = Series([1])
914
result = ser.item()
1015
assert result == 1

0 commit comments

Comments
 (0)