Skip to content

CLN: assorted cleanups, ported tests #39463

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Jan 29, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions pandas/_libs/tslibs/timedeltas.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -259,39 +259,39 @@ cdef convert_to_timedelta64(object ts, str unit):
ts = np.timedelta64(ts.value, "ns")
elif is_datetime64_object(ts):
# only accept a NaT here
if ts.astype('int64') == NPY_NAT:
return np.timedelta64(NPY_NAT)
if ts.astype("int64") == NPY_NAT:
return np.timedelta64(NPY_NAT, "ns")
elif is_timedelta64_object(ts):
ts = ensure_td64ns(ts)
elif is_integer_object(ts):
if ts == NPY_NAT:
return np.timedelta64(NPY_NAT, "ns")
else:
if unit in ['Y', 'M', 'W']:
if unit in ["Y", "M", "W"]:
ts = np.timedelta64(ts, unit)
else:
ts = cast_from_unit(ts, unit)
ts = np.timedelta64(ts, "ns")
elif is_float_object(ts):
if unit in ['Y', 'M', 'W']:
if unit in ["Y", "M", "W"]:
ts = np.timedelta64(int(ts), unit)
else:
ts = cast_from_unit(ts, unit)
ts = np.timedelta64(ts, "ns")
elif isinstance(ts, str):
if len(ts) > 0 and ts[0] == 'P':
if len(ts) > 0 and ts[0] == "P":
ts = parse_iso_format_string(ts)
else:
ts = parse_timedelta_string(ts)
ts = np.timedelta64(ts, "ns")
elif is_tick_object(ts):
ts = np.timedelta64(ts.nanos, 'ns')
ts = np.timedelta64(ts.nanos, "ns")

if PyDelta_Check(ts):
ts = np.timedelta64(delta_to_nanoseconds(ts), 'ns')
ts = np.timedelta64(delta_to_nanoseconds(ts), "ns")
elif not is_timedelta64_object(ts):
raise ValueError(f"Invalid type for timedelta scalar: {type(ts)}")
return ts.astype('timedelta64[ns]')
return ts.astype("timedelta64[ns]")


@cython.boundscheck(False)
Expand Down
8 changes: 2 additions & 6 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,7 @@
from pandas.errors import PerformanceWarning
from pandas.util._validators import validate_bool_kwarg

from pandas.core.dtypes.cast import (
find_common_type,
infer_dtype_from_scalar,
maybe_promote,
)
from pandas.core.dtypes.cast import find_common_type, infer_dtype_from_scalar
from pandas.core.dtypes.common import (
DT64NS_DTYPE,
is_dtype_equal,
Expand Down Expand Up @@ -1332,7 +1328,7 @@ def _slice_take_blocks_ax0(
return [blk.getitem_block(slobj, new_mgr_locs=slice(0, sllen))]
elif not allow_fill or self.ndim == 1:
if allow_fill and fill_value is None:
_, fill_value = maybe_promote(blk.dtype)
fill_value = blk.fill_value

if not allow_fill and only_slice:
# GH#33597 slice instead of take, so we get
Expand Down
27 changes: 13 additions & 14 deletions pandas/io/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -3875,7 +3875,7 @@ def _create_axes(

# add my values
vaxes = []
for i, (b, b_items) in enumerate(zip(blocks, blk_items)):
for i, (blk, b_items) in enumerate(zip(blocks, blk_items)):

# shape of the data column are the indexable axes
klass = DataCol
Expand Down Expand Up @@ -3907,13 +3907,13 @@ def _create_axes(
new_name = name or f"values_block_{i}"
data_converted = _maybe_convert_for_string_atom(
new_name,
b,
blk,
existing_col=existing_col,
min_itemsize=min_itemsize,
nan_rep=nan_rep,
encoding=self.encoding,
errors=self.errors,
block_columns=b_items,
columns=b_items,
)
adj_name = _maybe_adjust_name(new_name, self.version)

Expand Down Expand Up @@ -4886,13 +4886,15 @@ def _maybe_convert_for_string_atom(
nan_rep,
encoding,
errors,
block_columns: List[str],
columns: List[str],
):
if not block.is_object:
return block.values
bvalues = block.values

dtype_name = block.dtype.name
inferred_type = lib.infer_dtype(block.values, skipna=False)
if bvalues.dtype != object:
return bvalues

dtype_name = bvalues.dtype.name
inferred_type = lib.infer_dtype(bvalues, skipna=False)

if inferred_type == "date":
raise TypeError("[date] is not implemented as a table column")
Expand All @@ -4904,7 +4906,7 @@ def _maybe_convert_for_string_atom(
)

elif not (inferred_type == "string" or dtype_name == "object"):
return block.values
return bvalues

blocks: List[Block] = block.fillna(nan_rep, downcast=False)
# Note: because block is always object dtype, fillna goes
Expand All @@ -4923,13 +4925,11 @@ def _maybe_convert_for_string_atom(

# expected behaviour:
# search block for a non-string object column by column
for i in range(block.shape[0]):
for i in range(data.shape[0]):
col = block.iget(i)
inferred_type = lib.infer_dtype(col, skipna=False)
if inferred_type != "string":
error_column_label = (
block_columns[i] if len(block_columns) > i else f"No.{i}"
)
error_column_label = columns[i] if len(columns) > i else f"No.{i}"
raise TypeError(
f"Cannot serialize the column [{error_column_label}]\n"
f"because its data contents are not [string] but "
Expand All @@ -4938,7 +4938,6 @@ def _maybe_convert_for_string_atom(

# itemsize is the maximum length of a string (along any dimension)
data_converted = _convert_string_array(data, encoding, errors).reshape(data.shape)
assert data_converted.shape == block.shape, (data_converted.shape, block.shape)
itemsize = data_converted.itemsize

# specified min_itemsize?
Expand Down
14 changes: 14 additions & 0 deletions pandas/tests/frame/test_repr_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,20 @@


class TestDataFrameReprInfoEtc:
def test_repr_bytes_61_lines(self):
# GH#12857
lets = list("ACDEFGHIJKLMNOP")
slen = 50
nseqs = 1000
words = [[np.random.choice(lets) for x in range(slen)] for _ in range(nseqs)]
df = DataFrame(words).astype("U1")
assert (df.dtypes == object).all()

# smoke tests; at one point this raised with 61 but not 60
repr(df)
repr(df.iloc[:60, :])
repr(df.iloc[:61, :])

def test_repr_unicode_level_names(self, frame_or_series):
index = MultiIndex.from_tuples([(0, 0), (1, 1)], names=["\u0394", "i1"])

Expand Down
38 changes: 16 additions & 22 deletions pandas/tests/indexing/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,11 +55,10 @@ def test_setitem_ndarray_1d(self):
with pytest.raises(ValueError, match=msg):
df[2:5] = np.arange(1, 4) * 1j

@pytest.mark.parametrize("idxr", [tm.getitem, tm.loc, tm.iloc])
def test_getitem_ndarray_3d(self, index, frame_or_series, idxr):
def test_getitem_ndarray_3d(self, index, frame_or_series, indexer_sli):
# GH 25567
obj = gen_obj(frame_or_series, index)
idxr = idxr(obj)
idxr = indexer_sli(obj)
nd3 = np.random.randint(5, size=(2, 2, 2))

msg = "|".join(
Expand All @@ -78,19 +77,18 @@ def test_getitem_ndarray_3d(self, index, frame_or_series, idxr):
with tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False):
idxr[nd3]

@pytest.mark.parametrize("indexer", [tm.setitem, tm.loc, tm.iloc])
def test_setitem_ndarray_3d(self, index, frame_or_series, indexer):
def test_setitem_ndarray_3d(self, index, frame_or_series, indexer_sli):
# GH 25567
obj = gen_obj(frame_or_series, index)
idxr = indexer(obj)
idxr = indexer_sli(obj)
nd3 = np.random.randint(5, size=(2, 2, 2))

if indexer.__name__ == "iloc":
if indexer_sli.__name__ == "iloc":
err = ValueError
msg = f"Cannot set values with ndim > {obj.ndim}"
elif (
isinstance(index, pd.IntervalIndex)
and indexer.__name__ == "setitem"
and indexer_sli.__name__ == "setitem"
and obj.ndim == 1
):
err = AttributeError
Expand Down Expand Up @@ -948,8 +946,7 @@ def test_none_coercion_mixed_dtypes(self):


class TestDatetimelikeCoercion:
@pytest.mark.parametrize("indexer", [tm.setitem, tm.loc, tm.iloc])
def test_setitem_dt64_string_scalar(self, tz_naive_fixture, indexer):
def test_setitem_dt64_string_scalar(self, tz_naive_fixture, indexer_sli):
# dispatching _can_hold_element to underling DatetimeArray
tz = tz_naive_fixture

Expand All @@ -961,7 +958,7 @@ def test_setitem_dt64_string_scalar(self, tz_naive_fixture, indexer):
newval = "2018-01-01"
values._validate_setitem_value(newval)

indexer(ser)[0] = newval
indexer_sli(ser)[0] = newval

if tz is None:
# TODO(EA2D): we can make this no-copy in tz-naive case too
Expand All @@ -974,12 +971,11 @@ def test_setitem_dt64_string_scalar(self, tz_naive_fixture, indexer):
@pytest.mark.parametrize(
"key", [[0, 1], slice(0, 2), np.array([True, True, False])]
)
@pytest.mark.parametrize("indexer", [tm.setitem, tm.loc, tm.iloc])
def test_setitem_dt64_string_values(self, tz_naive_fixture, indexer, key, box):
def test_setitem_dt64_string_values(self, tz_naive_fixture, indexer_sli, key, box):
# dispatching _can_hold_element to underling DatetimeArray
tz = tz_naive_fixture

if isinstance(key, slice) and indexer is tm.loc:
if isinstance(key, slice) and indexer_sli is tm.loc:
key = slice(0, 1)

dti = date_range("2016-01-01", periods=3, tz=tz)
Expand All @@ -990,7 +986,7 @@ def test_setitem_dt64_string_values(self, tz_naive_fixture, indexer, key, box):
newvals = box(["2019-01-01", "2010-01-02"])
values._validate_setitem_value(newvals)

indexer(ser)[key] = newvals
indexer_sli(ser)[key] = newvals

if tz is None:
# TODO(EA2D): we can make this no-copy in tz-naive case too
Expand All @@ -1000,26 +996,24 @@ def test_setitem_dt64_string_values(self, tz_naive_fixture, indexer, key, box):
assert ser._values is values

@pytest.mark.parametrize("scalar", ["3 Days", offsets.Hour(4)])
@pytest.mark.parametrize("indexer", [tm.setitem, tm.loc, tm.iloc])
def test_setitem_td64_scalar(self, indexer, scalar):
def test_setitem_td64_scalar(self, indexer_sli, scalar):
# dispatching _can_hold_element to underling TimedeltaArray
tdi = timedelta_range("1 Day", periods=3)
ser = Series(tdi)

values = ser._values
values._validate_setitem_value(scalar)

indexer(ser)[0] = scalar
indexer_sli(ser)[0] = scalar
assert ser._values._data is values._data

@pytest.mark.parametrize("box", [list, np.array, pd.array])
@pytest.mark.parametrize(
"key", [[0, 1], slice(0, 2), np.array([True, True, False])]
)
@pytest.mark.parametrize("indexer", [tm.setitem, tm.loc, tm.iloc])
def test_setitem_td64_string_values(self, indexer, key, box):
def test_setitem_td64_string_values(self, indexer_sli, key, box):
# dispatching _can_hold_element to underling TimedeltaArray
if isinstance(key, slice) and indexer is tm.loc:
if isinstance(key, slice) and indexer_sli is tm.loc:
key = slice(0, 1)

tdi = timedelta_range("1 Day", periods=3)
Expand All @@ -1030,7 +1024,7 @@ def test_setitem_td64_string_values(self, indexer, key, box):
newvals = box(["10 Days", "44 hours"])
values._validate_setitem_value(newvals)

indexer(ser)[key] = newvals
indexer_sli(ser)[key] = newvals
assert ser._values._data is values._data


Expand Down
4 changes: 4 additions & 0 deletions pandas/tests/series/indexing/test_setitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,10 @@ def test_setitem_dt64_into_int_series(self, dtype):
tm.assert_series_equal(ser, expected)
assert isinstance(ser[0], type(val))

ser = orig.copy()
ser[:-1] = [val, val]
tm.assert_series_equal(ser, expected)

ser = orig.copy()
ser[:-1] = np.array([val, val])
tm.assert_series_equal(ser, expected)
Expand Down
5 changes: 5 additions & 0 deletions pandas/tests/series/methods/test_item.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,15 @@
"""
Series.item method, mainly testing that we get python scalars as opposed to
numpy scalars.
"""
import pytest

from pandas import Series, Timedelta, Timestamp, date_range


class TestItem:
def test_item(self):
# We are testing that we get python scalars as opposed to numpy scalars
ser = Series([1])
result = ser.item()
assert result == 1
Expand Down