diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index f3bf45f681b1f..f0150cf6a2d81 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -259,39 +259,39 @@ cdef convert_to_timedelta64(object ts, str unit): ts = np.timedelta64(ts.value, "ns") elif is_datetime64_object(ts): # only accept a NaT here - if ts.astype('int64') == NPY_NAT: - return np.timedelta64(NPY_NAT) + if ts.astype("int64") == NPY_NAT: + return np.timedelta64(NPY_NAT, "ns") elif is_timedelta64_object(ts): ts = ensure_td64ns(ts) elif is_integer_object(ts): if ts == NPY_NAT: return np.timedelta64(NPY_NAT, "ns") else: - if unit in ['Y', 'M', 'W']: + if unit in ["Y", "M", "W"]: ts = np.timedelta64(ts, unit) else: ts = cast_from_unit(ts, unit) ts = np.timedelta64(ts, "ns") elif is_float_object(ts): - if unit in ['Y', 'M', 'W']: + if unit in ["Y", "M", "W"]: ts = np.timedelta64(int(ts), unit) else: ts = cast_from_unit(ts, unit) ts = np.timedelta64(ts, "ns") elif isinstance(ts, str): - if len(ts) > 0 and ts[0] == 'P': + if len(ts) > 0 and ts[0] == "P": ts = parse_iso_format_string(ts) else: ts = parse_timedelta_string(ts) ts = np.timedelta64(ts, "ns") elif is_tick_object(ts): - ts = np.timedelta64(ts.nanos, 'ns') + ts = np.timedelta64(ts.nanos, "ns") if PyDelta_Check(ts): - ts = np.timedelta64(delta_to_nanoseconds(ts), 'ns') + ts = np.timedelta64(delta_to_nanoseconds(ts), "ns") elif not is_timedelta64_object(ts): raise ValueError(f"Invalid type for timedelta scalar: {type(ts)}") - return ts.astype('timedelta64[ns]') + return ts.astype("timedelta64[ns]") @cython.boundscheck(False) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 5ed2ca469dd8a..f1cf1aa9a72cb 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -24,11 +24,7 @@ from pandas.errors import PerformanceWarning from pandas.util._validators import validate_bool_kwarg -from pandas.core.dtypes.cast import ( - find_common_type, - infer_dtype_from_scalar, - maybe_promote, -) +from pandas.core.dtypes.cast import find_common_type, infer_dtype_from_scalar from pandas.core.dtypes.common import ( DT64NS_DTYPE, is_dtype_equal, @@ -1332,7 +1328,7 @@ def _slice_take_blocks_ax0( return [blk.getitem_block(slobj, new_mgr_locs=slice(0, sllen))] elif not allow_fill or self.ndim == 1: if allow_fill and fill_value is None: - _, fill_value = maybe_promote(blk.dtype) + fill_value = blk.fill_value if not allow_fill and only_slice: # GH#33597 slice instead of take, so we get diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 2eb7d1c9353cf..8917be1f558b2 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -3875,7 +3875,7 @@ def _create_axes( # add my values vaxes = [] - for i, (b, b_items) in enumerate(zip(blocks, blk_items)): + for i, (blk, b_items) in enumerate(zip(blocks, blk_items)): # shape of the data column are the indexable axes klass = DataCol @@ -3907,13 +3907,13 @@ def _create_axes( new_name = name or f"values_block_{i}" data_converted = _maybe_convert_for_string_atom( new_name, - b, + blk, existing_col=existing_col, min_itemsize=min_itemsize, nan_rep=nan_rep, encoding=self.encoding, errors=self.errors, - block_columns=b_items, + columns=b_items, ) adj_name = _maybe_adjust_name(new_name, self.version) @@ -4886,13 +4886,15 @@ def _maybe_convert_for_string_atom( nan_rep, encoding, errors, - block_columns: List[str], + columns: List[str], ): - if not block.is_object: - return block.values + bvalues = block.values - dtype_name = block.dtype.name - inferred_type = lib.infer_dtype(block.values, skipna=False) + if bvalues.dtype != object: + return bvalues + + dtype_name = bvalues.dtype.name + inferred_type = lib.infer_dtype(bvalues, skipna=False) if inferred_type == "date": raise TypeError("[date] is not implemented as a table column") @@ -4904,7 +4906,7 @@ def _maybe_convert_for_string_atom( ) elif not (inferred_type == "string" or dtype_name == "object"): - return block.values + return bvalues blocks: List[Block] = block.fillna(nan_rep, downcast=False) # Note: because block is always object dtype, fillna goes @@ -4923,13 +4925,11 @@ def _maybe_convert_for_string_atom( # expected behaviour: # search block for a non-string object column by column - for i in range(block.shape[0]): + for i in range(data.shape[0]): col = block.iget(i) inferred_type = lib.infer_dtype(col, skipna=False) if inferred_type != "string": - error_column_label = ( - block_columns[i] if len(block_columns) > i else f"No.{i}" - ) + error_column_label = columns[i] if len(columns) > i else f"No.{i}" raise TypeError( f"Cannot serialize the column [{error_column_label}]\n" f"because its data contents are not [string] but " @@ -4938,7 +4938,6 @@ def _maybe_convert_for_string_atom( # itemsize is the maximum length of a string (along any dimension) data_converted = _convert_string_array(data, encoding, errors).reshape(data.shape) - assert data_converted.shape == block.shape, (data_converted.shape, block.shape) itemsize = data_converted.itemsize # specified min_itemsize? diff --git a/pandas/tests/frame/test_repr_info.py b/pandas/tests/frame/test_repr_info.py index a7b3333e7c690..22b50310cedd6 100644 --- a/pandas/tests/frame/test_repr_info.py +++ b/pandas/tests/frame/test_repr_info.py @@ -23,6 +23,20 @@ class TestDataFrameReprInfoEtc: + def test_repr_bytes_61_lines(self): + # GH#12857 + lets = list("ACDEFGHIJKLMNOP") + slen = 50 + nseqs = 1000 + words = [[np.random.choice(lets) for x in range(slen)] for _ in range(nseqs)] + df = DataFrame(words).astype("U1") + assert (df.dtypes == object).all() + + # smoke tests; at one point this raised with 61 but not 60 + repr(df) + repr(df.iloc[:60, :]) + repr(df.iloc[:61, :]) + def test_repr_unicode_level_names(self, frame_or_series): index = MultiIndex.from_tuples([(0, 0), (1, 1)], names=["\u0394", "i1"]) diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index f67341ab176d7..d490a23317fef 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -55,11 +55,10 @@ def test_setitem_ndarray_1d(self): with pytest.raises(ValueError, match=msg): df[2:5] = np.arange(1, 4) * 1j - @pytest.mark.parametrize("idxr", [tm.getitem, tm.loc, tm.iloc]) - def test_getitem_ndarray_3d(self, index, frame_or_series, idxr): + def test_getitem_ndarray_3d(self, index, frame_or_series, indexer_sli): # GH 25567 obj = gen_obj(frame_or_series, index) - idxr = idxr(obj) + idxr = indexer_sli(obj) nd3 = np.random.randint(5, size=(2, 2, 2)) msg = "|".join( @@ -78,19 +77,18 @@ def test_getitem_ndarray_3d(self, index, frame_or_series, idxr): with tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False): idxr[nd3] - @pytest.mark.parametrize("indexer", [tm.setitem, tm.loc, tm.iloc]) - def test_setitem_ndarray_3d(self, index, frame_or_series, indexer): + def test_setitem_ndarray_3d(self, index, frame_or_series, indexer_sli): # GH 25567 obj = gen_obj(frame_or_series, index) - idxr = indexer(obj) + idxr = indexer_sli(obj) nd3 = np.random.randint(5, size=(2, 2, 2)) - if indexer.__name__ == "iloc": + if indexer_sli.__name__ == "iloc": err = ValueError msg = f"Cannot set values with ndim > {obj.ndim}" elif ( isinstance(index, pd.IntervalIndex) - and indexer.__name__ == "setitem" + and indexer_sli.__name__ == "setitem" and obj.ndim == 1 ): err = AttributeError @@ -948,8 +946,7 @@ def test_none_coercion_mixed_dtypes(self): class TestDatetimelikeCoercion: - @pytest.mark.parametrize("indexer", [tm.setitem, tm.loc, tm.iloc]) - def test_setitem_dt64_string_scalar(self, tz_naive_fixture, indexer): + def test_setitem_dt64_string_scalar(self, tz_naive_fixture, indexer_sli): # dispatching _can_hold_element to underling DatetimeArray tz = tz_naive_fixture @@ -961,7 +958,7 @@ def test_setitem_dt64_string_scalar(self, tz_naive_fixture, indexer): newval = "2018-01-01" values._validate_setitem_value(newval) - indexer(ser)[0] = newval + indexer_sli(ser)[0] = newval if tz is None: # TODO(EA2D): we can make this no-copy in tz-naive case too @@ -974,12 +971,11 @@ def test_setitem_dt64_string_scalar(self, tz_naive_fixture, indexer): @pytest.mark.parametrize( "key", [[0, 1], slice(0, 2), np.array([True, True, False])] ) - @pytest.mark.parametrize("indexer", [tm.setitem, tm.loc, tm.iloc]) - def test_setitem_dt64_string_values(self, tz_naive_fixture, indexer, key, box): + def test_setitem_dt64_string_values(self, tz_naive_fixture, indexer_sli, key, box): # dispatching _can_hold_element to underling DatetimeArray tz = tz_naive_fixture - if isinstance(key, slice) and indexer is tm.loc: + if isinstance(key, slice) and indexer_sli is tm.loc: key = slice(0, 1) dti = date_range("2016-01-01", periods=3, tz=tz) @@ -990,7 +986,7 @@ def test_setitem_dt64_string_values(self, tz_naive_fixture, indexer, key, box): newvals = box(["2019-01-01", "2010-01-02"]) values._validate_setitem_value(newvals) - indexer(ser)[key] = newvals + indexer_sli(ser)[key] = newvals if tz is None: # TODO(EA2D): we can make this no-copy in tz-naive case too @@ -1000,8 +996,7 @@ def test_setitem_dt64_string_values(self, tz_naive_fixture, indexer, key, box): assert ser._values is values @pytest.mark.parametrize("scalar", ["3 Days", offsets.Hour(4)]) - @pytest.mark.parametrize("indexer", [tm.setitem, tm.loc, tm.iloc]) - def test_setitem_td64_scalar(self, indexer, scalar): + def test_setitem_td64_scalar(self, indexer_sli, scalar): # dispatching _can_hold_element to underling TimedeltaArray tdi = timedelta_range("1 Day", periods=3) ser = Series(tdi) @@ -1009,17 +1004,16 @@ def test_setitem_td64_scalar(self, indexer, scalar): values = ser._values values._validate_setitem_value(scalar) - indexer(ser)[0] = scalar + indexer_sli(ser)[0] = scalar assert ser._values._data is values._data @pytest.mark.parametrize("box", [list, np.array, pd.array]) @pytest.mark.parametrize( "key", [[0, 1], slice(0, 2), np.array([True, True, False])] ) - @pytest.mark.parametrize("indexer", [tm.setitem, tm.loc, tm.iloc]) - def test_setitem_td64_string_values(self, indexer, key, box): + def test_setitem_td64_string_values(self, indexer_sli, key, box): # dispatching _can_hold_element to underling TimedeltaArray - if isinstance(key, slice) and indexer is tm.loc: + if isinstance(key, slice) and indexer_sli is tm.loc: key = slice(0, 1) tdi = timedelta_range("1 Day", periods=3) @@ -1030,7 +1024,7 @@ def test_setitem_td64_string_values(self, indexer, key, box): newvals = box(["10 Days", "44 hours"]) values._validate_setitem_value(newvals) - indexer(ser)[key] = newvals + indexer_sli(ser)[key] = newvals assert ser._values._data is values._data diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index fabc2dc8bbd1c..36cd6b0327ccd 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -273,6 +273,10 @@ def test_setitem_dt64_into_int_series(self, dtype): tm.assert_series_equal(ser, expected) assert isinstance(ser[0], type(val)) + ser = orig.copy() + ser[:-1] = [val, val] + tm.assert_series_equal(ser, expected) + ser = orig.copy() ser[:-1] = np.array([val, val]) tm.assert_series_equal(ser, expected) diff --git a/pandas/tests/series/methods/test_item.py b/pandas/tests/series/methods/test_item.py index a7ddc0c22dcf4..90e8f6d39c5cc 100644 --- a/pandas/tests/series/methods/test_item.py +++ b/pandas/tests/series/methods/test_item.py @@ -1,3 +1,7 @@ +""" +Series.item method, mainly testing that we get python scalars as opposed to +numpy scalars. +""" import pytest from pandas import Series, Timedelta, Timestamp, date_range @@ -5,6 +9,7 @@ class TestItem: def test_item(self): + # We are testing that we get python scalars as opposed to numpy scalars ser = Series([1]) result = ser.item() assert result == 1