From 2fbcab711fecda140293b251b4dc1d1ff5f89b84 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 15 Dec 2020 09:50:10 -0800 Subject: [PATCH 1/3] REF: require listlike in maybe_cast_to_datetime --- pandas/core/construction.py | 25 +++++++++++++++++++------ pandas/core/dtypes/cast.py | 14 ++++---------- 2 files changed, 23 insertions(+), 16 deletions(-) diff --git a/pandas/core/construction.py b/pandas/core/construction.py index a8ca457cdf2a7..79f5b5359a9a6 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -451,6 +451,7 @@ def sanitize_array( # GH#846 if isinstance(data, np.ndarray): + data = np.atleast_1d(data) if dtype is not None and is_float_dtype(data.dtype) and is_integer_dtype(dtype): # possibility of nan -> garbage @@ -476,6 +477,7 @@ def sanitize_array( return subarr elif isinstance(data, (list, tuple, abc.Set, abc.ValuesView)) and len(data) > 0: + # TODO: deque, array.array if isinstance(data, set): # Raise only for unordered sets, e.g., not for dict_keys raise TypeError("Set type is unordered") @@ -494,6 +496,9 @@ def sanitize_array( elif lib.is_scalar(data) and index is not None and dtype is not None: subarr = construct_1d_arraylike_from_scalar(data, len(index), dtype) else: + if not is_list_like(data): + # TODO: sure we want to do this here? + data = [data] subarr = _try_cast(data, dtype, copy, raise_cast_failure) # scalar like, GH @@ -508,11 +513,7 @@ def sanitize_array( # the result that we want elif subarr.ndim == 1: - if index is not None: - - # a 1-element ndarray - if len(subarr) != len(index) and len(subarr) == 1: - subarr = subarr.repeat(len(index)) + subarr = _maybe_repeat(subarr, index) elif subarr.ndim > 1: if isinstance(data, np.ndarray): @@ -531,6 +532,7 @@ def sanitize_array( if not np.all(isna(data)): data = np.array(data, dtype=dtype, copy=False) subarr = np.array(data, dtype=object, copy=copy) + subarr = _maybe_repeat(subarr, index) is_object_or_str_dtype = is_object_dtype(dtype) or is_string_dtype(dtype) if is_object_dtype(subarr.dtype) and not is_object_or_str_dtype: @@ -541,13 +543,24 @@ def sanitize_array( return subarr +def _maybe_repeat(arr: ArrayLike, index: Optional[Index]) -> ArrayLike: + """ + If we have a length-1 array and an index describing how long we expect + the result to be, repeat the array. + """ + if index is not None: + if 1 == len(arr) != len(index): + arr = arr.repeat(len(index)) + return arr + + def _try_cast(arr, dtype: Optional[DtypeObj], copy: bool, raise_cast_failure: bool): """ Convert input to numpy ndarray and optionally cast to a given dtype. Parameters ---------- - arr : ndarray, scalar, list, tuple, iterator (catchall) + arr : ndarray, list, tuple, iterator (catchall) Excludes: ExtensionArray, Series, Index. dtype : np.dtype, ExtensionDtype or None copy : bool diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 63445d0e1598d..afe37555ae719 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1341,6 +1341,9 @@ def maybe_cast_to_datetime(value, dtype: Optional[DtypeObj]): from pandas.core.tools.datetimes import to_datetime from pandas.core.tools.timedeltas import to_timedelta + if not is_list_like(value): + raise TypeError("value must be listlike") + if dtype is not None: is_datetime64 = is_datetime64_dtype(dtype) is_datetime64tz = is_datetime64tz_dtype(dtype) @@ -1369,13 +1372,6 @@ def maybe_cast_to_datetime(value, dtype: Optional[DtypeObj]): raise TypeError( f"cannot convert datetimelike to dtype [{dtype}]" ) - elif is_datetime64tz: - - # our NaT doesn't support tz's - # this will coerce to DatetimeIndex with - # a matching dtype below - if is_scalar(value) and isna(value): - value = [value] elif is_timedelta64 and not is_dtype_equal(dtype, TD64NS_DTYPE): @@ -1388,9 +1384,7 @@ def maybe_cast_to_datetime(value, dtype: Optional[DtypeObj]): else: raise TypeError(f"cannot convert timedeltalike to dtype [{dtype}]") - if is_scalar(value): - value = maybe_unbox_datetimelike(value, dtype) - elif not is_sparse(value): + if not is_sparse(value): value = np.array(value, copy=False) # have a scalar array-like (e.g. NaT) From 30dea45bdb64e8509f2dba72e475960e625fc391 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 15 Dec 2020 20:18:48 -0800 Subject: [PATCH 2/3] troubleshoot 32bit build failures --- pandas/core/construction.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/core/construction.py b/pandas/core/construction.py index 79f5b5359a9a6..64f0f7821dfe4 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -451,6 +451,9 @@ def sanitize_array( # GH#846 if isinstance(data, np.ndarray): + if data.ndim == 0 and data.dtype.kind in ["u", "i"]: + # FIXME: kludge while troubleshooting 32 bit build failure + data = np.atleast_1d(data).astype(np.int64) data = np.atleast_1d(data) if dtype is not None and is_float_dtype(data.dtype) and is_integer_dtype(dtype): From 8f696de2187b158919f71548fee182a37eb141d2 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 16 Dec 2020 17:35:00 -0800 Subject: [PATCH 3/3] troubleshoot 32bit builds --- pandas/core/construction.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/pandas/core/construction.py b/pandas/core/construction.py index 64f0f7821dfe4..aea857856d040 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -451,11 +451,6 @@ def sanitize_array( # GH#846 if isinstance(data, np.ndarray): - if data.ndim == 0 and data.dtype.kind in ["u", "i"]: - # FIXME: kludge while troubleshooting 32 bit build failure - data = np.atleast_1d(data).astype(np.int64) - data = np.atleast_1d(data) - if dtype is not None and is_float_dtype(data.dtype) and is_integer_dtype(dtype): # possibility of nan -> garbage try: @@ -466,7 +461,11 @@ def sanitize_array( else: subarr = np.array(data, copy=False) else: - # we will try to copy be-definition here + if data.ndim == 0: + # TODO: np.atleast_1d? doing that breaks some tests on 32bit + data = [data] + + # we will try to copy by-definition here subarr = _try_cast(data, dtype, copy, raise_cast_failure) elif isinstance(data, ABCExtensionArray):