From 1f7c446beb94d702e0499b2c430657b4f0aa5f1a Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 17 Jun 2019 11:24:08 +0200 Subject: [PATCH 1/4] CLN: clean-up sanitize_array series construction --- pandas/core/internals/construction.py | 60 +++++++++------------------ 1 file changed, 20 insertions(+), 40 deletions(-) diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index f564ac13dc41d..28c686f7c4bff 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -549,59 +549,39 @@ def sanitize_array(data, index, dtype=None, copy=False, else: data = data.copy() + # extract ndarray or ExtensionArray, ensure we have no PandasArray data = extract_array(data, extract_numpy=True) # GH#846 if isinstance(data, np.ndarray): - if dtype is not None: - subarr = np.array(data, copy=False) - + if (dtype is not None + and is_float_dtype(data.dtype) and is_integer_dtype(dtype)): # possibility of nan -> garbage - if is_float_dtype(data.dtype) and is_integer_dtype(dtype): - try: - subarr = _try_cast(data, True, dtype, copy, - True) - except ValueError: - if copy: - subarr = data.copy() - else: - subarr = _try_cast(data, True, dtype, copy, raise_cast_failure) - elif isinstance(data, Index): - # don't coerce Index types - # e.g. indexes can have different conversions (so don't fast path - # them) - # GH#6140 - subarr = sanitize_index(data, index, copy=copy) + try: + subarr = _try_cast(data, dtype, copy, True) + except ValueError: + if copy: + subarr = data.copy() + else: + subarr = np.array(data, copy=False) else: - # we will try to copy be-definition here - subarr = _try_cast(data, True, dtype, copy, raise_cast_failure) + subarr = _try_cast(data, dtype, copy, raise_cast_failure) elif isinstance(data, ExtensionArray): - if isinstance(data, ABCPandasArray): - # We don't want to let people put our PandasArray wrapper - # (the output of Series/Index.array), into a Series. So - # we explicitly unwrap it here. - subarr = data.to_numpy() - else: - subarr = data - - # everything else in this block must also handle ndarray's, - # becuase we've unwrapped PandasArray into an ndarray. - + # it is already ensured above this is not a PandasArray + subarr = data if dtype is not None: - subarr = data.astype(dtype) - + subarr = subarr.astype(dtype) if copy: - subarr = data.copy() + subarr = subarr.copy() return subarr elif isinstance(data, (list, tuple)) and len(data) > 0: if dtype is not None: try: - subarr = _try_cast(data, False, dtype, copy, - raise_cast_failure) + subarr = _try_cast(data, dtype, copy, raise_cast_failure) except Exception: if raise_cast_failure: # pragma: no cover raise @@ -616,9 +596,9 @@ def sanitize_array(data, index, dtype=None, copy=False, elif isinstance(data, range): # GH#16804 arr = np.arange(data.start, data.stop, data.step, dtype='int64') - subarr = _try_cast(arr, False, dtype, copy, raise_cast_failure) + subarr = _try_cast(arr, dtype, copy, raise_cast_failure) else: - subarr = _try_cast(data, False, dtype, copy, raise_cast_failure) + subarr = _try_cast(data, dtype, copy, raise_cast_failure) # scalar like, GH if getattr(subarr, 'ndim', 0) == 0: @@ -677,10 +657,10 @@ def sanitize_array(data, index, dtype=None, copy=False, return subarr -def _try_cast(arr, take_fast_path, dtype, copy, raise_cast_failure): +def _try_cast(arr, dtype, copy, raise_cast_failure): # perf shortcut as this is the most common case - if take_fast_path: + if isinstance(arr, np.ndarray): if maybe_castable(arr) and not copy and dtype is None: return arr From 904990ae65bc86e4b8744de09704e24da6539d1c Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 21 Jun 2019 10:36:12 +0200 Subject: [PATCH 2/4] lint --- pandas/core/internals/construction.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 28c686f7c4bff..347b6566e2b49 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -21,8 +21,8 @@ is_extension_array_dtype, is_extension_type, is_float_dtype, is_integer_dtype, is_iterator, is_list_like, is_object_dtype, pandas_dtype) from pandas.core.dtypes.generic import ( - ABCDataFrame, ABCDatetimeIndex, ABCIndexClass, ABCPandasArray, - ABCPeriodIndex, ABCSeries, ABCTimedeltaIndex) + ABCDataFrame, ABCDatetimeIndex, ABCIndexClass, ABCPeriodIndex, + ABCSeries, ABCTimedeltaIndex) from pandas.core.dtypes.missing import isna from pandas.core import algorithms, common as com From 3428cbca6718775f8e3038ac64553f0108b60471 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 21 Jun 2019 17:15:52 +0200 Subject: [PATCH 3/4] update --- pandas/core/internals/construction.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 347b6566e2b49..a3984cba21f86 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -573,8 +573,8 @@ def sanitize_array(data, index, dtype=None, copy=False, # it is already ensured above this is not a PandasArray subarr = data if dtype is not None: - subarr = subarr.astype(dtype) - if copy: + subarr = subarr.astype(dtype, copy=copy) + elif copy: subarr = subarr.copy() return subarr @@ -658,7 +658,19 @@ def sanitize_array(data, index, dtype=None, copy=False, def _try_cast(arr, dtype, copy, raise_cast_failure): - + """ + Convert input to numpy ndarray and optionally cast to a given dtype. + + Parameters + ---------- + arr : array-like + dtype : np.dtype, ExtensionDtype or None + copy : bool + If False, don't copy the data if not needed. + raise_cast_failure : bool + If True, and if a dtype is specified, raise errors during casting. + Otherwise an object array is returned. + """ # perf shortcut as this is the most common case if isinstance(arr, np.ndarray): if maybe_castable(arr) and not copy and dtype is None: From d076d79dfd2484ae3aa1e4a067f7f831a84bcfba Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sat, 22 Jun 2019 15:22:01 +0200 Subject: [PATCH 4/4] isort --- pandas/core/internals/construction.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index a3984cba21f86..bab7fc45d0d42 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -21,8 +21,8 @@ is_extension_array_dtype, is_extension_type, is_float_dtype, is_integer_dtype, is_iterator, is_list_like, is_object_dtype, pandas_dtype) from pandas.core.dtypes.generic import ( - ABCDataFrame, ABCDatetimeIndex, ABCIndexClass, ABCPeriodIndex, - ABCSeries, ABCTimedeltaIndex) + ABCDataFrame, ABCDatetimeIndex, ABCIndexClass, ABCPeriodIndex, ABCSeries, + ABCTimedeltaIndex) from pandas.core.dtypes.missing import isna from pandas.core import algorithms, common as com