From 676a4e5cef1cf37704ef702699db1fd6c89028ea Mon Sep 17 00:00:00 2001 From: Carlos Souza Date: Mon, 20 Mar 2017 19:32:02 -0300 Subject: [PATCH 01/18] Test --- RELEASE.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/RELEASE.md b/RELEASE.md index a181412be2719..efd075dabcba9 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -1,6 +1,6 @@ Release Notes ============= -The list of changes to pandas between each release can be found +The list of changes to Pandas between each release can be found [here](http://pandas.pydata.org/pandas-docs/stable/whatsnew.html). For full details, see the commit logs at http://github.com/pandas-dev/pandas. From bbdea4ba9e72f3a1db0a0678631232929a1ccba2 Mon Sep 17 00:00:00 2001 From: Carlos Souza Date: Sat, 22 Apr 2017 16:19:38 -0300 Subject: [PATCH 02/18] Adding failing tests --- pandas/tests/indexes/test_numeric.py | 17 +++++++++++++++++ pandas/tests/series/test_constructors.py | 11 +++++++++++ 2 files changed, 28 insertions(+) diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index 3d06f1672ae32..6107c11622eb2 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -678,6 +678,23 @@ def test_constructor_corner(self): with tm.assert_raises_regex(TypeError, 'casting'): Int64Index(arr_with_floats) + def test_constructor_overflow_coercion_signed_to_unsigned(self): + # GH 15832 + for t in ['uint8', 'uint16', 'uint32', 'uint64']: + with pytest.raises(OverflowError): + Index([-1], dtype=t) + + try: + Index([-1], dtype=int) + except Exception: + self.fail("Index constructor did not behave correctly, raising an " + "exception when it should not.") + + def test_constructor_overflow_coercion_float_to_int(self): + # GH 15832 + with pytest.raises(OverflowError): + Index([1, 2, 3.5], dtype=int) + def test_coerce_list(self): # coerce things arr = Index([1, 2, 3, 4]) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index d591aa4f567a9..2e316c248673b 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -888,3 +888,14 @@ def test_constructor_generic_timestamp_deprecated(self): msg = "cannot convert datetimelike" with tm.assert_raises_regex(TypeError, msg): Series([], dtype='M8[ps]') + + def test_constructor_overflow_coercion_signed_to_unsigned(self): + # GH 15832 + for t in ['uint8', 'uint16', 'uint32', 'uint64']: + with pytest.raises(OverflowError): + Series([-1], dtype=t) + + def test_constructor_overflow_coercion_float_to_int(self): + # GH 15832 + with pytest.raises(OverflowError): + Series([1, 2, 3.5], dtype=int) From d2e26aca84438560a32d76d357475786cdd8e358 Mon Sep 17 00:00:00 2001 From: Carlos Souza Date: Tue, 16 May 2017 10:42:30 -0300 Subject: [PATCH 03/18] Code passing new tests from issue GH 15832 (but breaking 2 other tests - help needed) --- pandas/core/indexes/base.py | 7 ++++++- pandas/core/series.py | 13 +++++++++++++ pandas/tests/indexes/test_numeric.py | 6 ------ pandas/tests/series/test_constructors.py | 22 +++++++++------------- 4 files changed, 28 insertions(+), 20 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 9b29f1b04ff73..3899ae0e12b8c 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -212,6 +212,10 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, if is_integer_dtype(dtype): inferred = lib.infer_dtype(data) if inferred == 'integer': + if (np.asarray(data) < 0).any(): + raise OverflowError("Trying to coerce " + "negative values to " + "negative integers") data = np.array(data, copy=copy, dtype=dtype) elif inferred in ['floating', 'mixed-integer-float']: if isnull(data).any(): @@ -224,7 +228,8 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, return cls._try_convert_to_int_index( data, copy, name) except ValueError: - pass + raise OverflowError("Trying to coerce float " + "values to integers") # Return an actual float index. from .numeric import Float64Index diff --git a/pandas/core/series.py b/pandas/core/series.py index 6ec163bbaa73d..7241e4c439521 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -28,6 +28,7 @@ is_iterator, is_dict_like, is_scalar, + is_unsigned_integer_dtype, _is_unorderable_exception, _ensure_platform_int, pandas_dtype) @@ -2919,6 +2920,18 @@ def _try_cast(arr, take_fast_path): subarr = maybe_cast_to_datetime(arr, dtype) if not is_extension_type(subarr): subarr = np.array(subarr, dtype=dtype, copy=copy) + + # Raises if coercion from unsigned to signed with neg data + if is_unsigned_integer_dtype(dtype) and (np.asarray(data) < 0)\ + .any(): + raise OverflowError("Trying to coerce negative values to " + "negative integers") + + # Raises if coercion from float to integer + if is_integer_dtype(dtype) and is_float_dtype(np.asarray(data)): + raise OverflowError("Trying to coerce float values to " + "integers") + except (ValueError, TypeError): if is_categorical_dtype(dtype): subarr = Categorical(arr) diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index 6107c11622eb2..20ee2b83f5c7a 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -684,12 +684,6 @@ def test_constructor_overflow_coercion_signed_to_unsigned(self): with pytest.raises(OverflowError): Index([-1], dtype=t) - try: - Index([-1], dtype=int) - except Exception: - self.fail("Index constructor did not behave correctly, raising an " - "exception when it should not.") - def test_constructor_overflow_coercion_float_to_int(self): # GH 15832 with pytest.raises(OverflowError): diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 2e316c248673b..629823b91dacb 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -1,30 +1,26 @@ # coding=utf-8 # pylint: disable-msg=E1101,W0612 -import pytest - from datetime import datetime, timedelta -from numpy import nan import numpy as np import numpy.ma as ma import pandas as pd - -from pandas.core.dtypes.common import ( - is_categorical_dtype, - is_datetime64tz_dtype) +import pytest +from numpy import nan from pandas import (Index, Series, isnull, date_range, NaT, period_range, MultiIndex, IntervalIndex) -from pandas.core.indexes.datetimes import Timestamp, DatetimeIndex +from pandas import compat +from pandas.compat import lrange, range, zip, OrderedDict, long +import pandas.util.testing as tm from pandas._libs import lib from pandas._libs.tslib import iNaT - -from pandas.compat import lrange, range, zip, OrderedDict, long -from pandas import compat +from pandas.core.dtypes.common import ( + is_categorical_dtype, + is_datetime64tz_dtype) +from pandas.core.indexes.datetimes import Timestamp, DatetimeIndex from pandas.util.testing import assert_series_equal -import pandas.util.testing as tm - from .common import TestData From 3c868a4934e0711e63c1207afa232f5f42b38b62 Mon Sep 17 00:00:00 2001 From: Carlos Souza Date: Wed, 17 May 2017 15:33:01 -0300 Subject: [PATCH 04/18] Fixing 1 of 2 new issues: now only raises if trying to coerce and dtype is unsigned integer --- pandas/core/indexes/base.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 3899ae0e12b8c..8a817d0e5cd85 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -212,7 +212,8 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, if is_integer_dtype(dtype): inferred = lib.infer_dtype(data) if inferred == 'integer': - if (np.asarray(data) < 0).any(): + if is_unsigned_integer_dtype(dtype) and\ + (np.asarray(data) < 0).any(): raise OverflowError("Trying to coerce " "negative values to " "negative integers") From 20ac5c697e91329456887ff54966b5a69910c71d Mon Sep 17 00:00:00 2001 From: Carlos Souza Date: Wed, 17 May 2017 18:13:47 -0300 Subject: [PATCH 05/18] Fixing 2nd issue: now it raises only if the coercion from float to integer loses precision --- pandas/core/series.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 7241e4c439521..df3f6345b59a8 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2927,10 +2927,13 @@ def _try_cast(arr, take_fast_path): raise OverflowError("Trying to coerce negative values to " "negative integers") - # Raises if coercion from float to integer + # Raises if coercion from float to integer loses precision if is_integer_dtype(dtype) and is_float_dtype(np.asarray(data)): - raise OverflowError("Trying to coerce float values to " - "integers") + if not np.array_equal(np.asarray(data), + np.asarray(data).astype(int). + astype(float)): + raise OverflowError("Trying to coerce float values to " + "integers") except (ValueError, TypeError): if is_categorical_dtype(dtype): From 14ed83beffd7896b1c27f67215dfd525755e3c84 Mon Sep 17 00:00:00 2001 From: Carlos Souza Date: Thu, 18 May 2017 23:34:11 -0300 Subject: [PATCH 06/18] Fixing broken tests --- pandas/core/series.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index df3f6345b59a8..3642722452e71 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2929,9 +2929,10 @@ def _try_cast(arr, take_fast_path): # Raises if coercion from float to integer loses precision if is_integer_dtype(dtype) and is_float_dtype(np.asarray(data)): - if not np.array_equal(np.asarray(data), - np.asarray(data).astype(int). - astype(float)): + #if not np.array_equal(np.asarray(data), + # np.asarray(data).astype(int). + # astype(float)): + if ((np.asarray(data) % np.asarray(data).astype(int)) > 0).any(): raise OverflowError("Trying to coerce float values to " "integers") From 3d0e76ff222d289485b913b57928a3f9f68b7c18 Mon Sep 17 00:00:00 2001 From: Carlos Souza Date: Fri, 19 May 2017 18:08:48 -0300 Subject: [PATCH 07/18] Adding maybe_cast_to_integer --- pandas/core/dtypes/cast.py | 26 ++++++++++++++++++++++++++ pandas/core/indexes/base.py | 7 ++----- pandas/core/series.py | 19 +++---------------- 3 files changed, 31 insertions(+), 21 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 19d3792f73de7..a39c705dbf5f9 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -16,6 +16,7 @@ is_timedelta64_dtype, is_dtype_equal, is_float_dtype, is_complex_dtype, is_integer_dtype, + is_unsigned_integer_dtype, is_datetime_or_timedelta_dtype, is_bool_dtype, is_scalar, _string_dtypes, @@ -1026,3 +1027,28 @@ def find_common_type(types): return np.object return np.find_common_type(types, []) + + +def maybe_cast_to_integer(arr, dtype): + """ + Find a common data type among the given dtypes. + + Parameters + ---------- + arr : array + dtype : dtype + + Returns + ------- + integer or unsigned integer array (or raise if the dtype is incompatible) + + """ + + if is_unsigned_integer_dtype(dtype) and (np.asarray(arr) < 0).any(): + raise OverflowError("Trying to coerce negative values to negative " + "integers") + elif is_integer_dtype(dtype) and is_float_dtype(np.asarray(arr)): + if ((np.asarray(arr) % np.asarray(arr).astype(int)) > 0).any(): + raise OverflowError("Trying to coerce float values to integers") + else: + return arr diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 8a817d0e5cd85..287414a7ac36a 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -52,6 +52,7 @@ from pandas.core.ops import _comp_method_OBJECT_ARRAY from pandas.core.strings import StringAccessorMixin from pandas.core.config import get_option +from pandas.core.dtypes.cast import maybe_cast_to_integer # simplify @@ -212,11 +213,7 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, if is_integer_dtype(dtype): inferred = lib.infer_dtype(data) if inferred == 'integer': - if is_unsigned_integer_dtype(dtype) and\ - (np.asarray(data) < 0).any(): - raise OverflowError("Trying to coerce " - "negative values to " - "negative integers") + data = maybe_cast_to_integer(data, dtype=dtype) data = np.array(data, copy=copy, dtype=dtype) elif inferred in ['floating', 'mixed-integer-float']: if isnull(data).any(): diff --git a/pandas/core/series.py b/pandas/core/series.py index 3642722452e71..a2c76113689d4 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -36,7 +36,8 @@ from pandas.core.dtypes.cast import ( maybe_upcast, infer_dtype_from_scalar, maybe_convert_platform, - maybe_cast_to_datetime, maybe_castable) + maybe_cast_to_datetime, maybe_castable, + maybe_cast_to_integer) from pandas.core.dtypes.missing import isnull, notnull from pandas.core.common import (is_bool_indexer, @@ -2917,25 +2918,11 @@ def _try_cast(arr, take_fast_path): return arr try: + subarr = maybe_cast_to_integer(arr, dtype) subarr = maybe_cast_to_datetime(arr, dtype) if not is_extension_type(subarr): subarr = np.array(subarr, dtype=dtype, copy=copy) - # Raises if coercion from unsigned to signed with neg data - if is_unsigned_integer_dtype(dtype) and (np.asarray(data) < 0)\ - .any(): - raise OverflowError("Trying to coerce negative values to " - "negative integers") - - # Raises if coercion from float to integer loses precision - if is_integer_dtype(dtype) and is_float_dtype(np.asarray(data)): - #if not np.array_equal(np.asarray(data), - # np.asarray(data).astype(int). - # astype(float)): - if ((np.asarray(data) % np.asarray(data).astype(int)) > 0).any(): - raise OverflowError("Trying to coerce float values to " - "integers") - except (ValueError, TypeError): if is_categorical_dtype(dtype): subarr = Categorical(arr) From 1726408e94f2cf9cc1a5fb4eeb7a3a51a33fe89e Mon Sep 17 00:00:00 2001 From: Carlos Souza Date: Fri, 19 May 2017 21:41:25 -0300 Subject: [PATCH 08/18] Fixing pytables test --- pandas/tests/io/test_pytables.py | 38 +++++++++++++++++--------------- 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index c9d2da67b8ee3..f3cf8985000d5 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -2080,24 +2080,26 @@ def test_table_values_dtypes_roundtrip(self): assert df1.dtypes[0] == 'float32' # check with mixed dtypes - df1 = DataFrame(dict([(c, Series(np.random.randn(5), dtype=c)) - for c in ['float32', 'float64', 'int32', - 'int64', 'int16', 'int8']])) - df1['string'] = 'foo' - df1['float322'] = 1. - df1['float322'] = df1['float322'].astype('float32') - df1['bool'] = df1['float32'] > 0 - df1['time1'] = Timestamp('20130101') - df1['time2'] = Timestamp('20130102') - - store.append('df_mixed_dtypes1', df1) - result = store.select('df_mixed_dtypes1').get_dtype_counts() - expected = Series({'float32': 2, 'float64': 1, 'int32': 1, - 'bool': 1, 'int16': 1, 'int8': 1, - 'int64': 1, 'object': 1, 'datetime64[ns]': 2}) - result = result.sort_index() - result = expected.sort_index() - tm.assert_series_equal(result, expected) + with pytest.raises(OverflowError): + df1 = DataFrame(dict([(c, Series(np.random.randn(5), dtype=c)) + for c in ['float32', 'float64', 'int32', + 'int64', 'int16', 'int8']])) + df1['string'] = 'foo' + df1['float322'] = 1. + df1['float322'] = df1['float322'].astype('float32') + df1['bool'] = df1['float32'] > 0 + df1['time1'] = Timestamp('20130101') + df1['time2'] = Timestamp('20130102') + + store.append('df_mixed_dtypes1', df1) + result = store.select('df_mixed_dtypes1').get_dtype_counts() + expected = Series({'float32': 2, 'float64': 1, 'int32': 1, + 'bool': 1, 'int16': 1, 'int8': 1, + 'int64': 1, 'object': 1, + 'datetime64[ns]': 2}) + result = result.sort_index() + result = expected.sort_index() + tm.assert_series_equal(result, expected) def test_table_mixed_dtypes(self): From 1f8e9b7a3eea14368ab6de7e87326bb132bb7470 Mon Sep 17 00:00:00 2001 From: Carlos Souza Date: Fri, 19 May 2017 23:51:44 -0300 Subject: [PATCH 09/18] Fixing small issue on base index --- pandas/core/indexes/base.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 287414a7ac36a..9b99c19428454 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -226,8 +226,7 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, return cls._try_convert_to_int_index( data, copy, name) except ValueError: - raise OverflowError("Trying to coerce float " - "values to integers") + pass # Return an actual float index. from .numeric import Float64Index From 83cfc5d9b65707996fad9ed15877fd5386c5beb9 Mon Sep 17 00:00:00 2001 From: Carlos Souza Date: Sat, 20 May 2017 00:44:00 -0300 Subject: [PATCH 10/18] Fixing last issue on base index --- pandas/core/indexes/base.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 9b99c19428454..9f0ed0e1fd598 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -220,6 +220,12 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, raise ValueError('cannot convert float ' 'NaN to integer') + if is_integer_dtype(dtype) and \ + ((np.asarray(data) % np.asarray(data). + astype(int)) > 0).any(): + raise OverflowError("Trying to coerce float " + "values to integers") + # If we are actually all equal to integers, # then coerce to integer. try: From 417188aa059c1cce3ba179d3526da749ca0ccf2f Mon Sep 17 00:00:00 2001 From: Carlos Souza Date: Sat, 20 May 2017 01:15:00 -0300 Subject: [PATCH 11/18] Fixing another issue on base index --- pandas/core/indexes/base.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 9f0ed0e1fd598..ea01da75630cf 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -220,11 +220,13 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, raise ValueError('cannot convert float ' 'NaN to integer') - if is_integer_dtype(dtype) and \ - ((np.asarray(data) % np.asarray(data). - astype(int)) > 0).any(): - raise OverflowError("Trying to coerce float " - "values to integers") + if is_integer_dtype(dtype) and not \ + (np.asarray(data).astype(int) == 0).all(): + if ((np.asarray(data) % np.asarray(data). + astype(int)) > 0).any(): + raise OverflowError("Trying to coerce " + "float values to " + "integers") # If we are actually all equal to integers, # then coerce to integer. From 939ae11c34ec971856f17fd90e4d99f44b54cfe6 Mon Sep 17 00:00:00 2001 From: Carlos Souza Date: Sat, 20 May 2017 01:49:06 -0300 Subject: [PATCH 12/18] Adjusting coercion tests in indexing --- pandas/tests/indexing/test_coercion.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/pandas/tests/indexing/test_coercion.py b/pandas/tests/indexing/test_coercion.py index 25cc810299678..118f9085eaa49 100644 --- a/pandas/tests/indexing/test_coercion.py +++ b/pandas/tests/indexing/test_coercion.py @@ -285,8 +285,9 @@ def test_setitem_index_int64(self): self._assert_setitem_index_conversion(obj, 5, exp_index, np.int64) # int + float -> float - exp_index = pd.Index([0, 1, 2, 3, 1.1]) - self._assert_setitem_index_conversion(obj, 1.1, exp_index, np.float64) + with pytest.raises(OverflowError): + exp_index = pd.Index([0, 1, 2, 3, 1.1]) + self._assert_setitem_index_conversion(obj, 1.1, exp_index, np.float64) # int + object -> object exp_index = pd.Index([0, 1, 2, 3, 'x']) @@ -373,8 +374,9 @@ def test_insert_index_int64(self): self._assert_insert_conversion(obj, 1, exp, np.int64) # int + float -> float - exp = pd.Index([1, 1.1, 2, 3, 4]) - self._assert_insert_conversion(obj, 1.1, exp, np.float64) + with pytest.raises(OverflowError): + exp = pd.Index([1, 1.1, 2, 3, 4]) + self._assert_insert_conversion(obj, 1.1, exp, np.float64) # int + bool -> int exp = pd.Index([1, 0, 2, 3, 4]) @@ -592,6 +594,7 @@ def _where_int64_common(self, klass): self._assert_where_conversion(obj, cond, values, exp, np.int64) # int + float -> float + #with pytest.raises(OverflowError): exp = klass([1, 1.1, 3, 1.1]) self._assert_where_conversion(obj, cond, 1.1, exp, np.float64) @@ -622,7 +625,8 @@ def test_where_series_int64(self): self._where_int64_common(pd.Series) def test_where_index_int64(self): - self._where_int64_common(pd.Index) + with pytest.raises(OverflowError): + self._where_int64_common(pd.Index) def _where_float64_common(self, klass): obj = klass([1.1, 2.2, 3.3, 4.4]) From 86e9d5e86a9e70418b8e55be3f636a0f99e80dd1 Mon Sep 17 00:00:00 2001 From: Carlos Souza Date: Sat, 20 May 2017 10:51:42 -0300 Subject: [PATCH 13/18] Fixing linting problems --- pandas/core/series.py | 1 - pandas/tests/indexing/test_coercion.py | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index a2c76113689d4..ea50170b66d15 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -28,7 +28,6 @@ is_iterator, is_dict_like, is_scalar, - is_unsigned_integer_dtype, _is_unorderable_exception, _ensure_platform_int, pandas_dtype) diff --git a/pandas/tests/indexing/test_coercion.py b/pandas/tests/indexing/test_coercion.py index 118f9085eaa49..c4f02d0ba8207 100644 --- a/pandas/tests/indexing/test_coercion.py +++ b/pandas/tests/indexing/test_coercion.py @@ -287,7 +287,8 @@ def test_setitem_index_int64(self): # int + float -> float with pytest.raises(OverflowError): exp_index = pd.Index([0, 1, 2, 3, 1.1]) - self._assert_setitem_index_conversion(obj, 1.1, exp_index, np.float64) + self._assert_setitem_index_conversion(obj, 1.1, exp_index, + np.float64) # int + object -> object exp_index = pd.Index([0, 1, 2, 3, 'x']) @@ -594,7 +595,6 @@ def _where_int64_common(self, klass): self._assert_where_conversion(obj, cond, values, exp, np.int64) # int + float -> float - #with pytest.raises(OverflowError): exp = klass([1, 1.1, 3, 1.1]) self._assert_where_conversion(obj, cond, 1.1, exp, np.float64) From 359086d8b88602b1d4013631637dcb23d32ded39 Mon Sep 17 00:00:00 2001 From: Carlos Souza Date: Sat, 20 May 2017 17:08:27 -0300 Subject: [PATCH 14/18] Adding all @jreback comments --- pandas/core/dtypes/cast.py | 29 ++++++++++++------ pandas/core/indexes/base.py | 14 +++------ pandas/core/series.py | 4 ++- pandas/tests/indexes/test_numeric.py | 10 +++--- pandas/tests/indexing/test_coercion.py | 14 +++------ pandas/tests/io/test_pytables.py | 39 ++++++++++++------------ pandas/tests/series/test_constructors.py | 22 ++++++------- 7 files changed, 67 insertions(+), 65 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index a39c705dbf5f9..796fe42745f24 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1031,24 +1031,33 @@ def find_common_type(types): def maybe_cast_to_integer(arr, dtype): """ - Find a common data type among the given dtypes. + Takes an integer dtype and returns the casted version, raising for an + incompatible dtype. Parameters ---------- - arr : array - dtype : dtype + arr : ndarray + dtype : np.dtype Returns ------- - integer or unsigned integer array (or raise if the dtype is incompatible) + integer or unsigned integer array + + Raises + ------ + OverflowError + * If ``dtype`` is incompatible + ValueError + * If coercion from float to integer loses precision """ - if is_unsigned_integer_dtype(dtype) and (np.asarray(arr) < 0).any(): + if is_unsigned_integer_dtype(dtype) and (arr < 0).any(): raise OverflowError("Trying to coerce negative values to negative " "integers") - elif is_integer_dtype(dtype) and is_float_dtype(np.asarray(arr)): - if ((np.asarray(arr) % np.asarray(arr).astype(int)) > 0).any(): - raise OverflowError("Trying to coerce float values to integers") - else: - return arr + elif is_integer_dtype(dtype) and (is_float_dtype(arr) or + is_object_dtype(arr)): + if not (arr == arr.astype(dtype)).all(): + raise ValueError("Trying to coerce float values to integers") + + return arr.astype(dtype, copy=False) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index ea01da75630cf..56037572fca58 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -15,6 +15,7 @@ from pandas.core.dtypes.generic import ABCSeries, ABCMultiIndex, ABCPeriodIndex from pandas.core.dtypes.missing import isnull, array_equivalent +from pandas.core.dtypes.cast import maybe_cast_to_integer from pandas.core.dtypes.common import ( _ensure_int64, _ensure_object, @@ -52,7 +53,6 @@ from pandas.core.ops import _comp_method_OBJECT_ARRAY from pandas.core.strings import StringAccessorMixin from pandas.core.config import get_option -from pandas.core.dtypes.cast import maybe_cast_to_integer # simplify @@ -219,14 +219,8 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, if isnull(data).any(): raise ValueError('cannot convert float ' 'NaN to integer') - - if is_integer_dtype(dtype) and not \ - (np.asarray(data).astype(int) == 0).all(): - if ((np.asarray(data) % np.asarray(data). - astype(int)) > 0).any(): - raise OverflowError("Trying to coerce " - "float values to " - "integers") + if inferred == 'mixed-integer-float': + maybe_cast_to_integer(data, dtype) # If we are actually all equal to integers, # then coerce to integer. @@ -258,6 +252,8 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, msg = str(e) if 'cannot convert float' in msg: raise + if 'Trying to coerce float values to integer' in msg: + raise # maybe coerce to a sub-class from pandas.core.indexes.period import ( diff --git a/pandas/core/series.py b/pandas/core/series.py index ea50170b66d15..6a243126b0c93 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2917,7 +2917,9 @@ def _try_cast(arr, take_fast_path): return arr try: - subarr = maybe_cast_to_integer(arr, dtype) + if is_float_dtype(dtype) or is_integer_dtype(dtype): + subarr = maybe_cast_to_integer(np.asarray(arr), dtype) + subarr = maybe_cast_to_datetime(arr, dtype) if not is_extension_type(subarr): subarr = np.array(subarr, dtype=dtype, copy=copy) diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index 20ee2b83f5c7a..4166201d0867d 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -304,6 +304,11 @@ def test_astype(self): i = Float64Index([0, 1.1, np.NAN]) pytest.raises(ValueError, lambda: i.astype(dtype)) + # GH 15832 + for t in ['uint8', 'uint16', 'uint32', 'uint64']: + with pytest.raises(ValueError): + Index([1, 2, 3.5], dtype=t) + def test_equals_numeric(self): i = Float64Index([1.0, 2.0]) @@ -684,11 +689,6 @@ def test_constructor_overflow_coercion_signed_to_unsigned(self): with pytest.raises(OverflowError): Index([-1], dtype=t) - def test_constructor_overflow_coercion_float_to_int(self): - # GH 15832 - with pytest.raises(OverflowError): - Index([1, 2, 3.5], dtype=int) - def test_coerce_list(self): # coerce things arr = Index([1, 2, 3, 4]) diff --git a/pandas/tests/indexing/test_coercion.py b/pandas/tests/indexing/test_coercion.py index c4f02d0ba8207..25cc810299678 100644 --- a/pandas/tests/indexing/test_coercion.py +++ b/pandas/tests/indexing/test_coercion.py @@ -285,10 +285,8 @@ def test_setitem_index_int64(self): self._assert_setitem_index_conversion(obj, 5, exp_index, np.int64) # int + float -> float - with pytest.raises(OverflowError): - exp_index = pd.Index([0, 1, 2, 3, 1.1]) - self._assert_setitem_index_conversion(obj, 1.1, exp_index, - np.float64) + exp_index = pd.Index([0, 1, 2, 3, 1.1]) + self._assert_setitem_index_conversion(obj, 1.1, exp_index, np.float64) # int + object -> object exp_index = pd.Index([0, 1, 2, 3, 'x']) @@ -375,9 +373,8 @@ def test_insert_index_int64(self): self._assert_insert_conversion(obj, 1, exp, np.int64) # int + float -> float - with pytest.raises(OverflowError): - exp = pd.Index([1, 1.1, 2, 3, 4]) - self._assert_insert_conversion(obj, 1.1, exp, np.float64) + exp = pd.Index([1, 1.1, 2, 3, 4]) + self._assert_insert_conversion(obj, 1.1, exp, np.float64) # int + bool -> int exp = pd.Index([1, 0, 2, 3, 4]) @@ -625,8 +622,7 @@ def test_where_series_int64(self): self._where_int64_common(pd.Series) def test_where_index_int64(self): - with pytest.raises(OverflowError): - self._where_int64_common(pd.Index) + self._where_int64_common(pd.Index) def _where_float64_common(self, klass): obj = klass([1.1, 2.2, 3.3, 4.4]) diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index f3cf8985000d5..84e2e4790347a 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -2080,26 +2080,25 @@ def test_table_values_dtypes_roundtrip(self): assert df1.dtypes[0] == 'float32' # check with mixed dtypes - with pytest.raises(OverflowError): - df1 = DataFrame(dict([(c, Series(np.random.randn(5), dtype=c)) - for c in ['float32', 'float64', 'int32', - 'int64', 'int16', 'int8']])) - df1['string'] = 'foo' - df1['float322'] = 1. - df1['float322'] = df1['float322'].astype('float32') - df1['bool'] = df1['float32'] > 0 - df1['time1'] = Timestamp('20130101') - df1['time2'] = Timestamp('20130102') - - store.append('df_mixed_dtypes1', df1) - result = store.select('df_mixed_dtypes1').get_dtype_counts() - expected = Series({'float32': 2, 'float64': 1, 'int32': 1, - 'bool': 1, 'int16': 1, 'int8': 1, - 'int64': 1, 'object': 1, - 'datetime64[ns]': 2}) - result = result.sort_index() - result = expected.sort_index() - tm.assert_series_equal(result, expected) + df1 = DataFrame(dict([(c, Series(np.random.randn(5), dtype=c)) + for c in ['float32', 'float64', 'int32', + 'int64', 'int16', 'int8']])) + df1['string'] = 'foo' + df1['float322'] = 1. + df1['float322'] = df1['float322'].astype('float32') + df1['bool'] = df1['float32'] > 0 + df1['time1'] = Timestamp('20130101') + df1['time2'] = Timestamp('20130102') + + store.append('df_mixed_dtypes1', df1) + result = store.select('df_mixed_dtypes1').get_dtype_counts() + expected = Series({'float32': 2, 'float64': 1, 'int32': 1, + 'bool': 1, 'int16': 1, 'int8': 1, + 'int64': 1, 'object': 1, + 'datetime64[ns]': 2}) + result = result.sort_index() + result = expected.sort_index() + tm.assert_series_equal(result, expected) def test_table_mixed_dtypes(self): diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 629823b91dacb..6081396d4f1fe 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -299,9 +299,20 @@ def test_constructor_pass_nan_nat(self): def test_constructor_cast(self): pytest.raises(ValueError, Series, ['a', 'b', 'c'], dtype=float) + # GH 15832 + for t in ['uint8', 'uint16', 'uint32', 'uint64']: + with pytest.raises(OverflowError): + Series([-1], dtype=t) + + # GH 15832 + for t in ['uint8', 'uint16', 'uint32', 'uint64']: + with pytest.raises(ValueError): + Series([1, 2, 3.5], dtype=t) + def test_constructor_dtype_nocast(self): # 1572 s = Series([1, 2, 3]) + s = Series([1, 2, 3]) s2 = Series(s, dtype=np.int64) @@ -884,14 +895,3 @@ def test_constructor_generic_timestamp_deprecated(self): msg = "cannot convert datetimelike" with tm.assert_raises_regex(TypeError, msg): Series([], dtype='M8[ps]') - - def test_constructor_overflow_coercion_signed_to_unsigned(self): - # GH 15832 - for t in ['uint8', 'uint16', 'uint32', 'uint64']: - with pytest.raises(OverflowError): - Series([-1], dtype=t) - - def test_constructor_overflow_coercion_float_to_int(self): - # GH 15832 - with pytest.raises(OverflowError): - Series([1, 2, 3.5], dtype=int) From 50950f579af03e9927fba144fb71c7ae1f8d8843 Mon Sep 17 00:00:00 2001 From: Carlos Souza Date: Sun, 21 May 2017 22:13:07 -0300 Subject: [PATCH 15/18] Adding tests for all ints and adjust pytables test --- pandas/tests/indexes/test_numeric.py | 9 ++++++++- pandas/tests/io/test_pytables.py | 2 +- pandas/tests/series/test_constructors.py | 9 ++++++++- 3 files changed, 17 insertions(+), 3 deletions(-) diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index 4166201d0867d..836411db999bf 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -305,10 +305,17 @@ def test_astype(self): pytest.raises(ValueError, lambda: i.astype(dtype)) # GH 15832 - for t in ['uint8', 'uint16', 'uint32', 'uint64']: + for t in ['uint8', 'uint16', 'uint32', 'uint64', 'int32', 'int64', + 'int16', 'int8']: with pytest.raises(ValueError): Index([1, 2, 3.5], dtype=t) + try: + for t in ['float16', 'float32']: + Index([1, 2, 3.5], dtype=t) + except ValueError: + pytest.fail("GH 15832 should not raise for float type") + def test_equals_numeric(self): i = Float64Index([1.0, 2.0]) diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index 84e2e4790347a..71123bafc975c 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -2080,7 +2080,7 @@ def test_table_values_dtypes_roundtrip(self): assert df1.dtypes[0] == 'float32' # check with mixed dtypes - df1 = DataFrame(dict([(c, Series(np.random.randn(5), dtype=c)) + df1 = DataFrame(dict([(c, Series(np.random.randn(5).astype(c))) for c in ['float32', 'float64', 'int32', 'int64', 'int16', 'int8']])) df1['string'] = 'foo' diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 6081396d4f1fe..d18e6f45cbe84 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -305,10 +305,17 @@ def test_constructor_cast(self): Series([-1], dtype=t) # GH 15832 - for t in ['uint8', 'uint16', 'uint32', 'uint64']: + for t in ['uint8', 'uint16', 'uint32', 'uint64', 'int32', 'int64', + 'int16', 'int8']: with pytest.raises(ValueError): Series([1, 2, 3.5], dtype=t) + try: + for t in ['float16', 'float32']: + Series([1, 2, 3.5], dtype=t) + except ValueError: + pytest.fail("GH 15832 should not raise for float type") + def test_constructor_dtype_nocast(self): # 1572 s = Series([1, 2, 3]) From 012fb575c2751abf156992dae505a23ea612aa8b Mon Sep 17 00:00:00 2001 From: Carlos Souza Date: Mon, 29 May 2017 03:12:02 -0400 Subject: [PATCH 16/18] Implementing final adjustments from @jreback and @gfyoung --- doc/source/whatsnew/v0.21.0.txt | 2 +- pandas/core/dtypes/cast.py | 17 +++++++++++- pandas/core/indexes/base.py | 6 ++-- pandas/tests/indexes/test_numeric.py | 10 +++---- pandas/tests/series/test_constructors.py | 35 +++++++++++++----------- 5 files changed, 44 insertions(+), 26 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index f392687a0a3fd..93d5a4164209d 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -41,7 +41,7 @@ Backwards incompatible API changes Other API Changes ^^^^^^^^^^^^^^^^^ - +- Series and Index constructors now raises when data is incompatible with dtype (:issue:`15832`) .. _whatsnew_0210.deprecations: diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 796fe42745f24..47ead1fc4d94f 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1050,10 +1050,25 @@ def maybe_cast_to_integer(arr, dtype): ValueError * If coercion from float to integer loses precision + Examples + -------- + If you try to coerce negative values to unsigned integers, it raises: + + >>> Series([-1], dtype='uint64') + Traceback (most recent call last): + ... + OverflowError: Trying to coerce negative values to unsigned integers + + Also, if you try to coerce float values to integers, it raises: + >>> Series([1, 2, 3.5], dtype='int64') + Traceback (most recent call last): + ... + ValueError: Trying to coerce float values to integers + """ if is_unsigned_integer_dtype(dtype) and (arr < 0).any(): - raise OverflowError("Trying to coerce negative values to negative " + raise OverflowError("Trying to coerce negative values to unsigned " "integers") elif is_integer_dtype(dtype) and (is_float_dtype(arr) or is_object_dtype(arr)): diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 56037572fca58..9ae9a20a6e6b6 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -250,9 +250,9 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, except (TypeError, ValueError) as e: msg = str(e) - if 'cannot convert float' in msg: - raise - if 'Trying to coerce float values to integer' in msg: + if 'cannot convert float' in msg or 'Trying to coerce ' \ + 'float values to ' \ + 'integer' in msg: raise # maybe coerce to a sub-class diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index 836411db999bf..29c0159c1522f 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -304,17 +304,17 @@ def test_astype(self): i = Float64Index([0, 1.1, np.NAN]) pytest.raises(ValueError, lambda: i.astype(dtype)) + def test_type_coercion(self): + # GH 15832 for t in ['uint8', 'uint16', 'uint32', 'uint64', 'int32', 'int64', 'int16', 'int8']: with pytest.raises(ValueError): Index([1, 2, 3.5], dtype=t) - try: - for t in ['float16', 'float32']: - Index([1, 2, 3.5], dtype=t) - except ValueError: - pytest.fail("GH 15832 should not raise for float type") + for t in ['float16', 'float32']: + i = Index([1, 2, 3.5], dtype=t) + assert i.equals(Index([1, 2, 3.5])) def test_equals_numeric(self): diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index d18e6f45cbe84..a4cf755fe7220 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -297,30 +297,33 @@ def test_constructor_pass_nan_nat(self): tm.assert_series_equal(Series(np.array([np.nan, pd.NaT])), exp) def test_constructor_cast(self): - pytest.raises(ValueError, Series, ['a', 'b', 'c'], dtype=float) + msg = "could not convert string to float" + with tm.assert_raises_regex(ValueError, msg): + Series(['a', 'b', 'c'], dtype=float) + @pytest.mark.parametrize("unsigned_integers", ['uint8', 'uint16', 'uint32', + 'uint64']) + def test_constructor_unsigned_dtype_overflow(self, unsigned_integers): # GH 15832 - for t in ['uint8', 'uint16', 'uint32', 'uint64']: - with pytest.raises(OverflowError): - Series([-1], dtype=t) - + with pytest.raises(OverflowError): + Series([-1], dtype=unsigned_integers) + + @pytest.mark.parametrize("integers", ['uint8', 'uint16', 'uint32', + 'uint64', 'int32', 'int64', 'int16', + 'int8']) + @pytest.mark.parametrize("floats", ['float16', 'float32']) + def test_constructor_coerce_float_fail(self, integers, floats): # GH 15832 - for t in ['uint8', 'uint16', 'uint32', 'uint64', 'int32', 'int64', - 'int16', 'int8']: - with pytest.raises(ValueError): - Series([1, 2, 3.5], dtype=t) + with pytest.raises(ValueError): + Series([1, 2, 3.5], dtype=integers) - try: - for t in ['float16', 'float32']: - Series([1, 2, 3.5], dtype=t) - except ValueError: - pytest.fail("GH 15832 should not raise for float type") + s = Series([1, 2, 3.5], dtype=floats) + expected = Series([1, 2, 3.5]).astype(floats) + assert_series_equal(s, expected) def test_constructor_dtype_nocast(self): # 1572 s = Series([1, 2, 3]) - s = Series([1, 2, 3]) - s2 = Series(s, dtype=np.int64) s2[1] = 5 From b1e66325665a95f83c065d1bb83ca128822e2a58 Mon Sep 17 00:00:00 2001 From: Carlos Souza Date: Mon, 29 May 2017 04:11:50 -0400 Subject: [PATCH 17/18] Adding final comments from @gfyoung --- pandas/tests/indexes/test_numeric.py | 27 +++++++++++++----------- pandas/tests/series/test_constructors.py | 6 ++++-- 2 files changed, 19 insertions(+), 14 deletions(-) diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index 29c0159c1522f..75a49e47451b5 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -304,17 +304,19 @@ def test_astype(self): i = Float64Index([0, 1.1, np.NAN]) pytest.raises(ValueError, lambda: i.astype(dtype)) - def test_type_coercion(self): + @pytest.mark.parametrize("integers", ['uint8', 'uint16', 'uint32', + 'uint64', 'int32', 'int64', 'int16', + 'int8']) + @pytest.mark.parametrize("floats", ['float16', 'float32']) + def test_type_coercion(self, integers, floats): # GH 15832 - for t in ['uint8', 'uint16', 'uint32', 'uint64', 'int32', 'int64', - 'int16', 'int8']: - with pytest.raises(ValueError): - Index([1, 2, 3.5], dtype=t) + msg = 'Trying to coerce float values to integers' + with tm.assert_raises_regex(ValueError, msg): + Index([1, 2, 3.5], dtype=integers) - for t in ['float16', 'float32']: - i = Index([1, 2, 3.5], dtype=t) - assert i.equals(Index([1, 2, 3.5])) + i = Index([1, 2, 3.5], dtype=floats) + assert i.equals(Index([1, 2, 3.5])) def test_equals_numeric(self): @@ -690,11 +692,12 @@ def test_constructor_corner(self): with tm.assert_raises_regex(TypeError, 'casting'): Int64Index(arr_with_floats) - def test_constructor_overflow_coercion_signed_to_unsigned(self): + @pytest.mark.parametrize("uints", ['uint8', 'uint16', 'uint32', 'uint64']) + def test_constructor_overflow_coercion_signed_to_unsigned(self, uints): # GH 15832 - for t in ['uint8', 'uint16', 'uint32', 'uint64']: - with pytest.raises(OverflowError): - Index([-1], dtype=t) + msg = 'Trying to coerce negative values to unsigned integers' + with tm.assert_raises_regex(OverflowError, msg): + Index([-1], dtype=uints) def test_coerce_list(self): # coerce things diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index a4cf755fe7220..691cb60a102f4 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -305,7 +305,8 @@ def test_constructor_cast(self): 'uint64']) def test_constructor_unsigned_dtype_overflow(self, unsigned_integers): # GH 15832 - with pytest.raises(OverflowError): + msg = 'Trying to coerce negative values to unsigned integers' + with tm.assert_raises_regex(OverflowError, msg): Series([-1], dtype=unsigned_integers) @pytest.mark.parametrize("integers", ['uint8', 'uint16', 'uint32', @@ -314,7 +315,8 @@ def test_constructor_unsigned_dtype_overflow(self, unsigned_integers): @pytest.mark.parametrize("floats", ['float16', 'float32']) def test_constructor_coerce_float_fail(self, integers, floats): # GH 15832 - with pytest.raises(ValueError): + msg = 'Trying to coerce float values to integers' + with tm.assert_raises_regex(ValueError, msg): Series([1, 2, 3.5], dtype=integers) s = Series([1, 2, 3.5], dtype=floats) From b78f4cc8433de0f9244ddd61b2665c81fe6f2744 Mon Sep 17 00:00:00 2001 From: Carlos Souza Date: Mon, 29 May 2017 22:09:53 -0400 Subject: [PATCH 18/18] Adding final comments from @jreback --- doc/source/whatsnew/v0.21.0.txt | 2 +- pandas/core/dtypes/cast.py | 20 ++++++++++++-------- pandas/core/indexes/base.py | 13 ++++++------- pandas/core/series.py | 4 ++-- pandas/tests/indexes/test_numeric.py | 12 ++++++------ 5 files changed, 27 insertions(+), 24 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 5acb0a54d41c4..7b8312d25641d 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -54,7 +54,7 @@ Backwards incompatible API changes Other API Changes ^^^^^^^^^^^^^^^^^ -- Series and Index constructors now raises when data is incompatible with dtype (:issue:`15832`) +- Series and Index constructors now raises when data is incompatible with a passed dtype= kwarg (:issue:`15832`) - Moved definition of ``MergeError`` to the ``pandas.errors`` module. diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 67e378aa35ac7..56235a99d5f02 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1029,15 +1029,18 @@ def find_common_type(types): return np.find_common_type(types, []) -def maybe_cast_to_integer(arr, dtype): +def maybe_cast_to_integer_array(arr, dtype, copy=False): """ - Takes an integer dtype and returns the casted version, raising for an - incompatible dtype. + Takes any dtype and returns the casted version, raising for when data is + incompatible with integer/unsigned integer dtypes. + + .. versionadded:: 0.21.0 Parameters ---------- arr : ndarray dtype : np.dtype + copy: boolean, default False Returns ------- @@ -1066,13 +1069,14 @@ def maybe_cast_to_integer(arr, dtype): ValueError: Trying to coerce float values to integers """ + casted = arr.astype(dtype, copy=copy) + if np.array(arr == casted).all(): + return casted if is_unsigned_integer_dtype(dtype) and (arr < 0).any(): raise OverflowError("Trying to coerce negative values to unsigned " "integers") - elif is_integer_dtype(dtype) and (is_float_dtype(arr) or - is_object_dtype(arr)): - if not (arr == arr.astype(dtype)).all(): - raise ValueError("Trying to coerce float values to integers") - return arr.astype(dtype, copy=False) + if is_integer_dtype(dtype) and (is_float_dtype(arr) or + is_object_dtype(arr)): + raise ValueError("Trying to coerce float values to integers") diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index f880d52a2f743..0c69508146e1a 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -15,7 +15,7 @@ from pandas.core.dtypes.generic import ABCSeries, ABCMultiIndex, ABCPeriodIndex from pandas.core.dtypes.missing import isnull, array_equivalent -from pandas.core.dtypes.cast import maybe_cast_to_integer +from pandas.core.dtypes.cast import maybe_cast_to_integer_array from pandas.core.dtypes.common import ( _ensure_int64, _ensure_object, @@ -213,14 +213,14 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, if is_integer_dtype(dtype): inferred = lib.infer_dtype(data) if inferred == 'integer': - data = maybe_cast_to_integer(data, dtype=dtype) - data = np.array(data, copy=copy, dtype=dtype) + data = maybe_cast_to_integer_array(data, dtype, + copy=copy) elif inferred in ['floating', 'mixed-integer-float']: if isnull(data).any(): raise ValueError('cannot convert float ' 'NaN to integer') if inferred == 'mixed-integer-float': - maybe_cast_to_integer(data, dtype) + maybe_cast_to_integer_array(data, dtype) # If we are actually all equal to integers, # then coerce to integer. @@ -250,9 +250,8 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, except (TypeError, ValueError) as e: msg = str(e) - if 'cannot convert float' in msg or 'Trying to coerce ' \ - 'float values to ' \ - 'integer' in msg: + if ('cannot convert float' in msg or + 'Trying to coerce float values to integer') in msg: raise # maybe coerce to a sub-class diff --git a/pandas/core/series.py b/pandas/core/series.py index b529de06c3ef4..9d16619c47f3a 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -36,7 +36,7 @@ maybe_upcast, infer_dtype_from_scalar, maybe_convert_platform, maybe_cast_to_datetime, maybe_castable, - maybe_cast_to_integer) + maybe_cast_to_integer_array) from pandas.core.dtypes.missing import isnull, notnull from pandas.core.common import (is_bool_indexer, @@ -2943,7 +2943,7 @@ def _try_cast(arr, take_fast_path): try: if is_float_dtype(dtype) or is_integer_dtype(dtype): - subarr = maybe_cast_to_integer(np.asarray(arr), dtype) + subarr = maybe_cast_to_integer_array(np.asarray(arr), dtype) subarr = maybe_cast_to_datetime(arr, dtype) if not is_extension_type(subarr): diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index 75a49e47451b5..16db27cd7e2d2 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -304,19 +304,19 @@ def test_astype(self): i = Float64Index([0, 1.1, np.NAN]) pytest.raises(ValueError, lambda: i.astype(dtype)) - @pytest.mark.parametrize("integers", ['uint8', 'uint16', 'uint32', + @pytest.mark.parametrize("int_dtype", ['uint8', 'uint16', 'uint32', 'uint64', 'int32', 'int64', 'int16', 'int8']) - @pytest.mark.parametrize("floats", ['float16', 'float32']) - def test_type_coercion(self, integers, floats): + @pytest.mark.parametrize("float_dtype", ['float16', 'float32']) + def test_type_coercion(self, int_dtype, float_dtype): # GH 15832 msg = 'Trying to coerce float values to integers' with tm.assert_raises_regex(ValueError, msg): - Index([1, 2, 3.5], dtype=integers) + Index([1, 2, 3.5], dtype=int_dtype) - i = Index([1, 2, 3.5], dtype=floats) - assert i.equals(Index([1, 2, 3.5])) + i = Index([1, 2, 3.5], dtype=float_dtype) + tm.assert_index_equal(i, Index([1, 2, 3.5])) def test_equals_numeric(self):