From 2fd8b7280cec07528d8e87558aeb61db890348ee Mon Sep 17 00:00:00 2001 From: Donny Zimmanck Date: Fri, 2 Oct 2020 16:08:56 -0700 Subject: [PATCH 1/9] Modify maybe_convert_objects() function to support an option to NOT convert IntEnums to ints. This is controlled with a new argument called 'convert_intenum' which defaults to False. --- pandas/_libs/lib.pyx | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 61a9634b00211..ef4770c887ac6 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -2079,7 +2079,8 @@ def maybe_convert_numeric(ndarray[object] values, set na_values, def maybe_convert_objects(ndarray[object] objects, bint try_float=False, bint safe=False, bint convert_datetime=False, bint convert_timedelta=False, - bint convert_to_nullable_integer=False): + bint convert_to_nullable_integer=False, + bint convert_intenum=False): """ Type inference function-- convert object array to proper dtype @@ -2181,6 +2182,9 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=False, seen.object_ = True break elif util.is_integer_object(val): + if getattr(val, 'name', None) is not None and not convert_intenum: + seen.object_ = True + break seen.int_ = True floats[i] = val complexes[i] = val From cec0c78f0df19fe0ec3540eb4be929fa784da075 Mon Sep 17 00:00:00 2001 From: Donny Zimmanck Date: Fri, 2 Oct 2020 16:10:16 -0700 Subject: [PATCH 2/9] Update the docstring for is_integer_object() with a note that it counts IntEnums as integers. --- pandas/_libs/tslibs/util.pxd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/util.pxd b/pandas/_libs/tslibs/util.pxd index e280609bb17a7..deeb59c65fab6 100644 --- a/pandas/_libs/tslibs/util.pxd +++ b/pandas/_libs/tslibs/util.pxd @@ -67,7 +67,7 @@ cdef inline bint is_integer_object(object obj) nogil: Notes ----- - This counts np.timedelta64 objects as integers. + This counts np.timedelta64 and IntEnums objects as integers. """ return (not PyBool_Check(obj) and PyArray_IsIntegerScalar(obj) and not is_timedelta64_object(obj)) From e0589daef451f75dee8a9873406df9cf3ad461e4 Mon Sep 17 00:00:00 2001 From: Donny Zimmanck Date: Fri, 2 Oct 2020 16:28:48 -0700 Subject: [PATCH 3/9] Add test. --- pandas/tests/dtypes/test_inference.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index e40a12f7bc8d1..3f1cef243bc6c 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -7,6 +7,7 @@ from collections import namedtuple from datetime import date, datetime, time, timedelta from decimal import Decimal +from enum import IntEnum from fractions import Fraction from io import StringIO from numbers import Number @@ -589,6 +590,24 @@ def test_maybe_convert_objects_bool_nan(self): out = lib.maybe_convert_objects(ind.values, safe=1) tm.assert_numpy_array_equal(out, exp) + def test_maybe_convert_objects_intenum(self): + class Colors(IntEnum): + red = 1 + blue = 2 + + ind = pd.Index([Colors.red, Colors.blue], dtype=object) + exp = np.array([Colors.red, Colors.blue], dtype=object) + out = lib.maybe_convert_objects(ind.values) + + # by default, we should not convert IntEnums to ints + tm.assert_numpy_array_equal(out, exp) + + exp = np.array([1, 2], dtype=int) + out = lib.maybe_convert_objects(ind.values, convert_intenum=True) + + # still coverts to int if convert_intenum set to True + tm.assert_numpy_array_equal(out, exp) + def test_mixed_dtypes_remain_object_array(self): # GH14956 array = np.array([datetime(2015, 1, 1, tzinfo=pytz.utc), 1], dtype=object) From 6f528b7ba126bc16dbfb2b018f645ac60895168c Mon Sep 17 00:00:00 2001 From: Donny Zimmanck Date: Mon, 5 Oct 2020 09:18:44 -0700 Subject: [PATCH 4/9] Change 'exp' to 'expected' and 'out' to 'result' to make test more readable. --- pandas/tests/dtypes/test_inference.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index 3f1cef243bc6c..f2fa955c6c7ef 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -596,17 +596,17 @@ class Colors(IntEnum): blue = 2 ind = pd.Index([Colors.red, Colors.blue], dtype=object) - exp = np.array([Colors.red, Colors.blue], dtype=object) - out = lib.maybe_convert_objects(ind.values) + expected = np.array([Colors.red, Colors.blue], dtype=object) + result = lib.maybe_convert_objects(ind.values) # by default, we should not convert IntEnums to ints - tm.assert_numpy_array_equal(out, exp) + tm.assert_numpy_array_equal(result, expected) - exp = np.array([1, 2], dtype=int) - out = lib.maybe_convert_objects(ind.values, convert_intenum=True) + expected = np.array([1, 2], dtype=int) + result = lib.maybe_convert_objects(ind.values, convert_intenum=True) # still coverts to int if convert_intenum set to True - tm.assert_numpy_array_equal(out, exp) + tm.assert_numpy_array_equal(result, expected) def test_mixed_dtypes_remain_object_array(self): # GH14956 From 426251b9af24dea31a281c26ee0d0aa311855c1e Mon Sep 17 00:00:00 2001 From: Donny Zimmanck Date: Mon, 5 Oct 2020 09:20:11 -0700 Subject: [PATCH 5/9] Change expected result in the test_maybe_convert_objects_intenum test to be explkcitly cast to int64 rather than int, which is 32-bit in some environments. --- pandas/tests/dtypes/test_inference.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index f2fa955c6c7ef..02b0c22095a4a 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -602,7 +602,7 @@ class Colors(IntEnum): # by default, we should not convert IntEnums to ints tm.assert_numpy_array_equal(result, expected) - expected = np.array([1, 2], dtype=int) + expected = np.array([1, 2], dtype=int64) result = lib.maybe_convert_objects(ind.values, convert_intenum=True) # still coverts to int if convert_intenum set to True From 66c1e1a2d168803484315ba473d8a2203af0d145 Mon Sep 17 00:00:00 2001 From: Donny Zimmanck Date: Mon, 5 Oct 2020 09:30:28 -0700 Subject: [PATCH 6/9] Typo: Forgot 'np' in 'np.int64' --- pandas/tests/dtypes/test_inference.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index 02b0c22095a4a..8432e2f5c592d 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -602,7 +602,7 @@ class Colors(IntEnum): # by default, we should not convert IntEnums to ints tm.assert_numpy_array_equal(result, expected) - expected = np.array([1, 2], dtype=int64) + expected = np.array([1, 2], dtype=np.int64) result = lib.maybe_convert_objects(ind.values, convert_intenum=True) # still coverts to int if convert_intenum set to True From c1389cdc84b1f810b923dab4b83fec3e982190a2 Mon Sep 17 00:00:00 2001 From: Donny Zimmanck Date: Tue, 6 Oct 2020 14:15:16 -0700 Subject: [PATCH 7/9] Empty commit to re-run CI tests. From 9c1a1d88bf3bb209308baf546ce6e377e44ef9de Mon Sep 17 00:00:00 2001 From: Donny Zimmanck Date: Mon, 16 Nov 2020 08:53:31 -0800 Subject: [PATCH 8/9] Fix inconcistency in namespace use in test_maybe_convert_objects_intenum() test by replacing 'pd.Index' with 'Index'. --- pandas/tests/dtypes/test_inference.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index 6890b1af4d8f0..ef5ef8d7a7d1c 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -595,7 +595,7 @@ class Colors(IntEnum): red = 1 blue = 2 - ind = pd.Index([Colors.red, Colors.blue], dtype=object) + ind = Index([Colors.red, Colors.blue], dtype=object) expected = np.array([Colors.red, Colors.blue], dtype=object) result = lib.maybe_convert_objects(ind.values) From f66e76c69eea72a314ee63ee1289fb914dbca260 Mon Sep 17 00:00:00 2001 From: Donny Zimmanck Date: Mon, 16 Nov 2020 15:58:30 -0800 Subject: [PATCH 9/9] Trigger CI run.