diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in index dbeb8bda3e454..b92eb0e651276 100644 --- a/pandas/_libs/hashtable_class_helper.pxi.in +++ b/pandas/_libs/hashtable_class_helper.pxi.in @@ -870,7 +870,12 @@ cdef class PyObjectHashTable(HashTable): for i in range(n): val = values[i] hash(val) - if not checknull(val): + + # `val is None` below is exception to prevent mangling of None and + # other NA values; note however that other NA values (ex: pd.NaT + # and np.nan) will still get mangled, so many not be a permanent + # solution; see GH 20866 + if not checknull(val) or val is None: k = kh_get_pymap(self.table, val) if k == self.table.n_buckets: kh_put_pymap(self.table, val, &ret) diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 8a8a6f7de70d7..46bd879c2db87 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -491,6 +491,14 @@ def test_tuple_with_strings(self, arg, expected): result = pd.unique(arg) tm.assert_numpy_array_equal(result, expected) + def test_obj_none_preservation(self): + # GH 20866 + arr = np.array(['foo', None], dtype=object) + result = pd.unique(arr) + expected = np.array(['foo', None], dtype=object) + + tm.assert_numpy_array_equal(result, expected, strict_nan=True) + class TestIsin(object):