@@ -160,12 +160,6 @@ def f():
160160
161161 self .assertRaises (ValueError , f )
162162
163- def f ():
164- with tm .assert_produces_warning (FutureWarning ):
165- Categorical ([1 , 2 ], [1 , 2 , np .nan , np .nan ])
166-
167- self .assertRaises (ValueError , f )
168-
169163 # The default should be unordered
170164 c1 = Categorical (["a" , "b" , "c" , "a" ])
171165 self .assertFalse (c1 .ordered )
@@ -222,29 +216,21 @@ def f():
222216 cat = pd .Categorical ([np .nan , 1. , 2. , 3. ])
223217 self .assertTrue (is_float_dtype (cat .categories ))
224218
225- # Deprecating NaNs in categoires (GH #10748)
226- # preserve int as far as possible by converting to object if NaN is in
227- # categories
228- with tm .assert_produces_warning (FutureWarning ):
229- cat = pd .Categorical ([np .nan , 1 , 2 , 3 ],
230- categories = [np .nan , 1 , 2 , 3 ])
231- self .assertTrue (is_object_dtype (cat .categories ))
232-
233219 # This doesn't work -> this would probably need some kind of "remember
234220 # the original type" feature to try to cast the array interface result
235221 # to...
236222
237223 # vals = np.asarray(cat[cat.notnull()])
238224 # self.assertTrue(is_integer_dtype(vals))
239- with tm . assert_produces_warning ( FutureWarning ):
240- cat = pd . Categorical ([ np . nan , "a" , "b" , "c" ],
241- categories = [ np . nan , "a" , "b" , "c" ])
242- self . assertTrue ( is_object_dtype ( cat . categories ))
243- # but don't do it for floats
244- with tm . assert_produces_warning ( FutureWarning ):
245- cat = pd . Categorical ([ np . nan , 1. , 2. , 3. ],
246- categories = [ np . nan , 1. , 2. , 3. ] )
247- self .assertTrue ( is_float_dtype ( cat . categories ) )
225+
226+ # Cannot have NaN in categories
227+ def f ( null_value ):
228+ pd . Categorical ([ null_value , "a" , "b" , "c" ],
229+ categories = [ null_value , "a" , "b" , "c" ])
230+
231+ self . assertRaises ( ValueError , f , np . nan )
232+ self . assertRaises ( ValueError , f , pd . NaT )
233+ self .assertRaises ( ValueError , f , None )
248234
249235 # corner cases
250236 cat = pd .Categorical ([1 ])
@@ -418,6 +404,12 @@ def f():
418404
419405 self .assertRaises (ValueError , f )
420406
407+ # NaN categories included
408+ def f ():
409+ Categorical .from_codes ([0 , 1 , 2 ], ["a" , "b" , np .nan ])
410+
411+ self .assertRaises (ValueError , f )
412+
421413 # too negative
422414 def f ():
423415 Categorical .from_codes ([- 2 , 1 , 2 ], ["a" , "b" , "c" ])
@@ -649,30 +641,6 @@ def test_describe(self):
649641 name = 'categories' ))
650642 tm .assert_frame_equal (desc , expected )
651643
652- # NA as a category
653- with tm .assert_produces_warning (FutureWarning ):
654- cat = pd .Categorical (["a" , "c" , "c" , np .nan ],
655- categories = ["b" , "a" , "c" , np .nan ])
656- result = cat .describe ()
657-
658- expected = DataFrame ([[0 , 0 ], [1 , 0.25 ], [2 , 0.5 ], [1 , 0.25 ]],
659- columns = ['counts' , 'freqs' ],
660- index = pd .CategoricalIndex (['b' , 'a' , 'c' , np .nan ],
661- name = 'categories' ))
662- tm .assert_frame_equal (result , expected , check_categorical = False )
663-
664- # NA as an unused category
665- with tm .assert_produces_warning (FutureWarning ):
666- cat = pd .Categorical (["a" , "c" , "c" ],
667- categories = ["b" , "a" , "c" , np .nan ])
668- result = cat .describe ()
669-
670- exp_idx = pd .CategoricalIndex (
671- ['b' , 'a' , 'c' , np .nan ], name = 'categories' )
672- expected = DataFrame ([[0 , 0 ], [1 , 1 / 3. ], [2 , 2 / 3. ], [0 , 0 ]],
673- columns = ['counts' , 'freqs' ], index = exp_idx )
674- tm .assert_frame_equal (result , expected , check_categorical = False )
675-
676644 def test_print (self ):
677645 expected = ["[a, b, b, a, a, c, c, c]" ,
678646 "Categories (3, object): [a < b < c]" ]
@@ -1119,90 +1087,18 @@ def test_nan_handling(self):
11191087 self .assert_numpy_array_equal (c ._codes ,
11201088 np .array ([0 , - 1 , - 1 , 0 ], dtype = np .int8 ))
11211089
1122- # If categories have nan included, the code should point to that
1123- # instead
1124- with tm .assert_produces_warning (FutureWarning ):
1125- c = Categorical (["a" , "b" , np .nan , "a" ],
1126- categories = ["a" , "b" , np .nan ])
1127- self .assert_index_equal (c .categories , Index (["a" , "b" , np .nan ]))
1128- self .assert_numpy_array_equal (c ._codes ,
1129- np .array ([0 , 1 , 2 , 0 ], dtype = np .int8 ))
1130- c [1 ] = np .nan
1131- self .assert_index_equal (c .categories , Index (["a" , "b" , np .nan ]))
1132- self .assert_numpy_array_equal (c ._codes ,
1133- np .array ([0 , 2 , 2 , 0 ], dtype = np .int8 ))
1134-
1135- # Changing categories should also make the replaced category np.nan
1136- c = Categorical (["a" , "b" , "c" , "a" ])
1137- with tm .assert_produces_warning (FutureWarning ):
1138- c .categories = ["a" , "b" , np .nan ] # noqa
1139-
1140- self .assert_index_equal (c .categories , Index (["a" , "b" , np .nan ]))
1141- self .assert_numpy_array_equal (c ._codes ,
1142- np .array ([0 , 1 , 2 , 0 ], dtype = np .int8 ))
1143-
11441090 # Adding nan to categories should make assigned nan point to the
11451091 # category!
11461092 c = Categorical (["a" , "b" , np .nan , "a" ])
11471093 self .assert_index_equal (c .categories , Index (["a" , "b" ]))
11481094 self .assert_numpy_array_equal (c ._codes ,
11491095 np .array ([0 , 1 , - 1 , 0 ], dtype = np .int8 ))
1150- with tm .assert_produces_warning (FutureWarning ):
1151- c .set_categories (["a" , "b" , np .nan ], rename = True , inplace = True )
1152-
1153- self .assert_index_equal (c .categories , Index (["a" , "b" , np .nan ]))
1154- self .assert_numpy_array_equal (c ._codes ,
1155- np .array ([0 , 1 , - 1 , 0 ], dtype = np .int8 ))
1156- c [1 ] = np .nan
1157- self .assert_index_equal (c .categories , Index (["a" , "b" , np .nan ]))
1158- self .assert_numpy_array_equal (c ._codes ,
1159- np .array ([0 , 2 , - 1 , 0 ], dtype = np .int8 ))
1160-
1161- # Remove null categories (GH 10156)
1162- cases = [([1.0 , 2.0 , np .nan ], [1.0 , 2.0 ]),
1163- (['a' , 'b' , None ], ['a' , 'b' ]),
1164- ([pd .Timestamp ('2012-05-01' ), pd .NaT ],
1165- [pd .Timestamp ('2012-05-01' )])]
1166-
1167- null_values = [np .nan , None , pd .NaT ]
1168-
1169- for with_null , without in cases :
1170- with tm .assert_produces_warning (FutureWarning ):
1171- base = Categorical ([], with_null )
1172- expected = Categorical ([], without )
1173-
1174- for nullval in null_values :
1175- result = base .remove_categories (nullval )
1176- self .assert_categorical_equal (result , expected )
1177-
1178- # Different null values are indistinguishable
1179- for i , j in [(0 , 1 ), (0 , 2 ), (1 , 2 )]:
1180- nulls = [null_values [i ], null_values [j ]]
1181-
1182- def f ():
1183- with tm .assert_produces_warning (FutureWarning ):
1184- Categorical ([], categories = nulls )
1185-
1186- self .assertRaises (ValueError , f )
11871096
11881097 def test_isnull (self ):
11891098 exp = np .array ([False , False , True ])
11901099 c = Categorical (["a" , "b" , np .nan ])
11911100 res = c .isnull ()
1192- self .assert_numpy_array_equal (res , exp )
1193-
1194- with tm .assert_produces_warning (FutureWarning ):
1195- c = Categorical (["a" , "b" , np .nan ], categories = ["a" , "b" , np .nan ])
1196- res = c .isnull ()
1197- self .assert_numpy_array_equal (res , exp )
11981101
1199- # test both nan in categories and as -1
1200- exp = np .array ([True , False , True ])
1201- c = Categorical (["a" , "b" , np .nan ])
1202- with tm .assert_produces_warning (FutureWarning ):
1203- c .set_categories (["a" , "b" , np .nan ], rename = True , inplace = True )
1204- c [0 ] = np .nan
1205- res = c .isnull ()
12061102 self .assert_numpy_array_equal (res , exp )
12071103
12081104 def test_codes_immutable (self ):
@@ -1487,45 +1383,10 @@ def test_slicing_directly(self):
14871383
14881384 def test_set_item_nan (self ):
14891385 cat = pd .Categorical ([1 , 2 , 3 ])
1490- exp = pd .Categorical ([1 , np .nan , 3 ], categories = [1 , 2 , 3 ])
14911386 cat [1 ] = np .nan
1492- tm .assert_categorical_equal (cat , exp )
14931387
1494- # if nan in categories, the proper code should be set!
1495- cat = pd .Categorical ([1 , 2 , 3 , np .nan ], categories = [1 , 2 , 3 ])
1496- with tm .assert_produces_warning (FutureWarning ):
1497- cat .set_categories ([1 , 2 , 3 , np .nan ], rename = True , inplace = True )
1498- cat [1 ] = np .nan
1499- exp = np .array ([0 , 3 , 2 , - 1 ], dtype = np .int8 )
1500- self .assert_numpy_array_equal (cat .codes , exp )
1501-
1502- cat = pd .Categorical ([1 , 2 , 3 , np .nan ], categories = [1 , 2 , 3 ])
1503- with tm .assert_produces_warning (FutureWarning ):
1504- cat .set_categories ([1 , 2 , 3 , np .nan ], rename = True , inplace = True )
1505- cat [1 :3 ] = np .nan
1506- exp = np .array ([0 , 3 , 3 , - 1 ], dtype = np .int8 )
1507- self .assert_numpy_array_equal (cat .codes , exp )
1508-
1509- cat = pd .Categorical ([1 , 2 , 3 , np .nan ], categories = [1 , 2 , 3 ])
1510- with tm .assert_produces_warning (FutureWarning ):
1511- cat .set_categories ([1 , 2 , 3 , np .nan ], rename = True , inplace = True )
1512- cat [1 :3 ] = [np .nan , 1 ]
1513- exp = np .array ([0 , 3 , 0 , - 1 ], dtype = np .int8 )
1514- self .assert_numpy_array_equal (cat .codes , exp )
1515-
1516- cat = pd .Categorical ([1 , 2 , 3 , np .nan ], categories = [1 , 2 , 3 ])
1517- with tm .assert_produces_warning (FutureWarning ):
1518- cat .set_categories ([1 , 2 , 3 , np .nan ], rename = True , inplace = True )
1519- cat [1 :3 ] = [np .nan , np .nan ]
1520- exp = np .array ([0 , 3 , 3 , - 1 ], dtype = np .int8 )
1521- self .assert_numpy_array_equal (cat .codes , exp )
1522-
1523- cat = pd .Categorical ([1 , 2 , np .nan , 3 ], categories = [1 , 2 , 3 ])
1524- with tm .assert_produces_warning (FutureWarning ):
1525- cat .set_categories ([1 , 2 , 3 , np .nan ], rename = True , inplace = True )
1526- cat [pd .isnull (cat )] = np .nan
1527- exp = np .array ([0 , 1 , 3 , 2 ], dtype = np .int8 )
1528- self .assert_numpy_array_equal (cat .codes , exp )
1388+ exp = pd .Categorical ([1 , np .nan , 3 ], categories = [1 , 2 , 3 ])
1389+ tm .assert_categorical_equal (cat , exp )
15291390
15301391 def test_shift (self ):
15311392 # GH 9416
@@ -2026,33 +1887,12 @@ def test_sideeffects_free(self):
20261887
20271888 def test_nan_handling (self ):
20281889
2029- # Nans are represented as -1 in labels
1890+ # NaNs are represented as -1 in labels
20301891 s = Series (Categorical (["a" , "b" , np .nan , "a" ]))
20311892 self .assert_index_equal (s .cat .categories , Index (["a" , "b" ]))
20321893 self .assert_numpy_array_equal (s .values .codes ,
20331894 np .array ([0 , 1 , - 1 , 0 ], dtype = np .int8 ))
20341895
2035- # If categories have nan included, the label should point to that
2036- # instead
2037- with tm .assert_produces_warning (FutureWarning ):
2038- s2 = Series (Categorical (["a" , "b" , np .nan , "a" ],
2039- categories = ["a" , "b" , np .nan ]))
2040-
2041- exp_cat = Index (["a" , "b" , np .nan ])
2042- self .assert_index_equal (s2 .cat .categories , exp_cat )
2043- self .assert_numpy_array_equal (s2 .values .codes ,
2044- np .array ([0 , 1 , 2 , 0 ], dtype = np .int8 ))
2045-
2046- # Changing categories should also make the replaced category np.nan
2047- s3 = Series (Categorical (["a" , "b" , "c" , "a" ]))
2048- with tm .assert_produces_warning (FutureWarning , check_stacklevel = False ):
2049- s3 .cat .categories = ["a" , "b" , np .nan ]
2050-
2051- exp_cat = Index (["a" , "b" , np .nan ])
2052- self .assert_index_equal (s3 .cat .categories , exp_cat )
2053- self .assert_numpy_array_equal (s3 .values .codes ,
2054- np .array ([0 , 1 , 2 , 0 ], dtype = np .int8 ))
2055-
20561896 def test_cat_accessor (self ):
20571897 s = Series (Categorical (["a" , "b" , np .nan , "a" ]))
20581898 self .assert_index_equal (s .cat .categories , Index (["a" , "b" ]))
0 commit comments