@@ -235,6 +235,21 @@ class Categorical(PandasObject):
235
235
def __init__ (self , values , categories = None , ordered = None , dtype = None ,
236
236
fastpath = False ):
237
237
238
+ # Ways of specifying the dtype (prioritized ordered)
239
+ # 1. dtype is a CategoricalDtype
240
+ # a.) with known categories, use dtype.categories
241
+ # b.) else with Categorical values, use values.dtype
242
+ # c.) else, infer from values
243
+ # d.) specifying dtype=CategoricalDtype and categories is an error
244
+ # 2. dtype is a string 'category'
245
+ # a.) use categories, ordered
246
+ # b.) use values.dtype
247
+ # c.) infer from values
248
+ # 3. dtype is None
249
+ # a.) use categories, ordered
250
+ # b.) use values.dtype
251
+ # c.) infer from values
252
+
238
253
if dtype is not None :
239
254
if isinstance (dtype , compat .string_types ):
240
255
if dtype == 'category' :
@@ -248,20 +263,24 @@ def __init__(self, values, categories=None, ordered=None, dtype=None,
248
263
categories = dtype .categories
249
264
ordered = dtype .ordered
250
265
251
- if ordered is None :
252
- ordered = False
266
+ elif is_categorical (values ):
267
+ dtype = values .dtype ._from_categorical_dtype (values .dtype ,
268
+ categories , ordered )
269
+ else :
270
+ dtype = CategoricalDtype (categories , ordered )
271
+
272
+ # At this point, dtype is always a CategoricalDtype
273
+ # if dtype.categories is None, we are inferring
253
274
254
275
if fastpath :
255
- if dtype is None :
256
- dtype = CategoricalDtype (categories , ordered )
257
276
self ._codes = coerce_indexer_dtype (values , categories )
258
277
self ._dtype = dtype
259
278
return
260
279
261
280
# sanitize input
262
281
if is_categorical_dtype (values ):
263
282
264
- # we are either a Series, CategoricalIndex
283
+ # we are either a Series or a CategoricalIndex
265
284
if isinstance (values , (ABCSeries , ABCCategoricalIndex )):
266
285
values = values ._values
267
286
@@ -272,6 +291,7 @@ def __init__(self, values, categories=None, ordered=None, dtype=None,
272
291
values = values .get_values ()
273
292
274
293
elif isinstance (values , (ABCIndexClass , ABCSeries )):
294
+ # we'll do inference later
275
295
pass
276
296
277
297
else :
@@ -289,12 +309,12 @@ def __init__(self, values, categories=None, ordered=None, dtype=None,
289
309
# "object" dtype to prevent this. In the end objects will be
290
310
# casted to int/... in the category assignment step.
291
311
if len (values ) == 0 or isna (values ).any ():
292
- dtype = 'object'
312
+ sanitize_dtype = 'object'
293
313
else :
294
- dtype = None
295
- values = _sanitize_array (values , None , dtype = dtype )
314
+ sanitize_dtype = None
315
+ values = _sanitize_array (values , None , dtype = sanitize_dtype )
296
316
297
- if categories is None :
317
+ if dtype . categories is None :
298
318
try :
299
319
codes , categories = factorize (values , sort = True )
300
320
except TypeError :
@@ -311,7 +331,8 @@ def __init__(self, values, categories=None, ordered=None, dtype=None,
311
331
raise NotImplementedError ("> 1 ndim Categorical are not "
312
332
"supported at this time" )
313
333
314
- if dtype is None or isinstance (dtype , str ):
334
+ if dtype .categories is None :
335
+ # we're inferring from values
315
336
dtype = CategoricalDtype (categories , ordered )
316
337
317
338
else :
@@ -322,11 +343,6 @@ def __init__(self, values, categories=None, ordered=None, dtype=None,
322
343
# - the new one, where each value is also in the categories array
323
344
# (or np.nan)
324
345
325
- # make sure that we always have the same type here, no matter what
326
- # we get passed in
327
- if dtype is None or isinstance (dtype , str ):
328
- dtype = CategoricalDtype (categories , ordered )
329
-
330
346
codes = _get_codes_for_values (values , dtype .categories )
331
347
332
348
# TODO: check for old style usage. These warnings should be removes
0 commit comments