Skip to content

Potential regression in master re empty Extension Indexes #23933

@max-sixty

Description

@max-sixty

Setting an additional index on a DataFrame with an empty PeriodIndex raises a couple of exceptions:

In [2]: 
df = pd.DataFrame(dict(a=1), index=pd.PeriodIndex(start='2000', freq='B', periods=0, name='date'))
df
Out[2]:
Empty DataFrame
Columns: [a]
Index: []


[ins] In [10]: df.set_index('a', append=True)
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
~/workspace/pandas/pandas/core/arrays/categorical.py in __init__(self, values, categories, ordered, dtype, fastpath)
    389             try:
--> 390                 codes, categories = factorize(values, sort=True)
    391             except TypeError:

~/workspace/pandas/pandas/util/_decorators.py in wrapper(*args, **kwargs)
    176                     kwargs[new_arg_name] = new_arg_value
--> 177             return func(*args, **kwargs)
    178         return wrapper

~/workspace/pandas/pandas/core/algorithms.py in factorize(values, sort, order, na_sentinel, size_hint)
    631
--> 632     uniques = _reconstruct_data(uniques, dtype, original)
    633

~/workspace/pandas/pandas/core/algorithms.py in _reconstruct_data(values, dtype, original)
    147     if is_extension_array_dtype(dtype):
--> 148         values = dtype.construct_array_type()._from_sequence(values)
    149     elif is_datetime64tz_dtype(dtype) or is_period_dtype(dtype):

~/workspace/pandas/pandas/core/arrays/period.py in _from_sequence(cls, scalars, dtype, copy)
    201
--> 202         freq = freq or libperiod.extract_freq(periods)
    203         ordinals = libperiod.extract_ordinals(periods, freq)

~/workspace/pandas/pandas/_libs/tslibs/period.pyx in pandas._libs.tslibs.period.extract_freq()
   1485
-> 1486     raise ValueError('freq not specified and cannot be inferred')
   1487

ValueError: freq not specified and cannot be inferred

During handling of the above exception, another exception occurred:

NotImplementedError                       Traceback (most recent call last)
<ipython-input-10-7d64bc1a5646> in <module>
----> 1 df.set_index('a', append=True)

~/workspace/pandas/pandas/core/frame.py in set_index(self, keys, drop, append, inplace, verify_integrity)
   4051                     to_remove.append(col)
   4052
-> 4053         index = ensure_index_from_sequences(arrays, names)
   4054
   4055         if verify_integrity and not index.is_unique:

~/workspace/pandas/pandas/core/indexes/base.py in ensure_index_from_sequences(sequences, names)
   5058         return Index(sequences[0], name=names)
   5059     else:
-> 5060         return MultiIndex.from_arrays(sequences, names=names)
   5061
   5062

~/workspace/pandas/pandas/core/indexes/multi.py in from_arrays(cls, arrays, sortorder, names)
   1332         from pandas.core.arrays.categorical import _factorize_from_iterables
   1333
-> 1334         labels, levels = _factorize_from_iterables(arrays)
   1335         if names is None:
   1336             names = [getattr(arr, "name", None) for arr in arrays]

~/workspace/pandas/pandas/core/arrays/categorical.py in _factorize_from_iterables(iterables)
   2601         # For consistency, it should return a list of 2 lists.
   2602         return [[], []]
-> 2603     return map(list, lzip(*[_factorize_from_iterable(it) for it in iterables]))

~/workspace/pandas/pandas/core/arrays/categorical.py in <listcomp>(.0)
   2601         # For consistency, it should return a list of 2 lists.
   2602         return [[], []]
-> 2603     return map(list, lzip(*[_factorize_from_iterable(it) for it in iterables]))

~/workspace/pandas/pandas/core/arrays/categorical.py in _factorize_from_iterable(values)
   2573         # but only the resulting categories, the order of which is independent
   2574         # from ordered. Set ordered to False as default. See GH #15457
-> 2575         cat = Categorical(values, ordered=False)
   2576         categories = cat.categories
   2577         codes = cat.codes

~/workspace/pandas/pandas/core/arrays/categorical.py in __init__(self, values, categories, ordered, dtype, fastpath)
    400
    401                 # FIXME
--> 402                 raise NotImplementedError("> 1 ndim Categorical are not "
    403                                           "supported at this time")
    404

NotImplementedError: > 1 ndim Categorical are not supported at this time

Expected Output

Problem description

This works fine on 0.23.4:

[ins] In [1]: import pandas as pd

[ins] In [2]: df = pd.DataFrame(dict(a=1), index=pd.PeriodIndex(start='2000', freq='B', periods=0, name='date'))
         ...:          ...: df
Out[2]:
Empty DataFrame
Columns: [a]
Index: []

[ins] In [3]: df.set_index('a', append=True)
Out[3]:
Empty DataFrame
Columns: []
Index: []

Output of pd.show_versions()

INSTALLED VERSIONS

commit: b7294dd
python: 3.7.1.final.0
python-bits: 64
OS: Darwin
OS-release: 18.0.0
machine: x86_64
processor: i386
byteorder: little
LC_ALL: None
LANG: en_US.UTF-8
LOCALE: en_US.UTF-8

pandas: 0.19.0+4049.gb7294dd3e
pytest: 3.9.2
pip: 18.1
setuptools: 40.6.2
Cython: 0.28.5
numpy: 1.15.2
scipy: None
pyarrow: None
xarray: 0.10.9
IPython: 7.1.1
sphinx: None
patsy: 0.5.0
dateutil: 2.7.5
pytz: 2018.5
blosc: None
bottleneck: 1.2.1
tables: None
numexpr: 2.6.8
feather: None
matplotlib: 3.0.0
openpyxl: None
xlrd: 1.1.0
xlwt: 1.3.0
xlsxwriter: None
lxml: None
bs4: 4.6.0
html5lib: 1.0.1
sqlalchemy: 1.2.12
pymysql: None
psycopg2: None
jinja2: 2.8.1
s3fs: None
fastparquet: None
pandas_gbq: 0.6.1+2.gd98c621
pandas_datareader: None
gcsfs: None

Metadata

Metadata

Assignees

No one assigned

    Labels

    BlockerBlocking issue or pull request for an upcoming releasePeriodPeriod data type

    Type

    No type

    Projects

    No projects

    Milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions