-
-
Notifications
You must be signed in to change notification settings - Fork 19k
Description
Setting an additional index on a DataFrame with an empty PeriodIndex raises a couple of exceptions:
In [2]:
df = pd.DataFrame(dict(a=1), index=pd.PeriodIndex(start='2000', freq='B', periods=0, name='date'))
df
Out[2]:
Empty DataFrame
Columns: [a]
Index: []
[ins] In [10]: df.set_index('a', append=True)
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
~/workspace/pandas/pandas/core/arrays/categorical.py in __init__(self, values, categories, ordered, dtype, fastpath)
389 try:
--> 390 codes, categories = factorize(values, sort=True)
391 except TypeError:
~/workspace/pandas/pandas/util/_decorators.py in wrapper(*args, **kwargs)
176 kwargs[new_arg_name] = new_arg_value
--> 177 return func(*args, **kwargs)
178 return wrapper
~/workspace/pandas/pandas/core/algorithms.py in factorize(values, sort, order, na_sentinel, size_hint)
631
--> 632 uniques = _reconstruct_data(uniques, dtype, original)
633
~/workspace/pandas/pandas/core/algorithms.py in _reconstruct_data(values, dtype, original)
147 if is_extension_array_dtype(dtype):
--> 148 values = dtype.construct_array_type()._from_sequence(values)
149 elif is_datetime64tz_dtype(dtype) or is_period_dtype(dtype):
~/workspace/pandas/pandas/core/arrays/period.py in _from_sequence(cls, scalars, dtype, copy)
201
--> 202 freq = freq or libperiod.extract_freq(periods)
203 ordinals = libperiod.extract_ordinals(periods, freq)
~/workspace/pandas/pandas/_libs/tslibs/period.pyx in pandas._libs.tslibs.period.extract_freq()
1485
-> 1486 raise ValueError('freq not specified and cannot be inferred')
1487
ValueError: freq not specified and cannot be inferred
During handling of the above exception, another exception occurred:
NotImplementedError Traceback (most recent call last)
<ipython-input-10-7d64bc1a5646> in <module>
----> 1 df.set_index('a', append=True)
~/workspace/pandas/pandas/core/frame.py in set_index(self, keys, drop, append, inplace, verify_integrity)
4051 to_remove.append(col)
4052
-> 4053 index = ensure_index_from_sequences(arrays, names)
4054
4055 if verify_integrity and not index.is_unique:
~/workspace/pandas/pandas/core/indexes/base.py in ensure_index_from_sequences(sequences, names)
5058 return Index(sequences[0], name=names)
5059 else:
-> 5060 return MultiIndex.from_arrays(sequences, names=names)
5061
5062
~/workspace/pandas/pandas/core/indexes/multi.py in from_arrays(cls, arrays, sortorder, names)
1332 from pandas.core.arrays.categorical import _factorize_from_iterables
1333
-> 1334 labels, levels = _factorize_from_iterables(arrays)
1335 if names is None:
1336 names = [getattr(arr, "name", None) for arr in arrays]
~/workspace/pandas/pandas/core/arrays/categorical.py in _factorize_from_iterables(iterables)
2601 # For consistency, it should return a list of 2 lists.
2602 return [[], []]
-> 2603 return map(list, lzip(*[_factorize_from_iterable(it) for it in iterables]))
~/workspace/pandas/pandas/core/arrays/categorical.py in <listcomp>(.0)
2601 # For consistency, it should return a list of 2 lists.
2602 return [[], []]
-> 2603 return map(list, lzip(*[_factorize_from_iterable(it) for it in iterables]))
~/workspace/pandas/pandas/core/arrays/categorical.py in _factorize_from_iterable(values)
2573 # but only the resulting categories, the order of which is independent
2574 # from ordered. Set ordered to False as default. See GH #15457
-> 2575 cat = Categorical(values, ordered=False)
2576 categories = cat.categories
2577 codes = cat.codes
~/workspace/pandas/pandas/core/arrays/categorical.py in __init__(self, values, categories, ordered, dtype, fastpath)
400
401 # FIXME
--> 402 raise NotImplementedError("> 1 ndim Categorical are not "
403 "supported at this time")
404
NotImplementedError: > 1 ndim Categorical are not supported at this time
Expected Output
Problem description
This works fine on 0.23.4:
[ins] In [1]: import pandas as pd
[ins] In [2]: df = pd.DataFrame(dict(a=1), index=pd.PeriodIndex(start='2000', freq='B', periods=0, name='date'))
...: ...: df
Out[2]:
Empty DataFrame
Columns: [a]
Index: []
[ins] In [3]: df.set_index('a', append=True)
Out[3]:
Empty DataFrame
Columns: []
Index: []
Output of pd.show_versions()
INSTALLED VERSIONS
commit: b7294dd
python: 3.7.1.final.0
python-bits: 64
OS: Darwin
OS-release: 18.0.0
machine: x86_64
processor: i386
byteorder: little
LC_ALL: None
LANG: en_US.UTF-8
LOCALE: en_US.UTF-8
pandas: 0.19.0+4049.gb7294dd3e
pytest: 3.9.2
pip: 18.1
setuptools: 40.6.2
Cython: 0.28.5
numpy: 1.15.2
scipy: None
pyarrow: None
xarray: 0.10.9
IPython: 7.1.1
sphinx: None
patsy: 0.5.0
dateutil: 2.7.5
pytz: 2018.5
blosc: None
bottleneck: 1.2.1
tables: None
numexpr: 2.6.8
feather: None
matplotlib: 3.0.0
openpyxl: None
xlrd: 1.1.0
xlwt: 1.3.0
xlsxwriter: None
lxml: None
bs4: 4.6.0
html5lib: 1.0.1
sqlalchemy: 1.2.12
pymysql: None
psycopg2: None
jinja2: 2.8.1
s3fs: None
fastparquet: None
pandas_gbq: 0.6.1+2.gd98c621
pandas_datareader: None
gcsfs: None