Skip to content

Commit fc88ab9

Browse files
committed
Merge pull request #9491 from behzadnouri/idx-insert-nan
BUG: unstack with nulls & Timedelta/DateTime index
2 parents 76195fb + 4d02e13 commit fc88ab9

File tree

4 files changed

+26
-22
lines changed

4 files changed

+26
-22
lines changed

doc/source/whatsnew/v0.16.0.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,7 @@ Bug Fixes
205205
- Bug in left ``join`` on multi-index with ``sort=True`` or null values (:issue:`9210`).
206206
- Bug in ``MultiIndex`` where inserting new keys would fail (:issue:`9250`).
207207
- Bug in ``groupby`` when key space exceeds ``int64`` bounds (:issue:`9096`).
208+
- Bug in ``unstack`` with ``TimedeltaIndex`` or ``DatetimeIndex`` and nulls (:issue:`9491`).
208209

209210

210211
- Fixed character encoding bug in ``read_stata`` and ``StataReader`` when loading data from a URL (:issue:`9231`).

pandas/core/reshape.py

Lines changed: 9 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
import pandas.core.common as com
1818
import pandas.algos as algos
1919

20-
from pandas.core.index import MultiIndex
20+
from pandas.core.index import MultiIndex, _get_na_value
2121

2222

2323
class _Unstacker(object):
@@ -194,8 +194,11 @@ def get_new_values(self):
194194

195195
def get_new_columns(self):
196196
if self.value_columns is None:
197-
return _make_new_index(self.removed_level, None) \
198-
if self.lift != 0 else self.removed_level
197+
if self.lift == 0:
198+
return self.removed_level
199+
200+
lev = self.removed_level
201+
return lev.insert(0, _get_na_value(lev.dtype.type))
199202

200203
stride = len(self.removed_level) + self.lift
201204
width = len(self.value_columns)
@@ -222,31 +225,16 @@ def get_new_index(self):
222225
# construct the new index
223226
if len(self.new_index_levels) == 1:
224227
lev, lab = self.new_index_levels[0], result_labels[0]
225-
return _make_new_index(lev, lab) \
226-
if (lab == -1).any() else lev.take(lab)
228+
if (lab == -1).any():
229+
lev = lev.insert(len(lev), _get_na_value(lev.dtype.type))
230+
return lev.take(lab)
227231

228232
return MultiIndex(levels=self.new_index_levels,
229233
labels=result_labels,
230234
names=self.new_index_names,
231235
verify_integrity=False)
232236

233237

234-
def _make_new_index(lev, lab):
235-
from pandas.core.index import Index, _get_na_value
236-
237-
nan = _get_na_value(lev.dtype.type)
238-
vals = lev.values.astype('object')
239-
vals = np.insert(vals, 0, nan) if lab is None else \
240-
np.insert(vals, len(vals), nan).take(lab)
241-
242-
try:
243-
vals = vals.astype(lev.dtype, subok=False, copy=False)
244-
except ValueError:
245-
return Index(vals, **lev._get_attributes_dict())
246-
247-
return lev._shallow_copy(vals)
248-
249-
250238
def _unstack_multiple(data, clocs):
251239
from pandas.core.groupby import decons_obs_group_ids
252240

pandas/tools/tests/test_pivot.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,21 @@ def test_pivot_index_with_nan(self):
184184
tm.assert_frame_equal(result, expected)
185185
tm.assert_frame_equal(df.pivot('b', 'a', 'c'), expected.T)
186186

187+
# GH9491
188+
df = DataFrame({'a':pd.date_range('2014-02-01', periods=6, freq='D'),
189+
'c':100 + np.arange(6)})
190+
df['b'] = df['a'] - pd.Timestamp('2014-02-02')
191+
df.loc[1, 'a'] = df.loc[3, 'a'] = nan
192+
df.loc[1, 'b'] = df.loc[4, 'b'] = nan
193+
194+
pv = df.pivot('a', 'b', 'c')
195+
self.assertEqual(pv.notnull().values.sum(), len(df))
196+
197+
for _, row in df.iterrows():
198+
self.assertEqual(pv.loc[row['a'], row['b']], row['c'])
199+
200+
tm.assert_frame_equal(df.pivot('b', 'a', 'c'), pv.T)
201+
187202
def test_pivot_with_tz(self):
188203
# GH 5878
189204
df = DataFrame({'dt1': [datetime.datetime(2013, 1, 1, 9, 0),

pandas/tseries/tdi.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -828,7 +828,7 @@ def insert(self, loc, item):
828828
pass
829829

830830
freq = None
831-
if isinstance(item, Timedelta):
831+
if isinstance(item, (Timedelta, tslib.NaTType)):
832832

833833
# check freq can be preserved on edge cases
834834
if self.freq is not None:

0 commit comments

Comments
 (0)