diff --git a/pandas/core/index.py b/pandas/core/index.py index 0c6e490f3eb50..38a97af572b1c 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -428,12 +428,10 @@ def take(self, indexer, axis=0): taken = self.view(np.ndarray).take(indexer) return self._constructor(taken, name=self.name) - def format(self, name=False, formatter=None, na_rep='NaN'): + def format(self, name=False, formatter=None, **kwargs): """ Render a string representation of the Index """ - from pandas.core.format import format_array - header = [] if name: header.append(com.pprint_thing(self.name, @@ -443,11 +441,13 @@ def format(self, name=False, formatter=None, na_rep='NaN'): if formatter is not None: return header + list(self.map(formatter)) - if self.is_all_dates: - return header + _date_formatter(self) + return self._format_with_header(header, **kwargs) + def _format_with_header(self, header, na_rep='NaN', **kwargs): values = self.values + from pandas.core.format import format_array + if values.dtype == np.object_: values = lib.maybe_convert_objects(values, safe=1) @@ -466,17 +466,18 @@ def format(self, name=False, formatter=None, na_rep='NaN'): result = _trim_front(format_array(values, None, justify='left')) return header + result - def to_native_types(self, slicer=None, na_rep='', float_format=None): + def to_native_types(self, slicer=None, **kwargs): + """ slice and dice then format """ values = self if slicer is not None: values = values[slicer] - if self.is_all_dates: - return _date_formatter(values) - else: - mask = isnull(values) - values = np.array(values,dtype=object) - values[mask] = na_rep + return values._format_native_types(**kwargs) + def _format_native_types(self, na_rep='', **kwargs): + """ actually format my specific types """ + mask = isnull(self) + values = np.array(self,dtype=object,copy=True) + values[mask] = na_rep return values.tolist() def equals(self, other): @@ -1320,6 +1321,11 @@ def inferred_type(self): def _constructor(self): return Int64Index + @property + def asi8(self): + # do not cache or you'll create a memory leak + return self.values.view('i8') + @property def is_all_dates(self): """ @@ -1489,11 +1495,8 @@ def __repr__(self): def __len__(self): return len(self.labels[0]) - def to_native_types(self, slicer=None, na_rep='', float_format=None): - ix = self - if slicer: - ix = self[slicer] - return ix.tolist() + def _format_native_types(self, **kwargs): + return self.tolist() @property def _constructor(self): @@ -1651,13 +1654,13 @@ def format(self, space=2, sparsify=None, adjoin=True, names=False, # we have some NA mask = lab==-1 if mask.any(): - formatted = np.array(formatted) + formatted = np.array(formatted,dtype=object) formatted[mask] = na_rep formatted = formatted.tolist() else: # weird all NA case - formatted = [com.pprint_thing(x, escape_chars=('\t', '\r', '\n')) + formatted = [com.pprint_thing(na_rep if isnull(x) else x, escape_chars=('\t', '\r', '\n')) for x in com.take_1d(lev.values, lab)] stringified_levels.append(formatted) @@ -1669,6 +1672,7 @@ def format(self, space=2, sparsify=None, adjoin=True, names=False, level.append(com.pprint_thing(name, escape_chars=('\t', '\r', '\n')) if name is not None else '') + level.extend(np.array(lev, dtype=object)) result_levels.append(level) @@ -2598,23 +2602,6 @@ def _wrap_joined_index(self, joined, other): # For utility purposes -def _date_formatter(obj, na_rep=u'NaT'): - data = list(obj) - - # tz formatter or time formatter - zero_time = time(0, 0) - for d in data: - if d.time() != zero_time or d.tzinfo is not None: - return [u'%s' % x for x in data ] - - values = np.array(data,dtype=object) - mask = isnull(obj.values) - values[mask] = na_rep - - imask = -mask - values[imask] = np.array([ u'%d-%.2d-%.2d' % (dt.year, dt.month, dt.day) for dt in values[imask] ]) - return values.tolist() - def _sparsify(label_list, start=0): pivoted = zip(*label_list) k = len(label_list) diff --git a/pandas/tests/test_format.py b/pandas/tests/test_format.py index 3473e5fffb34d..0ae8934c898b0 100644 --- a/pandas/tests/test_format.py +++ b/pandas/tests/test_format.py @@ -666,6 +666,22 @@ def test_index_with_nan(self): expected = u' id1 id3 value\nid2 \nNaN 1a3 78d 123\nNaN 9h4 79d 64' self.assert_(result == expected) + # partial nan in mi + df2 = df.copy() + df2.ix[:,'id2'] = np.nan + y = df2.set_index(['id2','id3']) + result = y.to_string() + expected = u' id1 value\nid2 id3 \nNaN 78d 1a3 123\n 79d 9h4 64' + self.assert_(result == expected) + + df = DataFrame({'id1': {0: np.nan, 1: '9h4'}, 'id2': {0: np.nan, 1: 'd67'}, + 'id3': {0: np.nan, 1: '79d'}, 'value': {0: 123, 1: 64}}) + + y = df.set_index(['id1','id2','id3']) + result = y.to_string() + expected = u' value\nid1 id2 id3 \nNaN NaN NaN 123\n9h4 d67 79d 64' + self.assert_(result == expected) + def test_to_string(self): from pandas import read_table import re diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index a7dd96fd2ce2b..ced4b23b7e4fa 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -21,7 +21,7 @@ import pandas.core.format as fmt import pandas.core.datetools as datetools from pandas.core.api import (DataFrame, Index, Series, notnull, isnull, - MultiIndex, DatetimeIndex, Timestamp) + MultiIndex, DatetimeIndex, Timestamp, Period) from pandas.io.parsers import read_csv from pandas.util.testing import (assert_almost_equal, @@ -4587,7 +4587,7 @@ def stuple_to_tuple(x): cols=MultiIndex.from_tuples(map(stuple_to_tuple,recons.columns)) recons.columns = cols - type_map = dict(i='i',f='f',s='O',u='O',dt='O') + type_map = dict(i='i',f='f',s='O',u='O',dt='O',p='O') if r_dtype: if r_dtype == 'u': # unicode r_dtype='O' @@ -4599,6 +4599,11 @@ def stuple_to_tuple(x): recons.index = np.array(map(Timestamp,recons.index), dtype=r_dtype ) df.index = np.array(map(Timestamp,df.index),dtype=r_dtype ) + elif r_dtype == 'p': + r_dtype='O' + recons.index = np.array(map(Timestamp,recons.index.to_datetime()), + dtype=r_dtype ) + df.index = np.array(map(Timestamp,df.index.to_datetime()),dtype=r_dtype ) else: r_dtype= type_map.get(r_dtype) recons.index = np.array(recons.index,dtype=r_dtype ) @@ -4608,12 +4613,17 @@ def stuple_to_tuple(x): c_dtype='O' recons.columns = np.array(map(_to_uni,recons.columns), dtype=c_dtype ) - df.Columns = np.array(map(_to_uni,df.columns),dtype=c_dtype ) + df.columns = np.array(map(_to_uni,df.columns),dtype=c_dtype ) elif c_dtype == 'dt': c_dtype='O' recons.columns = np.array(map(Timestamp,recons.columns), dtype=c_dtype ) - df.Columns = np.array(map(Timestamp,df.columns),dtype=c_dtype ) + df.columns = np.array(map(Timestamp,df.columns),dtype=c_dtype ) + elif c_dtype == 'p': + c_dtype='O' + recons.columns = np.array(map(Timestamp,recons.columns.to_datetime()), + dtype=c_dtype ) + df.columns = np.array(map(Timestamp,df.columns.to_datetime()),dtype=c_dtype ) else: c_dtype= type_map.get(c_dtype) recons.columns = np.array(recons.columns,dtype=c_dtype ) @@ -4631,8 +4641,8 @@ def stuple_to_tuple(x): _do_test(mkdf(nrows, ncols,r_idx_type='dt', c_idx_type='s'),path, 'dt','s') - for r_idx_type in ['i', 'f','s','u']: - for c_idx_type in ['i', 'f','s','u','dt']: + for r_idx_type in ['i','s','u','p']: + for c_idx_type in ['i', 's','u','dt','p']: for ncols in [1,2,128]: base = int((chunksize// ncols or 1) or 1) for nrows in [2,10,N-1,N,N+1,N+2,2*N-2,2*N-1,2*N,2*N+1,2*N+2, diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index 0e552ab0e610f..d230f3d5c3c29 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -568,6 +568,26 @@ def __contains__(self, key): except (KeyError, TypeError): return False + def _format_with_header(self, header, **kwargs): + return header + self._format_native_types(**kwargs) + + def _format_native_types(self, na_rep=u'NaT', **kwargs): + data = list(self) + + # tz formatter or time formatter + zero_time = time(0, 0) + for d in data: + if d.time() != zero_time or d.tzinfo is not None: + return [u'%s' % x for x in data ] + + values = np.array(data,dtype=object) + mask = isnull(self.values) + values[mask] = na_rep + + imask = -mask + values[imask] = np.array([ u'%d-%.2d-%.2d' % (dt.year, dt.month, dt.day) for dt in values[imask] ]) + return values.tolist() + def isin(self, values): """ Compute boolean array of whether each index value is found in the @@ -627,11 +647,6 @@ def astype(self, dtype): else: # pragma: no cover raise ValueError('Cannot cast DatetimeIndex to dtype %s' % dtype) - @property - def asi8(self): - # do not cache or you'll create a memory leak - return self.values.view('i8') - def _get_time_micros(self): utc = _utc() values = self.asi8 diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py index 947a2ffac6039..1e9aad7cf2d7b 100644 --- a/pandas/tseries/period.py +++ b/pandas/tseries/period.py @@ -12,6 +12,7 @@ import pandas.tseries.frequencies as _freq_mod import pandas.core.common as com +from pandas.core.common import isnull from pandas.lib import Timestamp import pandas.lib as lib @@ -792,6 +793,15 @@ def _mpl_repr(self): # how to represent ourselves to matplotlib return self._get_object_array() + def equals(self, other): + """ + Determines if two Index objects contain the same elements. + """ + if self is other: + return True + + return np.array_equal(self.asi8, other.asi8) + def tolist(self): """ Return a list of Period objects @@ -1029,16 +1039,18 @@ def __getitem__(self, key): return PeriodIndex(result, name=self.name, freq=self.freq) - def format(self, name=False, formatter=None): - """ - Render a string representation of the Index - """ - header = [] + def _format_with_header(self, header, **kwargs): + return header + self._format_native_types(**kwargs) - if name: - header.append(str(self.name) if self.name is not None else '') + def _format_native_types(self, na_rep=u'NaT', **kwargs): - return header + ['%s' % Period(x, freq=self.freq) for x in self] + values = np.array(list(self),dtype=object) + mask = isnull(self.values) + values[mask] = na_rep + + imask = -mask + values[imask] = np.array([ u'%s' % dt for dt in values[imask] ]) + return values.tolist() def __array_finalize__(self, obj): if self.ndim == 0: # pragma: no cover diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 59f01ba7ea074..bc2aa7628bf28 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -406,7 +406,7 @@ def makeCustomIndex(nentries, nlevels, prefix='#', names=False, ndupe_l=None, label will repeated at the corresponding level, you can specify just the first few, the rest will use the default ndupe_l of 1. len(ndupe_l) <= nlevels. - idx_type - "i"/"f"/"s"/"u"/"dt". + idx_type - "i"/"f"/"s"/"u"/"dt/"p". If idx_type is not None, `idx_nlevels` must be 1. "i"/"f" creates an integer/float index, "s"/"u" creates a string/unicode index @@ -422,7 +422,7 @@ def makeCustomIndex(nentries, nlevels, prefix='#', names=False, ndupe_l=None, assert (names is None or names is False or names is True or len(names) is nlevels) assert idx_type is None or \ - (idx_type in ('i', 'f', 's', 'u', 'dt') and nlevels == 1) + (idx_type in ('i', 'f', 's', 'u', 'dt', 'p') and nlevels == 1) if names is True: # build default names @@ -437,7 +437,7 @@ def makeCustomIndex(nentries, nlevels, prefix='#', names=False, ndupe_l=None, # specific 1D index type requested? idx_func = dict(i=makeIntIndex, f=makeFloatIndex, s=makeStringIndex, - u=makeUnicodeIndex, dt=makeDateIndex).get(idx_type) + u=makeUnicodeIndex, dt=makeDateIndex, p=makePeriodIndex).get(idx_type) if idx_func: idx = idx_func(nentries) # but we need to fill in the name @@ -446,7 +446,7 @@ def makeCustomIndex(nentries, nlevels, prefix='#', names=False, ndupe_l=None, return idx elif idx_type is not None: raise ValueError('"%s" is not a legal value for `idx_type`, use ' - '"i"/"f"/"s"/"u"/"dt".' % idx_type) + '"i"/"f"/"s"/"u"/"dt/"p".' % idx_type) if len(ndupe_l) < nlevels: ndupe_l.extend([1] * (nlevels - len(ndupe_l))) @@ -540,9 +540,9 @@ def makeCustomDataframe(nrows, ncols, c_idx_names=True, r_idx_names=True, assert c_idx_nlevels > 0 assert r_idx_nlevels > 0 assert r_idx_type is None or \ - (r_idx_type in ('i', 'f', 's', 'u', 'dt') and r_idx_nlevels == 1) + (r_idx_type in ('i', 'f', 's', 'u', 'dt', 'p') and r_idx_nlevels == 1) assert c_idx_type is None or \ - (c_idx_type in ('i', 'f', 's', 'u', 'dt') and c_idx_nlevels == 1) + (c_idx_type in ('i', 'f', 's', 'u', 'dt', 'p') and c_idx_nlevels == 1) columns = makeCustomIndex(ncols, nlevels=c_idx_nlevels, prefix='C', names=c_idx_names, ndupe_l=c_ndupe_l,