Skip to content

CLN: refactor core/index and tseries/index,period to have their format, to_native_types methods consistent #3193

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Mar 28, 2013
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 23 additions & 36 deletions pandas/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -428,12 +428,10 @@ def take(self, indexer, axis=0):
taken = self.view(np.ndarray).take(indexer)
return self._constructor(taken, name=self.name)

def format(self, name=False, formatter=None, na_rep='NaN'):
def format(self, name=False, formatter=None, **kwargs):
"""
Render a string representation of the Index
"""
from pandas.core.format import format_array

header = []
if name:
header.append(com.pprint_thing(self.name,
Expand All @@ -443,11 +441,13 @@ def format(self, name=False, formatter=None, na_rep='NaN'):
if formatter is not None:
return header + list(self.map(formatter))

if self.is_all_dates:
return header + _date_formatter(self)
return self._format_with_header(header, **kwargs)

def _format_with_header(self, header, na_rep='NaN', **kwargs):
values = self.values

from pandas.core.format import format_array

if values.dtype == np.object_:
values = lib.maybe_convert_objects(values, safe=1)

Expand All @@ -466,17 +466,18 @@ def format(self, name=False, formatter=None, na_rep='NaN'):
result = _trim_front(format_array(values, None, justify='left'))
return header + result

def to_native_types(self, slicer=None, na_rep='', float_format=None):
def to_native_types(self, slicer=None, **kwargs):
""" slice and dice then format """
values = self
if slicer is not None:
values = values[slicer]
if self.is_all_dates:
return _date_formatter(values)
else:
mask = isnull(values)
values = np.array(values,dtype=object)
values[mask] = na_rep
return values._format_native_types(**kwargs)

def _format_native_types(self, na_rep='', **kwargs):
""" actually format my specific types """
mask = isnull(self)
values = np.array(self,dtype=object,copy=True)
values[mask] = na_rep
return values.tolist()

def equals(self, other):
Expand Down Expand Up @@ -1320,6 +1321,11 @@ def inferred_type(self):
def _constructor(self):
return Int64Index

@property
def asi8(self):
# do not cache or you'll create a memory leak
return self.values.view('i8')

@property
def is_all_dates(self):
"""
Expand Down Expand Up @@ -1489,11 +1495,8 @@ def __repr__(self):
def __len__(self):
return len(self.labels[0])

def to_native_types(self, slicer=None, na_rep='', float_format=None):
ix = self
if slicer:
ix = self[slicer]
return ix.tolist()
def _format_native_types(self, **kwargs):
return self.tolist()

@property
def _constructor(self):
Expand Down Expand Up @@ -1651,13 +1654,13 @@ def format(self, space=2, sparsify=None, adjoin=True, names=False,
# we have some NA
mask = lab==-1
if mask.any():
formatted = np.array(formatted)
formatted = np.array(formatted,dtype=object)
formatted[mask] = na_rep
formatted = formatted.tolist()

else:
# weird all NA case
formatted = [com.pprint_thing(x, escape_chars=('\t', '\r', '\n'))
formatted = [com.pprint_thing(na_rep if isnull(x) else x, escape_chars=('\t', '\r', '\n'))
for x in com.take_1d(lev.values, lab)]
stringified_levels.append(formatted)

Expand All @@ -1669,6 +1672,7 @@ def format(self, space=2, sparsify=None, adjoin=True, names=False,
level.append(com.pprint_thing(name, escape_chars=('\t', '\r', '\n'))
if name is not None else '')


level.extend(np.array(lev, dtype=object))
result_levels.append(level)

Expand Down Expand Up @@ -2598,23 +2602,6 @@ def _wrap_joined_index(self, joined, other):

# For utility purposes

def _date_formatter(obj, na_rep=u'NaT'):
data = list(obj)

# tz formatter or time formatter
zero_time = time(0, 0)
for d in data:
if d.time() != zero_time or d.tzinfo is not None:
return [u'%s' % x for x in data ]

values = np.array(data,dtype=object)
mask = isnull(obj.values)
values[mask] = na_rep

imask = -mask
values[imask] = np.array([ u'%d-%.2d-%.2d' % (dt.year, dt.month, dt.day) for dt in values[imask] ])
return values.tolist()

def _sparsify(label_list, start=0):
pivoted = zip(*label_list)
k = len(label_list)
Expand Down
16 changes: 16 additions & 0 deletions pandas/tests/test_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -666,6 +666,22 @@ def test_index_with_nan(self):
expected = u' id1 id3 value\nid2 \nNaN 1a3 78d 123\nNaN 9h4 79d 64'
self.assert_(result == expected)

# partial nan in mi
df2 = df.copy()
df2.ix[:,'id2'] = np.nan
y = df2.set_index(['id2','id3'])
result = y.to_string()
expected = u' id1 value\nid2 id3 \nNaN 78d 1a3 123\n 79d 9h4 64'
self.assert_(result == expected)

df = DataFrame({'id1': {0: np.nan, 1: '9h4'}, 'id2': {0: np.nan, 1: 'd67'},
'id3': {0: np.nan, 1: '79d'}, 'value': {0: 123, 1: 64}})

y = df.set_index(['id1','id2','id3'])
result = y.to_string()
expected = u' value\nid1 id2 id3 \nNaN NaN NaN 123\n9h4 d67 79d 64'
self.assert_(result == expected)

def test_to_string(self):
from pandas import read_table
import re
Expand Down
22 changes: 16 additions & 6 deletions pandas/tests/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
import pandas.core.format as fmt
import pandas.core.datetools as datetools
from pandas.core.api import (DataFrame, Index, Series, notnull, isnull,
MultiIndex, DatetimeIndex, Timestamp)
MultiIndex, DatetimeIndex, Timestamp, Period)
from pandas.io.parsers import read_csv

from pandas.util.testing import (assert_almost_equal,
Expand Down Expand Up @@ -4587,7 +4587,7 @@ def stuple_to_tuple(x):
cols=MultiIndex.from_tuples(map(stuple_to_tuple,recons.columns))
recons.columns = cols

type_map = dict(i='i',f='f',s='O',u='O',dt='O')
type_map = dict(i='i',f='f',s='O',u='O',dt='O',p='O')
if r_dtype:
if r_dtype == 'u': # unicode
r_dtype='O'
Expand All @@ -4599,6 +4599,11 @@ def stuple_to_tuple(x):
recons.index = np.array(map(Timestamp,recons.index),
dtype=r_dtype )
df.index = np.array(map(Timestamp,df.index),dtype=r_dtype )
elif r_dtype == 'p':
r_dtype='O'
recons.index = np.array(map(Timestamp,recons.index.to_datetime()),
dtype=r_dtype )
df.index = np.array(map(Timestamp,df.index.to_datetime()),dtype=r_dtype )
else:
r_dtype= type_map.get(r_dtype)
recons.index = np.array(recons.index,dtype=r_dtype )
Expand All @@ -4608,12 +4613,17 @@ def stuple_to_tuple(x):
c_dtype='O'
recons.columns = np.array(map(_to_uni,recons.columns),
dtype=c_dtype )
df.Columns = np.array(map(_to_uni,df.columns),dtype=c_dtype )
df.columns = np.array(map(_to_uni,df.columns),dtype=c_dtype )
elif c_dtype == 'dt':
c_dtype='O'
recons.columns = np.array(map(Timestamp,recons.columns),
dtype=c_dtype )
df.Columns = np.array(map(Timestamp,df.columns),dtype=c_dtype )
df.columns = np.array(map(Timestamp,df.columns),dtype=c_dtype )
elif c_dtype == 'p':
c_dtype='O'
recons.columns = np.array(map(Timestamp,recons.columns.to_datetime()),
dtype=c_dtype )
df.columns = np.array(map(Timestamp,df.columns.to_datetime()),dtype=c_dtype )
else:
c_dtype= type_map.get(c_dtype)
recons.columns = np.array(recons.columns,dtype=c_dtype )
Expand All @@ -4631,8 +4641,8 @@ def stuple_to_tuple(x):
_do_test(mkdf(nrows, ncols,r_idx_type='dt',
c_idx_type='s'),path, 'dt','s')

for r_idx_type in ['i', 'f','s','u']:
for c_idx_type in ['i', 'f','s','u','dt']:
for r_idx_type in ['i','s','u','p']:
for c_idx_type in ['i', 's','u','dt','p']:
for ncols in [1,2,128]:
base = int((chunksize// ncols or 1) or 1)
for nrows in [2,10,N-1,N,N+1,N+2,2*N-2,2*N-1,2*N,2*N+1,2*N+2,
Expand Down
25 changes: 20 additions & 5 deletions pandas/tseries/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -568,6 +568,26 @@ def __contains__(self, key):
except (KeyError, TypeError):
return False

def _format_with_header(self, header, **kwargs):
return header + self._format_native_types(**kwargs)

def _format_native_types(self, na_rep=u'NaT', **kwargs):
data = list(self)

# tz formatter or time formatter
zero_time = time(0, 0)
for d in data:
if d.time() != zero_time or d.tzinfo is not None:
return [u'%s' % x for x in data ]

values = np.array(data,dtype=object)
mask = isnull(self.values)
values[mask] = na_rep

imask = -mask
values[imask] = np.array([ u'%d-%.2d-%.2d' % (dt.year, dt.month, dt.day) for dt in values[imask] ])
return values.tolist()

def isin(self, values):
"""
Compute boolean array of whether each index value is found in the
Expand Down Expand Up @@ -627,11 +647,6 @@ def astype(self, dtype):
else: # pragma: no cover
raise ValueError('Cannot cast DatetimeIndex to dtype %s' % dtype)

@property
def asi8(self):
# do not cache or you'll create a memory leak
return self.values.view('i8')

def _get_time_micros(self):
utc = _utc()
values = self.asi8
Expand Down
28 changes: 20 additions & 8 deletions pandas/tseries/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import pandas.tseries.frequencies as _freq_mod

import pandas.core.common as com
from pandas.core.common import isnull

from pandas.lib import Timestamp
import pandas.lib as lib
Expand Down Expand Up @@ -792,6 +793,15 @@ def _mpl_repr(self):
# how to represent ourselves to matplotlib
return self._get_object_array()

def equals(self, other):
"""
Determines if two Index objects contain the same elements.
"""
if self is other:
return True

return np.array_equal(self.asi8, other.asi8)

def tolist(self):
"""
Return a list of Period objects
Expand Down Expand Up @@ -1029,16 +1039,18 @@ def __getitem__(self, key):

return PeriodIndex(result, name=self.name, freq=self.freq)

def format(self, name=False, formatter=None):
"""
Render a string representation of the Index
"""
header = []
def _format_with_header(self, header, **kwargs):
return header + self._format_native_types(**kwargs)

if name:
header.append(str(self.name) if self.name is not None else '')
def _format_native_types(self, na_rep=u'NaT', **kwargs):

return header + ['%s' % Period(x, freq=self.freq) for x in self]
values = np.array(list(self),dtype=object)
mask = isnull(self.values)
values[mask] = na_rep

imask = -mask
values[imask] = np.array([ u'%s' % dt for dt in values[imask] ])
return values.tolist()

def __array_finalize__(self, obj):
if self.ndim == 0: # pragma: no cover
Expand Down
12 changes: 6 additions & 6 deletions pandas/util/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -406,7 +406,7 @@ def makeCustomIndex(nentries, nlevels, prefix='#', names=False, ndupe_l=None,
label will repeated at the corresponding level, you can specify just
the first few, the rest will use the default ndupe_l of 1.
len(ndupe_l) <= nlevels.
idx_type - "i"/"f"/"s"/"u"/"dt".
idx_type - "i"/"f"/"s"/"u"/"dt/"p".
If idx_type is not None, `idx_nlevels` must be 1.
"i"/"f" creates an integer/float index,
"s"/"u" creates a string/unicode index
Expand All @@ -422,7 +422,7 @@ def makeCustomIndex(nentries, nlevels, prefix='#', names=False, ndupe_l=None,
assert (names is None or names is False
or names is True or len(names) is nlevels)
assert idx_type is None or \
(idx_type in ('i', 'f', 's', 'u', 'dt') and nlevels == 1)
(idx_type in ('i', 'f', 's', 'u', 'dt', 'p') and nlevels == 1)

if names is True:
# build default names
Expand All @@ -437,7 +437,7 @@ def makeCustomIndex(nentries, nlevels, prefix='#', names=False, ndupe_l=None,

# specific 1D index type requested?
idx_func = dict(i=makeIntIndex, f=makeFloatIndex, s=makeStringIndex,
u=makeUnicodeIndex, dt=makeDateIndex).get(idx_type)
u=makeUnicodeIndex, dt=makeDateIndex, p=makePeriodIndex).get(idx_type)
if idx_func:
idx = idx_func(nentries)
# but we need to fill in the name
Expand All @@ -446,7 +446,7 @@ def makeCustomIndex(nentries, nlevels, prefix='#', names=False, ndupe_l=None,
return idx
elif idx_type is not None:
raise ValueError('"%s" is not a legal value for `idx_type`, use '
'"i"/"f"/"s"/"u"/"dt".' % idx_type)
'"i"/"f"/"s"/"u"/"dt/"p".' % idx_type)

if len(ndupe_l) < nlevels:
ndupe_l.extend([1] * (nlevels - len(ndupe_l)))
Expand Down Expand Up @@ -540,9 +540,9 @@ def makeCustomDataframe(nrows, ncols, c_idx_names=True, r_idx_names=True,
assert c_idx_nlevels > 0
assert r_idx_nlevels > 0
assert r_idx_type is None or \
(r_idx_type in ('i', 'f', 's', 'u', 'dt') and r_idx_nlevels == 1)
(r_idx_type in ('i', 'f', 's', 'u', 'dt', 'p') and r_idx_nlevels == 1)
assert c_idx_type is None or \
(c_idx_type in ('i', 'f', 's', 'u', 'dt') and c_idx_nlevels == 1)
(c_idx_type in ('i', 'f', 's', 'u', 'dt', 'p') and c_idx_nlevels == 1)

columns = makeCustomIndex(ncols, nlevels=c_idx_nlevels, prefix='C',
names=c_idx_names, ndupe_l=c_ndupe_l,
Expand Down