From 4479c82793c4db0069de9343ac55f665eca8a1c8 Mon Sep 17 00:00:00 2001 From: TomAugspurger Date: Wed, 14 May 2014 12:27:09 -0500 Subject: [PATCH] BUG: dont use Counter and make test unambiguous --- pandas/core/generic.py | 22 +++++++++------------- pandas/tests/test_generic.py | 15 ++++++++------- 2 files changed, 17 insertions(+), 20 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 9172d174a1354..04ab4fb14d512 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3573,22 +3573,18 @@ def describe_numeric_1d(series, percentiles): [series.max()]) def describe_categorical_1d(data): - if data.dtype == object: - names = ['count', 'unique'] - objcounts = data.value_counts() - result = [data.count(), len(objcounts)] - if result[1] > 0: + names = ['count', 'unique'] + objcounts = data.value_counts() + result = [data.count(), len(objcounts)] + if result[1] > 0: + top, freq = objcounts.index[0], objcounts.iloc[0] + + if data.dtype == object: names += ['top', 'freq'] - top, freq = objcounts.index[0], objcounts.iloc[0] result += [top, freq] - elif issubclass(data.dtype.type, np.datetime64): - names = ['count', 'unique'] - asint = data.dropna().values.view('i8') - objcounts = compat.Counter(asint) - result = [data.count(), len(objcounts)] - if result[1] > 0: - top, freq = objcounts.most_common(1)[0] + elif issubclass(data.dtype.type, np.datetime64): + asint = data.dropna().values.view('i8') names += ['first', 'last', 'top', 'freq'] result += [lib.Timestamp(asint.min()), lib.Timestamp(asint.max()), diff --git a/pandas/tests/test_generic.py b/pandas/tests/test_generic.py index 57ec9d0eb8981..148eecbd8dbaf 100644 --- a/pandas/tests/test_generic.py +++ b/pandas/tests/test_generic.py @@ -988,30 +988,31 @@ def test_describe_objects(self): assert_frame_equal(result, expected) df = DataFrame({"C1": pd.date_range('2010-01-01', periods=4, freq='D')}) + df.loc[4] = pd.Timestamp('2010-01-04') result = df.describe() - expected = DataFrame({"C1": [4, 4, pd.Timestamp('2010-01-01'), + expected = DataFrame({"C1": [5, 4, pd.Timestamp('2010-01-01'), pd.Timestamp('2010-01-04'), - pd.Timestamp('2010-01-01'), 1]}, + pd.Timestamp('2010-01-04'), 2]}, index=['count', 'unique', 'first', 'last', 'top', 'freq']) assert_frame_equal(result, expected) # mix time and str - df['C2'] = ['a', 'a', 'b', 'c'] + df['C2'] = ['a', 'a', 'b', 'c', 'a'] result = df.describe() # when mix of dateimte / obj the index gets reordered. - expected['C2'] = [4, 3, np.nan, np.nan, 'a', 2] + expected['C2'] = [5, 3, np.nan, np.nan, 'a', 3] assert_frame_equal(result, expected) # just str - expected = DataFrame({'C2': [4, 3, 'a', 2]}, + expected = DataFrame({'C2': [5, 3, 'a', 4]}, index=['count', 'unique', 'top', 'freq']) result = df[['C2']].describe() # mix of time, str, numeric - df['C3'] = [2, 4, 6, 8] + df['C3'] = [2, 4, 6, 8, 2] result = df.describe() - expected = DataFrame({"C3": [4., 5., 2.5819889, 2., 3.5, 5., 6.5, 8.]}, + expected = DataFrame({"C3": [5., 4.4, 2.607681, 2., 2., 4., 6., 8.]}, index=['count', 'mean', 'std', 'min', '25%', '50%', '75%', 'max']) assert_frame_equal(result, expected)