@@ -1022,6 +1022,30 @@ def _union_index(self, other):
10221022
10231023 return union_index
10241024
1025+ def describe (self ):
1026+ """
1027+ Generate various summary statistics of columns, excluding NaN values
1028+
1029+ Returns
1030+ -------
1031+ DataFrame
1032+ """
1033+ cols = self ._get_numeric_columns ()
1034+
1035+ tmp = self .reindex (columns = cols )
1036+
1037+ cols_destat = ['count' , 'mean' , 'std' , 'min' , '10%' , '50%' , '90%' , 'max' ]
1038+
1039+ list_destat = [tmp .count (), tmp .mean (), tmp .std (), tmp .min (),
1040+ tmp .scoreatpercentile (10 ), tmp .median (), tmp .scoreatpercentile (90 ), tmp .max ()]
1041+
1042+ destats = self ._constructor (np .zeros ((len (cols ), len (cols_destat ))), index = cols , columns = cols_destat )
1043+
1044+ for i , k in enumerate (list_destat ):
1045+ destats [cols_destat [i ]] = k
1046+
1047+ return destats
1048+
10251049 def dropEmptyRows (self , specificColumns = None ):
10261050 """
10271051 Return DataFrame with rows omitted containing ALL NaN values
@@ -2097,6 +2121,38 @@ def mean(self, axis=0):
20972121
20982122 return summed / count
20992123
2124+ def scoreatpercentile (self , per = 50 , axis = 0 ):
2125+ """
2126+ Return array or Series of score at the given `per` percentile
2127+ over requested axis.
2128+
2129+ Parameters
2130+ ----------
2131+ per : percentile
2132+
2133+ axis : {0, 1}
2134+ 0 for row-wise, 1 for column-wise
2135+
2136+ Returns
2137+ -------
2138+ Series or TimeSeries
2139+ """
2140+ from scipy .stats import scoreatpercentile
2141+
2142+ def f (arr , per ):
2143+ if arr .dtype != np .float_ :
2144+ arr = arr .astype (float )
2145+ return scoreatpercentile (arr [notnull (arr )], per )
2146+
2147+ if axis == 0 :
2148+ scoreatper = [f (self [col ].values , per ) for col in self .columns ]
2149+ return Series (scoreatper , index = self .columns )
2150+ elif axis == 1 :
2151+ scoreatper = [f (self .xs (k ).values , per ) for k in self .index ]
2152+ return Series (scoreatper , index = self .index )
2153+ else :
2154+ raise Exception ('Must have 0<= axis <= 1' )
2155+
21002156 def median (self , axis = 0 ):
21012157 """
21022158 Return array or Series of medians over requested axis.
0 commit comments