@@ -2791,26 +2791,77 @@ def hist(self, by=None, bins=10, **kwds):
27912791
27922792 def kde (self , bw_method = None , ind = None , ** kwds ):
27932793 """
2794- Kernel Density Estimate plot
2794+ Generate Kernel Density Estimate plot using Gaussian kernels.
2795+
2796+ In statistics, kernel density estimation (KDE) is a non-parametric way
2797+ to estimate the probability density function (PDF) of a random
2798+ variable. This function uses Gaussian kernels and includes automatic
2799+ bandwith determination.
27952800
27962801 Parameters
27972802 ----------
2798- bw_method: str, scalar or callable, optional
2799- The method used to calculate the estimator bandwidth. This can be
2803+ bw_method : str, scalar or callable, optional
2804+ The method used to calculate the estimator bandwidth. This can be
28002805 'scott', 'silverman', a scalar constant or a callable.
28012806 If None (default), 'scott' is used.
28022807 See :class:`scipy.stats.gaussian_kde` for more information.
28032808 ind : NumPy array or integer, optional
2804- Evaluation points. If None (default), 1000 equally spaced points
2805- are used. If `ind` is a NumPy array, the kde is evaluated at the
2806- points passed. If `ind` is an integer, `ind` number of equally
2807- spaced points are used.
2808- `**kwds` : optional
2809- Keyword arguments to pass on to :py:meth:`pandas.DataFrame.plot`.
2809+ Evaluation points for the estimated PDF. If None (default),
2810+ 1000 equally spaced points are used. If `ind` is a NumPy array, the
2811+ kde is evaluated at the points passed. If `ind` is an integer,
2812+ `ind` number of equally spaced points are used.
2813+ **kwds : optional
2814+ Additional keyword arguments are documented in
2815+ :meth:`pandas.Series.plot`.
28102816
28112817 Returns
28122818 -------
28132819 axes : matplotlib.AxesSubplot or np.array of them
2820+
2821+ See also
2822+ --------
2823+ scipy.stats.gaussian_kde : Representation of a kernel-density
2824+ estimate using Gaussian kernels. This is the function used
2825+ internally to estimate the PDF.
2826+ :meth:`pandas.Series.plot.kde` : Generate a KDE plot for a Series
2827+
2828+ Examples
2829+ --------
2830+ Given several Series of points randomly sampled from unknown
2831+ distributions, estimate their distribution using KDE with automatic
2832+ bandwidth determination and plot the results, evaluating them at
2833+ 1000 equally spaced points (default):
2834+
2835+ .. plot::
2836+ :context: close-figs
2837+
2838+ >>> df = pd.DataFrame({
2839+ ... 'x': [1, 2, 2.5, 3, 3.5, 4, 5],
2840+ ... 'y': [4, 4, 4.5, 5, 5.5, 6, 6],
2841+ ... })
2842+ >>> ax = df.plot.kde()
2843+
2844+ A scalar bandwidth can be specified. Using a small bandwidth value can
2845+ lead to overfitting, while using a large bandwidth value can result
2846+ in underfitting:
2847+
2848+ .. plot::
2849+ :context: close-figs
2850+
2851+ >>> ax = df.plot.kde(bw_method=0.3)
2852+
2853+ .. plot::
2854+ :context: close-figs
2855+
2856+ >>> ax = df.plot.kde(bw_method=3)
2857+
2858+ Finally, the `ind` parameter determines the evaluation points for the
2859+ plot of the estimated PDF:
2860+
2861+ .. plot::
2862+ :context: close-figs
2863+
2864+ >>> ax = df.plot.kde(ind=[1, 2, 3, 4, 5, 6])
28142865 """
28152866 return self (kind = 'kde' , bw_method = bw_method , ind = ind , ** kwds )
28162867
0 commit comments