Skip to content

Commit 569c67f

Browse files
committed
Add ddof for var, std
1 parent 6b9a81a commit 569c67f

File tree

2 files changed

+127
-2
lines changed

2 files changed

+127
-2
lines changed

xarray/core/_reductions.py

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -735,6 +735,7 @@ def std(
735735
self: DatasetReduce,
736736
dim: Union[None, Hashable, Sequence[Hashable]] = None,
737737
skipna: bool = True,
738+
ddof: int = 0,
738739
keep_attrs: bool = None,
739740
**kwargs,
740741
) -> T_Dataset:
@@ -751,6 +752,9 @@ def std(
751752
skips missing values for float dtypes; other dtypes either do not
752753
have a sentinel missing value (int) or skipna=True has not been
753754
implemented (object, datetime64 or timedelta64).
755+
ddof : int, default: 0
756+
“Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``,
757+
where ``N`` represents the number of elements.
754758
keep_attrs : bool, optional
755759
If True, ``attrs`` will be copied from the original
756760
object to the new one. If False (default), the new object will be
@@ -803,6 +807,16 @@ def std(
803807
Data variables:
804808
da (labels) float64 nan 0.0 1.0
805809
810+
Specify ``ddof=1`` for an unbiased estimate.
811+
812+
>>> ds.groupby("labels").std(skipna=True, ddof=1)
813+
<xarray.Dataset>
814+
Dimensions: (labels: 3)
815+
Coordinates:
816+
* labels (labels) object 'a' 'b' 'c'
817+
Data variables:
818+
da (labels) float64 nan 0.0 1.414
819+
806820
See Also
807821
--------
808822
numpy.std
@@ -814,6 +828,7 @@ def std(
814828
duck_array_ops.std,
815829
dim=dim,
816830
skipna=skipna,
831+
ddof=ddof,
817832
numeric_only=True,
818833
keep_attrs=keep_attrs,
819834
**kwargs,
@@ -823,6 +838,7 @@ def var(
823838
self: DatasetReduce,
824839
dim: Union[None, Hashable, Sequence[Hashable]] = None,
825840
skipna: bool = True,
841+
ddof: int = 0,
826842
keep_attrs: bool = None,
827843
**kwargs,
828844
) -> T_Dataset:
@@ -839,6 +855,9 @@ def var(
839855
skips missing values for float dtypes; other dtypes either do not
840856
have a sentinel missing value (int) or skipna=True has not been
841857
implemented (object, datetime64 or timedelta64).
858+
ddof : int, default: 0
859+
“Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``,
860+
where ``N`` represents the number of elements.
842861
keep_attrs : bool, optional
843862
If True, ``attrs`` will be copied from the original
844863
object to the new one. If False (default), the new object will be
@@ -891,6 +910,16 @@ def var(
891910
Data variables:
892911
da (labels) float64 nan 0.0 1.0
893912
913+
Specify ``ddof=1`` for an unbiased estimate.
914+
915+
>>> ds.groupby("labels").var(skipna=True, ddof=1)
916+
<xarray.Dataset>
917+
Dimensions: (labels: 3)
918+
Coordinates:
919+
* labels (labels) object 'a' 'b' 'c'
920+
Data variables:
921+
da (labels) float64 nan 0.0 2.0
922+
894923
See Also
895924
--------
896925
numpy.var
@@ -902,6 +931,7 @@ def var(
902931
duck_array_ops.var,
903932
dim=dim,
904933
skipna=skipna,
934+
ddof=ddof,
905935
numeric_only=True,
906936
keep_attrs=keep_attrs,
907937
**kwargs,
@@ -1692,6 +1722,7 @@ def std(
16921722
self: DatasetReduce,
16931723
dim: Union[None, Hashable, Sequence[Hashable]] = None,
16941724
skipna: bool = True,
1725+
ddof: int = 0,
16951726
keep_attrs: bool = None,
16961727
**kwargs,
16971728
) -> T_Dataset:
@@ -1708,6 +1739,9 @@ def std(
17081739
skips missing values for float dtypes; other dtypes either do not
17091740
have a sentinel missing value (int) or skipna=True has not been
17101741
implemented (object, datetime64 or timedelta64).
1742+
ddof : int, default: 0
1743+
“Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``,
1744+
where ``N`` represents the number of elements.
17111745
keep_attrs : bool, optional
17121746
If True, ``attrs`` will be copied from the original
17131747
object to the new one. If False (default), the new object will be
@@ -1760,6 +1794,16 @@ def std(
17601794
Data variables:
17611795
da (time) float64 0.0 0.8165 nan
17621796
1797+
Specify ``ddof=1`` for an unbiased estimate.
1798+
1799+
>>> ds.resample(time="3M").std(skipna=True, ddof=1)
1800+
<xarray.Dataset>
1801+
Dimensions: (time: 3)
1802+
Coordinates:
1803+
* time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31
1804+
Data variables:
1805+
da (time) float64 nan 1.0 nan
1806+
17631807
See Also
17641808
--------
17651809
numpy.std
@@ -1771,6 +1815,7 @@ def std(
17711815
duck_array_ops.std,
17721816
dim=dim,
17731817
skipna=skipna,
1818+
ddof=ddof,
17741819
numeric_only=True,
17751820
keep_attrs=keep_attrs,
17761821
**kwargs,
@@ -1780,6 +1825,7 @@ def var(
17801825
self: DatasetReduce,
17811826
dim: Union[None, Hashable, Sequence[Hashable]] = None,
17821827
skipna: bool = True,
1828+
ddof: int = 0,
17831829
keep_attrs: bool = None,
17841830
**kwargs,
17851831
) -> T_Dataset:
@@ -1796,6 +1842,9 @@ def var(
17961842
skips missing values for float dtypes; other dtypes either do not
17971843
have a sentinel missing value (int) or skipna=True has not been
17981844
implemented (object, datetime64 or timedelta64).
1845+
ddof : int, default: 0
1846+
“Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``,
1847+
where ``N`` represents the number of elements.
17991848
keep_attrs : bool, optional
18001849
If True, ``attrs`` will be copied from the original
18011850
object to the new one. If False (default), the new object will be
@@ -1848,6 +1897,16 @@ def var(
18481897
Data variables:
18491898
da (time) float64 0.0 0.6667 nan
18501899
1900+
Specify ``ddof=1`` for an unbiased estimate.
1901+
1902+
>>> ds.resample(time="3M").var(skipna=True, ddof=1)
1903+
<xarray.Dataset>
1904+
Dimensions: (time: 3)
1905+
Coordinates:
1906+
* time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31
1907+
Data variables:
1908+
da (time) float64 nan 1.0 nan
1909+
18511910
See Also
18521911
--------
18531912
numpy.var
@@ -1859,6 +1918,7 @@ def var(
18591918
duck_array_ops.var,
18601919
dim=dim,
18611920
skipna=skipna,
1921+
ddof=ddof,
18621922
numeric_only=True,
18631923
keep_attrs=keep_attrs,
18641924
**kwargs,
@@ -2587,6 +2647,7 @@ def std(
25872647
self: DataArrayReduce,
25882648
dim: Union[None, Hashable, Sequence[Hashable]] = None,
25892649
skipna: bool = True,
2650+
ddof: int = 0,
25902651
keep_attrs: bool = None,
25912652
**kwargs,
25922653
) -> T_DataArray:
@@ -2603,6 +2664,9 @@ def std(
26032664
skips missing values for float dtypes; other dtypes either do not
26042665
have a sentinel missing value (int) or skipna=True has not been
26052666
implemented (object, datetime64 or timedelta64).
2667+
ddof : int, default: 0
2668+
“Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``,
2669+
where ``N`` represents the number of elements.
26062670
keep_attrs : bool, optional
26072671
If True, ``attrs`` will be copied from the original
26082672
object to the new one. If False (default), the new object will be
@@ -2648,6 +2712,14 @@ def std(
26482712
Coordinates:
26492713
* labels (labels) object 'a' 'b' 'c'
26502714
2715+
Specify ``ddof=1`` for an unbiased estimate.
2716+
2717+
>>> da.groupby("labels").std(skipna=True, ddof=1)
2718+
<xarray.DataArray (labels: 3)>
2719+
array([ nan, 0. , 1.41421356])
2720+
Coordinates:
2721+
* labels (labels) object 'a' 'b' 'c'
2722+
26512723
See Also
26522724
--------
26532725
numpy.std
@@ -2659,6 +2731,7 @@ def std(
26592731
duck_array_ops.std,
26602732
dim=dim,
26612733
skipna=skipna,
2734+
ddof=ddof,
26622735
keep_attrs=keep_attrs,
26632736
**kwargs,
26642737
)
@@ -2667,6 +2740,7 @@ def var(
26672740
self: DataArrayReduce,
26682741
dim: Union[None, Hashable, Sequence[Hashable]] = None,
26692742
skipna: bool = True,
2743+
ddof: int = 0,
26702744
keep_attrs: bool = None,
26712745
**kwargs,
26722746
) -> T_DataArray:
@@ -2683,6 +2757,9 @@ def var(
26832757
skips missing values for float dtypes; other dtypes either do not
26842758
have a sentinel missing value (int) or skipna=True has not been
26852759
implemented (object, datetime64 or timedelta64).
2760+
ddof : int, default: 0
2761+
“Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``,
2762+
where ``N`` represents the number of elements.
26862763
keep_attrs : bool, optional
26872764
If True, ``attrs`` will be copied from the original
26882765
object to the new one. If False (default), the new object will be
@@ -2728,6 +2805,14 @@ def var(
27282805
Coordinates:
27292806
* labels (labels) object 'a' 'b' 'c'
27302807
2808+
Specify ``ddof=1`` for an unbiased estimate.
2809+
2810+
>>> da.groupby("labels").var(skipna=True, ddof=1)
2811+
<xarray.DataArray (labels: 3)>
2812+
array([nan, 0., 2.])
2813+
Coordinates:
2814+
* labels (labels) object 'a' 'b' 'c'
2815+
27312816
See Also
27322817
--------
27332818
numpy.var
@@ -2739,6 +2824,7 @@ def var(
27392824
duck_array_ops.var,
27402825
dim=dim,
27412826
skipna=skipna,
2827+
ddof=ddof,
27422828
keep_attrs=keep_attrs,
27432829
**kwargs,
27442830
)
@@ -3458,6 +3544,7 @@ def std(
34583544
self: DataArrayReduce,
34593545
dim: Union[None, Hashable, Sequence[Hashable]] = None,
34603546
skipna: bool = True,
3547+
ddof: int = 0,
34613548
keep_attrs: bool = None,
34623549
**kwargs,
34633550
) -> T_DataArray:
@@ -3474,6 +3561,9 @@ def std(
34743561
skips missing values for float dtypes; other dtypes either do not
34753562
have a sentinel missing value (int) or skipna=True has not been
34763563
implemented (object, datetime64 or timedelta64).
3564+
ddof : int, default: 0
3565+
“Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``,
3566+
where ``N`` represents the number of elements.
34773567
keep_attrs : bool, optional
34783568
If True, ``attrs`` will be copied from the original
34793569
object to the new one. If False (default), the new object will be
@@ -3519,6 +3609,14 @@ def std(
35193609
Coordinates:
35203610
* time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31
35213611
3612+
Specify ``ddof=1`` for an unbiased estimate.
3613+
3614+
>>> da.resample(time="3M").std(skipna=True, ddof=1)
3615+
<xarray.DataArray (time: 3)>
3616+
array([nan, 1., nan])
3617+
Coordinates:
3618+
* time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31
3619+
35223620
See Also
35233621
--------
35243622
numpy.std
@@ -3530,6 +3628,7 @@ def std(
35303628
duck_array_ops.std,
35313629
dim=dim,
35323630
skipna=skipna,
3631+
ddof=ddof,
35333632
keep_attrs=keep_attrs,
35343633
**kwargs,
35353634
)
@@ -3538,6 +3637,7 @@ def var(
35383637
self: DataArrayReduce,
35393638
dim: Union[None, Hashable, Sequence[Hashable]] = None,
35403639
skipna: bool = True,
3640+
ddof: int = 0,
35413641
keep_attrs: bool = None,
35423642
**kwargs,
35433643
) -> T_DataArray:
@@ -3554,6 +3654,9 @@ def var(
35543654
skips missing values for float dtypes; other dtypes either do not
35553655
have a sentinel missing value (int) or skipna=True has not been
35563656
implemented (object, datetime64 or timedelta64).
3657+
ddof : int, default: 0
3658+
“Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``,
3659+
where ``N`` represents the number of elements.
35573660
keep_attrs : bool, optional
35583661
If True, ``attrs`` will be copied from the original
35593662
object to the new one. If False (default), the new object will be
@@ -3599,6 +3702,14 @@ def var(
35993702
Coordinates:
36003703
* time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31
36013704
3705+
Specify ``ddof=1`` for an unbiased estimate.
3706+
3707+
>>> da.resample(time="3M").var(skipna=True, ddof=1)
3708+
<xarray.DataArray (time: 3)>
3709+
array([nan, 1., nan])
3710+
Coordinates:
3711+
* time (time) datetime64[ns] 2001-01-31 2001-04-30 2001-07-31
3712+
36023713
See Also
36033714
--------
36043715
numpy.var
@@ -3610,6 +3721,7 @@ def var(
36103721
duck_array_ops.var,
36113722
dim=dim,
36123723
skipna=skipna,
3724+
ddof=ddof,
36133725
keep_attrs=keep_attrs,
36143726
**kwargs,
36153727
)

xarray/util/generate_reductions.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,10 @@ def {method}(
9797
array's dtype. Changed in version 0.17.0: if specified on an integer
9898
array and skipna=True, the result will be a float array."""
9999

100+
_DDOF_DOCSTRING = """ddof : int, default: 0
101+
“Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``,
102+
where ``N`` represents the number of elements."""
103+
100104
_KEEP_ATTRS_DOCSTRING = """keep_attrs : bool, optional
101105
If True, ``attrs`` will be copied from the original
102106
object to the new one. If False (default), the new object will be
@@ -132,6 +136,15 @@ def {method}(
132136
133137
>>> {calculation}(skipna=True, min_count=2)""",
134138
)
139+
ddof = extra_kwarg(
140+
docs=_DDOF_DOCSTRING,
141+
kwarg="ddof: int = 0,",
142+
call="ddof=ddof,",
143+
example="""\n
144+
Specify ``ddof=1`` for an unbiased estimate.
145+
146+
>>> {calculation}(skipna=True, ddof=1)""",
147+
)
135148

136149

137150
class Method:
@@ -275,8 +288,8 @@ def generate_code(self, method):
275288
Method("mean", extra_kwargs=(skip_na,), numeric_only=True),
276289
Method("prod", extra_kwargs=(skip_na, min_count), numeric_only=True),
277290
Method("sum", extra_kwargs=(skip_na, min_count), numeric_only=True),
278-
Method("std", extra_kwargs=(skip_na,), numeric_only=True),
279-
Method("var", extra_kwargs=(skip_na,), numeric_only=True),
291+
Method("std", extra_kwargs=(skip_na, ddof), numeric_only=True),
292+
Method("var", extra_kwargs=(skip_na, ddof), numeric_only=True),
280293
Method("median", extra_kwargs=(skip_na,), numeric_only=True),
281294
)
282295

0 commit comments

Comments
 (0)