Skip to content

Commit 8fee1b7

Browse files
committed
Merge remote-tracking branch 'upstream/master'
2 parents cb2ab5e + 27de044 commit 8fee1b7

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

65 files changed

+849
-396
lines changed

.travis.yml

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -58,12 +58,6 @@ matrix:
5858
services:
5959
- mysql
6060
- postgresql
61-
62-
- env:
63-
- JOB="3.6, slow" ENV_FILE="ci/deps/travis-36-slow.yaml" PATTERN="slow" SQL="1"
64-
services:
65-
- mysql
66-
- postgresql
6761
allow_failures:
6862
- arch: arm64
6963
env:

asv_bench/benchmarks/rolling.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,11 +91,18 @@ class EWMMethods:
9191
def setup(self, constructor, window, dtype, method):
9292
N = 10 ** 5
9393
arr = (100 * np.random.random(N)).astype(dtype)
94+
times = pd.date_range("1900", periods=N, freq="23s")
9495
self.ewm = getattr(pd, constructor)(arr).ewm(halflife=window)
96+
self.ewm_times = getattr(pd, constructor)(arr).ewm(
97+
halflife="1 Day", times=times
98+
)
9599

96100
def time_ewm(self, constructor, window, dtype, method):
97101
getattr(self.ewm, method)()
98102

103+
def time_ewm_times(self, constructor, window, dtype, method):
104+
self.ewm.mean()
105+
99106

100107
class VariableWindowMethods(Methods):
101108
params = (
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
import numpy as np
2+
from pytz import UTC
3+
4+
from pandas._libs.tslibs.tzconversion import tz_convert, tz_localize_to_utc
5+
6+
from .tslib import _sizes, _tzs
7+
8+
9+
class TimeTZConvert:
10+
params = (
11+
_sizes,
12+
[x for x in _tzs if x is not None],
13+
)
14+
param_names = ["size", "tz"]
15+
16+
def setup(self, size, tz):
17+
arr = np.random.randint(0, 10, size=size, dtype="i8")
18+
self.i8data = arr
19+
20+
def time_tz_convert_from_utc(self, size, tz):
21+
# effectively:
22+
# dti = DatetimeIndex(self.i8data, tz=tz)
23+
# dti.tz_localize(None)
24+
tz_convert(self.i8data, UTC, tz)
25+
26+
def time_tz_localize_to_utc(self, size, tz):
27+
# effectively:
28+
# dti = DatetimeIndex(self.i8data)
29+
# dti.tz_localize(tz, ambiguous="NaT", nonexistent="NaT")
30+
tz_localize_to_utc(self.i8data, tz, ambiguous="NaT", nonexistent="NaT")

ci/azure/posix.yml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,11 @@ jobs:
3030
LC_ALL: "zh_CN.utf8"
3131
EXTRA_APT: "language-pack-zh-hans"
3232

33+
py36_slow:
34+
ENV_FILE: ci/deps/azure-36-slow.yaml
35+
CONDA_PY: "36"
36+
PATTERN: "slow"
37+
3338
py36_locale:
3439
ENV_FILE: ci/deps/azure-36-locale.yaml
3540
CONDA_PY: "36"
File renamed without changes.

ci/setup_env.sh

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -166,5 +166,4 @@ if [[ -n ${SQL:0} ]]; then
166166
else
167167
echo "not using dbs on non-linux Travis builds or Azure Pipelines"
168168
fi
169-
170169
echo "done"

doc/source/user_guide/computation.rst

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1095,6 +1095,25 @@ and **alpha** to the EW functions:
10951095
one half.
10961096
* **Alpha** specifies the smoothing factor directly.
10971097

1098+
.. versionadded:: 1.1.0
1099+
1100+
You can also specify ``halflife`` in terms of a timedelta convertible unit to specify the amount of
1101+
time it takes for an observation to decay to half its value when also specifying a sequence
1102+
of ``times``.
1103+
1104+
.. ipython:: python
1105+
1106+
df = pd.DataFrame({'B': [0, 1, 2, np.nan, 4]})
1107+
df
1108+
times = ['2020-01-01', '2020-01-03', '2020-01-10', '2020-01-15', '2020-01-17']
1109+
df.ewm(halflife='4 days', times=pd.DatetimeIndex(times)).mean()
1110+
1111+
The following formula is used to compute exponentially weighted mean with an input vector of times:
1112+
1113+
.. math::
1114+
1115+
y_t = \frac{\sum_{i=0}^t 0.5^\frac{t_{t} - t_{i}}{\lambda} x_{t-i}}{0.5^\frac{t_{t} - t_{i}}{\lambda}},
1116+
10981117
Here is an example for a univariate time series:
10991118

11001119
.. ipython:: python

doc/source/whatsnew/v1.1.0.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -329,6 +329,7 @@ Other enhancements
329329
- :meth:`DataFrame.to_excel` can now also write OpenOffice spreadsheet (.ods) files (:issue:`27222`)
330330
- :meth:`~Series.explode` now accepts ``ignore_index`` to reset the index, similarly to :meth:`pd.concat` or :meth:`DataFrame.sort_values` (:issue:`34932`).
331331
- :meth:`read_csv` now accepts string values like "0", "0.0", "1", "1.0" as convertible to the nullable boolean dtype (:issue:`34859`)
332+
- :class:`pandas.core.window.ExponentialMovingWindow` now supports a ``times`` argument that allows ``mean`` to be calculated with observations spaced by the timestamps in ``times`` (:issue:`34839`)
332333

333334
.. ---------------------------------------------------------------------------
334335
@@ -788,6 +789,7 @@ Deprecations
788789
- :meth:`Categorical.to_dense` is deprecated and will be removed in a future version, use ``np.asarray(cat)`` instead (:issue:`32639`)
789790
- The ``fastpath`` keyword in the ``SingleBlockManager`` constructor is deprecated and will be removed in a future version (:issue:`33092`)
790791
- Providing ``suffixes`` as a ``set`` in :func:`pandas.merge` is deprecated. Provide a tuple instead (:issue:`33740`, :issue:`34741`).
792+
- Indexing a series with a multi-dimensional indexer like ``[:, None]`` to return an ndarray now raises a ``FutureWarning``. Convert to a NumPy array before indexing instead (:issue:`27837`)
791793
- :meth:`Index.is_mixed` is deprecated and will be removed in a future version, check ``index.inferred_type`` directly instead (:issue:`32922`)
792794

793795
- Passing any arguments but the first one to :func:`read_html` as
@@ -976,6 +978,7 @@ Indexing
976978
- Bug in :meth:`DataFrame.loc` with dictionary of values changes columns with dtype of ``int`` to ``float`` (:issue:`34573`)
977979
- Bug in :meth:`Series.loc` when used with a :class:`MultiIndex` would raise an IndexingError when accessing a None value (:issue:`34318`)
978980
- Bug in :meth:`DataFrame.reset_index` and :meth:`Series.reset_index` would not preserve data types on an empty :class:`DataFrame` or :class:`Series` with a :class:`MultiIndex` (:issue:`19602`)
981+
- Bug in :class:`Series` and :class:`DataFrame` indexing with a ``time`` key on a :class:`DatetimeIndex` with ``NaT`` entries (:issue:`35114`)
979982

980983
Missing
981984
^^^^^^^
@@ -1121,6 +1124,7 @@ Sparse
11211124
- Bug where :class:`DataFrame` containing :class:`SparseArray` filled with ``NaN`` when indexed by a list-like (:issue:`27781`, :issue:`29563`)
11221125
- The repr of :class:`SparseDtype` now includes the repr of its ``fill_value`` attribute. Previously it used ``fill_value``'s string representation (:issue:`34352`)
11231126
- Bug where empty :class:`DataFrame` could not be cast to :class:`SparseDtype` (:issue:`33113`)
1127+
- Bug in :meth:`arrays.SparseArray` was returning the incorrect type when indexing a sparse dataframe with an iterable (:issue:`34526`, :issue:`34540`)
11241128

11251129
ExtensionArray
11261130
^^^^^^^^^^^^^^

environment.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,14 +20,14 @@ dependencies:
2020
- flake8<3.8.0 # temporary pin, GH#34150
2121
- flake8-comprehensions>=3.1.0 # used by flake8, linting of unnecessary comprehensions
2222
- flake8-rst>=0.6.0,<=0.7.0 # linting of code blocks in rst files
23-
- isort # check that imports are in the right order
23+
- isort=4.3.21 # check that imports are in the right order
2424
- mypy=0.730
2525
- pycodestyle # used by flake8
2626

2727
# documentation
2828
- gitpython # obtain contributors from git for whatsnew
2929
- gitdb2=2.0.6 # GH-32060
30-
- sphinx
30+
- sphinx<=3.1.1
3131

3232
# documentation (jupyter notebooks)
3333
- nbconvert>=5.4.1

pandas/_libs/tslib.pyx

Lines changed: 42 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ PyDateTime_IMPORT
1515

1616

1717
cimport numpy as cnp
18-
from numpy cimport float64_t, int64_t, ndarray, uint8_t
18+
from numpy cimport float64_t, int64_t, ndarray, uint8_t, intp_t
1919
import numpy as np
2020
cnp.import_array()
2121

@@ -46,7 +46,6 @@ from pandas._libs.tslibs.timezones cimport (
4646
get_dst_info,
4747
is_utc,
4848
is_tzlocal,
49-
utc_pytz as UTC,
5049
)
5150
from pandas._libs.tslibs.conversion cimport (
5251
_TSObject,
@@ -67,8 +66,8 @@ from pandas._libs.tslibs.timestamps cimport create_timestamp_from_ts, _Timestamp
6766
from pandas._libs.tslibs.timestamps import Timestamp
6867

6968
from pandas._libs.tslibs.tzconversion cimport (
70-
tz_convert_single,
7169
tz_convert_utc_to_tzlocal,
70+
tz_localize_to_utc_single,
7271
)
7372

7473
# Note: this is the only non-tslibs intra-pandas dependency here
@@ -157,13 +156,15 @@ def ints_to_pydatetime(
157156
Py_ssize_t i, n = len(arr)
158157
ndarray[int64_t] trans
159158
int64_t[:] deltas
160-
Py_ssize_t pos
159+
intp_t[:] pos
161160
npy_datetimestruct dts
162161
object dt, new_tz
163162
str typ
164-
int64_t value, delta, local_value
163+
int64_t value, local_value, delta = NPY_NAT # dummy for delta
165164
ndarray[object] result = np.empty(n, dtype=object)
166165
object (*func_create)(int64_t, npy_datetimestruct, tzinfo, object, bint)
166+
bint use_utc = False, use_tzlocal = False, use_fixed = False
167+
bint use_pytz = False
167168

168169
if box == "date":
169170
assert (tz is None), "tz should be None when converting to date"
@@ -184,66 +185,45 @@ def ints_to_pydatetime(
184185
)
185186

186187
if is_utc(tz) or tz is None:
187-
for i in range(n):
188-
value = arr[i]
189-
if value == NPY_NAT:
190-
result[i] = <object>NaT
191-
else:
192-
dt64_to_dtstruct(value, &dts)
193-
result[i] = func_create(value, dts, tz, freq, fold)
188+
use_utc = True
194189
elif is_tzlocal(tz):
195-
for i in range(n):
196-
value = arr[i]
197-
if value == NPY_NAT:
198-
result[i] = <object>NaT
199-
else:
200-
# Python datetime objects do not support nanosecond
201-
# resolution (yet, PEP 564). Need to compute new value
202-
# using the i8 representation.
203-
local_value = tz_convert_utc_to_tzlocal(value, tz)
204-
dt64_to_dtstruct(local_value, &dts)
205-
result[i] = func_create(value, dts, tz, freq, fold)
190+
use_tzlocal = True
206191
else:
207192
trans, deltas, typ = get_dst_info(tz)
208-
209-
if typ not in ['pytz', 'dateutil']:
193+
if typ not in ["pytz", "dateutil"]:
210194
# static/fixed; in this case we know that len(delta) == 1
195+
use_fixed = True
211196
delta = deltas[0]
212-
for i in range(n):
213-
value = arr[i]
214-
if value == NPY_NAT:
215-
result[i] = <object>NaT
216-
else:
217-
# Adjust datetime64 timestamp, recompute datetimestruct
218-
dt64_to_dtstruct(value + delta, &dts)
219-
result[i] = func_create(value, dts, tz, freq, fold)
197+
else:
198+
pos = trans.searchsorted(arr, side="right") - 1
199+
use_pytz = typ == "pytz"
220200

221-
elif typ == 'dateutil':
222-
# no zone-name change for dateutil tzs - dst etc
223-
# represented in single object.
224-
for i in range(n):
225-
value = arr[i]
226-
if value == NPY_NAT:
227-
result[i] = <object>NaT
228-
else:
229-
# Adjust datetime64 timestamp, recompute datetimestruct
230-
pos = trans.searchsorted(value, side='right') - 1
231-
dt64_to_dtstruct(value + deltas[pos], &dts)
232-
result[i] = func_create(value, dts, tz, freq, fold)
201+
for i in range(n):
202+
new_tz = tz
203+
value = arr[i]
204+
205+
if value == NPY_NAT:
206+
result[i] = <object>NaT
233207
else:
234-
# pytz
235-
for i in range(n):
236-
value = arr[i]
237-
if value == NPY_NAT:
238-
result[i] = <object>NaT
239-
else:
240-
# Adjust datetime64 timestamp, recompute datetimestruct
241-
pos = trans.searchsorted(value, side='right') - 1
242-
# find right representation of dst etc in pytz timezone
243-
new_tz = tz._tzinfos[tz._transition_info[pos]]
208+
if use_utc:
209+
local_value = value
210+
elif use_tzlocal:
211+
local_value = tz_convert_utc_to_tzlocal(value, tz)
212+
elif use_fixed:
213+
local_value = value + delta
214+
elif not use_pytz:
215+
# i.e. dateutil
216+
# no zone-name change for dateutil tzs - dst etc
217+
# represented in single object.
218+
local_value = value + deltas[pos[i]]
219+
else:
220+
# pytz
221+
# find right representation of dst etc in pytz timezone
222+
new_tz = tz._tzinfos[tz._transition_info[pos[i]]]
223+
local_value = value + deltas[pos[i]]
244224

245-
dt64_to_dtstruct(value + deltas[pos], &dts)
246-
result[i] = func_create(value, dts, new_tz, freq, fold)
225+
dt64_to_dtstruct(local_value, &dts)
226+
result[i] = func_create(value, dts, new_tz, freq, fold)
247227

248228
return result
249229

@@ -269,7 +249,7 @@ def _test_parse_iso8601(ts: str):
269249
check_dts_bounds(&obj.dts)
270250
if out_local == 1:
271251
obj.tzinfo = pytz.FixedOffset(out_tzoffset)
272-
obj.value = tz_convert_single(obj.value, obj.tzinfo, UTC)
252+
obj.value = tz_localize_to_utc_single(obj.value, obj.tzinfo)
273253
return Timestamp(obj.value, tz=obj.tzinfo)
274254
else:
275255
return Timestamp(obj.value)
@@ -363,8 +343,8 @@ def format_array_from_datetime(
363343

364344
def array_with_unit_to_datetime(
365345
ndarray values,
366-
object unit,
367-
str errors='coerce'
346+
str unit,
347+
str errors="coerce"
368348
):
369349
"""
370350
Convert the ndarray to datetime according to the time unit.
@@ -384,7 +364,7 @@ def array_with_unit_to_datetime(
384364
----------
385365
values : ndarray of object
386366
Date-like objects to convert.
387-
unit : object
367+
unit : str
388368
Time unit to use during conversion.
389369
errors : str, default 'raise'
390370
Error behavior when parsing.
@@ -727,7 +707,7 @@ cpdef array_to_datetime(
727707
# dateutil.tz.tzoffset objects
728708
out_tzoffset_vals.add(out_tzoffset * 60.)
729709
tz = pytz.FixedOffset(out_tzoffset)
730-
value = tz_convert_single(value, tz, UTC)
710+
value = tz_localize_to_utc_single(value, tz)
731711
out_local = 0
732712
out_tzoffset = 0
733713
else:

0 commit comments

Comments
 (0)