Skip to content

Commit a8ded73

Browse files
committed
implement int64 rounding function round_nsint64
The old `round_ns` is replaced by `round_nsint64`; `round_nsint64` is based on integer arithmetic while `round_ns` was based on floating point numbers. Rounding mode is explicitly defined by RoundTo enum class: - RoundTo.MINUS_INFTY rounds to -∞ (floor) - RountTo.PLUS_INFTY rounds to +∞ (ceil) - RoundTo.NEAREST_HALF_MINUS_INFTY rounds to nearest multiple, and breaks tie to -∞ - RoundTo.NEAREST_HALF_PLUS_INFTY rounds to nearest multiple, and breaks tie to +∞ - RoundTo.NEAREST_HALF_EVEN rounds to nearest multiple, and breaks tie to even multiple
1 parent fb25c21 commit a8ded73

File tree

2 files changed

+73
-42
lines changed

2 files changed

+73
-42
lines changed

pandas/_libs/tslibs/timestamps.pyx

Lines changed: 67 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ cimport ccalendar
2222
from conversion import tz_localize_to_utc, normalize_i8_timestamps
2323
from conversion cimport (tz_convert_single, _TSObject,
2424
convert_to_tsobject, convert_datetime_to_tsobject)
25+
import enum
2526
from fields import get_start_end_field, get_date_name_field
2627
from nattype import NaT
2728
from nattype cimport NPY_NAT
@@ -57,50 +58,80 @@ cdef inline object create_timestamp_from_ts(int64_t value,
5758
return ts_base
5859

5960

60-
def round_ns(values, rounder, freq):
61+
@enum.unique
62+
class RoundTo(enum.Enum):
63+
MINUS_INFTY = 0
64+
PLUS_INFTY = 1
65+
NEAREST_HALF_EVEN = 2
66+
NEAREST_HALF_PLUS_INFTY = 3
67+
NEAREST_HALF_MINUS_INFTY = 4
68+
69+
70+
cdef inline _npdivmod(x1, x2):
71+
"""implement divmod for numpy < 1.13"""
72+
return np.floor_divide(x1, x2), np.remainder(x1, x2)
73+
74+
75+
try:
76+
from numpy import divmod as npdivmod
77+
except ImportError:
78+
npdivmod = _npdivmod
79+
80+
81+
cdef inline _floor_int64(v, u):
82+
return v - np.remainder(v, u)
83+
84+
cdef inline _ceil_int64(v, u):
85+
return v + np.remainder(-v, u)
86+
87+
cdef inline _rounddown_int64(v, u):
88+
return _ceil_int64(v - u//2, u)
89+
90+
cdef inline _roundup_int64(v, u):
91+
return _floor_int64(v + u//2, u)
92+
93+
94+
def round_nsint64(values, mode: RoundTo, freq):
6195
"""
62-
Applies rounding function at given frequency
96+
Applies rounding mode at given frequency
6397
6498
Parameters
6599
----------
66100
values : :obj:`ndarray`
67-
rounder : function, eg. 'ceil', 'floor', 'round'
101+
mode : instance of `RoundTo` enumeration
68102
freq : str, obj
69103
70104
Returns
71105
-------
72106
:obj:`ndarray`
73107
"""
108+
109+
if not isinstance(mode, RoundTo):
110+
raise ValueError('mode should be a RoundTo member')
111+
74112
unit = to_offset(freq).nanos
75113

76-
# GH21262 If the Timestamp is multiple of the freq str
77-
# don't apply any rounding
78-
mask = values % unit == 0
79-
if mask.all():
80-
return values
81-
r = values.copy()
82-
83-
if unit < 1000:
84-
# for nano rounding, work with the last 6 digits separately
85-
# due to float precision
86-
buff = 1000000
87-
r[~mask] = (buff * (values[~mask] // buff) +
88-
unit * (rounder((values[~mask] % buff) *
89-
(1 / float(unit)))).astype('i8'))
90-
else:
91-
if unit % 1000 != 0:
92-
msg = 'Precision will be lost using frequency: {}'
93-
warnings.warn(msg.format(freq))
94-
# GH19206
95-
# to deal with round-off when unit is large
96-
if unit >= 1e9:
97-
divisor = 10 ** int(np.log10(unit / 1e7))
98-
else:
99-
divisor = 10
100-
r[~mask] = (unit * rounder((values[~mask] *
101-
(divisor / float(unit))) / divisor)
102-
.astype('i8'))
103-
return r
114+
if mode is RoundTo.MINUS_INFTY:
115+
return _floor_int64(values, unit)
116+
elif mode is RoundTo.PLUS_INFTY:
117+
return _ceil_int64(values, unit)
118+
elif mode is RoundTo.NEAREST_HALF_MINUS_INFTY:
119+
return _rounddown_int64(values, unit)
120+
elif mode is RoundTo.NEAREST_HALF_PLUS_INFTY:
121+
return _roundup_int64(values, unit)
122+
elif mode is RoundTo.NEAREST_HALF_EVEN:
123+
# for odd unit there is no need of a tie break
124+
if unit % 2:
125+
return _rounddown_int64(values, unit)
126+
d, r = npdivmod(values, unit)
127+
mask = np.logical_or(
128+
r > (unit // 2),
129+
np.logical_and(r == (unit // 2), d % 2)
130+
)
131+
d[mask] += 1
132+
return d * unit
133+
134+
raise NotImplementedError(mode)
104135

105136

106137
# This is PITA. Because we inherit from datetime, which has very specific
@@ -656,7 +687,7 @@ class Timestamp(_Timestamp):
656687

657688
return create_timestamp_from_ts(ts.value, ts.dts, ts.tzinfo, freq)
658689

659-
def _round(self, freq, rounder, ambiguous='raise'):
690+
def _round(self, freq, mode, ambiguous='raise'):
660691
if self.tz is not None:
661692
value = self.tz_localize(None).value
662693
else:
@@ -665,7 +696,7 @@ class Timestamp(_Timestamp):
665696
value = np.array([value], dtype=np.int64)
666697

667698
# Will only ever contain 1 element for timestamp
668-
r = round_ns(value, rounder, freq)[0]
699+
r = round_nsint64(value, mode, freq)[0]
669700
result = Timestamp(r, unit='ns')
670701
if self.tz is not None:
671702
result = result.tz_localize(self.tz, ambiguous=ambiguous)
@@ -694,7 +725,7 @@ class Timestamp(_Timestamp):
694725
------
695726
ValueError if the freq cannot be converted
696727
"""
697-
return self._round(freq, np.round, ambiguous)
728+
return self._round(freq, RoundTo.NEAREST_HALF_EVEN, ambiguous)
698729

699730
def floor(self, freq, ambiguous='raise'):
700731
"""
@@ -715,7 +746,7 @@ class Timestamp(_Timestamp):
715746
------
716747
ValueError if the freq cannot be converted
717748
"""
718-
return self._round(freq, np.floor, ambiguous)
749+
return self._round(freq, RoundTo.MINUS_INFTY, ambiguous)
719750

720751
def ceil(self, freq, ambiguous='raise'):
721752
"""
@@ -736,7 +767,7 @@ class Timestamp(_Timestamp):
736767
------
737768
ValueError if the freq cannot be converted
738769
"""
739-
return self._round(freq, np.ceil, ambiguous)
770+
return self._round(freq, RoundTo.PLUS_INFTY, ambiguous)
740771

741772
@property
742773
def tz(self):

pandas/core/indexes/datetimelike.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
import numpy as np
1212

1313
from pandas._libs import lib, iNaT, NaT
14-
from pandas._libs.tslibs.timestamps import round_ns
14+
from pandas._libs.tslibs.timestamps import round_nsint64, RoundTo
1515

1616
from pandas.core.dtypes.common import (
1717
ensure_int64,
@@ -180,10 +180,10 @@ class TimelikeOps(object):
180180
"""
181181
)
182182

183-
def _round(self, freq, rounder, ambiguous):
183+
def _round(self, freq, mode, ambiguous):
184184
# round the local times
185185
values = _ensure_datetimelike_to_i8(self)
186-
result = round_ns(values, rounder, freq)
186+
result = round_nsint64(values, mode, freq)
187187
result = self._maybe_mask_results(result, fill_value=NaT)
188188

189189
attribs = self._get_attributes_dict()
@@ -197,15 +197,15 @@ def _round(self, freq, rounder, ambiguous):
197197

198198
@Appender((_round_doc + _round_example).format(op="round"))
199199
def round(self, freq, ambiguous='raise'):
200-
return self._round(freq, np.round, ambiguous)
200+
return self._round(freq, RoundTo.NEAREST_HALF_EVEN, ambiguous)
201201

202202
@Appender((_round_doc + _floor_example).format(op="floor"))
203203
def floor(self, freq, ambiguous='raise'):
204-
return self._round(freq, np.floor, ambiguous)
204+
return self._round(freq, RoundTo.MINUS_INFTY, ambiguous)
205205

206206
@Appender((_round_doc + _ceil_example).format(op="ceil"))
207207
def ceil(self, freq, ambiguous='raise'):
208-
return self._round(freq, np.ceil, ambiguous)
208+
return self._round(freq, RoundTo.PLUS_INFTY, ambiguous)
209209

210210

211211
class DatetimeIndexOpsMixin(DatetimeLikeArrayMixin):

0 commit comments

Comments
 (0)