diff --git a/doc/api.rst b/doc/api.rst index 927c0aa072c..89fee10506d 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -555,6 +555,13 @@ Custom Indexes CFTimeIndex +Creating custom indexes +----------------------- +.. autosummary:: + :toctree: generated/ + + cftime_range + Plotting ======== diff --git a/doc/time-series.rst b/doc/time-series.rst index a7ce9226d4d..d99c3218d18 100644 --- a/doc/time-series.rst +++ b/doc/time-series.rst @@ -258,7 +258,16 @@ coordinate with a no-leap calendar within a context manager setting the calendar, its times will be decoded into ``cftime.datetime`` objects, regardless of whether or not they can be represented using ``np.datetime64[ns]`` objects. - + +xarray also includes a :py:func:`cftime_range` function, which enables creating a +``CFTimeIndex`` with regularly-spaced dates. For instance, we can create the +same dates and DataArray we created above using: + +.. ipython:: python + + dates = xr.cftime_range(start='0001', periods=24, freq='MS', calendar='noleap') + da = xr.DataArray(np.arange(24), coords=[dates], dims=['time'], name='foo') + For data indexed by a ``CFTimeIndex`` xarray currently supports: - `Partial datetime string indexing`_ using strictly `ISO 8601-format`_ partial diff --git a/doc/whats-new.rst b/doc/whats-new.rst index dd9eb9e48fe..3d0630d3a0a 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -49,6 +49,9 @@ Enhancements now displayed as `a b ... y z` rather than `a b c d ...`. (:issue:`1186`) By `Seth P `_. +- A new CFTimeIndex-enabled :py:func:`cftime_range` function for use in + generating dates from standard or non-standard calendars. By `Spencer Clark + `_. - When interpolating over a ``datetime64`` axis, you can now provide a datetime string instead of a ``datetime64`` object. E.g. ``da.interp(time='1991-02-01')`` (:issue:`2284`) diff --git a/xarray/__init__.py b/xarray/__init__.py index 7cc7811b783..e2d24e6c294 100644 --- a/xarray/__init__.py +++ b/xarray/__init__.py @@ -26,6 +26,7 @@ from .conventions import decode_cf, SerializationWarning +from .coding.cftime_offsets import cftime_range from .coding.cftimeindex import CFTimeIndex from .util.print_versions import show_versions diff --git a/xarray/coding/cftime_offsets.py b/xarray/coding/cftime_offsets.py new file mode 100644 index 00000000000..3fbb44f4ed3 --- /dev/null +++ b/xarray/coding/cftime_offsets.py @@ -0,0 +1,736 @@ +"""Time offset classes for use with cftime.datetime objects""" +# The offset classes and mechanisms for generating time ranges defined in +# this module were copied/adapted from those defined in pandas. See in +# particular the objects and methods defined in pandas.tseries.offsets +# and pandas.core.indexes.datetimes. + +# For reference, here is a copy of the pandas copyright notice: + +# (c) 2011-2012, Lambda Foundry, Inc. and PyData Development Team +# All rights reserved. + +# Copyright (c) 2008-2011 AQR Capital Management, LLC +# All rights reserved. + +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: + +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. + +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials provided +# with the distribution. + +# * Neither the name of the copyright holder nor the names of any +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. + +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import re + +from datetime import timedelta +from functools import partial + +import numpy as np + +from .cftimeindex import _parse_iso8601_with_reso, CFTimeIndex +from .times import format_cftime_datetime +from ..core.pycompat import basestring + + +def get_date_type(calendar): + """Return the cftime date type for a given calendar name.""" + try: + import cftime + except ImportError: + raise ImportError( + 'cftime is required for dates with non-standard calendars') + else: + calendars = { + 'noleap': cftime.DatetimeNoLeap, + '360_day': cftime.Datetime360Day, + '365_day': cftime.DatetimeNoLeap, + '366_day': cftime.DatetimeAllLeap, + 'gregorian': cftime.DatetimeGregorian, + 'proleptic_gregorian': cftime.DatetimeProlepticGregorian, + 'julian': cftime.DatetimeJulian, + 'all_leap': cftime.DatetimeAllLeap, + 'standard': cftime.DatetimeProlepticGregorian + } + return calendars[calendar] + + +class BaseCFTimeOffset(object): + _freq = None + + def __init__(self, n=1): + if not isinstance(n, int): + raise TypeError( + "The provided multiple 'n' must be an integer. " + "Instead a value of type {!r} was provided.".format(type(n))) + self.n = n + + def rule_code(self): + return self._freq + + def __eq__(self, other): + return self.n == other.n and self.rule_code() == other.rule_code() + + def __ne__(self, other): + return not self == other + + def __add__(self, other): + return self.__apply__(other) + + def __sub__(self, other): + import cftime + + if isinstance(other, cftime.datetime): + raise TypeError('Cannot subtract a cftime.datetime ' + 'from a time offset.') + elif type(other) == type(self): + return type(self)(self.n - other.n) + else: + return NotImplemented + + def __mul__(self, other): + return type(self)(n=other * self.n) + + def __neg__(self): + return self * -1 + + def __rmul__(self, other): + return self.__mul__(other) + + def __radd__(self, other): + return self.__add__(other) + + def __rsub__(self, other): + if isinstance(other, BaseCFTimeOffset) and type(self) != type(other): + raise TypeError('Cannot subtract cftime offsets of differing ' + 'types') + return -self + other + + def __apply__(self): + return NotImplemented + + def onOffset(self, date): + """Check if the given date is in the set of possible dates created + using a length-one version of this offset class.""" + test_date = (self + date) - self + return date == test_date + + def rollforward(self, date): + if self.onOffset(date): + return date + else: + return date + type(self)() + + def rollback(self, date): + if self.onOffset(date): + return date + else: + return date - type(self)() + + def __str__(self): + return '<{}: n={}>'.format(type(self).__name__, self.n) + + def __repr__(self): + return str(self) + + +def _days_in_month(date): + """The number of days in the month of the given date""" + if date.month == 12: + reference = type(date)(date.year + 1, 1, 1) + else: + reference = type(date)(date.year, date.month + 1, 1) + return (reference - timedelta(days=1)).day + + +def _adjust_n_months(other_day, n, reference_day): + """Adjust the number of times a monthly offset is applied based + on the day of a given date, and the reference day provided. + """ + if n > 0 and other_day < reference_day: + n = n - 1 + elif n <= 0 and other_day > reference_day: + n = n + 1 + return n + + +def _adjust_n_years(other, n, month, reference_day): + """Adjust the number of times an annual offset is applied based on + another date, and the reference day provided""" + if n > 0: + if other.month < month or (other.month == month and + other.day < reference_day): + n -= 1 + else: + if other.month > month or (other.month == month and + other.day > reference_day): + n += 1 + return n + + +def _shift_months(date, months, day_option='start'): + """Shift the date to a month start or end a given number of months away. + """ + delta_year = (date.month + months) // 12 + month = (date.month + months) % 12 + + if month == 0: + month = 12 + delta_year = delta_year - 1 + year = date.year + delta_year + + if day_option == 'start': + day = 1 + elif day_option == 'end': + reference = type(date)(year, month, 1) + day = _days_in_month(reference) + else: + raise ValueError(day_option) + return date.replace(year=year, month=month, day=day) + + +class MonthBegin(BaseCFTimeOffset): + _freq = 'MS' + + def __apply__(self, other): + n = _adjust_n_months(other.day, self.n, 1) + return _shift_months(other, n, 'start') + + def onOffset(self, date): + """Check if the given date is in the set of possible dates created + using a length-one version of this offset class.""" + return date.day == 1 + + +class MonthEnd(BaseCFTimeOffset): + _freq = 'M' + + def __apply__(self, other): + n = _adjust_n_months(other.day, self.n, _days_in_month(other)) + return _shift_months(other, n, 'end') + + def onOffset(self, date): + """Check if the given date is in the set of possible dates created + using a length-one version of this offset class.""" + return date.day == _days_in_month(date) + + +_MONTH_ABBREVIATIONS = { + 1: 'JAN', + 2: 'FEB', + 3: 'MAR', + 4: 'APR', + 5: 'MAY', + 6: 'JUN', + 7: 'JUL', + 8: 'AUG', + 9: 'SEP', + 10: 'OCT', + 11: 'NOV', + 12: 'DEC' +} + + +class YearOffset(BaseCFTimeOffset): + _freq = None + _day_option = None + _default_month = None + + def __init__(self, n=1, month=None): + BaseCFTimeOffset.__init__(self, n) + if month is None: + self.month = self._default_month + else: + self.month = month + if not isinstance(self.month, int): + raise TypeError("'self.month' must be an integer value between 1 " + "and 12. Instead, it was set to a value of " + "{!r}".format(self.month)) + elif not (1 <= self.month <= 12): + raise ValueError("'self.month' must be an integer value between 1 " + "and 12. Instead, it was set to a value of " + "{!r}".format(self.month)) + + def __apply__(self, other): + if self._day_option == 'start': + reference_day = 1 + elif self._day_option == 'end': + reference_day = _days_in_month(other) + else: + raise ValueError(self._day_option) + years = _adjust_n_years(other, self.n, self.month, reference_day) + months = years * 12 + (self.month - other.month) + return _shift_months(other, months, self._day_option) + + def __sub__(self, other): + import cftime + + if isinstance(other, cftime.datetime): + raise TypeError('Cannot subtract cftime.datetime from offset.') + elif type(other) == type(self) and other.month == self.month: + return type(self)(self.n - other.n, month=self.month) + else: + return NotImplemented + + def __mul__(self, other): + return type(self)(n=other * self.n, month=self.month) + + def rule_code(self): + return '{}-{}'.format(self._freq, _MONTH_ABBREVIATIONS[self.month]) + + def __str__(self): + return '<{}: n={}, month={}>'.format( + type(self).__name__, self.n, self.month) + + +class YearBegin(YearOffset): + _freq = 'AS' + _day_option = 'start' + _default_month = 1 + + def onOffset(self, date): + """Check if the given date is in the set of possible dates created + using a length-one version of this offset class.""" + return date.day == 1 and date.month == self.month + + def rollforward(self, date): + """Roll date forward to nearest start of year""" + if self.onOffset(date): + return date + else: + return date + YearBegin(month=self.month) + + def rollback(self, date): + """Roll date backward to nearest start of year""" + if self.onOffset(date): + return date + else: + return date - YearBegin(month=self.month) + + +class YearEnd(YearOffset): + _freq = 'A' + _day_option = 'end' + _default_month = 12 + + def onOffset(self, date): + """Check if the given date is in the set of possible dates created + using a length-one version of this offset class.""" + return date.day == _days_in_month(date) and date.month == self.month + + def rollforward(self, date): + """Roll date forward to nearest end of year""" + if self.onOffset(date): + return date + else: + return date + YearEnd(month=self.month) + + def rollback(self, date): + """Roll date backward to nearest end of year""" + if self.onOffset(date): + return date + else: + return date - YearEnd(month=self.month) + + +class Day(BaseCFTimeOffset): + _freq = 'D' + + def __apply__(self, other): + return other + timedelta(days=self.n) + + +class Hour(BaseCFTimeOffset): + _freq = 'H' + + def __apply__(self, other): + return other + timedelta(hours=self.n) + + +class Minute(BaseCFTimeOffset): + _freq = 'T' + + def __apply__(self, other): + return other + timedelta(minutes=self.n) + + +class Second(BaseCFTimeOffset): + _freq = 'S' + + def __apply__(self, other): + return other + timedelta(seconds=self.n) + + +_FREQUENCIES = { + 'A': YearEnd, + 'AS': YearBegin, + 'Y': YearEnd, + 'YS': YearBegin, + 'M': MonthEnd, + 'MS': MonthBegin, + 'D': Day, + 'H': Hour, + 'T': Minute, + 'min': Minute, + 'S': Second, + 'AS-JAN': partial(YearBegin, month=1), + 'AS-FEB': partial(YearBegin, month=2), + 'AS-MAR': partial(YearBegin, month=3), + 'AS-APR': partial(YearBegin, month=4), + 'AS-MAY': partial(YearBegin, month=5), + 'AS-JUN': partial(YearBegin, month=6), + 'AS-JUL': partial(YearBegin, month=7), + 'AS-AUG': partial(YearBegin, month=8), + 'AS-SEP': partial(YearBegin, month=9), + 'AS-OCT': partial(YearBegin, month=10), + 'AS-NOV': partial(YearBegin, month=11), + 'AS-DEC': partial(YearBegin, month=12), + 'A-JAN': partial(YearEnd, month=1), + 'A-FEB': partial(YearEnd, month=2), + 'A-MAR': partial(YearEnd, month=3), + 'A-APR': partial(YearEnd, month=4), + 'A-MAY': partial(YearEnd, month=5), + 'A-JUN': partial(YearEnd, month=6), + 'A-JUL': partial(YearEnd, month=7), + 'A-AUG': partial(YearEnd, month=8), + 'A-SEP': partial(YearEnd, month=9), + 'A-OCT': partial(YearEnd, month=10), + 'A-NOV': partial(YearEnd, month=11), + 'A-DEC': partial(YearEnd, month=12) +} + + +_FREQUENCY_CONDITION = '|'.join(_FREQUENCIES.keys()) +_PATTERN = '^((?P\d+)|())(?P({0}))$'.format( + _FREQUENCY_CONDITION) + + +def to_offset(freq): + """Convert a frequency string to the appropriate subclass of + BaseCFTimeOffset.""" + if isinstance(freq, BaseCFTimeOffset): + return freq + else: + try: + freq_data = re.match(_PATTERN, freq).groupdict() + except AttributeError: + raise ValueError('Invalid frequency string provided') + + freq = freq_data['freq'] + multiples = freq_data['multiple'] + if multiples is None: + multiples = 1 + else: + multiples = int(multiples) + + return _FREQUENCIES[freq](n=multiples) + + +def to_cftime_datetime(date_str_or_date, calendar=None): + import cftime + + if isinstance(date_str_or_date, basestring): + if calendar is None: + raise ValueError( + 'If converting a string to a cftime.datetime object, ' + 'a calendar type must be provided') + date, _ = _parse_iso8601_with_reso(get_date_type(calendar), + date_str_or_date) + return date + elif isinstance(date_str_or_date, cftime.datetime): + return date_str_or_date + else: + raise TypeError("date_str_or_date must be a string or a " + 'subclass of cftime.datetime. Instead got ' + '{!r}.'.format(date_str_or_date)) + + +def normalize_date(date): + """Round datetime down to midnight.""" + return date.replace(hour=0, minute=0, second=0, microsecond=0) + + +def _maybe_normalize_date(date, normalize): + """Round datetime down to midnight if normalize is True.""" + if normalize: + return normalize_date(date) + else: + return date + + +def _generate_linear_range(start, end, periods): + """Generate an equally-spaced sequence of cftime.datetime objects between + and including two dates (whose length equals the number of periods).""" + import cftime + + total_seconds = (end - start).total_seconds() + values = np.linspace(0., total_seconds, periods, endpoint=True) + units = 'seconds since {}'.format(format_cftime_datetime(start)) + calendar = start.calendar + return cftime.num2date(values, units=units, calendar=calendar, + only_use_cftime_datetimes=True) + + +def _generate_range(start, end, periods, offset): + """Generate a regular range of cftime.datetime objects with a + given time offset. + + Adapted from pandas.tseries.offsets.generate_range. + + Parameters + ---------- + start : cftime.datetime, or None + Start of range + end : cftime.datetime, or None + End of range + periods : int, or None + Number of elements in the sequence + offset : BaseCFTimeOffset + An offset class designed for working with cftime.datetime objects + + Returns + ------- + A generator object + """ + if start: + start = offset.rollforward(start) + + if end: + end = offset.rollback(end) + + if periods is None and end < start: + end = None + periods = 0 + + if end is None: + end = start + (periods - 1) * offset + + if start is None: + start = end - (periods - 1) * offset + + current = start + if offset.n >= 0: + while current <= end: + yield current + + next_date = current + offset + if next_date <= current: + raise ValueError('Offset {offset} did not increment date' + .format(offset=offset)) + current = next_date + else: + while current >= end: + yield current + + next_date = current + offset + if next_date >= current: + raise ValueError('Offset {offset} did not decrement date' + .format(offset=offset)) + current = next_date + + +def _count_not_none(*args): + """Compute the number of non-None arguments.""" + return sum([arg is not None for arg in args]) + + +def cftime_range(start=None, end=None, periods=None, freq='D', + tz=None, normalize=False, name=None, closed=None, + calendar='standard'): + """Return a fixed frequency CFTimeIndex. + + Parameters + ---------- + start : str or cftime.datetime, optional + Left bound for generating dates. + end : str or cftime.datetime, optional + Right bound for generating dates. + periods : integer, optional + Number of periods to generate. + freq : str, default 'D', BaseCFTimeOffset, or None + Frequency strings can have multiples, e.g. '5H'. + normalize : bool, default False + Normalize start/end dates to midnight before generating date range. + name : str, default None + Name of the resulting index + closed : {None, 'left', 'right'}, optional + Make the interval closed with respect to the given frequency to the + 'left', 'right', or both sides (None, the default). + calendar : str + Calendar type for the datetimes (default 'standard'). + + Returns + ------- + CFTimeIndex + + Notes + ----- + + This function is an analog of ``pandas.date_range`` for use in generating + sequences of ``cftime.datetime`` objects. It supports most of the + features of ``pandas.date_range`` (e.g. specifying how the index is + ``closed`` on either side, or whether or not to ``normalize`` the start and + end bounds); however, there are some notable exceptions: + + - You cannot specify a ``tz`` (time zone) argument. + - Start or end dates specified as partial-datetime strings must use the + `ISO-8601 format `_. + - It supports many, but not all, frequencies supported by + ``pandas.date_range``. For example it does not currently support any of + the business-related, semi-monthly, or sub-second frequencies. + - Compound sub-monthly frequencies are not supported, e.g. '1H1min', as + these can easily be written in terms of the finest common resolution, + e.g. '61min'. + + Valid simple frequency strings for use with ``cftime``-calendars include + any multiples of the following. + + +--------+-----------------------+ + | Alias | Description | + +========+=======================+ + | A, Y | Year-end frequency | + +--------+-----------------------+ + | AS, YS | Year-start frequency | + +--------+-----------------------+ + | M | Month-end frequency | + +--------+-----------------------+ + | MS | Month-start frequency | + +--------+-----------------------+ + | D | Day frequency | + +--------+-----------------------+ + | H | Hour frequency | + +--------+-----------------------+ + | T, min | Minute frequency | + +--------+-----------------------+ + | S | Second frequency | + +--------+-----------------------+ + + Any multiples of the following anchored offsets are also supported. + + +----------+-------------------------------------------------------------------+ + | Alias | Description | + +==========+===================================================================+ + | A(S)-JAN | Annual frequency, anchored at the end (or beginning) of January | + +----------+-------------------------------------------------------------------+ + | A(S)-FEB | Annual frequency, anchored at the end (or beginning) of February | + +----------+-------------------------------------------------------------------+ + | A(S)-MAR | Annual frequency, anchored at the end (or beginning) of March | + +----------+-------------------------------------------------------------------+ + | A(S)-APR | Annual frequency, anchored at the end (or beginning) of April | + +----------+-------------------------------------------------------------------+ + | A(S)-MAY | Annual frequency, anchored at the end (or beginning) of May | + +----------+-------------------------------------------------------------------+ + | A(S)-JUN | Annual frequency, anchored at the end (or beginning) of June | + +----------+-------------------------------------------------------------------+ + | A(S)-JUL | Annual frequency, anchored at the end (or beginning) of July | + +----------+-------------------------------------------------------------------+ + | A(S)-AUG | Annual frequency, anchored at the end (or beginning) of August | + +----------+-------------------------------------------------------------------+ + | A(S)-SEP | Annual frequency, anchored at the end (or beginning) of September | + +----------+-------------------------------------------------------------------+ + | A(S)-OCT | Annual frequency, anchored at the end (or beginning) of October | + +----------+-------------------------------------------------------------------+ + | A(S)-NOV | Annual frequency, anchored at the end (or beginning) of November | + +----------+-------------------------------------------------------------------+ + | A(S)-DEC | Annual frequency, anchored at the end (or beginning) of December | + +----------+-------------------------------------------------------------------+ + + Finally, the following calendar aliases are supported. + + +--------------------------------+---------------------------------------+ + | Alias | Date type | + +================================+=======================================+ + | standard, proleptic_gregorian | ``cftime.DatetimeProlepticGregorian`` | + +--------------------------------+---------------------------------------+ + | gregorian | ``cftime.DatetimeGregorian`` | + +--------------------------------+---------------------------------------+ + | noleap, 365_day | ``cftime.DatetimeNoLeap`` | + +--------------------------------+---------------------------------------+ + | all_leap, 366_day | ``cftime.DatetimeAllLeap`` | + +--------------------------------+---------------------------------------+ + | 360_day | ``cftime.Datetime360Day`` | + +--------------------------------+---------------------------------------+ + | julian | ``cftime.DatetimeJulian`` | + +--------------------------------+---------------------------------------+ + + Examples + -------- + + This function returns a ``CFTimeIndex``, populated with ``cftime.datetime`` + objects associated with the specified calendar type, e.g. + + >>> xr.cftime_range(start='2000', periods=6, freq='2MS', calendar='noleap') + CFTimeIndex([2000-01-01 00:00:00, 2000-03-01 00:00:00, 2000-05-01 00:00:00, + 2000-07-01 00:00:00, 2000-09-01 00:00:00, 2000-11-01 00:00:00], + dtype='object') + + As in the standard pandas function, three of the ``start``, ``end``, + ``periods``, or ``freq`` arguments must be specified at a given time, with + the other set to ``None``. See the `pandas documentation + `_ + for more examples of the behavior of ``date_range`` with each of the + parameters. + + See Also + -------- + pandas.date_range + """ # noqa: E501 + # Adapted from pandas.core.indexes.datetimes._generate_range. + if _count_not_none(start, end, periods, freq) != 3: + raise ValueError( + "Of the arguments 'start', 'end', 'periods', and 'freq', three " + "must be specified at a time.") + + if start is not None: + start = to_cftime_datetime(start, calendar) + start = _maybe_normalize_date(start, normalize) + if end is not None: + end = to_cftime_datetime(end, calendar) + end = _maybe_normalize_date(end, normalize) + + if freq is None: + dates = _generate_linear_range(start, end, periods) + else: + offset = to_offset(freq) + dates = np.array(list(_generate_range(start, end, periods, offset))) + + left_closed = False + right_closed = False + + if closed is None: + left_closed = True + right_closed = True + elif closed == 'left': + left_closed = True + elif closed == 'right': + right_closed = True + else: + raise ValueError("Closed must be either 'left', 'right' or None") + + if (not left_closed and len(dates) and + start is not None and dates[0] == start): + dates = dates[1:] + if (not right_closed and len(dates) and + end is not None and dates[-1] == end): + dates = dates[:-1] + + return CFTimeIndex(dates, name=name) diff --git a/xarray/coding/cftimeindex.py b/xarray/coding/cftimeindex.py index eb8cae2f398..ea2bcbc5858 100644 --- a/xarray/coding/cftimeindex.py +++ b/xarray/coding/cftimeindex.py @@ -1,3 +1,44 @@ +"""DatetimeIndex analog for cftime.datetime objects""" +# The pandas.Index subclass defined here was copied and adapted for +# use with cftime.datetime objects based on the source code defining +# pandas.DatetimeIndex. + +# For reference, here is a copy of the pandas copyright notice: + +# (c) 2011-2012, Lambda Foundry, Inc. and PyData Development Team +# All rights reserved. + +# Copyright (c) 2008-2011 AQR Capital Management, LLC +# All rights reserved. + +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: + +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. + +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials provided +# with the distribution. + +# * Neither the name of the copyright holder nor the names of any +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. + +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + from __future__ import absolute_import import re from datetime import timedelta @@ -116,28 +157,43 @@ def f(self): def get_date_type(self): - return type(self._data[0]) + if self.data: + return type(self._data[0]) + else: + return None def assert_all_valid_date_type(data): import cftime - sample = data[0] - date_type = type(sample) - if not isinstance(sample, cftime.datetime): - raise TypeError( - 'CFTimeIndex requires cftime.datetime ' - 'objects. Got object of {}.'.format(date_type)) - if not all(isinstance(value, date_type) for value in data): - raise TypeError( - 'CFTimeIndex requires using datetime ' - 'objects of all the same type. Got\n{}.'.format(data)) + if data.size: + sample = data[0] + date_type = type(sample) + if not isinstance(sample, cftime.datetime): + raise TypeError( + 'CFTimeIndex requires cftime.datetime ' + 'objects. Got object of {}.'.format(date_type)) + if not all(isinstance(value, date_type) for value in data): + raise TypeError( + 'CFTimeIndex requires using datetime ' + 'objects of all the same type. Got\n{}.'.format(data)) class CFTimeIndex(pd.Index): """Custom Index for working with CF calendars and dates All elements of a CFTimeIndex must be cftime.datetime objects. + + Parameters + ---------- + data : array or CFTimeIndex + Sequence of cftime.datetime objects to use in index + name : str, default None + Name of the resulting index + + See Also + -------- + cftime_range """ year = _field_accessor('year', 'The year of the datetime') month = _field_accessor('month', 'The month of the datetime') @@ -149,10 +205,14 @@ class CFTimeIndex(pd.Index): 'The microseconds of the datetime') date_type = property(get_date_type) - def __new__(cls, data): + def __new__(cls, data, name=None): + if name is None and hasattr(data, 'name'): + name = data.name + result = object.__new__(cls) - assert_all_valid_date_type(data) - result._data = np.array(data) + result._data = np.array(data, dtype='O') + assert_all_valid_date_type(result._data) + result.name = name return result def _partial_date_slice(self, resolution, parsed): diff --git a/xarray/tests/test_cftime_offsets.py b/xarray/tests/test_cftime_offsets.py new file mode 100644 index 00000000000..6d7990689ed --- /dev/null +++ b/xarray/tests/test_cftime_offsets.py @@ -0,0 +1,801 @@ +import pytest + +from itertools import product + +import numpy as np + +from xarray.coding.cftime_offsets import ( + BaseCFTimeOffset, YearBegin, YearEnd, MonthBegin, MonthEnd, + Day, Hour, Minute, Second, _days_in_month, + to_offset, get_date_type, _MONTH_ABBREVIATIONS, to_cftime_datetime, + cftime_range) +from xarray import CFTimeIndex + +cftime = pytest.importorskip('cftime') + + +_CFTIME_CALENDARS = ['365_day', '360_day', 'julian', 'all_leap', + '366_day', 'gregorian', 'proleptic_gregorian', 'standard'] + + +def _id_func(param): + """Called on each parameter passed to pytest.mark.parametrize""" + return str(param) + + +@pytest.fixture(params=_CFTIME_CALENDARS) +def calendar(request): + return request.param + + +@pytest.mark.parametrize( + ('offset', 'expected_n'), + [(BaseCFTimeOffset(), 1), + (YearBegin(), 1), + (YearEnd(), 1), + (BaseCFTimeOffset(n=2), 2), + (YearBegin(n=2), 2), + (YearEnd(n=2), 2)], + ids=_id_func +) +def test_cftime_offset_constructor_valid_n(offset, expected_n): + assert offset.n == expected_n + + +@pytest.mark.parametrize( + ('offset', 'invalid_n'), + [(BaseCFTimeOffset, 1.5), + (YearBegin, 1.5), + (YearEnd, 1.5)], + ids=_id_func +) +def test_cftime_offset_constructor_invalid_n(offset, invalid_n): + with pytest.raises(TypeError): + offset(n=invalid_n) + + +@pytest.mark.parametrize( + ('offset', 'expected_month'), + [(YearBegin(), 1), + (YearEnd(), 12), + (YearBegin(month=5), 5), + (YearEnd(month=5), 5)], + ids=_id_func +) +def test_year_offset_constructor_valid_month(offset, expected_month): + assert offset.month == expected_month + + +@pytest.mark.parametrize( + ('offset', 'invalid_month', 'exception'), + [(YearBegin, 0, ValueError), + (YearEnd, 0, ValueError), + (YearBegin, 13, ValueError,), + (YearEnd, 13, ValueError), + (YearBegin, 1.5, TypeError), + (YearEnd, 1.5, TypeError)], + ids=_id_func +) +def test_year_offset_constructor_invalid_month( + offset, invalid_month, exception): + with pytest.raises(exception): + offset(month=invalid_month) + + +@pytest.mark.parametrize( + ('offset', 'expected'), + [(BaseCFTimeOffset(), None), + (MonthBegin(), 'MS'), + (YearBegin(), 'AS-JAN')], + ids=_id_func +) +def test_rule_code(offset, expected): + assert offset.rule_code() == expected + + +@pytest.mark.parametrize( + ('offset', 'expected'), + [(BaseCFTimeOffset(), ''), + (YearBegin(), '')], + ids=_id_func +) +def test_str_and_repr(offset, expected): + assert str(offset) == expected + assert repr(offset) == expected + + +@pytest.mark.parametrize( + 'offset', + [BaseCFTimeOffset(), MonthBegin(), YearBegin()], + ids=_id_func +) +def test_to_offset_offset_input(offset): + assert to_offset(offset) == offset + + +@pytest.mark.parametrize( + ('freq', 'expected'), + [('M', MonthEnd()), + ('2M', MonthEnd(n=2)), + ('MS', MonthBegin()), + ('2MS', MonthBegin(n=2)), + ('D', Day()), + ('2D', Day(n=2)), + ('H', Hour()), + ('2H', Hour(n=2)), + ('T', Minute()), + ('2T', Minute(n=2)), + ('min', Minute()), + ('2min', Minute(n=2)), + ('S', Second()), + ('2S', Second(n=2))], + ids=_id_func +) +def test_to_offset_sub_annual(freq, expected): + assert to_offset(freq) == expected + + +_ANNUAL_OFFSET_TYPES = { + 'A': YearEnd, + 'AS': YearBegin +} + + +@pytest.mark.parametrize(('month_int', 'month_label'), + list(_MONTH_ABBREVIATIONS.items()) + [('', '')]) +@pytest.mark.parametrize('multiple', [None, 2]) +@pytest.mark.parametrize('offset_str', ['AS', 'A']) +def test_to_offset_annual(month_label, month_int, multiple, offset_str): + freq = offset_str + offset_type = _ANNUAL_OFFSET_TYPES[offset_str] + if month_label: + freq = '-'.join([freq, month_label]) + if multiple: + freq = '{}'.format(multiple) + freq + result = to_offset(freq) + + if multiple and month_int: + expected = offset_type(n=multiple, month=month_int) + elif multiple: + expected = offset_type(n=multiple) + elif month_int: + expected = offset_type(month=month_int) + else: + expected = offset_type() + assert result == expected + + +@pytest.mark.parametrize('freq', ['Z', '7min2', 'AM', 'M-', 'AS-', '1H1min']) +def test_invalid_to_offset_str(freq): + with pytest.raises(ValueError): + to_offset(freq) + + +@pytest.mark.parametrize( + ('argument', 'expected_date_args'), + [('2000-01-01', (2000, 1, 1)), + ((2000, 1, 1), (2000, 1, 1))], + ids=_id_func +) +def test_to_cftime_datetime(calendar, argument, expected_date_args): + date_type = get_date_type(calendar) + expected = date_type(*expected_date_args) + if isinstance(argument, tuple): + argument = date_type(*argument) + result = to_cftime_datetime(argument, calendar=calendar) + assert result == expected + + +def test_to_cftime_datetime_error_no_calendar(): + with pytest.raises(ValueError): + to_cftime_datetime('2000') + + +def test_to_cftime_datetime_error_type_error(): + with pytest.raises(TypeError): + to_cftime_datetime(1) + + +_EQ_TESTS_A = [ + BaseCFTimeOffset(), YearBegin(), YearEnd(), YearBegin(month=2), + YearEnd(month=2), MonthBegin(), MonthEnd(), Day(), Hour(), Minute(), + Second() +] +_EQ_TESTS_B = [ + BaseCFTimeOffset(n=2), YearBegin(n=2), YearEnd(n=2), + YearBegin(n=2, month=2), YearEnd(n=2, month=2), MonthBegin(n=2), + MonthEnd(n=2), Day(n=2), Hour(n=2), Minute(n=2), Second(n=2) +] + + +@pytest.mark.parametrize( + ('a', 'b'), product(_EQ_TESTS_A, _EQ_TESTS_B), ids=_id_func +) +def test_neq(a, b): + assert a != b + + +_EQ_TESTS_B_COPY = [ + BaseCFTimeOffset(n=2), YearBegin(n=2), YearEnd(n=2), + YearBegin(n=2, month=2), YearEnd(n=2, month=2), MonthBegin(n=2), + MonthEnd(n=2), Day(n=2), Hour(n=2), Minute(n=2), Second(n=2) +] + + +@pytest.mark.parametrize( + ('a', 'b'), zip(_EQ_TESTS_B, _EQ_TESTS_B_COPY), ids=_id_func +) +def test_eq(a, b): + assert a == b + + +_MUL_TESTS = [ + (BaseCFTimeOffset(), BaseCFTimeOffset(n=3)), + (YearEnd(), YearEnd(n=3)), + (YearBegin(), YearBegin(n=3)), + (MonthEnd(), MonthEnd(n=3)), + (MonthBegin(), MonthBegin(n=3)), + (Day(), Day(n=3)), + (Hour(), Hour(n=3)), + (Minute(), Minute(n=3)), + (Second(), Second(n=3)) +] + + +@pytest.mark.parametrize(('offset', 'expected'), _MUL_TESTS, ids=_id_func) +def test_mul(offset, expected): + assert offset * 3 == expected + + +@pytest.mark.parametrize(('offset', 'expected'), _MUL_TESTS, ids=_id_func) +def test_rmul(offset, expected): + assert 3 * offset == expected + + +@pytest.mark.parametrize( + ('offset', 'expected'), + [(BaseCFTimeOffset(), BaseCFTimeOffset(n=-1)), + (YearEnd(), YearEnd(n=-1)), + (YearBegin(), YearBegin(n=-1)), + (MonthEnd(), MonthEnd(n=-1)), + (MonthBegin(), MonthBegin(n=-1)), + (Day(), Day(n=-1)), + (Hour(), Hour(n=-1)), + (Minute(), Minute(n=-1)), + (Second(), Second(n=-1))], + ids=_id_func) +def test_neg(offset, expected): + assert -offset == expected + + +_ADD_TESTS = [ + (Day(n=2), (1, 1, 3)), + (Hour(n=2), (1, 1, 1, 2)), + (Minute(n=2), (1, 1, 1, 0, 2)), + (Second(n=2), (1, 1, 1, 0, 0, 2)) +] + + +@pytest.mark.parametrize( + ('offset', 'expected_date_args'), + _ADD_TESTS, + ids=_id_func +) +def test_add_sub_monthly(offset, expected_date_args, calendar): + date_type = get_date_type(calendar) + initial = date_type(1, 1, 1) + expected = date_type(*expected_date_args) + result = offset + initial + assert result == expected + + +@pytest.mark.parametrize( + ('offset', 'expected_date_args'), + _ADD_TESTS, + ids=_id_func +) +def test_radd_sub_monthly(offset, expected_date_args, calendar): + date_type = get_date_type(calendar) + initial = date_type(1, 1, 1) + expected = date_type(*expected_date_args) + result = initial + offset + assert result == expected + + +@pytest.mark.parametrize( + ('offset', 'expected_date_args'), + [(Day(n=2), (1, 1, 1)), + (Hour(n=2), (1, 1, 2, 22)), + (Minute(n=2), (1, 1, 2, 23, 58)), + (Second(n=2), (1, 1, 2, 23, 59, 58))], + ids=_id_func +) +def test_rsub_sub_monthly(offset, expected_date_args, calendar): + date_type = get_date_type(calendar) + initial = date_type(1, 1, 3) + expected = date_type(*expected_date_args) + result = initial - offset + assert result == expected + + +@pytest.mark.parametrize('offset', _EQ_TESTS_A, ids=_id_func) +def test_sub_error(offset, calendar): + date_type = get_date_type(calendar) + initial = date_type(1, 1, 1) + with pytest.raises(TypeError): + offset - initial + + +@pytest.mark.parametrize( + ('a', 'b'), + zip(_EQ_TESTS_A, _EQ_TESTS_B), + ids=_id_func +) +def test_minus_offset(a, b): + result = b - a + expected = a + assert result == expected + + +@pytest.mark.parametrize( + ('a', 'b'), + list(zip(np.roll(_EQ_TESTS_A, 1), _EQ_TESTS_B)) + + [(YearEnd(month=1), YearEnd(month=2))], + ids=_id_func +) +def test_minus_offset_error(a, b): + with pytest.raises(TypeError): + b - a + + +def test_days_in_month_non_december(calendar): + date_type = get_date_type(calendar) + reference = date_type(1, 4, 1) + assert _days_in_month(reference) == 30 + + +def test_days_in_month_december(calendar): + if calendar == '360_day': + expected = 30 + else: + expected = 31 + date_type = get_date_type(calendar) + reference = date_type(1, 12, 5) + assert _days_in_month(reference) == expected + + +@pytest.mark.parametrize( + ('initial_date_args', 'offset', 'expected_date_args'), + [((1, 1, 1), MonthBegin(), (1, 2, 1)), + ((1, 1, 1), MonthBegin(n=2), (1, 3, 1)), + ((1, 1, 7), MonthBegin(), (1, 2, 1)), + ((1, 1, 7), MonthBegin(n=2), (1, 3, 1)), + ((1, 3, 1), MonthBegin(n=-1), (1, 2, 1)), + ((1, 3, 1), MonthBegin(n=-2), (1, 1, 1)), + ((1, 3, 3), MonthBegin(n=-1), (1, 3, 1)), + ((1, 3, 3), MonthBegin(n=-2), (1, 2, 1)), + ((1, 2, 1), MonthBegin(n=14), (2, 4, 1)), + ((2, 4, 1), MonthBegin(n=-14), (1, 2, 1)), + ((1, 1, 1, 5, 5, 5, 5), MonthBegin(), (1, 2, 1, 5, 5, 5, 5)), + ((1, 1, 3, 5, 5, 5, 5), MonthBegin(), (1, 2, 1, 5, 5, 5, 5)), + ((1, 1, 3, 5, 5, 5, 5), MonthBegin(n=-1), (1, 1, 1, 5, 5, 5, 5))], + ids=_id_func +) +def test_add_month_begin( + calendar, initial_date_args, offset, expected_date_args): + date_type = get_date_type(calendar) + initial = date_type(*initial_date_args) + result = initial + offset + expected = date_type(*expected_date_args) + assert result == expected + + +@pytest.mark.parametrize( + ('initial_date_args', 'offset', 'expected_year_month', + 'expected_sub_day'), + [((1, 1, 1), MonthEnd(), (1, 1), ()), + ((1, 1, 1), MonthEnd(n=2), (1, 2), ()), + ((1, 3, 1), MonthEnd(n=-1), (1, 2), ()), + ((1, 3, 1), MonthEnd(n=-2), (1, 1), ()), + ((1, 2, 1), MonthEnd(n=14), (2, 3), ()), + ((2, 4, 1), MonthEnd(n=-14), (1, 2), ()), + ((1, 1, 1, 5, 5, 5, 5), MonthEnd(), (1, 1), (5, 5, 5, 5)), + ((1, 2, 1, 5, 5, 5, 5), MonthEnd(n=-1), (1, 1), (5, 5, 5, 5))], + ids=_id_func +) +def test_add_month_end( + calendar, initial_date_args, offset, expected_year_month, + expected_sub_day +): + date_type = get_date_type(calendar) + initial = date_type(*initial_date_args) + result = initial + offset + reference_args = expected_year_month + (1,) + reference = date_type(*reference_args) + + # Here the days at the end of each month varies based on the calendar used + expected_date_args = (expected_year_month + + (_days_in_month(reference),) + expected_sub_day) + expected = date_type(*expected_date_args) + assert result == expected + + +@pytest.mark.parametrize( + ('initial_year_month', 'initial_sub_day', 'offset', 'expected_year_month', + 'expected_sub_day'), + [((1, 1), (), MonthEnd(), (1, 2), ()), + ((1, 1), (), MonthEnd(n=2), (1, 3), ()), + ((1, 3), (), MonthEnd(n=-1), (1, 2), ()), + ((1, 3), (), MonthEnd(n=-2), (1, 1), ()), + ((1, 2), (), MonthEnd(n=14), (2, 4), ()), + ((2, 4), (), MonthEnd(n=-14), (1, 2), ()), + ((1, 1), (5, 5, 5, 5), MonthEnd(), (1, 2), (5, 5, 5, 5)), + ((1, 2), (5, 5, 5, 5), MonthEnd(n=-1), (1, 1), (5, 5, 5, 5))], + ids=_id_func +) +def test_add_month_end_onOffset( + calendar, initial_year_month, initial_sub_day, offset, expected_year_month, + expected_sub_day +): + date_type = get_date_type(calendar) + reference_args = initial_year_month + (1,) + reference = date_type(*reference_args) + initial_date_args = (initial_year_month + (_days_in_month(reference),) + + initial_sub_day) + initial = date_type(*initial_date_args) + result = initial + offset + reference_args = expected_year_month + (1,) + reference = date_type(*reference_args) + + # Here the days at the end of each month varies based on the calendar used + expected_date_args = (expected_year_month + + (_days_in_month(reference),) + expected_sub_day) + expected = date_type(*expected_date_args) + assert result == expected + + +@pytest.mark.parametrize( + ('initial_date_args', 'offset', 'expected_date_args'), + [((1, 1, 1), YearBegin(), (2, 1, 1)), + ((1, 1, 1), YearBegin(n=2), (3, 1, 1)), + ((1, 1, 1), YearBegin(month=2), (1, 2, 1)), + ((1, 1, 7), YearBegin(n=2), (3, 1, 1)), + ((2, 2, 1), YearBegin(n=-1), (2, 1, 1)), + ((1, 1, 2), YearBegin(n=-1), (1, 1, 1)), + ((1, 1, 1, 5, 5, 5, 5), YearBegin(), (2, 1, 1, 5, 5, 5, 5)), + ((2, 1, 1, 5, 5, 5, 5), YearBegin(n=-1), (1, 1, 1, 5, 5, 5, 5))], + ids=_id_func +) +def test_add_year_begin(calendar, initial_date_args, offset, + expected_date_args): + date_type = get_date_type(calendar) + initial = date_type(*initial_date_args) + result = initial + offset + expected = date_type(*expected_date_args) + assert result == expected + + +@pytest.mark.parametrize( + ('initial_date_args', 'offset', 'expected_year_month', + 'expected_sub_day'), + [((1, 1, 1), YearEnd(), (1, 12), ()), + ((1, 1, 1), YearEnd(n=2), (2, 12), ()), + ((1, 1, 1), YearEnd(month=1), (1, 1), ()), + ((2, 3, 1), YearEnd(n=-1), (1, 12), ()), + ((1, 3, 1), YearEnd(n=-1, month=2), (1, 2), ()), + ((1, 1, 1, 5, 5, 5, 5), YearEnd(), (1, 12), (5, 5, 5, 5)), + ((1, 1, 1, 5, 5, 5, 5), YearEnd(n=2), (2, 12), (5, 5, 5, 5))], + ids=_id_func +) +def test_add_year_end( + calendar, initial_date_args, offset, expected_year_month, + expected_sub_day +): + date_type = get_date_type(calendar) + initial = date_type(*initial_date_args) + result = initial + offset + reference_args = expected_year_month + (1,) + reference = date_type(*reference_args) + + # Here the days at the end of each month varies based on the calendar used + expected_date_args = (expected_year_month + + (_days_in_month(reference),) + expected_sub_day) + expected = date_type(*expected_date_args) + assert result == expected + + +@pytest.mark.parametrize( + ('initial_year_month', 'initial_sub_day', 'offset', 'expected_year_month', + 'expected_sub_day'), + [((1, 12), (), YearEnd(), (2, 12), ()), + ((1, 12), (), YearEnd(n=2), (3, 12), ()), + ((2, 12), (), YearEnd(n=-1), (1, 12), ()), + ((3, 12), (), YearEnd(n=-2), (1, 12), ()), + ((1, 1), (), YearEnd(month=2), (1, 2), ()), + ((1, 12), (5, 5, 5, 5), YearEnd(), (2, 12), (5, 5, 5, 5)), + ((2, 12), (5, 5, 5, 5), YearEnd(n=-1), (1, 12), (5, 5, 5, 5))], + ids=_id_func +) +def test_add_year_end_onOffset( + calendar, initial_year_month, initial_sub_day, offset, expected_year_month, + expected_sub_day +): + date_type = get_date_type(calendar) + reference_args = initial_year_month + (1,) + reference = date_type(*reference_args) + initial_date_args = (initial_year_month + (_days_in_month(reference),) + + initial_sub_day) + initial = date_type(*initial_date_args) + result = initial + offset + reference_args = expected_year_month + (1,) + reference = date_type(*reference_args) + + # Here the days at the end of each month varies based on the calendar used + expected_date_args = (expected_year_month + + (_days_in_month(reference),) + expected_sub_day) + expected = date_type(*expected_date_args) + assert result == expected + + +# Note for all sub-monthly offsets, pandas always returns True for onOffset +@pytest.mark.parametrize( + ('date_args', 'offset', 'expected'), + [((1, 1, 1), MonthBegin(), True), + ((1, 1, 1, 1), MonthBegin(), True), + ((1, 1, 5), MonthBegin(), False), + ((1, 1, 5), MonthEnd(), False), + ((1, 1, 1), YearBegin(), True), + ((1, 1, 1, 1), YearBegin(), True), + ((1, 1, 5), YearBegin(), False), + ((1, 12, 1), YearEnd(), False), + ((1, 1, 1), Day(), True), + ((1, 1, 1, 1), Day(), True), + ((1, 1, 1), Hour(), True), + ((1, 1, 1), Minute(), True), + ((1, 1, 1), Second(), True)], + ids=_id_func +) +def test_onOffset(calendar, date_args, offset, expected): + date_type = get_date_type(calendar) + date = date_type(*date_args) + result = offset.onOffset(date) + assert result == expected + + +@pytest.mark.parametrize( + ('year_month_args', 'sub_day_args', 'offset'), + [((1, 1), (), MonthEnd()), + ((1, 1), (1,), MonthEnd()), + ((1, 12), (), YearEnd()), + ((1, 1), (), YearEnd(month=1))], + ids=_id_func +) +def test_onOffset_month_or_year_end( + calendar, year_month_args, sub_day_args, offset): + date_type = get_date_type(calendar) + reference_args = year_month_args + (1,) + reference = date_type(*reference_args) + date_args = year_month_args + (_days_in_month(reference),) + sub_day_args + date = date_type(*date_args) + result = offset.onOffset(date) + assert result + + +@pytest.mark.parametrize( + ('offset', 'initial_date_args', 'partial_expected_date_args'), + [(YearBegin(), (1, 3, 1), (2, 1)), + (YearBegin(), (1, 1, 1), (1, 1)), + (YearBegin(n=2), (1, 3, 1), (2, 1)), + (YearBegin(n=2, month=2), (1, 3, 1), (2, 2)), + (YearEnd(), (1, 3, 1), (1, 12)), + (YearEnd(n=2), (1, 3, 1), (1, 12)), + (YearEnd(n=2, month=2), (1, 3, 1), (2, 2)), + (YearEnd(n=2, month=4), (1, 4, 30), (1, 4)), + (MonthBegin(), (1, 3, 2), (1, 4)), + (MonthBegin(), (1, 3, 1), (1, 3)), + (MonthBegin(n=2), (1, 3, 2), (1, 4)), + (MonthEnd(), (1, 3, 2), (1, 3)), + (MonthEnd(), (1, 4, 30), (1, 4)), + (MonthEnd(n=2), (1, 3, 2), (1, 3)), + (Day(), (1, 3, 2, 1), (1, 3, 2, 1)), + (Hour(), (1, 3, 2, 1, 1), (1, 3, 2, 1, 1)), + (Minute(), (1, 3, 2, 1, 1, 1), (1, 3, 2, 1, 1, 1)), + (Second(), (1, 3, 2, 1, 1, 1, 1), (1, 3, 2, 1, 1, 1, 1))], + ids=_id_func +) +def test_rollforward(calendar, offset, initial_date_args, + partial_expected_date_args): + date_type = get_date_type(calendar) + initial = date_type(*initial_date_args) + if isinstance(offset, (MonthBegin, YearBegin)): + expected_date_args = partial_expected_date_args + (1,) + elif isinstance(offset, (MonthEnd, YearEnd)): + reference_args = partial_expected_date_args + (1,) + reference = date_type(*reference_args) + expected_date_args = (partial_expected_date_args + + (_days_in_month(reference),)) + else: + expected_date_args = partial_expected_date_args + expected = date_type(*expected_date_args) + result = offset.rollforward(initial) + assert result == expected + + +@pytest.mark.parametrize( + ('offset', 'initial_date_args', 'partial_expected_date_args'), + [(YearBegin(), (1, 3, 1), (1, 1)), + (YearBegin(n=2), (1, 3, 1), (1, 1)), + (YearBegin(n=2, month=2), (1, 3, 1), (1, 2)), + (YearBegin(), (1, 1, 1), (1, 1)), + (YearBegin(n=2, month=2), (1, 2, 1), (1, 2)), + (YearEnd(), (2, 3, 1), (1, 12)), + (YearEnd(n=2), (2, 3, 1), (1, 12)), + (YearEnd(n=2, month=2), (2, 3, 1), (2, 2)), + (YearEnd(month=4), (1, 4, 30), (1, 4)), + (MonthBegin(), (1, 3, 2), (1, 3)), + (MonthBegin(n=2), (1, 3, 2), (1, 3)), + (MonthBegin(), (1, 3, 1), (1, 3)), + (MonthEnd(), (1, 3, 2), (1, 2)), + (MonthEnd(n=2), (1, 3, 2), (1, 2)), + (MonthEnd(), (1, 4, 30), (1, 4)), + (Day(), (1, 3, 2, 1), (1, 3, 2, 1)), + (Hour(), (1, 3, 2, 1, 1), (1, 3, 2, 1, 1)), + (Minute(), (1, 3, 2, 1, 1, 1), (1, 3, 2, 1, 1, 1)), + (Second(), (1, 3, 2, 1, 1, 1, 1), (1, 3, 2, 1, 1, 1, 1))], + ids=_id_func +) +def test_rollback(calendar, offset, initial_date_args, + partial_expected_date_args): + date_type = get_date_type(calendar) + initial = date_type(*initial_date_args) + if isinstance(offset, (MonthBegin, YearBegin)): + expected_date_args = partial_expected_date_args + (1,) + elif isinstance(offset, (MonthEnd, YearEnd)): + reference_args = partial_expected_date_args + (1,) + reference = date_type(*reference_args) + expected_date_args = (partial_expected_date_args + + (_days_in_month(reference),)) + else: + expected_date_args = partial_expected_date_args + expected = date_type(*expected_date_args) + result = offset.rollback(initial) + assert result == expected + + +_CFTIME_RANGE_TESTS = [ + ('0001-01-01', '0001-01-04', None, 'D', None, False, + [(1, 1, 1), (1, 1, 2), (1, 1, 3), (1, 1, 4)]), + ('0001-01-01', '0001-01-04', None, 'D', 'left', False, + [(1, 1, 1), (1, 1, 2), (1, 1, 3)]), + ('0001-01-01', '0001-01-04', None, 'D', 'right', False, + [(1, 1, 2), (1, 1, 3), (1, 1, 4)]), + ('0001-01-01T01:00:00', '0001-01-04', None, 'D', None, False, + [(1, 1, 1, 1), (1, 1, 2, 1), (1, 1, 3, 1)]), + ('0001-01-01T01:00:00', '0001-01-04', None, 'D', None, True, + [(1, 1, 1), (1, 1, 2), (1, 1, 3), (1, 1, 4)]), + ('0001-01-01', None, 4, 'D', None, False, + [(1, 1, 1), (1, 1, 2), (1, 1, 3), (1, 1, 4)]), + (None, '0001-01-04', 4, 'D', None, False, + [(1, 1, 1), (1, 1, 2), (1, 1, 3), (1, 1, 4)]), + ((1, 1, 1), '0001-01-04', None, 'D', None, False, + [(1, 1, 1), (1, 1, 2), (1, 1, 3), (1, 1, 4)]), + ((1, 1, 1), (1, 1, 4), None, 'D', None, False, + [(1, 1, 1), (1, 1, 2), (1, 1, 3), (1, 1, 4)]), + ('0001-01-30', '0011-02-01', None, '3AS-JUN', None, False, + [(1, 6, 1), (4, 6, 1), (7, 6, 1), (10, 6, 1)]), + ('0001-01-04', '0001-01-01', None, 'D', None, False, + []), + ('0010', None, 4, YearBegin(n=-2), None, False, + [(10, 1, 1), (8, 1, 1), (6, 1, 1), (4, 1, 1)]), + ('0001-01-01', '0001-01-04', 4, None, None, False, + [(1, 1, 1), (1, 1, 2), (1, 1, 3), (1, 1, 4)]) +] + + +@pytest.mark.parametrize( + ('start', 'end', 'periods', 'freq', 'closed', 'normalize', + 'expected_date_args'), + _CFTIME_RANGE_TESTS, ids=_id_func +) +def test_cftime_range( + start, end, periods, freq, closed, normalize, calendar, + expected_date_args): + date_type = get_date_type(calendar) + expected_dates = [date_type(*args) for args in expected_date_args] + + if isinstance(start, tuple): + start = date_type(*start) + if isinstance(end, tuple): + end = date_type(*end) + + result = cftime_range( + start=start, end=end, periods=periods, freq=freq, closed=closed, + normalize=normalize, calendar=calendar) + resulting_dates = result.values + + assert isinstance(result, CFTimeIndex) + + if freq is not None: + np.testing.assert_equal(resulting_dates, expected_dates) + else: + # If we create a linear range of dates using cftime.num2date + # we will not get exact round number dates. This is because + # datetime arithmetic in cftime is accurate approximately to + # 1 millisecond (see https://unidata.github.io/cftime/api.html). + deltas = resulting_dates - expected_dates + deltas = np.array([delta.total_seconds() for delta in deltas]) + assert np.max(np.abs(deltas)) < 0.001 + + +def test_cftime_range_name(): + result = cftime_range(start='2000', periods=4, name='foo') + assert result.name == 'foo' + + result = cftime_range(start='2000', periods=4) + assert result.name is None + + +@pytest.mark.parametrize( + ('start', 'end', 'periods', 'freq', 'closed'), + [(None, None, 5, 'A', None), + ('2000', None, None, 'A', None), + (None, '2000', None, 'A', None), + ('2000', '2001', None, None, None), + (None, None, None, None, None), + ('2000', '2001', None, 'A', 'up'), + ('2000', '2001', 5, 'A', None)] +) +def test_invalid_cftime_range_inputs(start, end, periods, freq, closed): + with pytest.raises(ValueError): + cftime_range(start, end, periods, freq, closed=closed) + + +_CALENDAR_SPECIFIC_MONTH_END_TESTS = [ + ('2M', 'noleap', + [(2, 28), (4, 30), (6, 30), (8, 31), (10, 31), (12, 31)]), + ('2M', 'all_leap', + [(2, 29), (4, 30), (6, 30), (8, 31), (10, 31), (12, 31)]), + ('2M', '360_day', + [(2, 30), (4, 30), (6, 30), (8, 30), (10, 30), (12, 30)]), + ('2M', 'standard', + [(2, 29), (4, 30), (6, 30), (8, 31), (10, 31), (12, 31)]), + ('2M', 'gregorian', + [(2, 29), (4, 30), (6, 30), (8, 31), (10, 31), (12, 31)]), + ('2M', 'julian', + [(2, 29), (4, 30), (6, 30), (8, 31), (10, 31), (12, 31)]) +] + + +@pytest.mark.parametrize( + ('freq', 'calendar', 'expected_month_day'), + _CALENDAR_SPECIFIC_MONTH_END_TESTS, ids=_id_func +) +def test_calendar_specific_month_end(freq, calendar, expected_month_day): + year = 2000 # Use a leap-year to highlight calendar differences + result = cftime_range( + start='2000-02', end='2001', freq=freq, calendar=calendar).values + date_type = get_date_type(calendar) + expected = [date_type(year, *args) for args in expected_month_day] + np.testing.assert_equal(result, expected) + + +@pytest.mark.parametrize( + ('calendar', 'start', 'end', 'expected_number_of_days'), + [('noleap', '2000', '2001', 365), + ('all_leap', '2000', '2001', 366), + ('360_day', '2000', '2001', 360), + ('standard', '2000', '2001', 366), + ('gregorian', '2000', '2001', 366), + ('julian', '2000', '2001', 366), + ('noleap', '2001', '2002', 365), + ('all_leap', '2001', '2002', 366), + ('360_day', '2001', '2002', 360), + ('standard', '2001', '2002', 365), + ('gregorian', '2001', '2002', 365), + ('julian', '2001', '2002', 365)] +) +def test_calendar_year_length( + calendar, start, end, expected_number_of_days): + result = cftime_range(start, end, freq='D', closed='left', + calendar=calendar) + assert len(result) == expected_number_of_days diff --git a/xarray/tests/test_cftimeindex.py b/xarray/tests/test_cftimeindex.py index 6f102b60b9d..f72c6904f0e 100644 --- a/xarray/tests/test_cftimeindex.py +++ b/xarray/tests/test_cftimeindex.py @@ -2,6 +2,7 @@ import pytest +import numpy as np import pandas as pd import xarray as xr @@ -121,22 +122,42 @@ def dec_days(date_type): return 31 +@pytest.fixture +def index_with_name(date_type): + dates = [date_type(1, 1, 1), date_type(1, 2, 1), + date_type(2, 1, 1), date_type(2, 2, 1)] + return CFTimeIndex(dates, name='foo') + + +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') +@pytest.mark.parametrize( + ('name', 'expected_name'), + [('bar', 'bar'), + (None, 'foo')]) +def test_constructor_with_name(index_with_name, name, expected_name): + result = CFTimeIndex(index_with_name, name=name).name + assert result == expected_name + + @pytest.mark.skipif(not has_cftime, reason='cftime not installed') def test_assert_all_valid_date_type(date_type, index): import cftime if date_type is cftime.DatetimeNoLeap: - mixed_date_types = [date_type(1, 1, 1), - cftime.DatetimeAllLeap(1, 2, 1)] + mixed_date_types = np.array( + [date_type(1, 1, 1), + cftime.DatetimeAllLeap(1, 2, 1)]) else: - mixed_date_types = [date_type(1, 1, 1), - cftime.DatetimeNoLeap(1, 2, 1)] + mixed_date_types = np.array( + [date_type(1, 1, 1), + cftime.DatetimeNoLeap(1, 2, 1)]) with pytest.raises(TypeError): assert_all_valid_date_type(mixed_date_types) with pytest.raises(TypeError): - assert_all_valid_date_type([1, date_type(1, 1, 1)]) + assert_all_valid_date_type(np.array([1, date_type(1, 1, 1)])) - assert_all_valid_date_type([date_type(1, 1, 1), date_type(1, 2, 1)]) + assert_all_valid_date_type( + np.array([date_type(1, 1, 1), date_type(1, 2, 1)])) @pytest.mark.skipif(not has_cftime, reason='cftime not installed') @@ -589,3 +610,9 @@ def test_concat_cftimeindex(date_type, enable_cftimeindex): else: assert isinstance(da.indexes['time'], pd.Index) assert not isinstance(da.indexes['time'], CFTimeIndex) + + +@pytest.mark.skipif(not has_cftime, reason='cftime not installed') +def test_empty_cftimeindex(): + index = CFTimeIndex([]) + assert index.date_type is None