diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index f1bd03a097cd0..8d4f2af19701a 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -1,24 +1,25 @@ # Copyright (c) 2012, Lambda Foundry, Inc. # See LICENSE for the license +import os +import sys +import time +import warnings + +from csv import QUOTE_MINIMAL, QUOTE_NONNUMERIC, QUOTE_NONE from libc.stdio cimport fopen, fclose from libc.stdlib cimport malloc, free from libc.string cimport strncpy, strlen, strcmp, strcasecmp -cimport libc.stdio as stdio -import warnings -from csv import QUOTE_MINIMAL, QUOTE_NONNUMERIC, QUOTE_NONE +cimport cython +from cython cimport Py_ssize_t + from cpython cimport (PyObject, PyBytes_FromString, PyBytes_AsString, PyBytes_Check, PyUnicode_Check, PyUnicode_AsUTF8String, PyErr_Occurred, PyErr_Fetch) from cpython.ref cimport Py_XDECREF -from pandas.errors import (ParserError, DtypeWarning, - EmptyDataError, ParserWarning) -# Import CParserError as alias of ParserError for backwards compatibility. -# Ultimately, we want to remove this import. See gh-12665 and gh-14479. -CParserError = ParserError cdef extern from "Python.h": object PyUnicode_FromString(char *v) @@ -29,15 +30,24 @@ cdef extern from "Python.h": cdef extern from "stdlib.h": void memcpy(void *dst, void *src, size_t n) -cimport cython -cimport numpy as cnp +import numpy as np +cimport numpy as cnp from numpy cimport ndarray, uint8_t, uint64_t, int64_t +cnp.import_array() -import numpy as np -cimport util +from util cimport UINT64_MAX, INT64_MAX, INT64_MIN +import lib + +from khash cimport ( + khiter_t, + kh_str_t, kh_init_str, kh_put_str, kh_exist_str, + kh_get_str, kh_destroy_str, + kh_float64_t, kh_get_float64, kh_destroy_float64, + kh_put_float64, kh_init_float64, + kh_strbox_t, kh_put_strbox, kh_get_strbox, kh_init_strbox, + kh_destroy_strbox) -import pandas._libs.lib as lib import pandas.compat as compat from pandas.core.dtypes.common import ( is_categorical_dtype, CategoricalDtype, @@ -47,55 +57,44 @@ from pandas.core.dtypes.common import ( pandas_dtype) from pandas.core.categorical import Categorical from pandas.core.dtypes.concat import union_categoricals - import pandas.io.common as com -import time -import os - -cnp.import_array() +from pandas.errors import (ParserError, DtypeWarning, + EmptyDataError, ParserWarning) -from khash cimport ( - khiter_t, - kh_str_t, kh_init_str, kh_put_str, kh_exist_str, - kh_get_str, kh_destroy_str, - kh_float64_t, kh_get_float64, kh_destroy_float64, - kh_put_float64, kh_init_float64, - kh_strbox_t, kh_put_strbox, kh_get_strbox, kh_init_strbox, - kh_destroy_strbox) +# Import CParserError as alias of ParserError for backwards compatibility. +# Ultimately, we want to remove this import. See gh-12665 and gh-14479. +CParserError = ParserError -import sys cdef bint PY3 = (sys.version_info[0] >= 3) cdef double INF = np.inf cdef double NEGINF = -INF -cdef extern from "headers/stdint.h": - enum: UINT8_MAX - enum: UINT16_MAX - enum: UINT32_MAX - enum: UINT64_MAX - enum: INT8_MIN - enum: INT8_MAX - enum: INT16_MIN - enum: INT16_MAX - enum: INT32_MAX - enum: INT32_MIN - enum: INT64_MAX - enum: INT64_MIN - -cdef extern from "headers/portable.h": - pass cdef extern from "errno.h": int errno +cdef extern from "headers/portable.h": + # I *think* this is here so that strcasecmp is defined on Windows + # so we don't get + # `parsers.obj : error LNK2001: unresolved external symbol strcasecmp` + # in Appveyor. + # In a sane world, the `from libc.string cimport` above would fail + # loudly. + pass + try: basestring except NameError: basestring = str +cdef extern from "src/numpy_helper.h": + object sarr_from_data(cnp.dtype, int length, void* data) + void transfer_object_column(char *dst, char *src, size_t stride, + size_t length) + cdef extern from "parser/tokenizer.h": ctypedef enum ParserState: @@ -2360,7 +2359,7 @@ def _to_structured_array(dict columns, object names, object usecols): # We own the data. buf = malloc(length * stride) - recs = util.sarr_from_data(dt, length, buf) + recs = sarr_from_data(dt, length, buf) assert(recs.flags.owndata) for i in range(nfields): @@ -2385,7 +2384,7 @@ cdef _fill_structured_column(char *dst, char* src, int64_t elsize, int64_t i if incref: - util.transfer_object_column(dst, src, stride, length) + transfer_object_column(dst, src, stride, length) else: for i in range(length): memcpy(dst, src, elsize) diff --git a/pandas/_libs/src/util.pxd b/pandas/_libs/src/util.pxd index e5fe90aa81f7d..be6591a118dc5 100644 --- a/pandas/_libs/src/util.pxd +++ b/pandas/_libs/src/util.pxd @@ -18,9 +18,6 @@ cdef extern from "numpy_helper.h": object get_value_1d(ndarray, Py_ssize_t) char *get_c_string(object) except NULL object char_to_string(char*) - void transfer_object_column(char *dst, char *src, size_t stride, - size_t length) - object sarr_from_data(cnp.dtype, int length, void* data) object unbox_if_zerodim(object arr) ctypedef fused numeric: @@ -100,8 +97,6 @@ cdef inline set_value_at(ndarray arr, object loc, object value): set_value_at_unsafe(arr, loc, value) -cdef inline int is_contiguous(ndarray arr): - return cnp.PyArray_CHKFLAGS(arr, cnp.NPY_C_CONTIGUOUS) cdef inline is_array(object o): return cnp.PyArray_Check(o) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 5e08df7dfe27b..342e282f28d3b 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -1,8 +1,5 @@ # -*- coding: utf-8 -*- # cython: profile=False -# cython: linetrace=False -# distutils: define_macros=CYTHON_TRACE=0 -# distutils: define_macros=CYTHON_TRACE_NOGIL=0 cimport numpy as np from numpy cimport int64_t, ndarray, float64_t @@ -10,15 +7,13 @@ import numpy as np np.import_array() -from cpython cimport PyTypeObject, PyFloat_Check - -cdef extern from "Python.h": - cdef PyTypeObject *Py_TYPE(object) +from cpython cimport PyFloat_Check from util cimport (is_integer_object, is_float_object, is_string_object, is_datetime64_object) from cpython.datetime cimport (PyDateTime_Check, PyDate_Check, + PyDateTime_CheckExact, PyDateTime_IMPORT, timedelta, datetime, date) # import datetime C API @@ -47,10 +42,8 @@ UTC = pytz.utc from tslibs.timedeltas cimport cast_from_unit from tslibs.timedeltas import Timedelta -from tslibs.timezones cimport ( - is_utc, is_tzlocal, is_fixed_offset, - treat_tz_as_pytz, - get_dst_info) +from tslibs.timezones cimport (is_utc, is_tzlocal, is_fixed_offset, + treat_tz_as_pytz, get_dst_info) from tslibs.conversion cimport (tz_convert_single, _TSObject, convert_datetime_to_tsobject, get_datetime64_nanos) @@ -204,13 +197,6 @@ def ints_to_pytimedelta(ndarray[int64_t] arr, box=False): return result -cdef PyTypeObject* ts_type = Timestamp - - -cdef inline bint is_timestamp(object o): - return Py_TYPE(o) == ts_type # isinstance(o, Timestamp) - - def _test_parse_iso8601(object ts): """ TESTING ONLY: Parse string into Timestamp using iso8601 parser. Used @@ -333,14 +319,6 @@ def format_array_from_datetime(ndarray[int64_t] values, object tz=None, return result -# const for parsers - -_MONTHS = ['JAN', 'FEB', 'MAR', 'APR', 'MAY', 'JUN', 'JUL', - 'AUG', 'SEP', 'OCT', 'NOV', 'DEC'] -_MONTH_NUMBERS = {k: i for i, k in enumerate(_MONTHS)} -_MONTH_ALIASES = {(k + 1): v for k, v in enumerate(_MONTHS)} - - cpdef array_with_unit_to_datetime(ndarray values, unit, errors='coerce'): """ convert the ndarray according to the unit @@ -360,7 +338,7 @@ cpdef array_with_unit_to_datetime(ndarray values, unit, errors='coerce'): bint is_ignore = errors=='ignore' bint is_coerce = errors=='coerce' bint is_raise = errors=='raise' - bint need_to_iterate=True + bint need_to_iterate = True ndarray[int64_t] iresult ndarray[object] oresult @@ -383,7 +361,7 @@ cpdef array_with_unit_to_datetime(ndarray values, unit, errors='coerce'): mask = iresult == iNaT iresult[mask] = 0 fvalues = iresult.astype('f8') * m - need_to_iterate=False + need_to_iterate = False except: pass @@ -394,7 +372,7 @@ cpdef array_with_unit_to_datetime(ndarray values, unit, errors='coerce'): or (fvalues > _NS_UPPER_BOUND).any()): raise OutOfBoundsDatetime( "cannot convert input with unit '{0}'".format(unit)) - result = (iresult *m).astype('M8[ns]') + result = (iresult * m).astype('M8[ns]') iresult = result.view('i8') iresult[mask] = iNaT return result @@ -545,7 +523,8 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise', 'utc=True') else: iresult[i] = pydatetime_to_dt64(val, &dts) - if is_timestamp(val): + if not PyDateTime_CheckExact(val): + # i.e. a Timestamp object iresult[i] += val.nanosecond try: check_dts_bounds(&dts) @@ -752,11 +731,15 @@ cpdef normalize_date(object dt): ------- normalized : datetime.datetime or Timestamp """ - if is_timestamp(dt): - return dt.replace(hour=0, minute=0, second=0, microsecond=0, - nanosecond=0) - elif PyDateTime_Check(dt): - return dt.replace(hour=0, minute=0, second=0, microsecond=0) + if PyDateTime_Check(dt): + if not PyDateTime_CheckExact(dt): + # i.e. a Timestamp object + return dt.replace(hour=0, minute=0, second=0, microsecond=0, + nanosecond=0) + else: + # regular datetime object + return dt.replace(hour=0, minute=0, second=0, microsecond=0) + # TODO: Make sure DST crossing is handled correctly here elif PyDate_Check(dt): return datetime(dt.year, dt.month, dt.day) else: diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 4f6b2f52d4d24..c12a15b71487b 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -32,10 +32,10 @@ from util cimport (is_string_object, is_integer_object, is_float_object) from timedeltas cimport cast_from_unit -from timezones cimport ( - is_utc, is_tzlocal, is_fixed_offset, - treat_tz_as_dateutil, treat_tz_as_pytz, - get_utcoffset, get_dst_info, get_timezone, maybe_get_tz) +from timezones cimport (is_utc, is_tzlocal, is_fixed_offset, + treat_tz_as_dateutil, treat_tz_as_pytz, + get_utcoffset, get_dst_info, + get_timezone, maybe_get_tz) from parsing import parse_datetime_string from nattype import nat_strings, NaT diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx index b321ca1659682..950677b3b53db 100644 --- a/pandas/_libs/tslibs/fields.pyx +++ b/pandas/_libs/tslibs/fields.pyx @@ -1,8 +1,5 @@ # -*- coding: utf-8 -*- # cython: profile=False -# cython: linetrace=False -# distutils: define_macros=CYTHON_TRACE=0 -# distutils: define_macros=CYTHON_TRACE_NOGIL=0 """ Functions for accessing attributes of Timestamp/datetime64/datetime-like objects and arrays diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index 90882eefd9f67..8ce1d9cdf2158 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -1,8 +1,5 @@ # -*- coding: utf-8 -*- # cython: profile=False -# cython: linetrace=False -# distutils: define_macros=CYTHON_TRACE=0 -# distutils: define_macros=CYTHON_TRACE_NOGIL=0 """ Parsing functions for datetime and datetime-like strings. """ diff --git a/pandas/_libs/tslibs/resolution.pyx b/pandas/_libs/tslibs/resolution.pyx index 1c20dbe7f8fc9..d2b518c74a1e3 100644 --- a/pandas/_libs/tslibs/resolution.pyx +++ b/pandas/_libs/tslibs/resolution.pyx @@ -10,20 +10,18 @@ np.import_array() from util cimport is_string_object, get_nat -from pandas._libs.khash cimport ( - khiter_t, - kh_destroy_int64, kh_put_int64, - kh_init_int64, kh_int64_t, - kh_resize_int64, kh_get_int64) +from pandas._libs.khash cimport (khiter_t, + kh_destroy_int64, kh_put_int64, + kh_init_int64, kh_int64_t, + kh_resize_int64, kh_get_int64) from cpython.datetime cimport datetime from np_datetime cimport (pandas_datetimestruct, dtstruct_to_dt64, dt64_to_dtstruct) from frequencies cimport get_freq_code -from timezones cimport ( - is_utc, is_tzlocal, - maybe_get_tz, get_dst_info, get_utcoffset) +from timezones cimport (is_utc, is_tzlocal, + maybe_get_tz, get_dst_info, get_utcoffset) from fields import build_field_sarray from conversion import tz_convert diff --git a/pandas/_libs/tslibs/timezones.pyx b/pandas/_libs/tslibs/timezones.pyx index d326f2cb68f24..de9f75344b2bf 100644 --- a/pandas/_libs/tslibs/timezones.pyx +++ b/pandas/_libs/tslibs/timezones.pyx @@ -1,8 +1,5 @@ # -*- coding: utf-8 -*- # cython: profile=False -# cython: linetrace=False -# distutils: define_macros=CYTHON_TRACE=0 -# distutils: define_macros=CYTHON_TRACE_NOGIL=0 cimport cython from cython cimport Py_ssize_t diff --git a/pandas/tests/indexes/period/test_tools.py b/pandas/tests/indexes/period/test_tools.py index 67ed725436581..9cbcfa4f46008 100644 --- a/pandas/tests/indexes/period/test_tools.py +++ b/pandas/tests/indexes/period/test_tools.py @@ -5,7 +5,8 @@ import pandas.util.testing as tm import pandas.core.indexes.period as period from pandas.compat import lrange -from pandas.tseries.frequencies import get_freq, MONTHS +from pandas.tseries.frequencies import get_freq +from pandas._libs.tslibs.resolution import _MONTHS as MONTHS from pandas._libs.tslibs.period import period_ordinal, period_asfreq from pandas import (PeriodIndex, Period, DatetimeIndex, Timestamp, Series, date_range, to_datetime, period_range) diff --git a/pandas/tests/test_resample.py b/pandas/tests/test_resample.py index 10c3c0ea507c1..1fd6befd64f57 100644 --- a/pandas/tests/test_resample.py +++ b/pandas/tests/test_resample.py @@ -22,8 +22,7 @@ from pandas.core.base import SpecificationError, AbstractMethodError from pandas.errors import UnsupportedFunctionCall from pandas.core.groupby import DataError -from pandas._libs.tslibs.resolution import DAYS -from pandas.tseries.frequencies import MONTHS +from pandas._libs.tslibs.resolution import DAYS, _MONTHS as MONTHS from pandas.tseries.frequencies import to_offset from pandas.core.indexes.datetimes import date_range from pandas.tseries.offsets import Minute, BDay diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index fef88587a7282..460ad3f5591fc 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -17,7 +17,6 @@ from pandas.util._decorators import deprecate_kwarg import pandas.tseries.offsets as offsets -from pandas._libs import tslib from pandas._libs.tslib import Timedelta from pandas._libs.tslibs.frequencies import ( # noqa get_freq_code, _base_and_stride, _period_str_to_code, @@ -26,7 +25,7 @@ from pandas._libs.tslibs.resolution import (Resolution, _FrequencyInferer, _TimedeltaFrequencyInferer) -from pandas._libs.tslibs.parsing import _get_rule_month +from pandas._libs.tslibs.parsing import _get_rule_month, _MONTH_NUMBERS from pytz import AmbiguousTimeError @@ -497,8 +496,8 @@ def _is_annual(rule): def _quarter_months_conform(source, target): - snum = _month_numbers[source] - tnum = _month_numbers[target] + snum = _MONTH_NUMBERS[source] + tnum = _MONTH_NUMBERS[target] return snum % 3 == tnum % 3 @@ -515,7 +514,3 @@ def _is_monthly(rule): def _is_weekly(rule): rule = rule.upper() return rule == 'W' or rule.startswith('W-') - - -MONTHS = tslib._MONTHS -_month_numbers = tslib._MONTH_NUMBERS