@@ -19,6 +19,7 @@ import numpy as np
1919cnp.import_array()
2020
2121import pytz
22+ from dateutil.tz import tzlocal, tzutc as dateutil_utc
2223
2324
2425from util cimport (is_integer_object, is_float_object, is_string_object,
@@ -328,7 +329,7 @@ cpdef array_with_unit_to_datetime(ndarray values, unit, errors='coerce'):
328329 if unit == ' ns' :
329330 if issubclass (values.dtype.type, np.integer):
330331 return values.astype(' M8[ns]' )
331- return array_to_datetime(values.astype(object ), errors = errors)
332+ return array_to_datetime(values.astype(object ), errors = errors)[ 0 ]
332333
333334 m = cast_from_unit(None , unit)
334335
@@ -457,21 +458,58 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise',
457458 dayfirst = False , yearfirst = False ,
458459 format = None , utc = None ,
459460 require_iso8601 = False ):
461+ """
462+ Converts a 1D array of date-like values to a numpy array of either:
463+ 1) datetime64[ns] data
464+ 2) datetime.datetime objects, if OutOfBoundsDatetime or TypeError
465+ is encountered
466+
467+ Also returns a pytz.FixedOffset if an array of strings with the same
468+ timezone offset is passed and utc=True is not passed. Otherwise, None
469+ is returned
470+
471+ Handles datetime.date, datetime.datetime, np.datetime64 objects, numeric,
472+ strings
473+
474+ Parameters
475+ ----------
476+ values : ndarray of object
477+ date-like objects to convert
478+ errors : str, default 'raise'
479+ error behavior when parsing
480+ dayfirst : bool, default False
481+ dayfirst parsing behavior when encountering datetime strings
482+ yearfirst : bool, default False
483+ yearfirst parsing behavior when encountering datetime strings
484+ format : str, default None
485+ format of the string to parse
486+ utc : bool, default None
487+ indicator whether the dates should be UTC
488+ require_iso8601 : bool, default False
489+ indicator whether the datetime string should be iso8601
490+
491+ Returns
492+ -------
493+ tuple (ndarray, tzoffset)
494+ """
460495 cdef:
461496 Py_ssize_t i, n = len (values)
462- object val, py_dt
497+ object val, py_dt, tz, tz_out = None
463498 ndarray[int64_t] iresult
464499 ndarray[object ] oresult
465500 npy_datetimestruct dts
466501 bint utc_convert = bool (utc)
467502 bint seen_integer = 0
468503 bint seen_string = 0
469504 bint seen_datetime = 0
505+ bint seen_datetime_offset = 0
470506 bint is_raise = errors== ' raise'
471507 bint is_ignore = errors== ' ignore'
472508 bint is_coerce = errors== ' coerce'
473509 _TSObject _ts
474510 int out_local= 0 , out_tzoffset= 0
511+ float offset_seconds
512+ set out_tzoffset_vals = set ()
475513
476514 # specify error conditions
477515 assert is_raise or is_ignore or is_coerce
@@ -584,7 +622,7 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise',
584622 raise ValueError (" time data {val} doesn't match "
585623 " format specified"
586624 .format(val = val))
587- return values
625+ return values, tz_out
588626
589627 try :
590628 py_dt = parse_datetime_string(val, dayfirst = dayfirst,
@@ -595,6 +633,30 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise',
595633 continue
596634 raise TypeError (" invalid string coercion to datetime" )
597635
636+ # If the dateutil parser returned tzinfo, capture it
637+ # to check if all arguments have the same tzinfo
638+ tz = py_dt.tzinfo
639+ if tz is not None :
640+ seen_datetime_offset = 1
641+ if tz == dateutil_utc():
642+ # dateutil.tz.tzutc has no offset-like attribute
643+ # Just add the 0 offset explicitly
644+ out_tzoffset_vals.add(0 )
645+ elif tz == tzlocal():
646+ # is comparison fails unlike other dateutil.tz
647+ # objects. Also, dateutil.tz.tzlocal has no
648+ # _offset attribute like tzoffset
649+ offset_seconds = tz._dst_offset.total_seconds()
650+ out_tzoffset_vals.add(offset_seconds)
651+ else :
652+ # dateutil.tz.tzoffset objects cannot be hashed
653+ # store the total_seconds() instead
654+ offset_seconds = tz._offset.total_seconds()
655+ out_tzoffset_vals.add(offset_seconds)
656+ else :
657+ # Add a marker for naive string, to track if we are
658+ # parsing mixed naive and aware strings
659+ out_tzoffset_vals.add(' naive' )
598660 try :
599661 _ts = convert_datetime_to_tsobject(py_dt, None )
600662 iresult[i] = _ts.value
@@ -614,8 +676,17 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise',
614676 # where we left off
615677 value = dtstruct_to_dt64(& dts)
616678 if out_local == 1 :
679+ seen_datetime_offset = 1
680+ # Store the out_tzoffset in seconds
681+ # since we store the total_seconds of
682+ # dateutil.tz.tzoffset objects
683+ out_tzoffset_vals.add(out_tzoffset * 60. )
617684 tz = pytz.FixedOffset(out_tzoffset)
618685 value = tz_convert_single(value, tz, ' UTC' )
686+ else :
687+ # Add a marker for naive string, to track if we are
688+ # parsing mixed naive and aware strings
689+ out_tzoffset_vals.add(' naive' )
619690 iresult[i] = value
620691 try :
621692 check_dts_bounds(& dts)
@@ -631,7 +702,7 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise',
631702 raise ValueError (" time data {val} doesn't "
632703 " match format specified"
633704 .format(val = val))
634- return values
705+ return values, tz_out
635706 raise
636707
637708 else :
@@ -657,7 +728,21 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise',
657728 else :
658729 raise TypeError
659730
660- return result
731+ if seen_datetime_offset and not utc_convert:
732+ # GH 17697
733+ # 1) If all the offsets are equal, return one offset for
734+ # the parsed dates to (maybe) pass to DatetimeIndex
735+ # 2) If the offsets are different, then force the parsing down the
736+ # object path where an array of datetimes
737+ # (with individual dateutil.tzoffsets) are returned
738+ is_same_offsets = len (out_tzoffset_vals) == 1
739+ if not is_same_offsets:
740+ return array_to_datetime_object(values, is_raise,
741+ dayfirst, yearfirst)
742+ else :
743+ tz_offset = out_tzoffset_vals.pop()
744+ tz_out = pytz.FixedOffset(tz_offset / 60. )
745+ return result, tz_out
661746 except OutOfBoundsDatetime:
662747 if is_raise:
663748 raise
@@ -679,36 +764,67 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise',
679764 oresult[i] = val.item()
680765 else :
681766 oresult[i] = val
682- return oresult
767+ return oresult, tz_out
683768 except TypeError :
684- oresult = np.empty(n, dtype = object )
769+ return array_to_datetime_object(values, is_raise, dayfirst, yearfirst )
685770
686- for i in range (n):
687- val = values[i]
688- if checknull_with_nat(val):
689- oresult[i] = val
690- elif is_string_object(val):
691771
692- if len (val) == 0 or val in nat_strings:
693- oresult[i] = ' NaT'
694- continue
772+ cdef array_to_datetime_object(ndarray[object ] values, bint is_raise,
773+ dayfirst = False , yearfirst = False ):
774+ """
775+ Fall back function for array_to_datetime
695776
696- try :
697- oresult[i] = parse_datetime_string(val, dayfirst = dayfirst,
698- yearfirst = yearfirst)
699- pydatetime_to_dt64(oresult[i], & dts)
700- check_dts_bounds(& dts)
701- except Exception :
702- if is_raise:
703- raise
704- return values
705- # oresult[i] = val
706- else :
777+ Attempts to parse datetime strings with dateutil to return an array
778+ of datetime objects
779+
780+ Parameters
781+ ----------
782+ values : ndarray of object
783+ date-like objects to convert
784+ is_raise : bool
785+ error behavior when parsing
786+ dayfirst : bool, default False
787+ dayfirst parsing behavior when encountering datetime strings
788+ yearfirst : bool, default False
789+ yearfirst parsing behavior when encountering datetime strings
790+
791+ Returns
792+ -------
793+ tuple (ndarray, None)
794+ """
795+ cdef:
796+ Py_ssize_t i, n = len (values)
797+ object val,
798+ ndarray[object ] oresult
799+ npy_datetimestruct dts
800+
801+ oresult = np.empty(n, dtype = object )
802+
803+ # We return an object array and only attempt to parse:
804+ # 1) NaT or NaT-like values
805+ # 2) datetime strings, which we return as datetime.datetime
806+ for i in range (n):
807+ val = values[i]
808+ if checknull_with_nat(val):
809+ oresult[i] = val
810+ elif is_string_object(val):
811+ if len (val) == 0 or val in nat_strings:
812+ oresult[i] = ' NaT'
813+ continue
814+ try :
815+ oresult[i] = parse_datetime_string(val, dayfirst = dayfirst,
816+ yearfirst = yearfirst)
817+ pydatetime_to_dt64(oresult[i], & dts)
818+ check_dts_bounds(& dts)
819+ except (ValueError , OverflowError ):
707820 if is_raise:
708821 raise
709- return values
710-
711- return oresult
822+ return values, None
823+ else :
824+ if is_raise:
825+ raise
826+ return values, None
827+ return oresult, None
712828
713829
714830cdef inline bint _parse_today_now(str val, int64_t* iresult):
0 commit comments