Skip to content

Added function to convert Excel date to datetime. #78

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 26, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
164 changes: 164 additions & 0 deletions tests/test_xldate_to_datetime.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
###############################################################################
#
# Tests for the xlrd xldate.xldate_as_datetime() function.
#

import unittest
from datetime import datetime
from xlrd import xldate

not_1904 = False
is_1904 = True


class TestConvertToDateTime(unittest.TestCase):
"""
Testcases to test the _xldate_to_datetime() function against dates
extracted from Excel files, with 1900/1904 epochs.

"""

def test_dates_and_times_1900_epoch(self):
"""
Test the _xldate_to_datetime() function for dates and times in
the Excel standard 1900 epoch.

"""
# Test Excel dates strings and corresponding serial date numbers taken
# from an Excel file.
excel_dates = [
# Excel's 0.0 date in the 1900 epoch is 1 day before 1900.
('1899-12-31T00:00:00.000', 0),

# Date/time before the false Excel 1900 leapday.
('1900-02-28T02:11:11.986', 59.09111094906),

# Date/time after the false Excel 1900 leapday.
('1900-03-01T05:46:44.068', 61.24078782403),

# Random date/times in Excel's 0-9999.9999+ range.
('1982-08-25T00:15:20.213', 30188.010650613425),
('2065-04-19T00:16:48.290', 60376.011670023145),
('3222-06-11T03:08:08.251', 483014.13065105322),
('4379-08-03T06:14:48.580', 905652.26028449077),
('5949-12-30T12:59:54.263', 1479232.5416002662),

# End of Excel's date range.
('9999-12-31T23:59:59.000', 2958465.999988426),
]

# Convert the Excel date strings to datetime objects and compare
# against the dateitme return value of xldate.xldate_as_datetime().
for excel_date in excel_dates:
exp = datetime.strptime(excel_date[0], "%Y-%m-%dT%H:%M:%S.%f")
got = xldate.xldate_as_datetime(excel_date[1], not_1904)

self.assertEqual(got, exp)

def test_dates_only_1900_epoch(self):
"""
Test the _xldate_to_datetime() function for dates in the Excel
standard 1900 epoch.

"""
# Test Excel dates strings and corresponding serial date numbers taken
# from an Excel file.
excel_dates = [
# Excel's day 0 in the 1900 epoch is 1 day before 1900.
('1899-12-31', 0),

# Excel's day 1 in the 1900 epoch.
('1900-01-01', 1),

# Date/time before the false Excel 1900 leapday.
('1900-02-28', 59),

# Date/time after the false Excel 1900 leapday.
('1900-03-01', 61),

# Random date/times in Excel's 0-9999.9999+ range.
('1902-09-27', 1001),
('1999-12-31', 36525),
('2000-01-01', 36526),
('4000-12-31', 767376),
('4321-01-01', 884254),
('9999-01-01', 2958101),

# End of Excel's date range.
('9999-12-31', 2958465),
]

# Convert the Excel date strings to datetime objects and compare
# against the dateitme return value of xldate.xldate_as_datetime().
for excel_date in excel_dates:
exp = datetime.strptime(excel_date[0], "%Y-%m-%d")
got = xldate.xldate_as_datetime(excel_date[1], not_1904)

self.assertEqual(got, exp)

def test_dates_only_1904_epoch(self):
"""
Test the _xldate_to_datetime() function for dates in the Excel
Mac/1904 epoch.

"""
# Test Excel dates strings and corresponding serial date numbers taken
# from an Excel file.
excel_dates = [
# Excel's day 0 in the 1904 epoch.
('1904-01-01', 0),

# Random date/times in Excel's 0-9999.9999+ range.
('1904-01-31', 30),
('1904-08-31', 243),
('1999-02-28', 34757),
('1999-12-31', 35063),
('2000-01-01', 35064),
('2400-12-31', 181526),
('4000-01-01', 765549),
('9999-01-01', 2956639),

# End of Excel's date range.
('9999-12-31', 2957003),
]

# Convert the Excel date strings to datetime objects and compare
# against the dateitme return value of xldate.xldate_as_datetime().
for excel_date in excel_dates:
exp = datetime.strptime(excel_date[0], "%Y-%m-%d")
got = xldate.xldate_as_datetime(excel_date[1], is_1904)

self.assertEqual(got, exp)

def test_times_only(self):
"""
Test the _xldate_to_datetime() function for times only, i.e, the
fractional part of the Excel date when the serial date is 0.

"""
# Test Excel dates strings and corresponding serial date numbers taken
# from an Excel file. The 1899-12-31 date is Excel's day 0.
excel_dates = [
# Random times in Excel's 0-0.9999+ range for 1 day.
('1899-12-31T00:00:00.000', 0),
('1899-12-31T00:15:20.213', 1.0650613425925924E-2),
('1899-12-31T02:24:37.095', 0.10042934027777778),
('1899-12-31T04:56:35.792', 0.2059698148148148),
('1899-12-31T07:31:20.407', 0.31343063657407405),
('1899-12-31T09:37:23.945', 0.40097158564814817),
('1899-12-31T12:09:48.602', 0.50681252314814818),
('1899-12-31T14:37:57.451', 0.60969271990740748),
('1899-12-31T17:04:02.415', 0.71113906250000003),
('1899-12-31T19:14:24.673', 0.80167445601851861),
('1899-12-31T21:39:05.944', 0.90215212962962965),
('1899-12-31T23:17:12.632', 0.97028509259259266),
('1899-12-31T23:59:59.999', 0.99999998842592586),
]

# Convert the Excel date strings to datetime objects and compare
# against the dateitme return value of xldate.xldate_as_datetime().
for excel_date in excel_dates:
exp = datetime.strptime(excel_date[0], "%Y-%m-%dT%H:%M:%S.%f")
got = xldate.xldate_as_datetime(excel_date[1], not_1904)

self.assertEqual(got, exp)
40 changes: 40 additions & 0 deletions xlrd/xldate.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,16 @@
# More importantly:
# Noon on Gregorian 1900-03-01 (day 61 in the 1900-based system) is JDN 2415080.0
# Noon on Gregorian 1904-01-02 (day 1 in the 1904-based system) is JDN 2416482.0
import datetime

_JDN_delta = (2415080 - 61, 2416482 - 1)
assert _JDN_delta[1] - _JDN_delta[0] == 1462

# Pre-calculate the datetime epochs for efficiency.
epoch_1904 = datetime.datetime(1904, 1, 1)
epoch_1900 = datetime.datetime(1899, 12, 31)
epoch_1900_minus_1 = datetime.datetime(1899, 12, 30)

class XLDateError(ValueError): pass

class XLDateNegative(XLDateError): pass
Expand Down Expand Up @@ -90,6 +96,40 @@ def xldate_as_tuple(xldate, datemode):
else:
return ((yreg // 1461) - 4716, mp + 3, d, hour, minute, second)


##
# Convert an Excel date/time number into a datetime.datetime object.
#
# @param xldate The Excel number
# @param datemode 0: 1900-based, 1: 1904-based.
#
# @return a datetime.datetime() object.
#
def xldate_as_datetime(xldate, datemode):
"""Convert an Excel date/time number into a datetime.datetime object."""

# Set the epoch based on the 1900/1904 datemode.
if datemode:
epoch = epoch_1904
else:
if xldate < 60:
epoch = epoch_1900
else:
# Workaround Excel 1900 leap year bug by adjusting the epoch.
epoch = epoch_1900_minus_1

# The integer part of the Excel date stores the number of days since
# the epoch and the fractional part stores the percentage of the day.
days = int(xldate)
fraction = xldate - days

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if a divmod-ish approach would be faster here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Like:

days, fraction =  divmod(xldate, 1)

I can profile it.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, and profiling would be great. Let's go for whichever one is faster, with a comment explaining that profiling had been done and what the results were :-)

# Get the the integer and decimal seconds in Excel's millisecond resolution.
seconds = int(round(fraction * 86400000.0))
seconds, milliseconds = divmod(seconds, 1000)

return epoch + datetime.timedelta(days, seconds, 0, milliseconds)


# === conversions from date/time to xl numbers

def _leap(y):
Expand Down