Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 18 additions & 1 deletion datajoint/blob.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,8 @@ def read_blob(self, n_bytes=None):
"F": self.read_recarray, # numpy array with fields, including recarrays
"d": self.read_decimal, # a decimal
"t": self.read_datetime, # date, time, or datetime
"T": self.read_int64_datetime, # np.datetime64
"Z": self.read_np_array_dt64,
"u": self.read_uuid, # UUID
}[data_structure_code]
except KeyError:
Expand Down Expand Up @@ -182,7 +184,9 @@ def pack_blob(self, obj):
return self.pack_array(np.array(obj))
if isinstance(obj, (float, int, complex)):
return self.pack_array(np.array(obj))
if isinstance(obj, (datetime.datetime, datetime.date, datetime.time)):
if isinstance(
obj, (datetime.datetime, datetime.date, datetime.time, np.datetime64)
):
return self.pack_datetime(obj)
if isinstance(obj, Decimal):
return self.pack_decimal(obj)
Expand Down Expand Up @@ -240,6 +244,9 @@ def pack_array(self, array):
"""
Serialize an np.ndarray into bytes. Scalars are encoded with ndim=0.
"""
if "datetime64" in array.dtype.name:
self.set_dj0()
return b"Z" + array.astype("datetime64[us]").tobytes()
blob = (
b"A"
+ np.uint64(array.ndim).tobytes()
Expand Down Expand Up @@ -426,6 +433,10 @@ def pack_dict(self, d):
)
)

def read_np_array_dt64(self):
data = self.read_value(dtype="<M8[us]", count=-1)
return data

def read_struct(self):
"""deserialize matlab stuct"""
n_dims = self.read_value()
Expand Down Expand Up @@ -504,12 +515,18 @@ def read_datetime(self):
)
return time and date and datetime.datetime.combine(date, time) or time or date

def read_int64_datetime(self):
data = self.read_value()
return data.astype("datetime64[us]")

@staticmethod
def pack_datetime(d):
if isinstance(d, datetime.datetime):
date, time = d.date(), d.time()
elif isinstance(d, datetime.date):
date, time = d, None
elif isinstance(d, np.datetime64):
return b"T" + (d.astype("datetime64[us]")).tobytes()
else:
date, time = None, d
return b"t" + (
Expand Down
17 changes: 17 additions & 0 deletions tests/test_blob.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import datajoint as dj
import timeit
import numpy as np
import uuid
from . import schema
Expand Down Expand Up @@ -149,6 +150,9 @@ def test_pack():
x == unpack(pack(x)), "Numpy string array object did not pack/unpack correctly"
)

x = np.datetime64("1998").astype("datetime64[us]")
assert_true(x == unpack(pack(x)))


def test_recarrays():
x = np.array([(1.0, 2), (3.0, 4)], dtype=[("x", float), ("y", int)])
Expand Down Expand Up @@ -222,3 +226,16 @@ def test_insert_longblob():
}
(schema.Longblob & "id=1").delete()
dj.blob.use_32bit_dims = False


def test_datetime_serialization_speed():
# If this fails that means for some reason deserializing/serializing
# np.datetime64 types is now slower than regular datetime

numpy_exe_time = timeit.timeit(
"unpack(pack(np.datetime64('now')))", number=100, globals=globals()
)
python_exe_time = timeit.timeit(
"unpack(pack(datetime.now()))", number=100, globals=globals()
)
assert numpy_exe_time < python_exe_time