From a14af14997ca56f6db87bfae72d9c8939a7e4872 Mon Sep 17 00:00:00 2001 From: Benjamin Moody Date: Tue, 17 Aug 2021 11:05:48 -0400 Subject: [PATCH 1/4] rdrecord: correctly infer length for multi-frequency records. If the header file does not specify the record length, it must be inferred from the size of the signal file (dividing the data length by the number of samples per frame.) Previously, this calculation assumed that tsamps_per_frame == n_sig, which is not necessarily the case. Correctly calculate the total spf for the first signal file and use that as the divisor, rather than the number of signals. Rename the "n_sig" parameter of _infer_sig_len to tsamps_per_frame accordingly. --- wfdb/io/_signal.py | 12 ++++++------ wfdb/io/record.py | 11 ++++++++++- 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/wfdb/io/_signal.py b/wfdb/io/_signal.py index 3c895613..1a1bbecf 100644 --- a/wfdb/io/_signal.py +++ b/wfdb/io/_signal.py @@ -2059,7 +2059,7 @@ def describe_list_indices(full_list): return unique_elements, element_indices -def _infer_sig_len(file_name, fmt, n_sig, dir_name, pn_dir=None): +def _infer_sig_len(file_name, fmt, tsamps_per_frame, dir_name, pn_dir=None): """ Infer the length of a signal from a dat file. @@ -2069,8 +2069,8 @@ def _infer_sig_len(file_name, fmt, n_sig, dir_name, pn_dir=None): Name of the dat file. fmt : str WFDB fmt of the dat file. - n_sig : int - Number of signals contained in the dat file. + tsamps_per_frame : int + Total number of samples per frame contained in the dat file. dir_name : str The full directory where the dat file(s) are located, if the dat file(s) are local. @@ -2081,11 +2081,11 @@ def _infer_sig_len(file_name, fmt, n_sig, dir_name, pn_dir=None): Returns ------- sig_len : int - The length of the signal. + The length of the signal file in frames. Notes ----- - sig_len * n_sig * bytes_per_sample == file_size + sig_len * tsamps_per_frame * bytes_per_sample == file_size """ if pn_dir is None: @@ -2094,7 +2094,7 @@ def _infer_sig_len(file_name, fmt, n_sig, dir_name, pn_dir=None): file_size = download._remote_file_size(file_name=file_name, pn_dir=pn_dir) - sig_len = int(file_size / (BYTES_PER_SAMPLE[fmt] * n_sig)) + sig_len = int(file_size / (BYTES_PER_SAMPLE[fmt] * tsamps_per_frame)) return sig_len diff --git a/wfdb/io/record.py b/wfdb/io/record.py index f140e2ef..d1370493 100644 --- a/wfdb/io/record.py +++ b/wfdb/io/record.py @@ -3441,9 +3441,18 @@ def rdrecord(record_name, sampfrom=0, sampto=None, channels=None, if record.n_sig == 0: record.sig_len = 0 else: + # Calculate total number of samples per frame in the + # first dat file. + tsamps_per_frame = 0 + for fname, spf in zip(record.file_name, + record.samps_per_frame): + if fname == record.file_name[0]: + tsamps_per_frame += spf + + # Calculate length from size of the dat file. record.sig_len = _signal._infer_sig_len( file_name=record.file_name[0], fmt=record.fmt[0], - n_sig=record.file_name.count(record.file_name[0]), + tsamps_per_frame=tsamps_per_frame, dir_name=dir_name, pn_dir=pn_dir) sampto = record.sig_len From 1e07f0d599718eff637a9edf60f69c2335062b7e Mon Sep 17 00:00:00 2001 From: Benjamin Moody Date: Tue, 17 Aug 2021 11:10:54 -0400 Subject: [PATCH 2/4] test_infer_sig_len: test inferring length of multi-frequency record. If the header file does not specify the signal length, rdrecord should calculate it from the size of the signal file. Test that this works correctly for multi-frequency records. --- sample-data/drive02-no-len.hea | 6 ++++++ tests/test_record.py | 6 +++--- 2 files changed, 9 insertions(+), 3 deletions(-) create mode 100644 sample-data/drive02-no-len.hea diff --git a/sample-data/drive02-no-len.hea b/sample-data/drive02-no-len.hea new file mode 100644 index 00000000..cae31a6a --- /dev/null +++ b/sample-data/drive02-no-len.hea @@ -0,0 +1,6 @@ +drive02-no-len 5 15.5 +drive02.dat 16x32 1000 16 0 -1236 14736 0 ECG +drive02.dat 16x2 1000 16 0 1802 13501 0 foot GSR +drive02.dat 16 1.0001/bpm 16 0 75 -19070 0 HR +drive02.dat 16 100 16 0 0 -9226 0 marker +drive02.dat 16x2 500 16 0 5804 -14191 0 RESP diff --git a/tests/test_record.py b/tests/test_record.py index 1c9f7413..bdcfbdd3 100644 --- a/tests/test_record.py +++ b/tests/test_record.py @@ -701,9 +701,9 @@ def test_infer_sig_len(self): Read two headers. The records should be the same. """ - record = wfdb.rdrecord('sample-data/100') - record_2 = wfdb.rdrecord('sample-data/100-no-len') - record_2.record_name = '100' + record = wfdb.rdrecord('sample-data/drive02') + record_2 = wfdb.rdrecord('sample-data/drive02-no-len') + record_2.record_name = record.record_name assert record_2.__eq__(record) From a95227fa2da2faf63ad6dc07e31804b9f797c464 Mon Sep 17 00:00:00 2001 From: Benjamin Moody Date: Tue, 17 Aug 2021 11:12:19 -0400 Subject: [PATCH 3/4] rdrecord: correctly infer length for signal files with a prolog. If the header file does not specify the record length, it must be inferred from the size of the signal file (dividing the data length by the number of samples per frame.) Previously, this calculation assumed that the file did not contain a prolog. Correctly calculate the data length by subtracting the prolog length (byte offset) from the size of the file. --- wfdb/io/_signal.py | 10 ++++++++-- wfdb/io/record.py | 1 + 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/wfdb/io/_signal.py b/wfdb/io/_signal.py index 1a1bbecf..fa3e3235 100644 --- a/wfdb/io/_signal.py +++ b/wfdb/io/_signal.py @@ -2059,7 +2059,8 @@ def describe_list_indices(full_list): return unique_elements, element_indices -def _infer_sig_len(file_name, fmt, tsamps_per_frame, dir_name, pn_dir=None): +def _infer_sig_len(file_name, fmt, tsamps_per_frame, byte_offset, + dir_name, pn_dir=None): """ Infer the length of a signal from a dat file. @@ -2071,6 +2072,8 @@ def _infer_sig_len(file_name, fmt, tsamps_per_frame, dir_name, pn_dir=None): WFDB fmt of the dat file. tsamps_per_frame : int Total number of samples per frame contained in the dat file. + byte_offset : int or None + The byte offset of the dat file. None is equivalent to zero. dir_name : str The full directory where the dat file(s) are located, if the dat file(s) are local. @@ -2094,7 +2097,10 @@ def _infer_sig_len(file_name, fmt, tsamps_per_frame, dir_name, pn_dir=None): file_size = download._remote_file_size(file_name=file_name, pn_dir=pn_dir) - sig_len = int(file_size / (BYTES_PER_SAMPLE[fmt] * tsamps_per_frame)) + if byte_offset is None: + byte_offset = 0 + data_size = file_size - byte_offset + sig_len = int(data_size / (BYTES_PER_SAMPLE[fmt] * tsamps_per_frame)) return sig_len diff --git a/wfdb/io/record.py b/wfdb/io/record.py index d1370493..93c74d9a 100644 --- a/wfdb/io/record.py +++ b/wfdb/io/record.py @@ -3453,6 +3453,7 @@ def rdrecord(record_name, sampfrom=0, sampto=None, channels=None, record.sig_len = _signal._infer_sig_len( file_name=record.file_name[0], fmt=record.fmt[0], tsamps_per_frame=tsamps_per_frame, + byte_offset=record.byte_offset[0], dir_name=dir_name, pn_dir=pn_dir) sampto = record.sig_len From e36dcbf384628d40459224964067409fc397eadb Mon Sep 17 00:00:00 2001 From: Benjamin Moody Date: Tue, 17 Aug 2021 11:16:21 -0400 Subject: [PATCH 4/4] test_infer_sig_len: test inferring length of signal file with prolog. If the header file does not specify the signal length, rdrecord should calculate it from the size of the signal file. Test that this works correctly for a signal file with a prolog. --- sample-data/a103l-no-len.hea | 6 ++++++ tests/test_record.py | 6 ++++++ 2 files changed, 12 insertions(+) create mode 100644 sample-data/a103l-no-len.hea diff --git a/sample-data/a103l-no-len.hea b/sample-data/a103l-no-len.hea new file mode 100644 index 00000000..d768afae --- /dev/null +++ b/sample-data/a103l-no-len.hea @@ -0,0 +1,6 @@ +a103l-no-len 3 250 +a103l.mat 16+24 7247/mV 16 0 -171 -27403 0 II +a103l.mat 16+24 1.052e+04/mV 16 0 9127 -301 0 V +a103l.mat 16+24 1.253e+04/NU 16 0 6042 -17391 0 PLETH +#Asystole +#False alarm diff --git a/tests/test_record.py b/tests/test_record.py index bdcfbdd3..bb8c9d42 100644 --- a/tests/test_record.py +++ b/tests/test_record.py @@ -707,6 +707,12 @@ def test_infer_sig_len(self): assert record_2.__eq__(record) + record = wfdb.rdrecord('sample-data/a103l') + record_2 = wfdb.rdrecord('sample-data/a103l-no-len') + record_2.record_name = record.record_name + + assert record_2.__eq__(record) + class TestDownload(unittest.TestCase): # Test that we can download records with no "dat" file