diff --git a/sample-data/huge.qrs b/sample-data/huge.qrs new file mode 100644 index 00000000..f48e8e03 Binary files /dev/null and b/sample-data/huge.qrs differ diff --git a/tests/test_annotation.py b/tests/test_annotation.py index 32f39082..c7d0f4d3 100644 --- a/tests/test_annotation.py +++ b/tests/test_annotation.py @@ -1,10 +1,13 @@ +import os import re +import unittest import numpy as np import wfdb -class test_annotation(): + +class TestAnnotation(unittest.TestCase): """ Testing read and write of WFDB annotations, including Physionet streaming. @@ -183,3 +186,34 @@ def test_3(self): assert (comp == [True] * 6) assert annotation.__eq__(pn_annotation) assert annotation.__eq__(write_annotation) + + def test_4(self): + """ + Read and write annotations with large time skips + + Annotation file created by: + echo "xxxxxxxxx 10000000000 N 0 0 0" | wrann -r huge -a qrs + """ + annotation = wfdb.rdann('sample-data/huge', 'qrs') + self.assertEqual(annotation.sample[0], 10000000000) + annotation.wrann() + + annotation1 = wfdb.rdann('sample-data/huge', 'qrs') + annotation2 = wfdb.rdann('huge', 'qrs') + self.assertEqual(annotation1, annotation2) + + @classmethod + def tearDownClass(cls): + writefiles = [ + '100.atr', + '1003.atr', + '12726.anI', + 'huge.qrs', + ] + for file in writefiles: + if os.path.isfile(file): + os.remove(file) + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_record.py b/tests/test_record.py index 89fbc941..b71ffba1 100644 --- a/tests/test_record.py +++ b/tests/test_record.py @@ -521,9 +521,9 @@ def test_header_with_non_utf8(self): @classmethod def tearDownClass(cls): "Clean up written files" - writefiles = ['03700181.dat','03700181.hea','100.atr','100.dat', - '100.hea','1003.atr','100_3chan.dat','100_3chan.hea', - '12726.anI','a103l.hea','a103l.mat','s0010_re.dat', + writefiles = ['03700181.dat','03700181.hea','100.dat', + '100.hea','100_3chan.dat','100_3chan.hea', + 'a103l.hea','a103l.mat','s0010_re.dat', 's0010_re.hea','s0010_re.xyz','test01_00s.dat', 'test01_00s.hea','test01_00s_skewframe.hea', 'n8_evoked_raw_95_F1_R9.dat', 'n8_evoked_raw_95_F1_R9.hea'] diff --git a/wfdb/io/annotation.py b/wfdb/io/annotation.py index 7ae619bb..b3d30e1d 100644 --- a/wfdb/io/annotation.py +++ b/wfdb/io/annotation.py @@ -466,8 +466,6 @@ def check_field(self, field): raise ValueError("The 'sample' field must only contain non-negative integers") if min(sampdiffs) < 0 : raise ValueError("The 'sample' field must contain monotonically increasing sample numbers") - if max(sampdiffs) > 2147483648: - raise ValueError('WFDB annotation files cannot store sample differences greater than 2**31') elif field == 'label_store': if min(item) < 1 or max(item) > 49: @@ -1370,19 +1368,30 @@ def field2bytes(field, value): # sample difference sd = value[0] - # Add SKIP element if value is too large for single byte - if sd>1023: - # 8 bytes in total: - # - [0, 59>>2] indicates SKIP - # - Next 4 gives sample difference - # - Final 2 give 0 and sym - data_bytes = [0, 236, (sd&16711680)>>16, (sd&4278190080)>>24, sd&255, (sd&65280)>>8, 0, 4*typecode] - # Just need samp and sym - else: - # - First byte stores low 8 bits of samp - # - Second byte stores high 2 bits of samp - # and sym - data_bytes = [sd & 255, ((sd & 768) >> 8) + 4*typecode] + data_bytes = [] + + # Add SKIP element(s) if the sample difference is too large to + # be stored in the annotation type word. + # + # Each SKIP element consists of three words (6 bytes): + # - Bytes 0-1 contain the SKIP indicator (59 << 10) + # - Bytes 2-3 contain the high 16 bits of the sample difference + # - Bytes 4-5 contain the low 16 bits of the sample difference + # If the total difference exceeds 2**31 - 1, multiple skips must + # be used. + while sd > 1023: + n = min(sd, 0x7fffffff) + data_bytes += [0, 59 << 2, + (n >> 16) & 255, + (n >> 24) & 255, + (n >> 0) & 255, + (n >> 8) & 255] + sd -= n + + # Annotation type itself is stored as a single word: + # - bits 0 to 9 store the sample difference (0 to 1023) + # - bits 10 to 15 store the type code + data_bytes += [sd & 255, ((sd & 768) >> 8) + 4 * typecode] elif field == 'num': # First byte stores num @@ -1653,8 +1662,11 @@ def rdann(record_name, extension, sampfrom=0, sampto=None, shift_samps=False, subtype, chan, num, aux_note) # Convert lists to numpy arrays dtype='int' - (sample, label_store, subtype, - chan, num) = lists_to_int_arrays(sample, label_store, subtype, chan, num) + (label_store, subtype, + chan, num) = lists_to_int_arrays(label_store, subtype, chan, num) + + # Convert sample numbers to a numpy array of 'int64' + sample = np.array(sample, dtype='int64') # Try to get fs from the header file if it is not contained in the # annotation file @@ -1748,8 +1760,8 @@ def load_byte_pairs(record_name, extension, pn_dir): Returns ------- - filebytes : str - The input filestream converted to bytes. + filebytes : ndarray + The input filestream converted to an Nx2 array of unsigned bytes. """ # local file @@ -1769,8 +1781,8 @@ def proc_ann_bytes(filebytes, sampto): Parameters ---------- - filebytes : str - The input filestream converted to bytes. + filebytes : ndarray + The input filestream converted to an Nx2 array of unsigned bytes. sampto : int The maximum sample number for annotations to be returned. @@ -1852,8 +1864,8 @@ def proc_core_fields(filebytes, bpi): Parameters ---------- - filebytes : str - The input filestream converted to bytes. + filebytes : ndarray + The input filestream converted to an Nx2 array of unsigned bytes. bpi : int The index to start the conversion. @@ -1869,31 +1881,28 @@ def proc_core_fields(filebytes, bpi): The index to start the conversion. """ - label_store = filebytes[bpi, 1] >> 2 + sample_diff = 0 # The current byte pair will contain either the actual d_sample + annotation store value, # or 0 + SKIP. - - # Not a skip - it is the actual sample number + annotation type store value - if label_store != 59: - sample_diff = filebytes[bpi, 0] + 256 * (filebytes[bpi, 1] & 3) - bpi = bpi + 1 - # Skip. Note: Could there be another skip after the first? - else: + while filebytes[bpi, 1] >> 2 == 59: # 4 bytes storing dt - sample_diff = 65536 * filebytes[bpi + 1,0] + 16777216 * filebytes[bpi + 1,1] \ - + filebytes[bpi + 2,0] + 256 * filebytes[bpi + 2,1] + skip_diff = ((int(filebytes[bpi + 1, 0]) << 16) + + (int(filebytes[bpi + 1, 1]) << 24) + + (int(filebytes[bpi + 2, 0]) << 0) + + (int(filebytes[bpi + 2, 1]) << 8)) # Data type is long integer (stored in two's complement). Range -2**31 to 2**31 - 1 - if sample_diff > 2147483647: - sample_diff = sample_diff - 4294967296 + if skip_diff > 2147483647: + skip_diff = skip_diff - 4294967296 - # After the 4 bytes, the next pair's samp is also added - sample_diff = sample_diff + filebytes[bpi + 3, 0] + 256 * (filebytes[bpi + 3, 1] & 3) + sample_diff += skip_diff + bpi = bpi + 3 - # The label is stored after the 4 bytes. Samples here should be 0. - label_store = filebytes[bpi + 3, 1] >> 2 - bpi = bpi + 4 + # Not a skip - it is the actual sample number + annotation type store value + label_store = filebytes[bpi, 1] >> 2 + sample_diff += int(filebytes[bpi, 0] + 256 * (filebytes[bpi, 1] & 3)) + bpi = bpi + 1 return sample_diff, label_store, bpi