Merge pull request #1 from effigies/mnt/reshape-gifti-ascii-data

pauldmccarthy · web-flow · commit 5bcf012a4452 · 2024-02-26T08:50:11.000Z
RF: Consistently apply data type, shape and index order
diff --git a/nibabel/gifti/gifti.py b/nibabel/gifti/gifti.py
@@ -745,7 +745,7 @@ def agg_data(self, intent_code=None):
         >>> triangles_2 = surf_img.agg_data('triangle')
         >>> triangles_3 = surf_img.agg_data(1009)  # Numeric code for pointset
         >>> print(np.array2string(triangles))
-        [0 1 2]
+        [[0 1 2]]
         >>> np.array_equal(triangles, triangles_2)
         True
         >>> np.array_equal(triangles, triangles_3)
diff --git a/nibabel/gifti/parse_gifti_fast.py b/nibabel/gifti/parse_gifti_fast.py
@@ -68,21 +68,21 @@ def read_data_block(darray, fname, data, mmap):
     if mmap is True:
         mmap = 'c'
     enclabel = gifti_encoding_codes.label[darray.encoding]
-    dtype = data_type_codes.type[darray.datatype]
 
+    if enclabel not in ('ASCII', 'B64BIN', 'B64GZ', 'External'):
+        raise GiftiParseError(f'Unknown encoding {darray.encoding}')
+
+    # Encode the endianness in the dtype
+    byteorder = gifti_endian_codes.byteorder[darray.endian]
+    dtype = data_type_codes.dtype[darray.datatype].newbyteorder(byteorder)
+
+    shape = tuple(darray.dims)
+    order = array_index_order_codes.npcode[darray.ind_ord]
+
+    # GIFTI_ENCODING_ASCII
     if enclabel == 'ASCII':
-        # GIFTI_ENCODING_ASCII
-        c = StringIO(data)
-        da = np.loadtxt(c, dtype=dtype)
-        # Reshape to dims specified in GiftiDataArray attributes, but preserve
-        # existing behaviour of loading as 1D for arrays with a dimension of
-        # length 1
-        da = da.reshape(darray.dims).squeeze()
-        return da  # independent of the endianness
-    elif enclabel not in ('B64BIN', 'B64GZ', 'External'):
-        return 0
-
-    # GIFTI_ENCODING_EXTBIN
+        return np.loadtxt(StringIO(data), dtype=dtype, ndmin=1).reshape(shape, order=order)
+
     # We assume that the external data file is raw uncompressed binary, with
     # the data type/endianness/ordering specified by the other DataArray
     # attributes
@@ -98,53 +98,41 @@ def read_data_block(darray, fname, data, mmap):
         newarr = None
         if mmap:
             try:
-                newarr = np.memmap(
+                return np.memmap(
                     ext_fname,
                     dtype=dtype,
                     mode=mmap,
                     offset=darray.ext_offset,
-                    shape=tuple(darray.dims),
+                    shape=shape,
+                    order=order,
                 )
             # If the memmap fails, we ignore the error and load the data into
             # memory below
             except (AttributeError, TypeError, ValueError):
                 pass
         # mmap=False or np.memmap failed
         if newarr is None:
-            # We can replace this with a call to np.fromfile in numpy>=1.17,
-            # as an "offset" parameter was added in that version.
-            with open(ext_fname, 'rb') as f:
-                f.seek(darray.ext_offset)
-                nbytes = np.prod(darray.dims) * dtype().itemsize
-                buff = f.read(nbytes)
-                newarr = np.frombuffer(buff, dtype=dtype)
+            return np.fromfile(
+                ext_fname,
+                dtype=dtype,
+                count=np.prod(darray.dims),
+                offset=darray.ext_offset,
+            ).reshape(shape, order=order)
 
     # Numpy arrays created from bytes objects are read-only.
     # Neither b64decode nor decompress will return bytearrays, and there
     # are not equivalents to fobj.readinto to allow us to pass them, so
     # there is not a simple way to avoid making copies.
     # If this becomes a problem, we should write a decoding interface with
     # a tunable chunk size.
+    dec = base64.b64decode(data.encode('ascii'))
+    if enclabel == 'B64BIN':
+        buff = bytearray(dec)
     else:
-        dec = base64.b64decode(data.encode('ascii'))
-        if enclabel == 'B64BIN':
-            # GIFTI_ENCODING_B64BIN
-            buff = bytearray(dec)
-        else:
-            # GIFTI_ENCODING_B64GZ
-            buff = bytearray(zlib.decompress(dec))
-        del dec
-        newarr = np.frombuffer(buff, dtype=dtype)
-
-    sh = tuple(darray.dims)
-    if len(newarr.shape) != len(sh):
-        newarr = newarr.reshape(sh, order=array_index_order_codes.npcode[darray.ind_ord])
-
-    # check if we need to byteswap
-    required_byteorder = gifti_endian_codes.byteorder[darray.endian]
-    if required_byteorder in ('big', 'little') and required_byteorder != sys.byteorder:
-        newarr = newarr.byteswap()
-    return newarr
+        # GIFTI_ENCODING_B64GZ
+        buff = bytearray(zlib.decompress(dec))
+    del dec
+    return np.frombuffer(buff, dtype=dtype).reshape(shape, order=order)
 
 
 def _str2int(in_str):
diff --git a/nibabel/gifti/tests/test_parse_gifti_fast.py b/nibabel/gifti/tests/test_parse_gifti_fast.py
@@ -41,7 +41,16 @@
 DATA_FILE7 = pjoin(IO_DATA_PATH, 'external.gii')
 DATA_FILE8 = pjoin(IO_DATA_PATH, 'ascii_flat_data.gii')
 
-datafiles = [DATA_FILE1, DATA_FILE2, DATA_FILE3, DATA_FILE4, DATA_FILE5, DATA_FILE6, DATA_FILE7, DATA_FILE8]
+datafiles = [
+    DATA_FILE1,
+    DATA_FILE2,
+    DATA_FILE3,
+    DATA_FILE4,
+    DATA_FILE5,
+    DATA_FILE6,
+    DATA_FILE7,
+    DATA_FILE8,
+]
 numDA = [2, 1, 1, 1, 2, 1, 2, 2]
 
 DATA_FILE1_darr1 = np.array(
@@ -51,7 +60,7 @@
         [-17.614349, -65.401642, 21.071466],
     ]
 )
-DATA_FILE1_darr2 = np.array([0, 1, 2])
+DATA_FILE1_darr2 = np.array([[0, 1, 2]])
 
 DATA_FILE2_darr1 = np.array(
     [