Skip to content

Commit 3b9fbfc

Browse files
bottlerfacebook-github-bot
authored andcommitted
Read heterogenous nonlist PLY properties as arrays
Summary: In the original implementation, I had considered PLY properties where there are mixed types of elements in a property to be rare and basically unimportant, so the implementation is very naive. If we want to support pointcloud PLY files, we need to handle at least the subcase where there are no lists efficiently because this seems to be very common there. Reviewed By: nikhilaravi, gkioxari Differential Revision: D22573315 fbshipit-source-id: db6f29446d4e555a2e2b37d38c8e4450d061465b
1 parent 89532a8 commit 3b9fbfc

File tree

2 files changed

+267
-56
lines changed

2 files changed

+267
-56
lines changed

pytorch3d/io/ply_io.py

Lines changed: 213 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55

66
"""This module implements utility functions for loading and saving meshes."""
7+
import itertools
78
import struct
89
import sys
910
import warnings
@@ -232,13 +233,20 @@ def _read_ply_fixed_size_element_ascii(f, definition: _PlyElementType):
232233
Given an element which has no lists and one type, read the
233234
corresponding data.
234235
236+
For example
237+
238+
element vertex 8
239+
property float x
240+
property float y
241+
property float z
242+
235243
Args:
236244
f: file-like object being read.
237245
definition: The element object which describes what we are reading.
238246
239247
Returns:
240-
2D numpy array corresponding to the data. The rows are the different
241-
values. There is one column for each property.
248+
1-element list containing a 2D numpy array corresponding to the data.
249+
The rows are the different values. There is one column for each property.
242250
"""
243251
np_type = _PLY_TYPES[definition.properties[0].data_type].np_type
244252
old_offset = f.tell()
@@ -251,11 +259,62 @@ def _read_ply_fixed_size_element_ascii(f, definition: _PlyElementType):
251259
)
252260
if not len(data): # np.loadtxt() seeks even on empty data
253261
f.seek(old_offset)
254-
if definition.count and data.shape[1] != len(definition.properties):
262+
if data.shape[1] != len(definition.properties):
255263
raise ValueError("Inconsistent data for %s." % definition.name)
256264
if data.shape[0] != definition.count:
257265
raise ValueError("Not enough data for %s." % definition.name)
258-
return data
266+
return [data]
267+
268+
269+
def _read_ply_nolist_element_ascii(f, definition: _PlyElementType):
270+
"""
271+
Given an element which has no lists and multiple types, read the
272+
corresponding data, by loading all the data as float64 and converting
273+
the relevant parts later.
274+
275+
For example, given
276+
277+
element vertex 8
278+
property float x
279+
property float y
280+
property float z
281+
property uchar red
282+
property uchar green
283+
property uchar blue
284+
285+
the output will have two arrays, the first containing (x,y,z)
286+
and the second (red,green,blue).
287+
288+
Args:
289+
f: file-like object being read.
290+
definition: The element object which describes what we are reading.
291+
292+
Returns:
293+
List of 2D numpy arrays corresponding to the data.
294+
"""
295+
old_offset = f.tell()
296+
with warnings.catch_warnings():
297+
warnings.filterwarnings(
298+
"ignore", message=".* Empty input file.*", category=UserWarning
299+
)
300+
data = np.loadtxt(
301+
f, dtype=np.float64, comments=None, ndmin=2, max_rows=definition.count
302+
)
303+
if not len(data): # np.loadtxt() seeks even on empty data
304+
f.seek(old_offset)
305+
if data.shape[1] != len(definition.properties):
306+
raise ValueError("Inconsistent data for %s." % definition.name)
307+
if data.shape[0] != definition.count:
308+
raise ValueError("Not enough data for %s." % definition.name)
309+
pieces = []
310+
offset = 0
311+
for dtype, it in itertools.groupby(p.data_type for p in definition.properties):
312+
count = sum(1 for _ in it)
313+
end_offset = offset + count
314+
piece = data[:, offset:end_offset].astype(_PLY_TYPES[dtype].np_type)
315+
pieces.append(piece)
316+
offset = end_offset
317+
return pieces
259318

260319

261320
def _try_read_ply_constant_list_ascii(f, definition: _PlyElementType):
@@ -264,6 +323,28 @@ def _try_read_ply_constant_list_ascii(f, definition: _PlyElementType):
264323
corresponding data assuming every value has the same length.
265324
If the data is ragged, return None and leave f undisturbed.
266325
326+
For example, if the element is
327+
328+
element face 2
329+
property list uchar int vertex_index
330+
331+
and the data is
332+
333+
4 0 1 2 3
334+
4 7 6 5 4
335+
336+
then the function will return
337+
338+
[[0, 1, 2, 3],
339+
[7, 6, 5, 4]]
340+
341+
but if the data is
342+
343+
4 0 1 2 3
344+
3 6 5 4
345+
346+
then the function will return None.
347+
267348
Args:
268349
f: file-like object being read.
269350
definition: The element object which describes what we are reading.
@@ -349,8 +430,12 @@ def _read_ply_element_ascii(f, definition: _PlyElementType):
349430
each occurence of the element, and the inner lists have one value per
350431
property.
351432
"""
433+
if not definition.count:
434+
return []
352435
if definition.is_constant_type_fixed_size():
353436
return _read_ply_fixed_size_element_ascii(f, definition)
437+
if definition.is_fixed_size():
438+
return _read_ply_nolist_element_ascii(f, definition)
354439
if definition.try_constant_list():
355440
data = _try_read_ply_constant_list_ascii(f, definition)
356441
if data is not None:
@@ -372,75 +457,123 @@ def _read_ply_element_ascii(f, definition: _PlyElementType):
372457
return data
373458

374459

460+
def _read_raw_array(f, aim: str, length: int, dtype=np.uint8, dtype_size=1):
461+
"""
462+
Read [length] elements from a file.
463+
464+
Args:
465+
f: file object
466+
aim: name of target for error message
467+
length: number of elements
468+
dtype: numpy type
469+
dtype_size: number of bytes per element.
470+
471+
Returns:
472+
new numpy array
473+
"""
474+
475+
if isinstance(f, BytesIO):
476+
# np.fromfile is faster but won't work on a BytesIO
477+
needed_bytes = length * dtype_size
478+
bytes_data = bytearray(needed_bytes)
479+
n_bytes_read = f.readinto(bytes_data)
480+
if n_bytes_read != needed_bytes:
481+
raise ValueError("Not enough data for %s." % aim)
482+
data = np.frombuffer(bytes_data, dtype=dtype)
483+
else:
484+
data = np.fromfile(f, dtype=dtype, count=length)
485+
if data.shape[0] != length:
486+
raise ValueError("Not enough data for %s." % aim)
487+
return data
488+
489+
375490
def _read_ply_fixed_size_element_binary(
376491
f, definition: _PlyElementType, big_endian: bool
377492
):
378493
"""
379494
Given an element which has no lists and one type, read the
380495
corresponding data.
381496
497+
For example
498+
499+
element vertex 8
500+
property float x
501+
property float y
502+
property float z
503+
504+
382505
Args:
383506
f: file-like object being read.
384507
definition: The element object which describes what we are reading.
385508
big_endian: (bool) whether the document is encoded as big endian.
386509
387510
Returns:
388-
2D numpy array corresponding to the data. The rows are the different
389-
values. There is one column for each property.
511+
1-element list containing a 2D numpy array corresponding to the data.
512+
The rows are the different values. There is one column for each property.
390513
"""
391514
ply_type = _PLY_TYPES[definition.properties[0].data_type]
392515
np_type = ply_type.np_type
393516
type_size = ply_type.size
394517
needed_length = definition.count * len(definition.properties)
395-
if isinstance(f, BytesIO):
396-
# np.fromfile is faster but won't work on a BytesIO
397-
needed_bytes = needed_length * type_size
398-
bytes_data = bytearray(needed_bytes)
399-
n_bytes_read = f.readinto(bytes_data)
400-
if n_bytes_read != needed_bytes:
401-
raise ValueError("Not enough data for %s." % definition.name)
402-
data = np.frombuffer(bytes_data, dtype=np_type)
403-
else:
404-
data = np.fromfile(f, dtype=np_type, count=needed_length)
405-
if data.shape[0] != needed_length:
406-
raise ValueError("Not enough data for %s." % definition.name)
518+
data = _read_raw_array(f, definition.name, needed_length, np_type, type_size)
407519

408520
if (sys.byteorder == "big") != big_endian:
409521
data = data.byteswap()
410-
return data.reshape(definition.count, len(definition.properties))
522+
return [data.reshape(definition.count, len(definition.properties))]
411523

412524

413-
def _read_ply_element_struct(f, definition: _PlyElementType, endian_str: str):
525+
def _read_ply_element_binary_nolists(f, definition: _PlyElementType, big_endian: bool):
414526
"""
415-
Given an element which has no lists, read the corresponding data. Uses the
416-
struct library.
527+
Given an element which has no lists, read the corresponding data as tuple
528+
of numpy arrays, one for each set of adjacent columns with the same type.
529+
530+
For example, given
531+
532+
element vertex 8
533+
property float x
534+
property float y
535+
property float z
536+
property uchar red
537+
property uchar green
538+
property uchar blue
417539
418-
Note: It looks like struct would also support lists where
419-
type=size_type=char, but it is hard to know how much data to read in that
420-
case.
540+
the output will have two arrays, the first containing (x,y,z)
541+
and the second (red,green,blue).
421542
422543
Args:
423544
f: file-like object being read.
424545
definition: The element object which describes what we are reading.
425-
endian_str: ">" or "<" according to whether the document is big or
426-
little endian.
546+
big_endian: (bool) whether the document is encoded as big endian.
427547
428548
Returns:
429-
2D numpy array corresponding to the data. The rows are the different
430-
values. There is one column for each property.
549+
List of 2D numpy arrays corresponding to the data. The rows are the different
550+
values.
431551
"""
432-
format = "".join(
433-
_PLY_TYPES[property.data_type].struct_char for property in definition.properties
434-
)
435-
format = endian_str + format
436-
pattern = struct.Struct(format)
437-
size = pattern.size
552+
size = sum(_PLY_TYPES[prop.data_type].size for prop in definition.properties)
438553
needed_bytes = size * definition.count
439-
bytes_data = f.read(needed_bytes)
440-
if len(bytes_data) != needed_bytes:
441-
raise ValueError("Not enough data for %s." % definition.name)
442-
data = [pattern.unpack_from(bytes_data, i * size) for i in range(definition.count)]
443-
return data
554+
data = _read_raw_array(f, definition.name, needed_bytes).reshape(-1, size)
555+
offset = 0
556+
pieces = []
557+
for dtype, it in itertools.groupby(p.data_type for p in definition.properties):
558+
count = sum(1 for _ in it)
559+
bytes_each = count * _PLY_TYPES[dtype].size
560+
end_offset = offset + bytes_each
561+
562+
# what we want to do is
563+
# piece = data[:, offset:end_offset].view(_PLY_TYPES[dtype].np_type)
564+
# but it fails in the general case
565+
# because of https://github.com/numpy/numpy/issues/9496.
566+
piece = np.lib.stride_tricks.as_strided(
567+
data[:1, offset:end_offset].view(_PLY_TYPES[dtype].np_type),
568+
shape=(definition.count, count),
569+
strides=(data.strides[0], _PLY_TYPES[dtype].size),
570+
)
571+
572+
if (sys.byteorder == "big") != big_endian:
573+
piece = piece.byteswap()
574+
pieces.append(piece)
575+
offset = end_offset
576+
return pieces
444577

445578

446579
def _try_read_ply_constant_list_binary(
@@ -451,6 +584,28 @@ def _try_read_ply_constant_list_binary(
451584
corresponding data assuming every value has the same length.
452585
If the data is ragged, return None and leave f undisturbed.
453586
587+
For example, if the element is
588+
589+
element face 2
590+
property list uchar int vertex_index
591+
592+
and the data is
593+
594+
4 0 1 2 3
595+
4 7 6 5 4
596+
597+
then the function will return
598+
599+
[[0, 1, 2, 3],
600+
[7, 6, 5, 4]]
601+
602+
but if the data is
603+
604+
4 0 1 2 3
605+
3 6 5 4
606+
607+
then the function will return None.
608+
454609
Args:
455610
f: file-like object being read.
456611
definition: The element object which describes what we are reading.
@@ -460,8 +615,6 @@ def _try_read_ply_constant_list_binary(
460615
If every element has the same size, 2D numpy array corresponding to the
461616
data. The rows are the different values. Otherwise None.
462617
"""
463-
if definition.count == 0:
464-
return []
465618
property = definition.properties[0]
466619
endian_str = ">" if big_endian else "<"
467620
length_format = endian_str + _PLY_TYPES[property.list_size_type].struct_char
@@ -515,18 +668,20 @@ def _read_ply_element_binary(f, definition: _PlyElementType, big_endian: bool) -
515668
each occurence of the element, and the inner lists have one value per
516669
property.
517670
"""
518-
endian_str = ">" if big_endian else "<"
671+
if not definition.count:
672+
return []
519673

520674
if definition.is_constant_type_fixed_size():
521675
return _read_ply_fixed_size_element_binary(f, definition, big_endian)
522676
if definition.is_fixed_size():
523-
return _read_ply_element_struct(f, definition, endian_str)
677+
return _read_ply_element_binary_nolists(f, definition, big_endian)
524678
if definition.try_constant_list():
525679
data = _try_read_ply_constant_list_binary(f, definition, big_endian)
526680
if data is not None:
527681
return data
528682

529683
# We failed to read the element as a lump, must process each line manually.
684+
endian_str = ">" if big_endian else "<"
530685
property_structs = []
531686
for property in definition.properties:
532687
initial_type = property.list_size_type or property.data_type
@@ -606,6 +761,7 @@ def _load_ply_raw(f, path_manager: PathManager) -> Tuple[_PlyHeader, dict]:
606761
elements: A dictionary of element names to values. If an element is
607762
regular, in the sense of having no lists or being one
608763
uniformly-sized list, then the value will be a 2D numpy array.
764+
If it has no lists but more than one type, it will be a list of arrays.
609765
If not, it is a list of the relevant property values.
610766
"""
611767
with _open_file(f, path_manager, "rb") as f:
@@ -670,11 +826,20 @@ def load_ply(f, path_manager: Optional[PathManager] = None):
670826
if face is None:
671827
raise ValueError("The ply file has no face element.")
672828

673-
if len(vertex) and (
674-
not isinstance(vertex, np.ndarray) or vertex.ndim != 2 or vertex.shape[1] != 3
675-
):
829+
if not isinstance(vertex, list) or len(vertex) > 1:
676830
raise ValueError("Invalid vertices in file.")
677-
verts = _make_tensor(vertex, cols=3, dtype=torch.float32)
831+
832+
if len(vertex):
833+
vertex0 = vertex[0]
834+
if len(vertex0) and (
835+
not isinstance(vertex0, np.ndarray)
836+
or vertex0.ndim != 2
837+
or vertex0.shape[1] != 3
838+
):
839+
raise ValueError("Invalid vertices in file.")
840+
else:
841+
vertex0 = []
842+
verts = _make_tensor(vertex0, cols=3, dtype=torch.float32)
678843

679844
face_head = next(head for head in header.elements if head.name == "face")
680845
if len(face_head.properties) != 1 or face_head.properties[0].list_size_type is None:

0 commit comments

Comments
 (0)