Skip to content

Commit a897aee

Browse files
bpo-32072: Fix issues with binary plists. (#4455)
* Fixed saving bytearrays. * Identical objects will be saved only once. * Equal references will be load as identical objects. * Added support for saving and loading recursive data structures.
1 parent b4d1e1f commit a897aee

File tree

3 files changed

+112
-36
lines changed

3 files changed

+112
-36
lines changed

Lib/plistlib.py

Lines changed: 51 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -525,6 +525,8 @@ def __init__(self, message="Invalid file"):
525525

526526
_BINARY_FORMAT = {1: 'B', 2: 'H', 4: 'L', 8: 'Q'}
527527

528+
_undefined = object()
529+
528530
class _BinaryPlistParser:
529531
"""
530532
Read or write a binary plist file, following the description of the binary
@@ -555,7 +557,8 @@ def parse(self, fp):
555557
) = struct.unpack('>6xBBQQQ', trailer)
556558
self._fp.seek(offset_table_offset)
557559
self._object_offsets = self._read_ints(num_objects, offset_size)
558-
return self._read_object(self._object_offsets[top_object])
560+
self._objects = [_undefined] * num_objects
561+
return self._read_object(top_object)
559562

560563
except (OSError, IndexError, struct.error, OverflowError,
561564
UnicodeDecodeError):
@@ -584,71 +587,78 @@ def _read_ints(self, n, size):
584587
def _read_refs(self, n):
585588
return self._read_ints(n, self._ref_size)
586589

587-
def _read_object(self, offset):
590+
def _read_object(self, ref):
588591
"""
589-
read the object at offset.
592+
read the object by reference.
590593
591594
May recursively read sub-objects (content of an array/dict/set)
592595
"""
596+
result = self._objects[ref]
597+
if result is not _undefined:
598+
return result
599+
600+
offset = self._object_offsets[ref]
593601
self._fp.seek(offset)
594602
token = self._fp.read(1)[0]
595603
tokenH, tokenL = token & 0xF0, token & 0x0F
596604

597605
if token == 0x00:
598-
return None
606+
result = None
599607

600608
elif token == 0x08:
601-
return False
609+
result = False
602610

603611
elif token == 0x09:
604-
return True
612+
result = True
605613

606614
# The referenced source code also mentions URL (0x0c, 0x0d) and
607615
# UUID (0x0e), but neither can be generated using the Cocoa libraries.
608616

609617
elif token == 0x0f:
610-
return b''
618+
result = b''
611619

612620
elif tokenH == 0x10: # int
613-
return int.from_bytes(self._fp.read(1 << tokenL),
614-
'big', signed=tokenL >= 3)
621+
result = int.from_bytes(self._fp.read(1 << tokenL),
622+
'big', signed=tokenL >= 3)
615623

616624
elif token == 0x22: # real
617-
return struct.unpack('>f', self._fp.read(4))[0]
625+
result = struct.unpack('>f', self._fp.read(4))[0]
618626

619627
elif token == 0x23: # real
620-
return struct.unpack('>d', self._fp.read(8))[0]
628+
result = struct.unpack('>d', self._fp.read(8))[0]
621629

622630
elif token == 0x33: # date
623631
f = struct.unpack('>d', self._fp.read(8))[0]
624632
# timestamp 0 of binary plists corresponds to 1/1/2001
625633
# (year of Mac OS X 10.0), instead of 1/1/1970.
626-
return datetime.datetime(2001, 1, 1) + datetime.timedelta(seconds=f)
634+
result = (datetime.datetime(2001, 1, 1) +
635+
datetime.timedelta(seconds=f))
627636

628637
elif tokenH == 0x40: # data
629638
s = self._get_size(tokenL)
630639
if self._use_builtin_types:
631-
return self._fp.read(s)
640+
result = self._fp.read(s)
632641
else:
633-
return Data(self._fp.read(s))
642+
result = Data(self._fp.read(s))
634643

635644
elif tokenH == 0x50: # ascii string
636645
s = self._get_size(tokenL)
637646
result = self._fp.read(s).decode('ascii')
638-
return result
647+
result = result
639648

640649
elif tokenH == 0x60: # unicode string
641650
s = self._get_size(tokenL)
642-
return self._fp.read(s * 2).decode('utf-16be')
651+
result = self._fp.read(s * 2).decode('utf-16be')
643652

644653
# tokenH == 0x80 is documented as 'UID' and appears to be used for
645654
# keyed-archiving, not in plists.
646655

647656
elif tokenH == 0xA0: # array
648657
s = self._get_size(tokenL)
649658
obj_refs = self._read_refs(s)
650-
return [self._read_object(self._object_offsets[x])
651-
for x in obj_refs]
659+
result = []
660+
self._objects[ref] = result
661+
result.extend(self._read_object(x) for x in obj_refs)
652662

653663
# tokenH == 0xB0 is documented as 'ordset', but is not actually
654664
# implemented in the Apple reference code.
@@ -661,12 +671,15 @@ def _read_object(self, offset):
661671
key_refs = self._read_refs(s)
662672
obj_refs = self._read_refs(s)
663673
result = self._dict_type()
674+
self._objects[ref] = result
664675
for k, o in zip(key_refs, obj_refs):
665-
result[self._read_object(self._object_offsets[k])
666-
] = self._read_object(self._object_offsets[o])
667-
return result
676+
result[self._read_object(k)] = self._read_object(o)
668677

669-
raise InvalidFileException()
678+
else:
679+
raise InvalidFileException()
680+
681+
self._objects[ref] = result
682+
return result
670683

671684
def _count_to_size(count):
672685
if count < 1 << 8:
@@ -681,6 +694,8 @@ def _count_to_size(count):
681694
else:
682695
return 8
683696

697+
_scalars = (str, int, float, datetime.datetime, bytes)
698+
684699
class _BinaryPlistWriter (object):
685700
def __init__(self, fp, sort_keys, skipkeys):
686701
self._fp = fp
@@ -736,24 +751,25 @@ def _flatten(self, value):
736751
# First check if the object is in the object table, not used for
737752
# containers to ensure that two subcontainers with the same contents
738753
# will be serialized as distinct values.
739-
if isinstance(value, (
740-
str, int, float, datetime.datetime, bytes, bytearray)):
754+
if isinstance(value, _scalars):
741755
if (type(value), value) in self._objtable:
742756
return
743757

744758
elif isinstance(value, Data):
745759
if (type(value.data), value.data) in self._objtable:
746760
return
747761

762+
elif id(value) in self._objidtable:
763+
return
764+
748765
# Add to objectreference map
749766
refnum = len(self._objlist)
750767
self._objlist.append(value)
751-
try:
752-
if isinstance(value, Data):
753-
self._objtable[(type(value.data), value.data)] = refnum
754-
else:
755-
self._objtable[(type(value), value)] = refnum
756-
except TypeError:
768+
if isinstance(value, _scalars):
769+
self._objtable[(type(value), value)] = refnum
770+
elif isinstance(value, Data):
771+
self._objtable[(type(value.data), value.data)] = refnum
772+
else:
757773
self._objidtable[id(value)] = refnum
758774

759775
# And finally recurse into containers
@@ -780,12 +796,11 @@ def _flatten(self, value):
780796
self._flatten(o)
781797

782798
def _getrefnum(self, value):
783-
try:
784-
if isinstance(value, Data):
785-
return self._objtable[(type(value.data), value.data)]
786-
else:
787-
return self._objtable[(type(value), value)]
788-
except TypeError:
799+
if isinstance(value, _scalars):
800+
return self._objtable[(type(value), value)]
801+
elif isinstance(value, Data):
802+
return self._objtable[(type(value.data), value.data)]
803+
else:
789804
return self._objidtable[id(value)]
790805

791806
def _write_size(self, token, size):

Lib/test/test_plistlib.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,17 @@ def test_int(self):
169169
self.assertRaises(OverflowError, plistlib.dumps,
170170
pl, fmt=fmt)
171171

172+
def test_bytearray(self):
173+
for pl in (b'<binary gunk>', b"<lots of binary gunk>\0\1\2\3" * 10):
174+
for fmt in ALL_FORMATS:
175+
with self.subTest(pl=pl, fmt=fmt):
176+
data = plistlib.dumps(bytearray(pl), fmt=fmt)
177+
pl2 = plistlib.loads(data)
178+
self.assertIsInstance(pl2, bytes)
179+
self.assertEqual(pl2, pl)
180+
data2 = plistlib.dumps(pl2, fmt=fmt)
181+
self.assertEqual(data, data2)
182+
172183
def test_bytes(self):
173184
pl = self._create()
174185
data = plistlib.dumps(pl)
@@ -431,6 +442,9 @@ def test_xml_encodings(self):
431442
pl2 = plistlib.loads(data)
432443
self.assertEqual(dict(pl), dict(pl2))
433444

445+
446+
class TestBinaryPlistlib(unittest.TestCase):
447+
434448
def test_nonstandard_refs_size(self):
435449
# Issue #21538: Refs and offsets are 24-bit integers
436450
data = (b'bplist00'
@@ -443,6 +457,47 @@ def test_nonstandard_refs_size(self):
443457
b'\x00\x00\x00\x00\x00\x00\x00\x13')
444458
self.assertEqual(plistlib.loads(data), {'a': 'b'})
445459

460+
def test_dump_duplicates(self):
461+
# Test effectiveness of saving duplicated objects
462+
for x in (None, False, True, 12345, 123.45, 'abcde', b'abcde',
463+
datetime.datetime(2004, 10, 26, 10, 33, 33),
464+
plistlib.Data(b'abcde'), bytearray(b'abcde'),
465+
[12, 345], (12, 345), {'12': 345}):
466+
with self.subTest(x=x):
467+
data = plistlib.dumps([x]*1000, fmt=plistlib.FMT_BINARY)
468+
self.assertLess(len(data), 1100, repr(data))
469+
470+
def test_identity(self):
471+
for x in (None, False, True, 12345, 123.45, 'abcde', b'abcde',
472+
datetime.datetime(2004, 10, 26, 10, 33, 33),
473+
plistlib.Data(b'abcde'), bytearray(b'abcde'),
474+
[12, 345], (12, 345), {'12': 345}):
475+
with self.subTest(x=x):
476+
data = plistlib.dumps([x]*2, fmt=plistlib.FMT_BINARY)
477+
a, b = plistlib.loads(data)
478+
if isinstance(x, tuple):
479+
x = list(x)
480+
self.assertEqual(a, x)
481+
self.assertEqual(b, x)
482+
self.assertIs(a, b)
483+
484+
def test_cycles(self):
485+
# recursive list
486+
a = []
487+
a.append(a)
488+
b = plistlib.loads(plistlib.dumps(a, fmt=plistlib.FMT_BINARY))
489+
self.assertIs(b[0], b)
490+
# recursive tuple
491+
a = ([],)
492+
a[0].append(a)
493+
b = plistlib.loads(plistlib.dumps(a, fmt=plistlib.FMT_BINARY))
494+
self.assertIs(b[0][0], b)
495+
# recursive dict
496+
a = {}
497+
a['x'] = a
498+
b = plistlib.loads(plistlib.dumps(a, fmt=plistlib.FMT_BINARY))
499+
self.assertIs(b['x'], b)
500+
446501
def test_large_timestamp(self):
447502
# Issue #26709: 32-bit timestamp out of range
448503
for ts in -2**31-1, 2**31:
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
Fixed issues with binary plists:
2+
3+
* Fixed saving bytearrays.
4+
* Identical objects will be saved only once.
5+
* Equal references will be load as identical objects.
6+
* Added support for saving and loading recursive data structures.

0 commit comments

Comments
 (0)