Skip to content

Commit 8cd3108

Browse files
miss-islingtonserhiy-storchaka
authored andcommitted
bpo-32072: Fix issues with binary plists. (GH-4455) (#4654)
* Fixed saving bytearrays. * Identical objects will be saved only once. * Equal references will be load as identical objects. * Added support for saving and loading recursive data structures. (cherry picked from commit a897aee)
1 parent c91bf74 commit 8cd3108

File tree

3 files changed

+112
-36
lines changed

3 files changed

+112
-36
lines changed

Lib/plistlib.py

Lines changed: 51 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -590,6 +590,8 @@ def __init__(self, message="Invalid file"):
590590

591591
_BINARY_FORMAT = {1: 'B', 2: 'H', 4: 'L', 8: 'Q'}
592592

593+
_undefined = object()
594+
593595
class _BinaryPlistParser:
594596
"""
595597
Read or write a binary plist file, following the description of the binary
@@ -620,7 +622,8 @@ def parse(self, fp):
620622
) = struct.unpack('>6xBBQQQ', trailer)
621623
self._fp.seek(offset_table_offset)
622624
self._object_offsets = self._read_ints(num_objects, offset_size)
623-
return self._read_object(self._object_offsets[top_object])
625+
self._objects = [_undefined] * num_objects
626+
return self._read_object(top_object)
624627

625628
except (OSError, IndexError, struct.error, OverflowError,
626629
UnicodeDecodeError):
@@ -649,71 +652,78 @@ def _read_ints(self, n, size):
649652
def _read_refs(self, n):
650653
return self._read_ints(n, self._ref_size)
651654

652-
def _read_object(self, offset):
655+
def _read_object(self, ref):
653656
"""
654-
read the object at offset.
657+
read the object by reference.
655658
656659
May recursively read sub-objects (content of an array/dict/set)
657660
"""
661+
result = self._objects[ref]
662+
if result is not _undefined:
663+
return result
664+
665+
offset = self._object_offsets[ref]
658666
self._fp.seek(offset)
659667
token = self._fp.read(1)[0]
660668
tokenH, tokenL = token & 0xF0, token & 0x0F
661669

662670
if token == 0x00:
663-
return None
671+
result = None
664672

665673
elif token == 0x08:
666-
return False
674+
result = False
667675

668676
elif token == 0x09:
669-
return True
677+
result = True
670678

671679
# The referenced source code also mentions URL (0x0c, 0x0d) and
672680
# UUID (0x0e), but neither can be generated using the Cocoa libraries.
673681

674682
elif token == 0x0f:
675-
return b''
683+
result = b''
676684

677685
elif tokenH == 0x10: # int
678-
return int.from_bytes(self._fp.read(1 << tokenL),
679-
'big', signed=tokenL >= 3)
686+
result = int.from_bytes(self._fp.read(1 << tokenL),
687+
'big', signed=tokenL >= 3)
680688

681689
elif token == 0x22: # real
682-
return struct.unpack('>f', self._fp.read(4))[0]
690+
result = struct.unpack('>f', self._fp.read(4))[0]
683691

684692
elif token == 0x23: # real
685-
return struct.unpack('>d', self._fp.read(8))[0]
693+
result = struct.unpack('>d', self._fp.read(8))[0]
686694

687695
elif token == 0x33: # date
688696
f = struct.unpack('>d', self._fp.read(8))[0]
689697
# timestamp 0 of binary plists corresponds to 1/1/2001
690698
# (year of Mac OS X 10.0), instead of 1/1/1970.
691-
return datetime.datetime(2001, 1, 1) + datetime.timedelta(seconds=f)
699+
result = (datetime.datetime(2001, 1, 1) +
700+
datetime.timedelta(seconds=f))
692701

693702
elif tokenH == 0x40: # data
694703
s = self._get_size(tokenL)
695704
if self._use_builtin_types:
696-
return self._fp.read(s)
705+
result = self._fp.read(s)
697706
else:
698-
return Data(self._fp.read(s))
707+
result = Data(self._fp.read(s))
699708

700709
elif tokenH == 0x50: # ascii string
701710
s = self._get_size(tokenL)
702711
result = self._fp.read(s).decode('ascii')
703-
return result
712+
result = result
704713

705714
elif tokenH == 0x60: # unicode string
706715
s = self._get_size(tokenL)
707-
return self._fp.read(s * 2).decode('utf-16be')
716+
result = self._fp.read(s * 2).decode('utf-16be')
708717

709718
# tokenH == 0x80 is documented as 'UID' and appears to be used for
710719
# keyed-archiving, not in plists.
711720

712721
elif tokenH == 0xA0: # array
713722
s = self._get_size(tokenL)
714723
obj_refs = self._read_refs(s)
715-
return [self._read_object(self._object_offsets[x])
716-
for x in obj_refs]
724+
result = []
725+
self._objects[ref] = result
726+
result.extend(self._read_object(x) for x in obj_refs)
717727

718728
# tokenH == 0xB0 is documented as 'ordset', but is not actually
719729
# implemented in the Apple reference code.
@@ -726,12 +736,15 @@ def _read_object(self, offset):
726736
key_refs = self._read_refs(s)
727737
obj_refs = self._read_refs(s)
728738
result = self._dict_type()
739+
self._objects[ref] = result
729740
for k, o in zip(key_refs, obj_refs):
730-
result[self._read_object(self._object_offsets[k])
731-
] = self._read_object(self._object_offsets[o])
732-
return result
741+
result[self._read_object(k)] = self._read_object(o)
733742

734-
raise InvalidFileException()
743+
else:
744+
raise InvalidFileException()
745+
746+
self._objects[ref] = result
747+
return result
735748

736749
def _count_to_size(count):
737750
if count < 1 << 8:
@@ -746,6 +759,8 @@ def _count_to_size(count):
746759
else:
747760
return 8
748761

762+
_scalars = (str, int, float, datetime.datetime, bytes)
763+
749764
class _BinaryPlistWriter (object):
750765
def __init__(self, fp, sort_keys, skipkeys):
751766
self._fp = fp
@@ -801,24 +816,25 @@ def _flatten(self, value):
801816
# First check if the object is in the object table, not used for
802817
# containers to ensure that two subcontainers with the same contents
803818
# will be serialized as distinct values.
804-
if isinstance(value, (
805-
str, int, float, datetime.datetime, bytes, bytearray)):
819+
if isinstance(value, _scalars):
806820
if (type(value), value) in self._objtable:
807821
return
808822

809823
elif isinstance(value, Data):
810824
if (type(value.data), value.data) in self._objtable:
811825
return
812826

827+
elif id(value) in self._objidtable:
828+
return
829+
813830
# Add to objectreference map
814831
refnum = len(self._objlist)
815832
self._objlist.append(value)
816-
try:
817-
if isinstance(value, Data):
818-
self._objtable[(type(value.data), value.data)] = refnum
819-
else:
820-
self._objtable[(type(value), value)] = refnum
821-
except TypeError:
833+
if isinstance(value, _scalars):
834+
self._objtable[(type(value), value)] = refnum
835+
elif isinstance(value, Data):
836+
self._objtable[(type(value.data), value.data)] = refnum
837+
else:
822838
self._objidtable[id(value)] = refnum
823839

824840
# And finally recurse into containers
@@ -845,12 +861,11 @@ def _flatten(self, value):
845861
self._flatten(o)
846862

847863
def _getrefnum(self, value):
848-
try:
849-
if isinstance(value, Data):
850-
return self._objtable[(type(value.data), value.data)]
851-
else:
852-
return self._objtable[(type(value), value)]
853-
except TypeError:
864+
if isinstance(value, _scalars):
865+
return self._objtable[(type(value), value)]
866+
elif isinstance(value, Data):
867+
return self._objtable[(type(value.data), value.data)]
868+
else:
854869
return self._objidtable[id(value)]
855870

856871
def _write_size(self, token, size):

Lib/test/test_plistlib.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,17 @@ def test_int(self):
169169
self.assertRaises(OverflowError, plistlib.dumps,
170170
pl, fmt=fmt)
171171

172+
def test_bytearray(self):
173+
for pl in (b'<binary gunk>', b"<lots of binary gunk>\0\1\2\3" * 10):
174+
for fmt in ALL_FORMATS:
175+
with self.subTest(pl=pl, fmt=fmt):
176+
data = plistlib.dumps(bytearray(pl), fmt=fmt)
177+
pl2 = plistlib.loads(data)
178+
self.assertIsInstance(pl2, bytes)
179+
self.assertEqual(pl2, pl)
180+
data2 = plistlib.dumps(pl2, fmt=fmt)
181+
self.assertEqual(data, data2)
182+
172183
def test_bytes(self):
173184
pl = self._create()
174185
data = plistlib.dumps(pl)
@@ -432,6 +443,9 @@ def test_xml_encodings(self):
432443
pl2 = plistlib.loads(data)
433444
self.assertEqual(dict(pl), dict(pl2))
434445

446+
447+
class TestBinaryPlistlib(unittest.TestCase):
448+
435449
def test_nonstandard_refs_size(self):
436450
# Issue #21538: Refs and offsets are 24-bit integers
437451
data = (b'bplist00'
@@ -444,6 +458,47 @@ def test_nonstandard_refs_size(self):
444458
b'\x00\x00\x00\x00\x00\x00\x00\x13')
445459
self.assertEqual(plistlib.loads(data), {'a': 'b'})
446460

461+
def test_dump_duplicates(self):
462+
# Test effectiveness of saving duplicated objects
463+
for x in (None, False, True, 12345, 123.45, 'abcde', b'abcde',
464+
datetime.datetime(2004, 10, 26, 10, 33, 33),
465+
plistlib.Data(b'abcde'), bytearray(b'abcde'),
466+
[12, 345], (12, 345), {'12': 345}):
467+
with self.subTest(x=x):
468+
data = plistlib.dumps([x]*1000, fmt=plistlib.FMT_BINARY)
469+
self.assertLess(len(data), 1100, repr(data))
470+
471+
def test_identity(self):
472+
for x in (None, False, True, 12345, 123.45, 'abcde', b'abcde',
473+
datetime.datetime(2004, 10, 26, 10, 33, 33),
474+
plistlib.Data(b'abcde'), bytearray(b'abcde'),
475+
[12, 345], (12, 345), {'12': 345}):
476+
with self.subTest(x=x):
477+
data = plistlib.dumps([x]*2, fmt=plistlib.FMT_BINARY)
478+
a, b = plistlib.loads(data)
479+
if isinstance(x, tuple):
480+
x = list(x)
481+
self.assertEqual(a, x)
482+
self.assertEqual(b, x)
483+
self.assertIs(a, b)
484+
485+
def test_cycles(self):
486+
# recursive list
487+
a = []
488+
a.append(a)
489+
b = plistlib.loads(plistlib.dumps(a, fmt=plistlib.FMT_BINARY))
490+
self.assertIs(b[0], b)
491+
# recursive tuple
492+
a = ([],)
493+
a[0].append(a)
494+
b = plistlib.loads(plistlib.dumps(a, fmt=plistlib.FMT_BINARY))
495+
self.assertIs(b[0][0], b)
496+
# recursive dict
497+
a = {}
498+
a['x'] = a
499+
b = plistlib.loads(plistlib.dumps(a, fmt=plistlib.FMT_BINARY))
500+
self.assertIs(b['x'], b)
501+
447502
def test_large_timestamp(self):
448503
# Issue #26709: 32-bit timestamp out of range
449504
for ts in -2**31-1, 2**31:
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
Fixed issues with binary plists:
2+
3+
* Fixed saving bytearrays.
4+
* Identical objects will be saved only once.
5+
* Equal references will be load as identical objects.
6+
* Added support for saving and loading recursive data structures.

0 commit comments

Comments
 (0)