@@ -590,6 +590,8 @@ def __init__(self, message="Invalid file"):
590
590
591
591
_BINARY_FORMAT = {1 : 'B' , 2 : 'H' , 4 : 'L' , 8 : 'Q' }
592
592
593
+ _undefined = object ()
594
+
593
595
class _BinaryPlistParser :
594
596
"""
595
597
Read or write a binary plist file, following the description of the binary
@@ -620,7 +622,8 @@ def parse(self, fp):
620
622
) = struct .unpack ('>6xBBQQQ' , trailer )
621
623
self ._fp .seek (offset_table_offset )
622
624
self ._object_offsets = self ._read_ints (num_objects , offset_size )
623
- return self ._read_object (self ._object_offsets [top_object ])
625
+ self ._objects = [_undefined ] * num_objects
626
+ return self ._read_object (top_object )
624
627
625
628
except (OSError , IndexError , struct .error , OverflowError ,
626
629
UnicodeDecodeError ):
@@ -649,71 +652,78 @@ def _read_ints(self, n, size):
649
652
def _read_refs (self , n ):
650
653
return self ._read_ints (n , self ._ref_size )
651
654
652
- def _read_object (self , offset ):
655
+ def _read_object (self , ref ):
653
656
"""
654
- read the object at offset .
657
+ read the object by reference .
655
658
656
659
May recursively read sub-objects (content of an array/dict/set)
657
660
"""
661
+ result = self ._objects [ref ]
662
+ if result is not _undefined :
663
+ return result
664
+
665
+ offset = self ._object_offsets [ref ]
658
666
self ._fp .seek (offset )
659
667
token = self ._fp .read (1 )[0 ]
660
668
tokenH , tokenL = token & 0xF0 , token & 0x0F
661
669
662
670
if token == 0x00 :
663
- return None
671
+ result = None
664
672
665
673
elif token == 0x08 :
666
- return False
674
+ result = False
667
675
668
676
elif token == 0x09 :
669
- return True
677
+ result = True
670
678
671
679
# The referenced source code also mentions URL (0x0c, 0x0d) and
672
680
# UUID (0x0e), but neither can be generated using the Cocoa libraries.
673
681
674
682
elif token == 0x0f :
675
- return b''
683
+ result = b''
676
684
677
685
elif tokenH == 0x10 : # int
678
- return int .from_bytes (self ._fp .read (1 << tokenL ),
679
- 'big' , signed = tokenL >= 3 )
686
+ result = int .from_bytes (self ._fp .read (1 << tokenL ),
687
+ 'big' , signed = tokenL >= 3 )
680
688
681
689
elif token == 0x22 : # real
682
- return struct .unpack ('>f' , self ._fp .read (4 ))[0 ]
690
+ result = struct .unpack ('>f' , self ._fp .read (4 ))[0 ]
683
691
684
692
elif token == 0x23 : # real
685
- return struct .unpack ('>d' , self ._fp .read (8 ))[0 ]
693
+ result = struct .unpack ('>d' , self ._fp .read (8 ))[0 ]
686
694
687
695
elif token == 0x33 : # date
688
696
f = struct .unpack ('>d' , self ._fp .read (8 ))[0 ]
689
697
# timestamp 0 of binary plists corresponds to 1/1/2001
690
698
# (year of Mac OS X 10.0), instead of 1/1/1970.
691
- return datetime .datetime (2001 , 1 , 1 ) + datetime .timedelta (seconds = f )
699
+ result = (datetime .datetime (2001 , 1 , 1 ) +
700
+ datetime .timedelta (seconds = f ))
692
701
693
702
elif tokenH == 0x40 : # data
694
703
s = self ._get_size (tokenL )
695
704
if self ._use_builtin_types :
696
- return self ._fp .read (s )
705
+ result = self ._fp .read (s )
697
706
else :
698
- return Data (self ._fp .read (s ))
707
+ result = Data (self ._fp .read (s ))
699
708
700
709
elif tokenH == 0x50 : # ascii string
701
710
s = self ._get_size (tokenL )
702
711
result = self ._fp .read (s ).decode ('ascii' )
703
- return result
712
+ result = result
704
713
705
714
elif tokenH == 0x60 : # unicode string
706
715
s = self ._get_size (tokenL )
707
- return self ._fp .read (s * 2 ).decode ('utf-16be' )
716
+ result = self ._fp .read (s * 2 ).decode ('utf-16be' )
708
717
709
718
# tokenH == 0x80 is documented as 'UID' and appears to be used for
710
719
# keyed-archiving, not in plists.
711
720
712
721
elif tokenH == 0xA0 : # array
713
722
s = self ._get_size (tokenL )
714
723
obj_refs = self ._read_refs (s )
715
- return [self ._read_object (self ._object_offsets [x ])
716
- for x in obj_refs ]
724
+ result = []
725
+ self ._objects [ref ] = result
726
+ result .extend (self ._read_object (x ) for x in obj_refs )
717
727
718
728
# tokenH == 0xB0 is documented as 'ordset', but is not actually
719
729
# implemented in the Apple reference code.
@@ -726,12 +736,15 @@ def _read_object(self, offset):
726
736
key_refs = self ._read_refs (s )
727
737
obj_refs = self ._read_refs (s )
728
738
result = self ._dict_type ()
739
+ self ._objects [ref ] = result
729
740
for k , o in zip (key_refs , obj_refs ):
730
- result [self ._read_object (self ._object_offsets [k ])
731
- ] = self ._read_object (self ._object_offsets [o ])
732
- return result
741
+ result [self ._read_object (k )] = self ._read_object (o )
733
742
734
- raise InvalidFileException ()
743
+ else :
744
+ raise InvalidFileException ()
745
+
746
+ self ._objects [ref ] = result
747
+ return result
735
748
736
749
def _count_to_size (count ):
737
750
if count < 1 << 8 :
@@ -746,6 +759,8 @@ def _count_to_size(count):
746
759
else :
747
760
return 8
748
761
762
+ _scalars = (str , int , float , datetime .datetime , bytes )
763
+
749
764
class _BinaryPlistWriter (object ):
750
765
def __init__ (self , fp , sort_keys , skipkeys ):
751
766
self ._fp = fp
@@ -801,24 +816,25 @@ def _flatten(self, value):
801
816
# First check if the object is in the object table, not used for
802
817
# containers to ensure that two subcontainers with the same contents
803
818
# will be serialized as distinct values.
804
- if isinstance (value , (
805
- str , int , float , datetime .datetime , bytes , bytearray )):
819
+ if isinstance (value , _scalars ):
806
820
if (type (value ), value ) in self ._objtable :
807
821
return
808
822
809
823
elif isinstance (value , Data ):
810
824
if (type (value .data ), value .data ) in self ._objtable :
811
825
return
812
826
827
+ elif id (value ) in self ._objidtable :
828
+ return
829
+
813
830
# Add to objectreference map
814
831
refnum = len (self ._objlist )
815
832
self ._objlist .append (value )
816
- try :
817
- if isinstance (value , Data ):
818
- self ._objtable [(type (value .data ), value .data )] = refnum
819
- else :
820
- self ._objtable [(type (value ), value )] = refnum
821
- except TypeError :
833
+ if isinstance (value , _scalars ):
834
+ self ._objtable [(type (value ), value )] = refnum
835
+ elif isinstance (value , Data ):
836
+ self ._objtable [(type (value .data ), value .data )] = refnum
837
+ else :
822
838
self ._objidtable [id (value )] = refnum
823
839
824
840
# And finally recurse into containers
@@ -845,12 +861,11 @@ def _flatten(self, value):
845
861
self ._flatten (o )
846
862
847
863
def _getrefnum (self , value ):
848
- try :
849
- if isinstance (value , Data ):
850
- return self ._objtable [(type (value .data ), value .data )]
851
- else :
852
- return self ._objtable [(type (value ), value )]
853
- except TypeError :
864
+ if isinstance (value , _scalars ):
865
+ return self ._objtable [(type (value ), value )]
866
+ elif isinstance (value , Data ):
867
+ return self ._objtable [(type (value .data ), value .data )]
868
+ else :
854
869
return self ._objidtable [id (value )]
855
870
856
871
def _write_size (self , token , size ):
0 commit comments