@@ -525,6 +525,8 @@ def __init__(self, message="Invalid file"):
525
525
526
526
_BINARY_FORMAT = {1 : 'B' , 2 : 'H' , 4 : 'L' , 8 : 'Q' }
527
527
528
+ _undefined = object ()
529
+
528
530
class _BinaryPlistParser :
529
531
"""
530
532
Read or write a binary plist file, following the description of the binary
@@ -555,7 +557,8 @@ def parse(self, fp):
555
557
) = struct .unpack ('>6xBBQQQ' , trailer )
556
558
self ._fp .seek (offset_table_offset )
557
559
self ._object_offsets = self ._read_ints (num_objects , offset_size )
558
- return self ._read_object (self ._object_offsets [top_object ])
560
+ self ._objects = [_undefined ] * num_objects
561
+ return self ._read_object (top_object )
559
562
560
563
except (OSError , IndexError , struct .error , OverflowError ,
561
564
UnicodeDecodeError ):
@@ -584,71 +587,78 @@ def _read_ints(self, n, size):
584
587
def _read_refs (self , n ):
585
588
return self ._read_ints (n , self ._ref_size )
586
589
587
- def _read_object (self , offset ):
590
+ def _read_object (self , ref ):
588
591
"""
589
- read the object at offset .
592
+ read the object by reference .
590
593
591
594
May recursively read sub-objects (content of an array/dict/set)
592
595
"""
596
+ result = self ._objects [ref ]
597
+ if result is not _undefined :
598
+ return result
599
+
600
+ offset = self ._object_offsets [ref ]
593
601
self ._fp .seek (offset )
594
602
token = self ._fp .read (1 )[0 ]
595
603
tokenH , tokenL = token & 0xF0 , token & 0x0F
596
604
597
605
if token == 0x00 :
598
- return None
606
+ result = None
599
607
600
608
elif token == 0x08 :
601
- return False
609
+ result = False
602
610
603
611
elif token == 0x09 :
604
- return True
612
+ result = True
605
613
606
614
# The referenced source code also mentions URL (0x0c, 0x0d) and
607
615
# UUID (0x0e), but neither can be generated using the Cocoa libraries.
608
616
609
617
elif token == 0x0f :
610
- return b''
618
+ result = b''
611
619
612
620
elif tokenH == 0x10 : # int
613
- return int .from_bytes (self ._fp .read (1 << tokenL ),
614
- 'big' , signed = tokenL >= 3 )
621
+ result = int .from_bytes (self ._fp .read (1 << tokenL ),
622
+ 'big' , signed = tokenL >= 3 )
615
623
616
624
elif token == 0x22 : # real
617
- return struct .unpack ('>f' , self ._fp .read (4 ))[0 ]
625
+ result = struct .unpack ('>f' , self ._fp .read (4 ))[0 ]
618
626
619
627
elif token == 0x23 : # real
620
- return struct .unpack ('>d' , self ._fp .read (8 ))[0 ]
628
+ result = struct .unpack ('>d' , self ._fp .read (8 ))[0 ]
621
629
622
630
elif token == 0x33 : # date
623
631
f = struct .unpack ('>d' , self ._fp .read (8 ))[0 ]
624
632
# timestamp 0 of binary plists corresponds to 1/1/2001
625
633
# (year of Mac OS X 10.0), instead of 1/1/1970.
626
- return datetime .datetime (2001 , 1 , 1 ) + datetime .timedelta (seconds = f )
634
+ result = (datetime .datetime (2001 , 1 , 1 ) +
635
+ datetime .timedelta (seconds = f ))
627
636
628
637
elif tokenH == 0x40 : # data
629
638
s = self ._get_size (tokenL )
630
639
if self ._use_builtin_types :
631
- return self ._fp .read (s )
640
+ result = self ._fp .read (s )
632
641
else :
633
- return Data (self ._fp .read (s ))
642
+ result = Data (self ._fp .read (s ))
634
643
635
644
elif tokenH == 0x50 : # ascii string
636
645
s = self ._get_size (tokenL )
637
646
result = self ._fp .read (s ).decode ('ascii' )
638
- return result
647
+ result = result
639
648
640
649
elif tokenH == 0x60 : # unicode string
641
650
s = self ._get_size (tokenL )
642
- return self ._fp .read (s * 2 ).decode ('utf-16be' )
651
+ result = self ._fp .read (s * 2 ).decode ('utf-16be' )
643
652
644
653
# tokenH == 0x80 is documented as 'UID' and appears to be used for
645
654
# keyed-archiving, not in plists.
646
655
647
656
elif tokenH == 0xA0 : # array
648
657
s = self ._get_size (tokenL )
649
658
obj_refs = self ._read_refs (s )
650
- return [self ._read_object (self ._object_offsets [x ])
651
- for x in obj_refs ]
659
+ result = []
660
+ self ._objects [ref ] = result
661
+ result .extend (self ._read_object (x ) for x in obj_refs )
652
662
653
663
# tokenH == 0xB0 is documented as 'ordset', but is not actually
654
664
# implemented in the Apple reference code.
@@ -661,12 +671,15 @@ def _read_object(self, offset):
661
671
key_refs = self ._read_refs (s )
662
672
obj_refs = self ._read_refs (s )
663
673
result = self ._dict_type ()
674
+ self ._objects [ref ] = result
664
675
for k , o in zip (key_refs , obj_refs ):
665
- result [self ._read_object (self ._object_offsets [k ])
666
- ] = self ._read_object (self ._object_offsets [o ])
667
- return result
676
+ result [self ._read_object (k )] = self ._read_object (o )
668
677
669
- raise InvalidFileException ()
678
+ else :
679
+ raise InvalidFileException ()
680
+
681
+ self ._objects [ref ] = result
682
+ return result
670
683
671
684
def _count_to_size (count ):
672
685
if count < 1 << 8 :
@@ -681,6 +694,8 @@ def _count_to_size(count):
681
694
else :
682
695
return 8
683
696
697
+ _scalars = (str , int , float , datetime .datetime , bytes )
698
+
684
699
class _BinaryPlistWriter (object ):
685
700
def __init__ (self , fp , sort_keys , skipkeys ):
686
701
self ._fp = fp
@@ -736,24 +751,25 @@ def _flatten(self, value):
736
751
# First check if the object is in the object table, not used for
737
752
# containers to ensure that two subcontainers with the same contents
738
753
# will be serialized as distinct values.
739
- if isinstance (value , (
740
- str , int , float , datetime .datetime , bytes , bytearray )):
754
+ if isinstance (value , _scalars ):
741
755
if (type (value ), value ) in self ._objtable :
742
756
return
743
757
744
758
elif isinstance (value , Data ):
745
759
if (type (value .data ), value .data ) in self ._objtable :
746
760
return
747
761
762
+ elif id (value ) in self ._objidtable :
763
+ return
764
+
748
765
# Add to objectreference map
749
766
refnum = len (self ._objlist )
750
767
self ._objlist .append (value )
751
- try :
752
- if isinstance (value , Data ):
753
- self ._objtable [(type (value .data ), value .data )] = refnum
754
- else :
755
- self ._objtable [(type (value ), value )] = refnum
756
- except TypeError :
768
+ if isinstance (value , _scalars ):
769
+ self ._objtable [(type (value ), value )] = refnum
770
+ elif isinstance (value , Data ):
771
+ self ._objtable [(type (value .data ), value .data )] = refnum
772
+ else :
757
773
self ._objidtable [id (value )] = refnum
758
774
759
775
# And finally recurse into containers
@@ -780,12 +796,11 @@ def _flatten(self, value):
780
796
self ._flatten (o )
781
797
782
798
def _getrefnum (self , value ):
783
- try :
784
- if isinstance (value , Data ):
785
- return self ._objtable [(type (value .data ), value .data )]
786
- else :
787
- return self ._objtable [(type (value ), value )]
788
- except TypeError :
799
+ if isinstance (value , _scalars ):
800
+ return self ._objtable [(type (value ), value )]
801
+ elif isinstance (value , Data ):
802
+ return self ._objtable [(type (value .data ), value .data )]
803
+ else :
789
804
return self ._objidtable [id (value )]
790
805
791
806
def _write_size (self , token , size ):
0 commit comments