@@ -705,8 +705,8 @@ def _seek_to_extent(self, extent):
705705 self ._cdfp .seek (extent * self .logical_block_size )
706706
707707 @functools .lru_cache (maxsize = 256 )
708- def _find_iso_record (self , iso_path ):
709- # type: (bytes) -> dr.DirectoryRecord
708+ def _find_iso_record (self , iso_path , encoding = 'utf-8' ):
709+ # type: (bytes, str ) -> dr.DirectoryRecord
710710 """
711711 An internal method to find a directory record on the ISO given an ISO
712712 path. If the entry is found, it returns the directory record object
@@ -718,11 +718,11 @@ def _find_iso_record(self, iso_path):
718718 Returns:
719719 The directory record entry representing the entry on the ISO.
720720 """
721- return _find_dr_record_by_name (self .pvd , iso_path , 'utf-8' )
721+ return _find_dr_record_by_name (self .pvd , iso_path , encoding )
722722
723723 @functools .lru_cache (maxsize = 256 )
724- def _find_rr_record (self , rr_path ):
725- # type: (bytes) -> dr.DirectoryRecord
724+ def _find_rr_record (self , rr_path , encoding = 'utf-8' ):
725+ # type: (bytes, str ) -> dr.DirectoryRecord
726726 """
727727 An internal method to find a directory record on the ISO given a Rock
728728 Ridge path. If the entry is found, it returns the directory record
@@ -742,7 +742,7 @@ def _find_rr_record(self, rr_path):
742742
743743 splitpath = utils .split_path (rr_path )
744744
745- currpath = splitpath .pop (0 ).decode ('utf-8' ).encode ('utf-8' )
745+ currpath = splitpath .pop (0 ).decode ('utf-8' ).encode (encoding )
746746
747747 entry = root_dir_record
748748
@@ -793,13 +793,13 @@ def _find_rr_record(self, rr_path):
793793 if not child .is_dir ():
794794 break
795795 entry = child
796- currpath = splitpath .pop (0 ).decode ('utf-8' ).encode ('utf-8' )
796+ currpath = splitpath .pop (0 ).decode ('utf-8' ).encode (encoding )
797797
798798 raise pycdlibexception .PyCdlibInvalidInput ('Could not find path' )
799799
800800 @functools .lru_cache (maxsize = 256 )
801- def _find_joliet_record (self , joliet_path ):
802- # type: (bytes) -> dr.DirectoryRecord
801+ def _find_joliet_record (self , joliet_path , encoding = 'utf-16_be' ):
802+ # type: (bytes, str ) -> dr.DirectoryRecord
803803 """
804804 An internal method to find a directory record on the ISO given a Joliet
805805 path. If the entry is found, it returns the directory record object
@@ -813,7 +813,7 @@ def _find_joliet_record(self, joliet_path):
813813 """
814814 if self .joliet_vd is None :
815815 raise pycdlibexception .PyCdlibInternalError ('Joliet path requested on non-Joliet ISO' )
816- return _find_dr_record_by_name (self .joliet_vd , joliet_path , 'utf-16_be' )
816+ return _find_dr_record_by_name (self .joliet_vd , joliet_path , encoding )
817817
818818 @functools .lru_cache (maxsize = 256 )
819819 def _find_udf_record (self , udf_path ):
@@ -2412,8 +2412,8 @@ def _udf_get_file_from_iso_fp(self, outfp, blocksize, udf_path):
24122412 utils .copy_data (data_len , blocksize , data_fp , outfp )
24132413
24142414 def _get_file_from_iso_fp (self , outfp , blocksize , iso_path , rr_path ,
2415- joliet_path ):
2416- # type: (BinaryIO, int, Optional[bytes], Optional[bytes], Optional[bytes]) -> None
2415+ joliet_path , encoding = None ):
2416+ # type: (BinaryIO, int, Optional[bytes], Optional[bytes], Optional[bytes], str ) -> None
24172417 """
24182418 An internal method to fetch a single file from the ISO and write it out
24192419 to the file object.
@@ -2433,13 +2433,16 @@ def _get_file_from_iso_fp(self, outfp, blocksize, iso_path, rr_path,
24332433 if joliet_path is not None :
24342434 if self .joliet_vd is None :
24352435 raise pycdlibexception .PyCdlibInvalidInput ('Cannot fetch a joliet_path from a non-Joliet ISO' )
2436- found_record = self ._find_joliet_record (joliet_path )
2436+ encoding = encoding or 'utf-16_be'
2437+ found_record = self ._find_joliet_record (joliet_path , encoding )
24372438 elif rr_path is not None :
24382439 if not self .rock_ridge :
24392440 raise pycdlibexception .PyCdlibInvalidInput ('Cannot fetch a rr_path from a non-Rock Ridge ISO' )
2440- found_record = self ._find_rr_record (rr_path )
2441+ encoding = encoding or 'utf-8'
2442+ found_record = self ._find_rr_record (rr_path , encoding )
24412443 elif iso_path is not None :
2442- found_record = self ._find_iso_record (iso_path )
2444+ encoding = encoding or 'utf-8'
2445+ found_record = self ._find_iso_record (iso_path , encoding )
24432446 else :
24442447 raise pycdlibexception .PyCdlibInternalError ('Invalid path passed to get_file_from_iso_fp' )
24452448
@@ -3471,8 +3474,8 @@ def _rm_joliet_dir(self, joliet_path):
34713474
34723475 return num_bytes_to_remove
34733476
3474- def _get_iso_entry (self , iso_path ):
3475- # type: (bytes) -> dr.DirectoryRecord
3477+ def _get_iso_entry (self , iso_path , encoding = 'utf-8' ):
3478+ # type: (bytes, str ) -> dr.DirectoryRecord
34763479 """
34773480 Internal method to get the directory record for an ISO path.
34783481
@@ -3484,10 +3487,10 @@ def _get_iso_entry(self, iso_path):
34843487 if self ._needs_reshuffle :
34853488 self ._reshuffle_extents ()
34863489
3487- return self ._find_iso_record (iso_path )
3490+ return self ._find_iso_record (iso_path , encoding )
34883491
3489- def _get_rr_entry (self , rr_path ):
3490- # type: (bytes) -> dr.DirectoryRecord
3492+ def _get_rr_entry (self , rr_path , encoding = 'utf-8' ):
3493+ # type: (bytes, str ) -> dr.DirectoryRecord
34913494 """
34923495 Internal method to get the directory record for a Rock Ridge path.
34933496
@@ -3500,10 +3503,10 @@ def _get_rr_entry(self, rr_path):
35003503 if self ._needs_reshuffle :
35013504 self ._reshuffle_extents ()
35023505
3503- return self ._find_rr_record (rr_path )
3506+ return self ._find_rr_record (rr_path , encoding )
35043507
3505- def _get_joliet_entry (self , joliet_path ):
3506- # type: (bytes) -> dr.DirectoryRecord
3508+ def _get_joliet_entry (self , joliet_path , encoding = 'utf-16_be' ):
3509+ # type: (bytes, str ) -> dr.DirectoryRecord
35073510 """
35083511 Internal method to get the directory record for a Joliet path.
35093512
@@ -3516,7 +3519,7 @@ def _get_joliet_entry(self, joliet_path):
35163519 if self ._needs_reshuffle :
35173520 self ._reshuffle_extents ()
35183521
3519- return self ._find_joliet_record (joliet_path )
3522+ return self ._find_joliet_record (joliet_path , encoding )
35203523
35213524 def _get_udf_entry (self , udf_path ):
35223525 # type: (str) -> udfmod.UDFFileEntry
@@ -4172,6 +4175,7 @@ def get_file_from_iso_fp(self, outfp, **kwargs):
41724175 with iso_path, rr_path, and udf_path).
41734176 udf_path - The absolute UDF path to lookup on the ISO (exclusive with
41744177 iso_path, rr_path, and joliet_path).
4178+ encoding - The encoding to use for parsing the filenames.
41754179 Returns:
41764180 Nothing.
41774181 """
@@ -4183,6 +4187,7 @@ def get_file_from_iso_fp(self, outfp, **kwargs):
41834187 iso_path = None
41844188 rr_path = None
41854189 udf_path = None
4190+ encoding = None
41864191 num_paths = 0
41874192 for key , value in kwargs .items ():
41884193 if key == 'blocksize' :
@@ -4213,6 +4218,8 @@ def get_file_from_iso_fp(self, outfp, **kwargs):
42134218 num_paths += 1
42144219 elif value is not None :
42154220 raise pycdlibexception .PyCdlibInvalidInput ('udf_path must be a string' )
4221+ elif key == 'encoding' :
4222+ encoding = value
42164223 else :
42174224 raise pycdlibexception .PyCdlibInvalidInput ('Unknown keyword %s' % (key ))
42184225
@@ -4223,7 +4230,7 @@ def get_file_from_iso_fp(self, outfp, **kwargs):
42234230 self ._udf_get_file_from_iso_fp (outfp , blocksize , udf_path )
42244231 else :
42254232 self ._get_file_from_iso_fp (outfp , blocksize , iso_path , rr_path ,
4226- joliet_path )
4233+ joliet_path , encoding )
42274234
42284235 def get_and_write (self , iso_path , local_path , blocksize = 8192 ):
42294236 # type: (str, str, int) -> None
@@ -5459,6 +5466,8 @@ def list_children(self, **kwargs):
54595466 if key in ('joliet_path' , 'rr_path' , 'iso_path' , 'udf_path' ):
54605467 if value is not None :
54615468 num_paths += 1
5469+ elif key in ('encoding' ):
5470+ continue
54625471 else :
54635472 raise pycdlibexception .PyCdlibInvalidInput ("Invalid keyword, must be one of 'iso_path', 'rr_path', 'joliet_path', or 'udf_path'" )
54645473
@@ -5476,12 +5485,15 @@ def list_children(self, **kwargs):
54765485 else :
54775486 use_rr = False
54785487 if 'joliet_path' in kwargs :
5479- rec = self ._get_joliet_entry (self ._normalize_joliet_path (kwargs ['joliet_path' ]))
5488+ kwargs ['encoding' ] = kwargs .get ('encoding' , None ) or 'utf-16_be'
5489+ rec = self ._get_joliet_entry (self ._normalize_joliet_path (kwargs ['joliet_path' ]), kwargs ['encoding' ])
54805490 elif 'rr_path' in kwargs :
5481- rec = self ._get_rr_entry (utils .normpath (kwargs ['rr_path' ]))
5491+ kwargs ['encoding' ] = kwargs .get ('encoding' , None ) or 'utf-8'
5492+ rec = self ._get_rr_entry (utils .normpath (kwargs ['rr_path' ]), kwargs ['encoding' ])
54825493 use_rr = True
54835494 else :
5484- rec = self ._get_iso_entry (utils .normpath (kwargs ['iso_path' ]))
5495+ kwargs ['encoding' ] = kwargs .get ('encoding' , None ) or 'utf-8'
5496+ rec = self ._get_iso_entry (utils .normpath (kwargs ['iso_path' ]), kwargs ['encoding' ])
54855497
54865498 for c in _yield_children (rec , use_rr ):
54875499 yield c
@@ -5626,8 +5638,8 @@ def rm_isohybrid(self):
56265638
56275639 self .isohybrid_mbr = None
56285640
5629- def full_path_from_dirrecord (self , rec , rockridge = False ):
5630- # type: (Union[dr.DirectoryRecord, udfmod.UDFFileEntry], bool) -> str
5641+ def full_path_from_dirrecord (self , rec , rockridge = False , user_encoding = None ):
5642+ # type: (Union[dr.DirectoryRecord, udfmod.UDFFileEntry], bool, str ) -> str
56315643 """
56325644 Get the absolute path of a directory record.
56335645
@@ -5646,6 +5658,8 @@ def full_path_from_dirrecord(self, rec, rockridge=False):
56465658 if self .joliet_vd is not None and id (rec .vd ) == id (self .joliet_vd ):
56475659 encoding = 'utf-16_be'
56485660
5661+ if user_encoding :
5662+ encoding = user_encoding
56495663 # A root entry has no Rock Ridge entry, even on a Rock Ridge ISO.
56505664 # Always return / here.
56515665 if rec .is_root :
@@ -5685,6 +5699,8 @@ def full_path_from_dirrecord(self, rec, rockridge=False):
56855699 encoding = rec .file_ident .encoding
56865700 else :
56875701 encoding = 'utf-8'
5702+ if user_encoding :
5703+ encoding = user_encoding
56885704 udf_rec = rec # type: Optional[udfmod.UDFFileEntry]
56895705 while udf_rec is not None :
56905706 ident = udf_rec .file_identifier ()
@@ -5893,13 +5909,13 @@ def walk(self, **kwargs):
58935909 while dirs :
58945910 dir_record = dirs .popleft ()
58955911
5896- relpath = self .full_path_from_dirrecord (dir_record ,
5897- rockridge = path_type == 'rr_path' )
5912+ relpath = self .full_path_from_dirrecord (dir_record , rockridge = path_type == 'rr_path' ,
5913+ user_encoding = user_encoding )
58985914 dirlist = []
58995915 filelist = []
59005916 dirdict = {}
59015917
5902- for child in reversed (list (self .list_children (** {path_type : relpath }))):
5918+ for child in reversed (list (self .list_children (** {path_type : relpath , 'encoding' : kwargs . get ( 'encoding' , None ) }))):
59035919 if child is None or child .is_dot () or child .is_dotdot ():
59045920 continue
59055921
0 commit comments