66import shlex
77import shutil
88import subprocess
9- import tarfile
109from time import sleep
1110from typing import Any , IO , Optional , Union
1211
@@ -186,31 +185,51 @@ def make_cache_object(dir_path: Path, path: Path) -> CacheObject:
186185
187186
188187class Inventory :
189- """Encapsulate the tarfile TarFile object and file stream
188+ """Encapsulate the file stream and subprocess.Popen object
190189
191190 This encapsulation allows cleaner downstream handling, so that we can close
192191 both the extractfile file stream and the tarball object itself when we're
193192 done. This eliminates interference with later operations.
194193 """
195194
196- def __init__ (self , stream : IO [bytes ], tar_ref : Optional [ tarfile . TarFile ] = None ):
195+ def __init__ (self , stream : IO [bytes ], subproc : subprocess . Popen = None ):
197196 """Construct an instance to track extracted inventory
198197
199198 This encapsulates many byte stream operations so that it can be used
200199 as if it were a byte stream.
201200
202201 Args:
203202 stream: the data stream of a specific tarball member
204- tar_ref : the TarFile object
203+ subproc : the subprocess producing the stream
205204 """
206- self .tarfile = tar_ref
205+ self .subproc = subproc
207206 self .stream = stream
208207
209208 def close (self ):
210- """Close both the byte stream and, if open, a tarfile object"""
209+ """Close the byte stream and clean up the Popen object"""
210+ if self .subproc :
211+ if self .subproc .poll () is None :
212+ # The subprocess is still running: kill it, drain the outputs,
213+ # and wait for its termination. (If the timeout on the wait()
214+ # is exceeded, it will raise subprocess.TimeoutExpired.)
215+ self .subproc .kill ()
216+ if self .subproc .stdout :
217+ while self .subproc .stdout .read (4096 ):
218+ pass
219+ if self .subproc .stderr :
220+ while self .subproc .stderr .read (4096 ):
221+ pass
222+ self .subproc .wait (60.0 )
223+
224+ # Release our reference to the subprocess.Popen object so that the
225+ # object can be reclaimed.
226+ self .subproc = None
227+
228+ # Explicitly close the stream, in case there never was an associated
229+ # subprocess. (If there was an associated subprocess, the streams are
230+ # now empty, and we'll assume that they are closed when the Popen
231+ # object is reclaimed.)
211232 self .stream .close ()
212- if self .tarfile :
213- self .tarfile .close ()
214233
215234 def getbuffer (self ):
216235 """Return the underlying byte buffer (used by send_file)"""
@@ -230,7 +249,7 @@ def seek(self, *args, **kwargs) -> int:
230249
231250 def __repr__ (self ) -> str :
232251 """Return a string representation"""
233- return f"<Stream { self .stream } from { self .tarfile } >"
252+ return f"<Stream { self .stream } from { self .subproc } >"
234253
235254 def __iter__ (self ):
236255 """Allow iterating through lines in the buffer"""
@@ -516,18 +535,19 @@ def extract(tarball_path: Path, path: str) -> Inventory:
516535
517536 Returns:
518537 An inventory object that mimics an IO[bytes] object while also
519- maintaining a reference to the tarfile TarFile object to be
520- closed later.
538+ maintaining a reference to the subprocess.Popen object to be
539+ cleaned up later.
521540
522541 Raise:
523- TarballNotFound on failure opening the tarball
524542 CacheExtractBadPath if the target cannot be extracted
543+ TarballUnpackError on other tar-command failures
525544 Any exception raised by subprocess.Popen()
526- RuntimeError on unexpected failures (see message)
527545 """
528546 tar_path = shutil .which ("tar" )
529547 if tar_path is None :
530- raise RuntimeError ("External 'tar' executable not found" )
548+ raise TarballUnpackError (
549+ tarball_path , "External 'tar' executable not found"
550+ )
531551
532552 # The external tar utility offers better capabilities than the
533553 # Standard Library package, so run it in a subprocess: extract
@@ -550,31 +570,29 @@ def extract(tarball_path: Path, path: str) -> Inventory:
550570
551571 # If the return code is None (meaning the command is still running) or
552572 # is zero (meaning it completed successfully), then return the stream
553- # containing the extracted file to our caller.
573+ # containing the extracted file to our caller, and return the Popen
574+ # object to ensure it stays "alive" until our caller is done with the
575+ # stream.
554576 if not tarproc .returncode :
555- # Since we own the `tarproc` object, we don't need to return a
556- # value for the second part of the Inventory object (this is an
557- # artifact from when we used the Standard Library tarfile
558- # package).
559- return Inventory (tarproc .stdout , None )
577+ return Inventory (tarproc .stdout , tarproc )
560578
561579 # The tar command was invoked successfully (otherwise, the Popen()
562580 # constructor would have raised an exception), but it exited with
563581 # an error code. We have to glean what went wrong by looking at
564- # stderror , which is fragile but the only option. Rather than
582+ # stderr , which is fragile but the only option. Rather than
565583 # relying on looking for specific text, we assume that, if the
566584 # error references the tar file, the file was not found (or is
567585 # otherwise inaccessible) and if the error references the archive
568586 # member, then it was a bad path. (Failing those, report a generic
569587 # failure.)
570588 error_text = tarproc .stderr .read ().decode ()
571- if str (tarball_path ) in error_text :
572- # "tar: /path/to/bad_tarball.tar.xz: Cannot open: No such file or directory"
573- raise TarballNotFound (str (tarball_path ))
574589 if path in error_text :
575590 # "tar: missing_member.txt: Not found in archive"
576591 raise CacheExtractBadPath (tarball_path , path )
577- raise RuntimeError (f"Unexpected error from { tar_path } : { error_text !r} " )
592+ # "tar: /path/to/bad_tarball.tar.xz: Cannot open: No such file or directory"
593+ raise TarballUnpackError (
594+ tarball_path , "Unexpected error from {tar_path}: {error_text!r}"
595+ )
578596
579597 def get_inventory (self , path : str ) -> Optional [JSONOBJECT ]:
580598 """Access the file stream of a tarball member file.
@@ -592,9 +610,9 @@ def get_inventory(self, path: str) -> Optional[JSONOBJECT]:
592610 "stream" : Inventory (self .tarball_path .open ("rb" ), None ),
593611 }
594612 else :
595- file_path = Path ( self .name ) / path
596- stream = Tarball .extract (self .tarball_path , str ( file_path ) )
597- info = {"name" : file_path . name , "type" : CacheType .FILE , "stream" : stream }
613+ file_path = f" { self .name } / { path } "
614+ stream = Tarball .extract (self .tarball_path , file_path )
615+ info = {"name" : file_path , "type" : CacheType .FILE , "stream" : stream }
598616
599617 return info
600618
0 commit comments