|
7 | 7 | import shutil |
8 | 8 | import subprocess |
9 | 9 | import tarfile |
| 10 | +from time import sleep |
10 | 11 | from typing import Any, IO, Optional, Union |
11 | 12 |
|
12 | 13 | from pbench.common import MetadataLog, selinux |
@@ -521,19 +522,59 @@ def extract(tarball_path: Path, path: str) -> Inventory: |
521 | 522 | Raise: |
522 | 523 | TarballNotFound on failure opening the tarball |
523 | 524 | CacheExtractBadPath if the target cannot be extracted |
| 525 | + Any exception raised by subprocess.Popen() |
| 526 | + RuntimeError on unexpected failures (see message) |
524 | 527 | """ |
525 | | - try: |
526 | | - tar = tarfile.open(tarball_path, "r:*") |
527 | | - except Exception as exc: |
528 | | - raise TarballNotFound(str(tarball_path)) from exc |
529 | | - try: |
530 | | - stream = tar.extractfile(str(path)) |
531 | | - except Exception as exc: |
532 | | - raise CacheExtractBadPath(tarball_path, path) from exc |
533 | | - else: |
534 | | - if not stream: |
535 | | - raise CacheExtractBadPath(tarball_path, path) |
536 | | - return Inventory(stream, tar) |
| 528 | + tar_path = shutil.which("tar") |
| 529 | + if tar_path is None: |
| 530 | + raise RuntimeError("External 'tar' executable not found") |
| 531 | + |
| 532 | + # The external tar utility offers better capabilities than the |
| 533 | + # Standard Library package, so run it in a subprocess: extract |
| 534 | + # the target member from the specified tar archive and direct it to |
| 535 | + # stdout; we expect only one occurrence of the target member, so stop |
| 536 | + # processing as soon as we find it instead of looking for additional |
| 537 | + # instances of it later in the archive -- this is a huge savings when |
| 538 | + # the archive is very large. |
| 539 | + tarproc = subprocess.Popen( |
| 540 | + [str(tar_path), "xOf", tarball_path, "--occurrence=1", path], |
| 541 | + stdin=subprocess.DEVNULL, |
| 542 | + stdout=subprocess.PIPE, |
| 543 | + stderr=subprocess.PIPE |
| 544 | + ) |
| 545 | + |
| 546 | + # Wait for one of two things to happen: either the subprocess produces |
| 547 | + # some output or it exits. |
| 548 | + while not tarproc.stdout.peek() and tarproc.poll() is None: |
| 549 | + sleep(0.02) |
| 550 | + |
| 551 | + # If the return code is None (meaning the command is still running) or |
| 552 | + # is zero (meaning it completed successfully), then return the stream |
| 553 | + # containing the extracted file to our caller. |
| 554 | + if not tarproc.returncode: |
| 555 | + # Since we own the `tarproc` object, we don't need to return a |
| 556 | + # value for the second part of the Inventory object (this is an |
| 557 | + # artifact from when we used the Standard Library tarfile |
| 558 | + # package). |
| 559 | + return Inventory(tarproc.stdout, None) |
| 560 | + |
| 561 | + # The tar command was invoked successfully (otherwise, the Popen() |
| 562 | + # constructor would have raised an exception), but it exited with |
| 563 | + # an error code. We have to glean what went wrong by looking at |
| 564 | + # stderror, which is fragile but the only option. Rather than |
| 565 | + # relying on looking for specific text, we assume that, if the |
| 566 | + # error references the tar file, the file was not found (or is |
| 567 | + # otherwise inaccessible) and if the error references the archive |
| 568 | + # member, then it was a bad path. (Failing those, report a generic |
| 569 | + # failure.) |
| 570 | + error_text = tarproc.stderr.read().decode() |
| 571 | + if str(tarball_path) in error_text: |
| 572 | + # "tar: /path/to/bad_tarball.tar.xz: Cannot open: No such file or directory" |
| 573 | + raise TarballNotFound(str(tarball_path)) |
| 574 | + if path in error_text: |
| 575 | + # "tar: missing_member.txt: Not found in archive" |
| 576 | + raise CacheExtractBadPath(tarball_path, path) |
| 577 | + raise RuntimeError(f"Unexpected error from {tar_path}: {error_text!r}") |
537 | 578 |
|
538 | 579 | def get_inventory(self, path: str) -> Optional[JSONOBJECT]: |
539 | 580 | """Access the file stream of a tarball member file. |
|
0 commit comments