diff --git a/src/core/src/bootstrap/Constants.py b/src/core/src/bootstrap/Constants.py index 2ae56eb8..bce9d0ef 100644 --- a/src/core/src/bootstrap/Constants.py +++ b/src/core/src/bootstrap/Constants.py @@ -51,6 +51,7 @@ def __iter__(self): # Max values MAX_AUTO_ASSESSMENT_LOGFILE_SIZE_IN_BYTES = 5*1024*1024 MAX_AUTO_ASSESSMENT_WAIT_FOR_MAIN_CORE_EXEC_IN_MINUTES = 3 * 60 + MAX_RETRY_ATTEMPTS_FOR_ERROR_MITIGATION = 10 class SystemPaths(EnumBackport): SYSTEMD_ROOT = "/etc/systemd/system/" @@ -268,7 +269,7 @@ class RebootStatus(EnumBackport): STARTED = "Started" COMPLETED = "Completed" FAILED = "Failed" - + # Enum for VM Cloud Type class VMCloudType(EnumBackport): UNKNOWN = "Unknown" diff --git a/src/core/src/package_managers/YumPackageManager.py b/src/core/src/package_managers/YumPackageManager.py index 33a5cf91..60df32c0 100644 --- a/src/core/src/package_managers/YumPackageManager.py +++ b/src/core/src/package_managers/YumPackageManager.py @@ -94,9 +94,9 @@ def __init__(self, env_layer, execution_config, composite_logger, telemetry_writ self.known_errors_and_fixes = {"SSL peer rejected your certificate as expired": self.fix_ssl_certificate_issue, "Error: Cannot retrieve repository metadata (repomd.xml) for repository": self.fix_ssl_certificate_issue, "Error: Failed to download metadata for repo": self.fix_ssl_certificate_issue} - + self.yum_update_client_package = "sudo yum update -y --disablerepo='*' --enablerepo='*microsoft*'" - + self.package_install_expected_avg_time_in_seconds = 90 # As per telemetry data, the average time to install package is around 90 seconds for yum. def refresh_repo(self): @@ -108,7 +108,7 @@ def invoke_package_manager_advanced(self, command, raise_on_exception=True): self.composite_logger.log_verbose("[YPM] Invoking package manager. [Command={0}]".format(str(command))) code, out = self.env_layer.run_command_output(command, False, False) - code, out = self.try_mitigate_issues_if_any(command, code, out) + code, out = self.try_mitigate_issues_if_any(command, code, out, raise_on_exception) if code not in [self.yum_exitcode_ok, self.yum_exitcode_no_applicable_packages, self.yum_exitcode_updates_available]: self.composite_logger.log_warning('[ERROR] Customer environment error. [Command={0}][Code={1}][Output={2}]'.format(command, str(code), str(out))) @@ -848,14 +848,36 @@ def is_auto_update_service_installed(self, install_check_cmd): # endregion # region Handling known errors - def try_mitigate_issues_if_any(self, command, code, out): - """ Attempt to fix the errors occurred while executing a command. Repeat check until no issues found """ + def try_mitigate_issues_if_any(self, command, code, out, raise_on_exception=True, seen_errors=None, retry_count=0, max_retries=Constants.MAX_RETRY_ATTEMPTS_FOR_ERROR_MITIGATION): + """ Attempt to fix the errors occurred while executing a command. Repeat check until no issues found + Args: + raise_on_exception (bool): If true, should raise exception on issue mitigation failures. + seen_errors (Any): Hash set used to maintain a list of errors strings seen in the call stack. + retry_count (int): Count of number of retries made to resolve errors. + max_retries (int): Maximum number of retries allowed before exiting the retry loop. + """ + if seen_errors is None: + seen_errors = set() + + # Keep an upper bound on the size of the call stack to prevent an unbounded loop if error mitigation fails. + if retry_count >= max_retries: + self.log_error_mitigation_failure(out, raise_on_exception) + return code, out + if "Error" in out or "Errno" in out: + + # Preemptively exit the retry loop if the same error string is repeating in the call stack. + # This implies that self.check_known_issues_and_attempt_fix may have failed to mitigate the error. + if out in seen_errors: + self.log_error_mitigation_failure(out, raise_on_exception) + return code, out + + seen_errors.add(out) issue_mitigated = self.check_known_issues_and_attempt_fix(out) if issue_mitigated: self.composite_logger.log_debug('Post mitigation, invoking package manager again using: ' + command) code_after_fix_attempt, out_after_fix_attempt = self.env_layer.run_command_output(command, False, False) - return self.try_mitigate_issues_if_any(command, code_after_fix_attempt, out_after_fix_attempt) + return self.try_mitigate_issues_if_any(command, code_after_fix_attempt, out_after_fix_attempt, raise_on_exception, seen_errors, retry_count + 1, max_retries) return code, out def check_known_issues_and_attempt_fix(self, output): @@ -883,6 +905,13 @@ def fix_ssl_certificate_issue(self): self.composite_logger.log_verbose("\n\n==[SUCCESS]===============================================================") self.composite_logger.log_debug("Client package update complete. [Code={0}][Out={1}]".format(str(code), out)) self.composite_logger.log_verbose("==========================================================================\n\n") + + def log_error_mitigation_failure(self, output, raise_on_exception=True): + self.composite_logger.log_error("[YPM] Customer Environment Error: Unable to auto-mitigate known issue. Please investigate and address. [Out={0}]".format(output)) + if raise_on_exception: + error_msg = 'Customer environment error (Unable to auto-mitigate known issue): [Out={0}]'.format(output) + self.status_handler.add_error_to_status(error_msg, Constants.PatchOperationErrorCodes.PACKAGE_MANAGER_FAILURE) + raise Exception(error_msg, "[{0}]".format(Constants.ERROR_ADDED_TO_STATUS)) # endregion # region Reboot Management diff --git a/src/core/tests/Test_YumPackageManager.py b/src/core/tests/Test_YumPackageManager.py index 5e4f486f..08d40414 100644 --- a/src/core/tests/Test_YumPackageManager.py +++ b/src/core/tests/Test_YumPackageManager.py @@ -487,6 +487,47 @@ def test_ssl_certificate_issue_type3_fix_fail(self): self.assertRaises(Exception, package_manager.invoke_package_manager, package_manager.yum_check) + def test_auto_issue_mitigation_should_raise_exception_if_error_repeats(self): + self.runtime.set_legacy_test_type('IssueMitigationRetryExitAfterMultipleAttempts') + + package_manager = self.container.get('package_manager') + self.assertTrue(package_manager) + + self.assertRaises(Exception, package_manager.invoke_package_manager, package_manager.yum_check) + + def test_auto_issue_mitigation_should_raise_exception_if_retries_are_exhausted(self): + self.runtime.set_legacy_test_type('IssueMitigationRetryExitAfterMultipleAttempts') + + package_manager = self.container.get('package_manager') + self.assertTrue(package_manager) + + with self.assertRaises(Exception): + package_manager.try_mitigate_issues_if_any('testcmd', 0, 'Test out', retry_count = Constants.MAX_RETRY_ATTEMPTS_FOR_ERROR_MITIGATION + 1) + + def test_auto_issue_mitigation_when_error_repeats_raise_exception_disabled(self): + expected_out = "Error: Failed to download metadata for repo 'rhui-rhel-8-for-x86_64-baseos-rhui-rpms': Cannot download repomd.xml: Cannot download repodata/repomd.xml: All mirrors were tried" + self.runtime.set_legacy_test_type('IssueMitigationRetryExitAfterMultipleAttempts') + + package_manager = self.container.get('package_manager') + self.assertTrue(package_manager) + + code, out = package_manager.try_mitigate_issues_if_any('testcmd', 0, expected_out, raise_on_exception = False) + + self.assertEqual(out, expected_out) + self.assertTrue(code >= 0) + + def test_auto_issue_mitigation_when_retries_are_exhausted_raise_exception_disabled(self): + expected_out = "Error: Failed to download metadata for repo 'rhui-rhel-8-for-x86_64-baseos-rhui-rpms': Cannot download repomd.xml: Cannot download repodata/repomd.xml: All mirrors were tried" + self.runtime.set_legacy_test_type('IssueMitigationRetryExitAfterMultipleAttempts') + + package_manager = self.container.get('package_manager') + self.assertTrue(package_manager) + + code, out = package_manager.try_mitigate_issues_if_any('testcmd', 0, expected_out, retry_count = Constants.MAX_RETRY_ATTEMPTS_FOR_ERROR_MITIGATION + 1, raise_on_exception = False) + + self.assertEqual(out, expected_out) + self.assertTrue(code >= 0) + def test_disable_auto_os_updates_with_uninstalled_services(self): # no services are installed on the machine. expected o/p: function will complete successfully. Backup file will be created with default values, no auto OS update configuration settings will be updated as there are none self.runtime.set_legacy_test_type('SadPath') diff --git a/src/core/tests/library/LegacyEnvLayerExtensions.py b/src/core/tests/library/LegacyEnvLayerExtensions.py index b2448898..55575399 100644 --- a/src/core/tests/library/LegacyEnvLayerExtensions.py +++ b/src/core/tests/library/LegacyEnvLayerExtensions.py @@ -981,6 +981,10 @@ def run_command_output(self, cmd, no_output=False, chk_err=True): else: code = 0 output = "Error: Cannot retrieve repository metadata (repomd.xml) for repository: addons. Please verify its path and try again" + elif self.legacy_test_type == 'IssueMitigationRetryExitAfterMultipleAttempts': + if self.legacy_package_manager_name is Constants.YUM: + code = 0 + output = "Error: Failed to download metadata for repo 'rhui-rhel-8-for-x86_64-baseos-rhui-rpms': Cannot download repomd.xml: Cannot download repodata/repomd.xml: All mirrors were tried" elif self.legacy_test_type == 'DependencyInstallSuccessfully': if self.legacy_package_manager_name is Constants.APT: # Total 7 packages: git-man, git, grub-efi-amd64-signed, testPkg1, testPkg2, testPkg3 and grub-efi-amd64-bin @@ -1062,7 +1066,7 @@ def run_command_output(self, cmd, no_output=False, chk_err=True): if self.legacy_package_manager_name is Constants.APT: # Total 7 packages: git-man, git, grub-efi-amd64-signed, testPkg1, testPkg2, testPkg3 and grub-efi-amd64-bin # grub-efi-amd64-signed is dependent on grub-efi-amd64-bin - # Installation of grub-efi-amd64-bin fails and as grub-efi-amd64-signed is dependent, it also failed + # Installation of grub-efi-amd64-bin fails and as grub-efi-amd64-signed is dependent, it also failed # Rest all packages install successfully if cmd.find("dist-upgrade") > -1: code = 0