Skip to content
Merged
3 changes: 2 additions & 1 deletion src/core/src/bootstrap/Constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ def __iter__(self):
# Max values
MAX_AUTO_ASSESSMENT_LOGFILE_SIZE_IN_BYTES = 5*1024*1024
MAX_AUTO_ASSESSMENT_WAIT_FOR_MAIN_CORE_EXEC_IN_MINUTES = 3 * 60
MAX_RETRY_ATTEMPTS_FOR_ERROR_MITIGATION = 10

class SystemPaths(EnumBackport):
SYSTEMD_ROOT = "/etc/systemd/system/"
Expand Down Expand Up @@ -268,7 +269,7 @@ class RebootStatus(EnumBackport):
STARTED = "Started"
COMPLETED = "Completed"
FAILED = "Failed"

# Enum for VM Cloud Type
class VMCloudType(EnumBackport):
UNKNOWN = "Unknown"
Expand Down
41 changes: 35 additions & 6 deletions src/core/src/package_managers/YumPackageManager.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,9 +94,9 @@ def __init__(self, env_layer, execution_config, composite_logger, telemetry_writ
self.known_errors_and_fixes = {"SSL peer rejected your certificate as expired": self.fix_ssl_certificate_issue,
"Error: Cannot retrieve repository metadata (repomd.xml) for repository": self.fix_ssl_certificate_issue,
"Error: Failed to download metadata for repo": self.fix_ssl_certificate_issue}

self.yum_update_client_package = "sudo yum update -y --disablerepo='*' --enablerepo='*microsoft*'"

self.package_install_expected_avg_time_in_seconds = 90 # As per telemetry data, the average time to install package is around 90 seconds for yum.

def refresh_repo(self):
Expand All @@ -108,7 +108,7 @@ def invoke_package_manager_advanced(self, command, raise_on_exception=True):
self.composite_logger.log_verbose("[YPM] Invoking package manager. [Command={0}]".format(str(command)))
code, out = self.env_layer.run_command_output(command, False, False)

code, out = self.try_mitigate_issues_if_any(command, code, out)
code, out = self.try_mitigate_issues_if_any(command, code, out, raise_on_exception)

if code not in [self.yum_exitcode_ok, self.yum_exitcode_no_applicable_packages, self.yum_exitcode_updates_available]:
self.composite_logger.log_warning('[ERROR] Customer environment error. [Command={0}][Code={1}][Output={2}]'.format(command, str(code), str(out)))
Expand Down Expand Up @@ -848,14 +848,36 @@ def is_auto_update_service_installed(self, install_check_cmd):
# endregion

# region Handling known errors
def try_mitigate_issues_if_any(self, command, code, out):
""" Attempt to fix the errors occurred while executing a command. Repeat check until no issues found """
def try_mitigate_issues_if_any(self, command, code, out, raise_on_exception=True, seen_errors=None, retry_count=0, max_retries=Constants.MAX_RETRY_ATTEMPTS_FOR_ERROR_MITIGATION):
""" Attempt to fix the errors occurred while executing a command. Repeat check until no issues found
Args:
raise_on_exception (bool): If true, should raise exception on issue mitigation failures.
seen_errors (Any): Hash set used to maintain a list of errors strings seen in the call stack.
retry_count (int): Count of number of retries made to resolve errors.
max_retries (int): Maximum number of retries allowed before exiting the retry loop.
"""
if seen_errors is None:
seen_errors = set()

# Keep an upper bound on the size of the call stack to prevent an unbounded loop if error mitigation fails.
if retry_count >= max_retries:
self.log_error_mitigation_failure(out, raise_on_exception)
return code, out

if "Error" in out or "Errno" in out:

# Preemptively exit the retry loop if the same error string is repeating in the call stack.
# This implies that self.check_known_issues_and_attempt_fix may have failed to mitigate the error.
if out in seen_errors:
self.log_error_mitigation_failure(out, raise_on_exception)
return code, out

seen_errors.add(out)
issue_mitigated = self.check_known_issues_and_attempt_fix(out)
if issue_mitigated:
self.composite_logger.log_debug('Post mitigation, invoking package manager again using: ' + command)
code_after_fix_attempt, out_after_fix_attempt = self.env_layer.run_command_output(command, False, False)
return self.try_mitigate_issues_if_any(command, code_after_fix_attempt, out_after_fix_attempt)
return self.try_mitigate_issues_if_any(command, code_after_fix_attempt, out_after_fix_attempt, raise_on_exception, seen_errors, retry_count + 1, max_retries)
return code, out

def check_known_issues_and_attempt_fix(self, output):
Expand Down Expand Up @@ -883,6 +905,13 @@ def fix_ssl_certificate_issue(self):
self.composite_logger.log_verbose("\n\n==[SUCCESS]===============================================================")
self.composite_logger.log_debug("Client package update complete. [Code={0}][Out={1}]".format(str(code), out))
self.composite_logger.log_verbose("==========================================================================\n\n")

def log_error_mitigation_failure(self, output, raise_on_exception=True):
self.composite_logger.log_error("[YPM] Customer Environment Error: Unable to auto-mitigate known issue. Please investigate and address. [Out={0}]".format(output))
if raise_on_exception:
error_msg = 'Customer environment error (Unable to auto-mitigate known issue): [Out={0}]'.format(output)
self.status_handler.add_error_to_status(error_msg, Constants.PatchOperationErrorCodes.PACKAGE_MANAGER_FAILURE)
raise Exception(error_msg, "[{0}]".format(Constants.ERROR_ADDED_TO_STATUS))
# endregion

# region Reboot Management
Expand Down
41 changes: 41 additions & 0 deletions src/core/tests/Test_YumPackageManager.py
Original file line number Diff line number Diff line change
Expand Up @@ -487,6 +487,47 @@ def test_ssl_certificate_issue_type3_fix_fail(self):

self.assertRaises(Exception, package_manager.invoke_package_manager, package_manager.yum_check)

def test_auto_issue_mitigation_should_raise_exception_if_error_repeats(self):
self.runtime.set_legacy_test_type('IssueMitigationRetryExitAfterMultipleAttempts')

package_manager = self.container.get('package_manager')
self.assertTrue(package_manager)

self.assertRaises(Exception, package_manager.invoke_package_manager, package_manager.yum_check)

def test_auto_issue_mitigation_should_raise_exception_if_retries_are_exhausted(self):
self.runtime.set_legacy_test_type('IssueMitigationRetryExitAfterMultipleAttempts')

package_manager = self.container.get('package_manager')
self.assertTrue(package_manager)

with self.assertRaises(Exception):
package_manager.try_mitigate_issues_if_any('testcmd', 0, 'Test out', retry_count = Constants.MAX_RETRY_ATTEMPTS_FOR_ERROR_MITIGATION + 1)

def test_auto_issue_mitigation_when_error_repeats_raise_exception_disabled(self):
expected_out = "Error: Failed to download metadata for repo 'rhui-rhel-8-for-x86_64-baseos-rhui-rpms': Cannot download repomd.xml: Cannot download repodata/repomd.xml: All mirrors were tried"
self.runtime.set_legacy_test_type('IssueMitigationRetryExitAfterMultipleAttempts')

package_manager = self.container.get('package_manager')
self.assertTrue(package_manager)

code, out = package_manager.try_mitigate_issues_if_any('testcmd', 0, expected_out, raise_on_exception = False)

self.assertEqual(out, expected_out)
self.assertTrue(code >= 0)

def test_auto_issue_mitigation_when_retries_are_exhausted_raise_exception_disabled(self):
expected_out = "Error: Failed to download metadata for repo 'rhui-rhel-8-for-x86_64-baseos-rhui-rpms': Cannot download repomd.xml: Cannot download repodata/repomd.xml: All mirrors were tried"
self.runtime.set_legacy_test_type('IssueMitigationRetryExitAfterMultipleAttempts')

package_manager = self.container.get('package_manager')
self.assertTrue(package_manager)

code, out = package_manager.try_mitigate_issues_if_any('testcmd', 0, expected_out, retry_count = Constants.MAX_RETRY_ATTEMPTS_FOR_ERROR_MITIGATION + 1, raise_on_exception = False)

self.assertEqual(out, expected_out)
self.assertTrue(code >= 0)

def test_disable_auto_os_updates_with_uninstalled_services(self):
# no services are installed on the machine. expected o/p: function will complete successfully. Backup file will be created with default values, no auto OS update configuration settings will be updated as there are none
self.runtime.set_legacy_test_type('SadPath')
Expand Down
6 changes: 5 additions & 1 deletion src/core/tests/library/LegacyEnvLayerExtensions.py
Original file line number Diff line number Diff line change
Expand Up @@ -981,6 +981,10 @@ def run_command_output(self, cmd, no_output=False, chk_err=True):
else:
code = 0
output = "Error: Cannot retrieve repository metadata (repomd.xml) for repository: addons. Please verify its path and try again"
elif self.legacy_test_type == 'IssueMitigationRetryExitAfterMultipleAttempts':
if self.legacy_package_manager_name is Constants.YUM:
code = 0
output = "Error: Failed to download metadata for repo 'rhui-rhel-8-for-x86_64-baseos-rhui-rpms': Cannot download repomd.xml: Cannot download repodata/repomd.xml: All mirrors were tried"
elif self.legacy_test_type == 'DependencyInstallSuccessfully':
if self.legacy_package_manager_name is Constants.APT:
# Total 7 packages: git-man, git, grub-efi-amd64-signed, testPkg1, testPkg2, testPkg3 and grub-efi-amd64-bin
Expand Down Expand Up @@ -1062,7 +1066,7 @@ def run_command_output(self, cmd, no_output=False, chk_err=True):
if self.legacy_package_manager_name is Constants.APT:
# Total 7 packages: git-man, git, grub-efi-amd64-signed, testPkg1, testPkg2, testPkg3 and grub-efi-amd64-bin
# grub-efi-amd64-signed is dependent on grub-efi-amd64-bin
# Installation of grub-efi-amd64-bin fails and as grub-efi-amd64-signed is dependent, it also failed
# Installation of grub-efi-amd64-bin fails and as grub-efi-amd64-signed is dependent, it also failed
# Rest all packages install successfully
if cmd.find("dist-upgrade") > -1:
code = 0
Expand Down