Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 34 additions & 10 deletions src/core/src/CoreMain.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ def __init__(self, argv):
patch_operation_requested = Constants.UNKNOWN
patch_assessment_successful = False
patch_installation_successful = False
overall_patch_installation_operation_successful = False

try:
# Level 2 bootstrapping
Expand All @@ -41,16 +42,24 @@ def __init__(self, argv):
lifecycle_manager, status_handler = bootstrapper.build_core_components(container)
composite_logger.log_debug("Completed building out full container.\n\n")

# Basic environment check
# Current operation in status handler is set to either assessment or installation when these operations begin. Setting it to assessment since that is the first operation that runs always.
# This ensures all errors occurring before assessment starts are logged within the error objects of assessment substatus
if status_handler.get_current_operation() is None:
status_handler.set_current_operation(Constants.ASSESSMENT)

# Environment startup
bootstrapper.bootstrap_splash_text()
bootstrapper.basic_environment_health_check()
lifecycle_manager.execution_start_check() # terminates if this instance shouldn't be running (redundant)
lifecycle_manager.execution_start_check() # terminates if this instance shouldn't be running (redundant)

# Execution config retrieval
composite_logger.log_debug("Obtaining execution configuration...")
execution_config = container.get('execution_config')
telemetry_writer.set_operation_id(execution_config.activity_id)
patch_operation_requested = execution_config.operation.lower()

# Basic environment check
bootstrapper.basic_environment_health_check()

patch_assessor = container.get('patch_assessor')
package_manager = container.get('package_manager')

Expand All @@ -70,6 +79,11 @@ def __init__(self, argv):
patch_assessment_successful = False
patch_assessment_successful = patch_assessor.start_assessment()

# PatchInstallationSummary to be marked as completed successfully only after the implicit (i.e. 2nd) assessment is completed, as per CRP's restrictions
if patch_assessment_successful and patch_installation_successful:
patch_installer.mark_installation_completed()
overall_patch_installation_operation_successful = True

except Exception as error:
# Privileged operation handling for non-production use
if Constants.EnvLayer.PRIVILEGED_OP_MARKER in repr(error):
Expand All @@ -84,19 +98,16 @@ def __init__(self, argv):
if telemetry_writer is not None:
telemetry_writer.write_event("EXCEPTION: " + repr(error), Constants.TelemetryEventLevel.Error)
if status_handler is not None:
composite_logger.log_debug(' - Status handler pending writes flags [I=' + str(patch_installation_successful) + ', A=' + str(patch_assessment_successful) + ']')
if patch_operation_requested == Constants.INSTALLATION.lower() and not patch_installation_successful:
status_handler.set_installation_substatus_json(status=Constants.STATUS_ERROR)
composite_logger.log_debug(' -- Persisted failed installation substatus.')
if not patch_assessment_successful:
status_handler.set_assessment_substatus_json(status=Constants.STATUS_ERROR)
composite_logger.log_debug(' -- Persisted failed assessment substatus.')
composite_logger.log_debug(' - Status handler pending writes flags [I=' + str(overall_patch_installation_operation_successful) + ', A=' + str(patch_assessment_successful) + ']')

# Add any pending errors to appropriate substatus
if Constants.ERROR_ADDED_TO_STATUS not in repr(error):
status_handler.add_error_to_status("Terminal exception {0}".format(repr(error)), Constants.PatchOperationErrorCodes.OPERATION_FAILED)
else:
status_handler.add_error_to_status("Execution terminated due to last reported error.", Constants.PatchOperationErrorCodes.OPERATION_FAILED)

self.update_patch_substatus_if_pending(patch_operation_requested, overall_patch_installation_operation_successful, patch_assessment_successful, status_handler, composite_logger)

else:
composite_logger.log_error(' - Status handler is not initialized, and status data cannot be written.')
composite_logger.log_debug("Completed exception handling.\n")
Expand All @@ -109,3 +120,16 @@ def __init__(self, argv):

stdout_file_mirror.stop()
file_logger.close(message_at_close="<End of output>")

@staticmethod
def update_patch_substatus_if_pending(patch_operation_requested, overall_patch_installation_operation_successful, patch_assessment_successful, status_handler, composite_logger):
if patch_operation_requested == Constants.INSTALLATION.lower() and not overall_patch_installation_operation_successful:
if not patch_assessment_successful:
status_handler.set_current_operation(Constants.INSTALLATION)
status_handler.add_error_to_status("Installation failed due to assessment failure. Please refer the error details in assessment substatus")
status_handler.set_installation_substatus_json(status=Constants.STATUS_ERROR)
composite_logger.log_debug(' -- Persisted failed installation substatus.')
if not patch_assessment_successful:
status_handler.set_assessment_substatus_json(status=Constants.STATUS_ERROR)
composite_logger.log_debug(' -- Persisted failed assessment substatus.')
Comment on lines +125 to +134
Copy link
Collaborator

@kjohn-msft kjohn-msft Jun 2, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This would have been best encapsulated within the separate assessor and installer classes and invoked as needed. But that can't be done neatly as they may not have been initialized correctly. For completeness of positioning, it shouldn't be inside status handler as it's a level of detail that is not for status handler. It's not for CoreMain as it's usurping some logic that belongs elsewhere.

Maybe not for this PR, but consider what can be done for this.


5 changes: 3 additions & 2 deletions src/core/src/bootstrap/Constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,8 +189,9 @@ class TelemetryEventLevel(EnumBackport):
LogAlways = "LogAlways"

TELEMETRY_TASK_NAME = "ExtensionCoreLog"
TELEMETRY_AT_AGENT_NOT_COMPATIBLE_ERROR_MSG = "The minimum Azure Linux Agent version prerequisite for Linux patching was not met. Please update the Azure Linux Agent on this machine following instructions here: http://aka.ms/UpdateLinuxAgent"
TELEMETRY_AT_AGENT_COMPATIBLE_MSG = "The minimum Azure Linux Agent version prerequisite for Linux patching was met."

TELEMETRY_AT_AGENT_NOT_COMPATIBLE_ERROR_MSG = "Unsupported older Azure Linux Agent version. To resolve: http://aka.ms/UpdateLinuxAgent"
TELEMETRY_AT_AGENT_COMPATIBLE_MSG = "Minimum Azure Linux Agent version prerequisite met"

UTC_DATETIME_FORMAT = "%Y-%m-%dT%H:%M:%SZ"

Expand Down
38 changes: 22 additions & 16 deletions src/core/src/core_logic/PatchInstaller.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,22 +94,8 @@ def start_installation(self, simulate=False):

# Combining maintenance
overall_patch_installation_successful = bool(update_run_successful and not maintenance_window_exceeded)
if overall_patch_installation_successful:
self.status_handler.set_installation_substatus_json(status=Constants.STATUS_SUCCESS)
# update patch metadata in status for auto patching request, to be reported to healthstore
if self.execution_config.maintenance_run_id is not None:
try:
#todo: temp fix to test auto patching, this will be reset to using the maintenanceRunId string as is, once the corresponding changes in RSM are made
# patch_version = str(self.execution_config.maintenance_run_id)
patch_version = datetime.datetime.strptime(self.execution_config.maintenance_run_id.split(" ")[0], "%m/%d/%Y").strftime('%Y.%m.%d')
self.status_handler.set_patch_metadata_for_healthstore_substatus_json(patch_version=patch_version if patch_version is not None and patch_version != "" else Constants.PATCH_VERSION_UNKNOWN,
report_to_healthstore=True,
wait_after_update=False)
except ValueError as e:
error_message = "Maintenance Run Id is in incorrect format. Expected=[DateTimeUTC]. Actual=[{0}]. Error=[{1}]".format(str(self.execution_config.maintenance_run_id), repr(e))
self.composite_logger.log_error(error_message)
raise Exception(error_message)
else:

if not overall_patch_installation_successful:
self.status_handler.set_installation_substatus_json(status=Constants.STATUS_ERROR)
# NOTE: For auto patching requests, no need to report patch metadata to healthstore in case of failure

Expand Down Expand Up @@ -282,6 +268,26 @@ def is_reboot_pending(self):
self.composite_logger.log_error('Error while checking for reboot pending: ' + repr(error))
return True # defaults for safety

def mark_installation_completed(self):
""" Marks Installation operation as completed by updating the status of PatchInstallationSummary as success and patch metadata to be sent to healthstore.
This is set outside of start_installation function to a restriction in CRP, where installation substatus should be marked as completed only after the implicit (2nd) assessment operation """
self.status_handler.set_current_operation(Constants.INSTALLATION) # Required for status handler to log errors, that occur during marking installation completed, in installation substatus

self.status_handler.set_installation_substatus_json(status=Constants.STATUS_SUCCESS)
# update patch metadata in status for auto patching request, to be reported to healthstore
if self.execution_config.maintenance_run_id is not None:
try:
# todo: temp fix to test auto patching, this will be reset to using the maintenanceRunId string as is, once the corresponding changes in RSM are made
# patch_version = str(self.execution_config.maintenance_run_id)
patch_version = datetime.datetime.strptime(self.execution_config.maintenance_run_id.split(" ")[0], "%m/%d/%Y").strftime('%Y.%m.%d')
self.status_handler.set_patch_metadata_for_healthstore_substatus_json(patch_version=patch_version if patch_version is not None and patch_version != "" else Constants.PATCH_VERSION_UNKNOWN,
report_to_healthstore=True,
wait_after_update=False)
except ValueError as e:
error_message = "Maintenance Run Id is in incorrect format. Expected=[DateTimeUTC]. Actual=[{0}]. Error=[{1}]".format(str(self.execution_config.maintenance_run_id), repr(e))
self.composite_logger.log_error(error_message)
raise Exception(error_message)

# region Installation Progress support
def perform_status_reconciliation_conditionally(self, package_manager, condition=True):
"""Periodically based on the condition check, writes out success records as required; returns count of detected installs.
Expand Down
3 changes: 3 additions & 0 deletions src/core/src/service_interfaces/StatusHandler.py
Original file line number Diff line number Diff line change
Expand Up @@ -507,6 +507,9 @@ def __write_status_file(self):
def set_current_operation(self, operation):
self.__current_operation = operation

def get_current_operation(self):
return self.__current_operation

def __get_total_error_count_from_prev_status(self, error_message):
try:
return int(re.search('(.+?) error/s reported.', error_message).group(1))
Expand Down
4 changes: 2 additions & 2 deletions src/core/tests/TestCoreMain.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,7 @@ def test_assessment_operation_fail_due_to_no_telemetry(self):
self.assertTrue(substatus_file_data[0]["name"] == Constants.PATCH_ASSESSMENT_SUMMARY)
self.assertTrue(substatus_file_data[0]["status"] == Constants.STATUS_ERROR.lower())
self.assertEqual(len(json.loads(substatus_file_data[0]["formattedMessage"]["message"])["errors"]["details"]), 1)
self.assertTrue("The minimum Azure Linux Agent version prerequisite for Linux patching was not met" in json.loads(substatus_file_data[0]["formattedMessage"]["message"])["errors"]["details"][0]["message"])
self.assertTrue(Constants.TELEMETRY_AT_AGENT_NOT_COMPATIBLE_ERROR_MSG in json.loads(substatus_file_data[0]["formattedMessage"]["message"])["errors"]["details"][0]["message"])
runtime.stop()

def test_installation_operation_fail_due_to_no_telemetry(self):
Expand All @@ -295,7 +295,7 @@ def test_installation_operation_fail_due_to_no_telemetry(self):
self.assertTrue(substatus_file_data[0]["name"] == Constants.PATCH_ASSESSMENT_SUMMARY)
self.assertTrue(substatus_file_data[0]["status"] == Constants.STATUS_ERROR.lower())
self.assertEqual(len(json.loads(substatus_file_data[0]["formattedMessage"]["message"])["errors"]["details"]), 1)
self.assertTrue("The minimum Azure Linux Agent version prerequisite for Linux patching was not met" in json.loads(substatus_file_data[0]["formattedMessage"]["message"])["errors"]["details"][0]["message"])
self.assertTrue(Constants.TELEMETRY_AT_AGENT_NOT_COMPATIBLE_ERROR_MSG in json.loads(substatus_file_data[0]["formattedMessage"]["message"])["errors"]["details"][0]["message"])
self.assertTrue(substatus_file_data[1]["name"] == Constants.PATCH_INSTALLATION_SUMMARY)
self.assertTrue(substatus_file_data[1]["status"] == Constants.STATUS_ERROR.lower())
runtime.stop()
Expand Down