Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
16 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions src/core/src/bootstrap/Constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,28 @@ class ConfigSettings(EnumBackport):
# patch versions for healthstore when there is no maintenance run id
PATCH_VERSION_UNKNOWN = "UNKNOWN"

# Strings used in perf logs
class PerfLogTrackerParams:
TASK = "task"
TASK_STATUS = "task_status"
PACKAGE_MANAGER = "package_manager"
NUMBER_OF_TRIALS = "number_of_trials"
ERROR_MSG = "error_msg"
INSTALLED_PATCH_COUNT = "installed_patch_count"
PATCH_OPERATION_SUCCESSFUL = "patch_operation_successful"
MAINTENANCE_WINDOW = "maintenance_window"
PERC_MAINTENANCE_WINDOW_USED = "perc_maintenance_window_used"
MAINTENANCE_WINDOW_EXCEEDED = "maintenance_window_exceeded"
START_TIME = "start_time"
END_TIME = "end_time"
TIME_TAKEN = "time_taken"
MACHINE_INFO = "machine_info"
MESSAGE = "message"

class TaskStatus(EnumBackport):
SUCCEEDED = "succeeded"
FAILED = "failed"

# Patch Modes for Configure Patching
class PatchModes(EnumBackport):
IMAGE_DEFAULT = "ImageDefault"
Expand Down
22 changes: 22 additions & 0 deletions src/core/src/core_logic/MaintenanceWindow.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,3 +71,25 @@ def is_package_install_time_available(self, remaining_time_in_minutes=None):
else:
self.composite_logger.log_warning("Time Remaining: " + str(timedelta(seconds=int(remaining_time_in_minutes * 60))) + ", Cutoff time: " + str(timedelta(minutes=cutoff_time_in_minutes)) + " [Out of time!]")
return False

def get_percentage_maintenance_window_used(self):
"""Calculate percentage of maintenance window used"""
try:
current_time = self.env_layer.datetime.datetime_utcnow()
start_time = self.env_layer.datetime.utc_to_standard_datetime(self.start_time)
if current_time < start_time:
raise Exception("Start time {0} is greater than current time {1}".format(str(start_time), str(current_time)))
dur = datetime.datetime.strptime(self.duration, "%H:%M:%S")
dura = timedelta(hours=dur.hour, minutes=dur.minute, seconds=dur.second)
total_time_in_minutes = self.env_layer.datetime.total_minutes_from_time_delta(dura)
elapsed_time_in_minutes = self.env_layer.datetime.total_minutes_from_time_delta(current_time - start_time)
percent_maintenance_window_used = (elapsed_time_in_minutes / total_time_in_minutes) * 100
except Exception as error:
error_msg = "Error calculating percentage of maintenance window used."
self.composite_logger.log_error("\n" + error_msg)
self.status_handler.add_error_to_status(error_msg, Constants.PatchOperationErrorCodes.DEFAULT_ERROR)
if Constants.ERROR_ADDED_TO_STATUS not in repr(error):
error.args = (error.args, "[{0}]".format(Constants.ERROR_ADDED_TO_STATUS))
raise

return percent_maintenance_window_used
15 changes: 15 additions & 0 deletions src/core/src/core_logic/PatchAssessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import shutil
import time
from core.src.bootstrap.Constants import Constants
from core.src.core_logic.Stopwatch import Stopwatch


class PatchAssessor(object):
Expand All @@ -34,6 +35,7 @@ def __init__(self, env_layer, execution_config, composite_logger, telemetry_writ
self.status_handler = status_handler
self.lifecycle_manager = lifecycle_manager
self.package_manager = package_manager
self.package_manager_name = self.package_manager.__class__.__name__
self.assessment_state_file_path = os.path.join(self.execution_config.config_folder, Constants.ASSESSMENT_STATE_FILE)

def start_assessment(self):
Expand All @@ -49,6 +51,9 @@ def start_assessment(self):
self.composite_logger.log('\nStarting patch assessment...')
self.write_assessment_state() # success / failure does not matter, only that an attempt started

self.stopwatch = Stopwatch(self.env_layer, self.telemetry_writer, self.composite_logger)
self.stopwatch.start()

self.status_handler.set_assessment_substatus_json(status=Constants.STATUS_TRANSITIONING)
self.composite_logger.log("\nMachine Id: " + self.env_layer.platform.node())
self.composite_logger.log("Activity Id: " + self.execution_config.activity_id)
Expand All @@ -57,11 +62,13 @@ def start_assessment(self):
self.composite_logger.log("\n\nGetting available patches...")
self.package_manager.refresh_repo()
self.status_handler.reset_assessment_data()
number_of_tries = 0

for i in range(0, Constants.MAX_ASSESSMENT_RETRY_COUNT):
try:
if self.lifecycle_manager is not None:
self.lifecycle_manager.lifecycle_status_check() # may terminate the code abruptly, as designed
number_of_tries = number_of_tries + 1
packages, package_versions = self.package_manager.get_all_updates()
self.telemetry_writer.write_event("Full assessment: " + str(packages), Constants.TelemetryEventLevel.Verbose)
self.status_handler.set_package_assessment_status(packages, package_versions)
Expand All @@ -81,15 +88,23 @@ def start_assessment(self):
else:
error_msg = 'Error retrieving available patches: ' + repr(error)
self.composite_logger.log_error(error_msg)
self.write_assessment_perf_logs(number_of_tries, Constants.TaskStatus.FAILED, error_msg)
self.status_handler.add_error_to_status(error_msg, Constants.PatchOperationErrorCodes.DEFAULT_ERROR)
if Constants.ERROR_ADDED_TO_STATUS not in repr(error):
error.args = (error.args, "[{0}]".format(Constants.ERROR_ADDED_TO_STATUS))
self.status_handler.set_assessment_substatus_json(status=Constants.STATUS_ERROR)
raise

self.write_assessment_perf_logs(number_of_tries, Constants.TaskStatus.SUCCEEDED, "")
self.composite_logger.log("\nPatch assessment completed.\n")
return True

def write_assessment_perf_logs(self, number_of_tries, task_status, error_msg):
assessment_perf_log = {Constants.PerfLogTrackerParams.TASK: Constants.ASSESSMENT, Constants.PerfLogTrackerParams.TASK_STATUS: str(task_status),
Constants.PerfLogTrackerParams.ERROR_MSG: error_msg, Constants.PerfLogTrackerParams.PACKAGE_MANAGER: self.package_manager_name,
Constants.PerfLogTrackerParams.NUMBER_OF_TRIALS: str(number_of_tries)}
self.stopwatch.stop_and_write_telemetry(str(assessment_perf_log))

def raise_if_telemetry_unsupported(self):
if self.lifecycle_manager.get_vm_cloud_type() == Constants.VMCloudType.ARC and self.execution_config.operation not in [Constants.ASSESSMENT, Constants.INSTALLATION]:
self.composite_logger.log("Skipping telemetry compatibility check for Arc cloud type when operation is not manual")
Expand Down
27 changes: 25 additions & 2 deletions src/core/src/core_logic/PatchInstaller.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
import os
import time
from core.src.bootstrap.Constants import Constants

from core.src.core_logic.Stopwatch import Stopwatch

class PatchInstaller(object):
"""" Wrapper class for a single patch installation operation """
Expand All @@ -33,6 +33,7 @@ def __init__(self, env_layer, execution_config, composite_logger, telemetry_writ
self.lifecycle_manager = lifecycle_manager

self.package_manager = package_manager
self.package_manager_name = self.package_manager.__class__.__name__
self.package_filter = package_filter
self.maintenance_window = maintenance_window
self.reboot_manager = reboot_manager
Expand All @@ -51,6 +52,9 @@ def start_installation(self, simulate=False):

self.composite_logger.log('\nStarting patch installation...')

self.stopwatch = Stopwatch(self.env_layer, self.telemetry_writer, self.composite_logger)
self.stopwatch.start()

self.composite_logger.log("\nMachine Id: " + self.env_layer.platform.node())
self.composite_logger.log("Activity Id: " + self.execution_config.activity_id)
self.composite_logger.log("Operation request time: " + self.execution_config.start_time + ", Maintenance Window Duration: " + self.execution_config.duration)
Expand All @@ -72,22 +76,26 @@ def start_installation(self, simulate=False):

# Install Updates
installed_update_count, update_run_successful, maintenance_window_exceeded = self.install_updates(maintenance_window, package_manager, simulate)

retry_count = 1
# Repeat patch installation if flagged as required and time is available
if not maintenance_window_exceeded and package_manager.get_package_manager_setting(Constants.PACKAGE_MGR_SETTING_REPEAT_PATCH_OPERATION, False):
self.composite_logger.log("\nInstalled update count (first round): " + str(installed_update_count))
self.composite_logger.log("\nPatch installation run will be repeated as the package manager recommended it --------------------------------------------->")
package_manager.set_package_manager_setting(Constants.PACKAGE_MGR_SETTING_REPEAT_PATCH_OPERATION, False) # Resetting
new_installed_update_count, update_run_successful, maintenance_window_exceeded = self.install_updates(maintenance_window, package_manager, simulate)
installed_update_count += new_installed_update_count
retry_count = retry_count + 1

if package_manager.get_package_manager_setting(Constants.PACKAGE_MGR_SETTING_REPEAT_PATCH_OPERATION, False): # We should not see this again
error_msg = "Unexpected repeated package manager update occurred. Please re-run the update deployment."
self.status_handler.add_error_to_status(error_msg, Constants.PatchOperationErrorCodes.PACKAGE_MANAGER_FAILURE)
self.write_installer_perf_logs(update_run_successful, installed_update_count, retry_count, maintenance_window, maintenance_window_exceeded, Constants.TaskStatus.FAILED, error_msg)
raise Exception(error_msg, "[{0}]".format(Constants.ERROR_ADDED_TO_STATUS))

self.composite_logger.log("\nInstalled update count: " + str(installed_update_count) + " (including dependencies)")

self.write_installer_perf_logs(update_run_successful, installed_update_count, retry_count, maintenance_window, maintenance_window_exceeded, Constants.TaskStatus.SUCCEEDED, "")

# Reboot as per setting and environment state
reboot_manager.start_reboot_if_required_and_time_available(maintenance_window.get_remaining_time_in_minutes(None, False))
maintenance_window_exceeded = maintenance_window_exceeded or reboot_manager.maintenance_window_exceeded_flag
Expand All @@ -98,6 +106,21 @@ def start_installation(self, simulate=False):

return overall_patch_installation_successful

def write_installer_perf_logs(self, patch_operation_successful, installed_patch_count, number_of_rounds, maintenance_window, maintenance_window_exceeded, task_status, error_msg):
perc_maintenance_window_used = -1

try:
perc_maintenance_window_used = maintenance_window.get_percentage_maintenance_window_used()
except Exception as error:
self.composite_logger.log_debug("Error in writing patch installation performance logs. Error is: " + repr(error))

patch_installation_perf_log = {Constants.PerfLogTrackerParams.TASK: Constants.INSTALLATION, Constants.PerfLogTrackerParams.TASK_STATUS: str(task_status), Constants.PerfLogTrackerParams.ERROR_MSG: error_msg,
Constants.PerfLogTrackerParams.PACKAGE_MANAGER: self.package_manager_name, Constants.PerfLogTrackerParams.PATCH_OPERATION_SUCCESSFUL: str(patch_operation_successful),
Constants.PerfLogTrackerParams.INSTALLED_PATCH_COUNT: str(installed_patch_count), Constants.PerfLogTrackerParams.NUMBER_OF_TRIALS: str(number_of_rounds),
Constants.PerfLogTrackerParams.MAINTENANCE_WINDOW: str(maintenance_window.duration), Constants.PerfLogTrackerParams.PERC_MAINTENANCE_WINDOW_USED: str(perc_maintenance_window_used),
Constants.PerfLogTrackerParams.MAINTENANCE_WINDOW_EXCEEDED: str(maintenance_window_exceeded)}
self.stopwatch.stop_and_write_telemetry(str(patch_installation_perf_log))

def raise_if_telemetry_unsupported(self):
if self.lifecycle_manager.get_vm_cloud_type() == Constants.VMCloudType.ARC and self.execution_config.operation not in [Constants.ASSESSMENT, Constants.INSTALLATION]:
self.composite_logger.log("Skipping telemetry compatibility check for Arc cloud type when operation is not manual")
Expand Down
70 changes: 70 additions & 0 deletions src/core/src/core_logic/Stopwatch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
# Copyright 2023 Microsoft Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Requires Python 2.7+

from core.src.bootstrap.Constants import Constants

class Stopwatch(object):
"""Implements the stopwatch logic"""

class StopwatchException(Constants.EnumBackport):
# Stopwatch exception strings
STARTED_ALREADY = "Stopwatch is already started"
NOT_STARTED = "Stopwatch is not started"
STOPPED_ALREADY = "Stopwatch is already stoppped"

def __init__(self, env_layer, telemetry_writer, composite_logger):
self.env_layer = env_layer
self.telemetry_writer = telemetry_writer
self.composite_logger = composite_logger
self.start_time = None
self.end_time = None
self.time_taken = None
self.task_details = None

def __del__(self):
# if start_time is None that means Stopwatch is not started and hence no need to log
# call stop only if end_time is None otherwise stop() is already called.
if self.start_time is not None and self.end_time is None:
self.stop()
self.set_task_details("")
self.composite_logger.log("Stopwatch details before instance is destroyed: " + str(self.task_details))

def start(self):
if self.start_time is not None:
self.composite_logger.log_debug(str(Stopwatch.StopwatchException.STARTED_ALREADY))
self.start_time = self.env_layer.datetime.datetime_utcnow()
self.end_time = None
self.time_taken = None
self.task_details = None

def stop(self):
if self.end_time is not None:
self.composite_logger.log_debug(str(Stopwatch.StopwatchException.STOPPED_ALREADY))
self.end_time = self.env_layer.datetime.datetime_utcnow()
if self.start_time is None:
self.composite_logger.log_debug(str(Stopwatch.StopwatchException.NOT_STARTED))
self.start_time = self.end_time

self.time_taken = self.env_layer.datetime.total_minutes_from_time_delta(self.end_time - self.start_time)

def stop_and_write_telemetry(self, message):
self.stop()
self.set_task_details(message)
self.composite_logger.log("Stopwatch details: " + str(self.task_details))

def set_task_details(self, message):
self.task_details = {Constants.PerfLogTrackerParams.START_TIME: str(self.start_time), Constants.PerfLogTrackerParams.END_TIME: str(self.end_time), Constants.PerfLogTrackerParams.TIME_TAKEN: str(self.time_taken),
Constants.PerfLogTrackerParams.MACHINE_INFO: self.telemetry_writer.machine_info, Constants.PerfLogTrackerParams.MESSAGE: str(message)}
8 changes: 4 additions & 4 deletions src/core/src/service_interfaces/TelemetryWriter.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
class TelemetryWriter(object):
"""Class for writing telemetry data to data transports"""


def __init__(self, env_layer, composite_logger, events_folder_path, telemetry_supported):
self.env_layer = env_layer
self.composite_logger = composite_logger
Expand All @@ -45,7 +46,7 @@ def __init__(self, env_layer, composite_logger, events_folder_path, telemetry_su
self.__is_telemetry_supported = telemetry_supported and self.events_folder_path is not None

self.write_event('Started Linux patch core operation.', Constants.TelemetryEventLevel.Informational)
self.write_machine_config_info()
self.set_machine_config_info()

def write_config_info(self, config_info, config_type='unknown'):
# Configuration info
Expand Down Expand Up @@ -73,16 +74,15 @@ def write_package_info(self, package_name, package_ver, package_size, install_du
self.write_event(message, Constants.TelemetryEventLevel.Informational)

# Composed payload
def write_machine_config_info(self):
def set_machine_config_info(self):
# Machine info - sent only once at the start of the run
machine_info = {
self.machine_info = {
'platform_name': str(self.env_layer.platform.linux_distribution()[0]),
'platform_version': str(self.env_layer.platform.linux_distribution()[1]),
'machine_cpu': self.get_machine_processor(),
'machine_arch': str(self.env_layer.platform.machine()),
'disk_type': self.get_disk_type()
}
return self.write_config_info(machine_info, 'machine_config')

def write_execution_error(self, cmd, code, output):
# Expected to log any errors from a cmd execution, including package manager execution errors
Expand Down
28 changes: 28 additions & 0 deletions src/core/tests/Test_MaintenanceWindow.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,5 +79,33 @@ def test_check_available_time_after_duration_complete(self):
self.assertEqual(runtime.maintenance_window.is_package_install_time_available(), False)
runtime.stop()

def test_get_percentage_maintenance_window_used(self):
argument_composer = ArgumentComposer()
argument_composer.start_time = (datetime.datetime.utcnow() - datetime.timedelta(hours=0, minutes=18)).strftime("%Y-%m-%dT%H:%M:%S.9999Z")
argument_composer.maximum_duration = "PT3H"
runtime = RuntimeCompositor(argument_composer.get_composed_arguments(), True)
perc_maintenance_window_used = runtime.maintenance_window.get_percentage_maintenance_window_used()
# 18 minutes of maintenance window used out of 3 hours (180 minutes). So, it should be 10%.
# The value should be slightly greater than 10 as it takes some time to trigger the method get_percentage_maintenance_window_used
self.assertGreaterEqual(perc_maintenance_window_used, 10)
self.assertLessEqual(perc_maintenance_window_used, 11)
runtime.stop()

def test_get_percentage_maintenance_window_used_Fail(self):
argument_composer = ArgumentComposer()
# ZeroDivisionError should be thrown as duration is 0
argument_composer.maximum_duration = "PT0H"
runtime = RuntimeCompositor(argument_composer.get_composed_arguments(), True)
self.assertRaises(Exception, runtime.maintenance_window.get_percentage_maintenance_window_used)
runtime.stop()

def test_get_percentage_maintenance_window_used_start_time_greater_exception(self):
argument_composer = ArgumentComposer()
# Setting start time 1 hour later than current time
argument_composer.start_time = (datetime.datetime.utcnow() + datetime.timedelta(hours=1)).strftime("%Y-%m-%dT%H:%M:%S.9999Z")
runtime = RuntimeCompositor(argument_composer.get_composed_arguments(), True)
self.assertRaises(Exception, runtime.maintenance_window.get_percentage_maintenance_window_used)
runtime.stop()

if __name__ == '__main__':
unittest.main()
Loading