|
8 | 8 | from pcluster.aws.common import AWSClientError |
9 | 9 | from pcluster.validators.s3_validators import S3BucketRegionValidator, S3BucketUriValidator, UrlValidator |
10 | 10 | from tests.pcluster.validators.utils import assert_failure_messages |
| 11 | +import datetime |
11 | 12 |
|
12 | 13 |
|
13 | 14 | def test_url_validator(): |
14 | 15 | dynamodb_client = boto3.client("dynamodb", region_name="us-east-1") |
15 | 16 | current_time = int(time.time()) |
16 | | - one_month_ago = current_time - (0.5 * 24 * 60 * 60) |
| 17 | + one_month_ago = current_time - (300 * 24 * 60 * 60) |
17 | 18 |
|
18 | 19 | filter_expression = "#call_start_time >= :one_month_ago" |
19 | 20 | expression_attribute_values = {":one_month_ago": {"N": str(one_month_ago)}} |
@@ -95,28 +96,74 @@ def _remove_os_from_string(x): |
95 | 96 | def _get_statistics_by_category( |
96 | 97 | all_items, category_name, statistics_name, category_name_processing=None, statistics_processing=None |
97 | 98 | ): |
98 | | - os_cluster_creation_times = {} |
99 | | - for item in all_items: |
100 | | - if item["call_status"]["S"] != "passed": |
101 | | - continue |
102 | | - if statistics_name not in item: |
103 | | - continue |
104 | | - cluster_creation_time = item[statistics_name]["N"] |
105 | | - if cluster_creation_time == "0": |
106 | | - continue |
107 | | - os = item[category_name]["S"] |
108 | | - if category_name_processing: |
109 | | - os = category_name_processing(os) |
110 | | - if os not in os_cluster_creation_times: |
111 | | - os_cluster_creation_times[os] = [float(cluster_creation_time)] |
112 | | - else: |
113 | | - os_cluster_creation_times[os].append(float(cluster_creation_time)) |
| 99 | + more_data = True |
| 100 | + lastest_time = float(all_items[0]["call_start_time"]["N"]) |
| 101 | + window_length = 8 |
114 | 102 | result = {} |
115 | | - for os, cluster_creation_times in os_cluster_creation_times.items(): |
116 | | - cluster_creation_times.sort(reverse=True) |
117 | | - result[os] = sum(cluster_creation_times) / len(cluster_creation_times) |
118 | | - return sorted(result.items(), key=lambda x: x[1], reverse=True) |
| 103 | + while more_data: |
| 104 | + more_data = False |
| 105 | + os_cluster_creation_times = {} |
| 106 | + for item in all_items: |
| 107 | + if item["call_status"]["S"] != "passed": |
| 108 | + continue |
| 109 | + if statistics_name not in item: |
| 110 | + continue |
| 111 | + if float(item["call_start_time"]["N"]) < lastest_time - (window_length * 24 * 60 * 60): |
| 112 | + more_data = True |
| 113 | + continue |
| 114 | + if float(item["call_start_time"]["N"]) > lastest_time: |
| 115 | + continue |
| 116 | + cluster_creation_time = item[statistics_name]["N"] |
| 117 | + if cluster_creation_time == "0": |
| 118 | + continue |
| 119 | + os = item[category_name]["S"] |
| 120 | + if category_name_processing: |
| 121 | + os = category_name_processing(os) |
| 122 | + if os not in os_cluster_creation_times: |
| 123 | + os_cluster_creation_times[os] = [float(cluster_creation_time)] |
| 124 | + else: |
| 125 | + os_cluster_creation_times[os].append(float(cluster_creation_time)) |
| 126 | + for os, cluster_creation_times in os_cluster_creation_times.items(): |
| 127 | + if os not in result: |
| 128 | + result[os] = [] |
| 129 | + os_time_key = f"{os}-time" |
| 130 | + if os_time_key not in result: |
| 131 | + result[os_time_key] = [] |
| 132 | + result[os].insert(0, sum(cluster_creation_times) / len(cluster_creation_times)) |
| 133 | + result[os_time_key].insert(0, datetime.datetime.fromtimestamp(lastest_time).strftime("%Y-%m-%d")) |
| 134 | + if os_cluster_creation_times: |
| 135 | + more_data = True |
| 136 | + lastest_time = lastest_time - 24 * 60 * 60 |
| 137 | + print(lastest_time) |
| 138 | + |
| 139 | + plot_statistics(result, statistics_name) |
| 140 | + return result |
| 141 | + # return sorted(result.items(), key=lambda x: x[1], reverse=True) |
| 142 | + |
| 143 | +import matplotlib.pyplot as plt |
| 144 | +def plot_statistics(result, statistics_name): |
| 145 | + plt.figure(figsize=(12, 6)) |
| 146 | + |
| 147 | + # Create x-axis values (assuming each point represents a day) |
| 148 | + for category, values in result.items(): |
| 149 | + if "-time" in category: |
| 150 | + continue |
| 151 | + x_values = result[f"{category}-time"] |
| 152 | + plt.plot(x_values, values, marker='o', label=category) |
| 153 | + |
| 154 | + plt.title(statistics_name) |
| 155 | + plt.xlabel('Latest timestamp') |
| 156 | + plt.ylabel('Average Creation Time') |
| 157 | + plt.grid(True, linestyle='--', alpha=0.7) |
| 158 | + plt.legend() |
| 159 | + |
| 160 | + # Rotate x-axis labels for better readability |
| 161 | + plt.xticks(rotation=45) |
| 162 | + |
| 163 | + # Adjust layout to prevent label cutoff |
| 164 | + plt.tight_layout() |
119 | 165 |
|
| 166 | + plt.show() |
120 | 167 |
|
121 | 168 | def _get_launch_time(logs, instance_id): |
122 | 169 | for log in logs: |
|
0 commit comments