Skip to content

Commit 189434f

Browse files
Improve to calculate historical data and plot graphs
1 parent d2584b0 commit 189434f

File tree

1 file changed

+68
-21
lines changed

1 file changed

+68
-21
lines changed

cli/tests/pcluster/validators/test_s3_validators_local.py

Lines changed: 68 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,13 @@
88
from pcluster.aws.common import AWSClientError
99
from pcluster.validators.s3_validators import S3BucketRegionValidator, S3BucketUriValidator, UrlValidator
1010
from tests.pcluster.validators.utils import assert_failure_messages
11+
import datetime
1112

1213

1314
def test_url_validator():
1415
dynamodb_client = boto3.client("dynamodb", region_name="us-east-1")
1516
current_time = int(time.time())
16-
one_month_ago = current_time - (0.5 * 24 * 60 * 60)
17+
one_month_ago = current_time - (300 * 24 * 60 * 60)
1718

1819
filter_expression = "#call_start_time >= :one_month_ago"
1920
expression_attribute_values = {":one_month_ago": {"N": str(one_month_ago)}}
@@ -95,28 +96,74 @@ def _remove_os_from_string(x):
9596
def _get_statistics_by_category(
9697
all_items, category_name, statistics_name, category_name_processing=None, statistics_processing=None
9798
):
98-
os_cluster_creation_times = {}
99-
for item in all_items:
100-
if item["call_status"]["S"] != "passed":
101-
continue
102-
if statistics_name not in item:
103-
continue
104-
cluster_creation_time = item[statistics_name]["N"]
105-
if cluster_creation_time == "0":
106-
continue
107-
os = item[category_name]["S"]
108-
if category_name_processing:
109-
os = category_name_processing(os)
110-
if os not in os_cluster_creation_times:
111-
os_cluster_creation_times[os] = [float(cluster_creation_time)]
112-
else:
113-
os_cluster_creation_times[os].append(float(cluster_creation_time))
99+
more_data = True
100+
lastest_time = float(all_items[0]["call_start_time"]["N"])
101+
window_length = 8
114102
result = {}
115-
for os, cluster_creation_times in os_cluster_creation_times.items():
116-
cluster_creation_times.sort(reverse=True)
117-
result[os] = sum(cluster_creation_times) / len(cluster_creation_times)
118-
return sorted(result.items(), key=lambda x: x[1], reverse=True)
103+
while more_data:
104+
more_data = False
105+
os_cluster_creation_times = {}
106+
for item in all_items:
107+
if item["call_status"]["S"] != "passed":
108+
continue
109+
if statistics_name not in item:
110+
continue
111+
if float(item["call_start_time"]["N"]) < lastest_time - (window_length * 24 * 60 * 60):
112+
more_data = True
113+
continue
114+
if float(item["call_start_time"]["N"]) > lastest_time:
115+
continue
116+
cluster_creation_time = item[statistics_name]["N"]
117+
if cluster_creation_time == "0":
118+
continue
119+
os = item[category_name]["S"]
120+
if category_name_processing:
121+
os = category_name_processing(os)
122+
if os not in os_cluster_creation_times:
123+
os_cluster_creation_times[os] = [float(cluster_creation_time)]
124+
else:
125+
os_cluster_creation_times[os].append(float(cluster_creation_time))
126+
for os, cluster_creation_times in os_cluster_creation_times.items():
127+
if os not in result:
128+
result[os] = []
129+
os_time_key = f"{os}-time"
130+
if os_time_key not in result:
131+
result[os_time_key] = []
132+
result[os].insert(0, sum(cluster_creation_times) / len(cluster_creation_times))
133+
result[os_time_key].insert(0, datetime.datetime.fromtimestamp(lastest_time).strftime("%Y-%m-%d"))
134+
if os_cluster_creation_times:
135+
more_data = True
136+
lastest_time = lastest_time - 24 * 60 * 60
137+
print(lastest_time)
138+
139+
plot_statistics(result, statistics_name)
140+
return result
141+
# return sorted(result.items(), key=lambda x: x[1], reverse=True)
142+
143+
import matplotlib.pyplot as plt
144+
def plot_statistics(result, statistics_name):
145+
plt.figure(figsize=(12, 6))
146+
147+
# Create x-axis values (assuming each point represents a day)
148+
for category, values in result.items():
149+
if "-time" in category:
150+
continue
151+
x_values = result[f"{category}-time"]
152+
plt.plot(x_values, values, marker='o', label=category)
153+
154+
plt.title(statistics_name)
155+
plt.xlabel('Latest timestamp')
156+
plt.ylabel('Average Creation Time')
157+
plt.grid(True, linestyle='--', alpha=0.7)
158+
plt.legend()
159+
160+
# Rotate x-axis labels for better readability
161+
plt.xticks(rotation=45)
162+
163+
# Adjust layout to prevent label cutoff
164+
plt.tight_layout()
119165

166+
plt.show()
120167

121168
def _get_launch_time(logs, instance_id):
122169
for log in logs:

0 commit comments

Comments
 (0)