|
| 1 | +import ast |
| 2 | +import json |
1 | 3 | import time |
2 | 4 | from collections import defaultdict |
3 | 5 |
|
@@ -83,6 +85,57 @@ def test_url_validator(): |
83 | 85 | print(all_items) |
84 | 86 |
|
85 | 87 |
|
| 88 | + |
| 89 | +def test_performance_validator(): |
| 90 | + dynamodb_client = boto3.client("dynamodb", region_name="us-east-1") |
| 91 | + current_time = int(time.time()) |
| 92 | + one_month_ago = current_time - (300 * 24 * 60 * 60) |
| 93 | + |
| 94 | + filter_expression = "#timestamp >= :one_month_ago" |
| 95 | + expression_attribute_values = {":one_month_ago": {"N": str(one_month_ago)}} |
| 96 | + all_items = [] |
| 97 | + last_evaluated_key = None |
| 98 | + while True: |
| 99 | + projection_expression = ( |
| 100 | + "#instance, #name, #os, #result, #timestamp" |
| 101 | + ) |
| 102 | + expression_attribute_names = { |
| 103 | + "#instance": "instance", |
| 104 | + "#name": "name", |
| 105 | + "#os": "os", |
| 106 | + "#result": "result", |
| 107 | + "#timestamp": "timestamp", |
| 108 | + } |
| 109 | + # Parameters for the scan operation |
| 110 | + scan_params = { |
| 111 | + "TableName": "ParallelCluster-PerformanceTest-Metadata", |
| 112 | + "ProjectionExpression": projection_expression, |
| 113 | + "FilterExpression": filter_expression, |
| 114 | + "ExpressionAttributeNames": expression_attribute_names, |
| 115 | + "ExpressionAttributeValues": expression_attribute_values, |
| 116 | + } |
| 117 | + |
| 118 | + # Add ExclusiveStartKey if we're not on the first iteration |
| 119 | + if last_evaluated_key: |
| 120 | + scan_params["ExclusiveStartKey"] = last_evaluated_key |
| 121 | + |
| 122 | + response = dynamodb_client.scan(**scan_params) |
| 123 | + all_items.extend(response.get("Items", [])) |
| 124 | + |
| 125 | + # Check if there are more items to fetch |
| 126 | + last_evaluated_key = response.get("LastEvaluatedKey") |
| 127 | + if not last_evaluated_key: |
| 128 | + break |
| 129 | + all_items.sort(key=lambda x: x["timestamp"]["N"], reverse=True) |
| 130 | + items_by_name = defaultdict(list) |
| 131 | + for item in all_items: |
| 132 | + items_by_name[item["name"]["S"]].append(item) |
| 133 | + result = defaultdict(dict) |
| 134 | + for name, items in items_by_name.items(): |
| 135 | + result[name] = _get_statistics_by_node_nume(items) |
| 136 | + print(all_items) |
| 137 | + |
| 138 | + |
86 | 139 | def _mean(x): |
87 | 140 | return sum(x) / len(x) |
88 | 141 |
|
@@ -140,6 +193,28 @@ def _get_statistics_by_category( |
140 | 193 | return result |
141 | 194 | # return sorted(result.items(), key=lambda x: x[1], reverse=True) |
142 | 195 |
|
| 196 | +def _get_statistics_by_node_nume( |
| 197 | + all_items |
| 198 | +): |
| 199 | + result = {} |
| 200 | + for item in all_items: |
| 201 | + this_result = ast.literal_eval(item["result"]["S"]) |
| 202 | + for node_num, performance in this_result: |
| 203 | + if node_num not in result: |
| 204 | + result[node_num] = {} |
| 205 | + os = item["os"]["S"] |
| 206 | + os_time_key = f"{os}-time" |
| 207 | + if os not in result[node_num]: |
| 208 | + result[node_num][os] = [] |
| 209 | + result[node_num][os_time_key] = [] |
| 210 | + result[node_num][os].append(performance) |
| 211 | + result[node_num][os_time_key].append(datetime.datetime.fromtimestamp(int(item["timestamp"]["N"])).strftime("%Y-%m-%d %H:%M")) |
| 212 | + for node_num, node_num_result in result.items(): |
| 213 | + plot_statistics(node_num_result, node_num) |
| 214 | + return result |
| 215 | + # return sorted(result.items(), key=lambda x: x[1], reverse=True) |
| 216 | + |
| 217 | + |
143 | 218 | import matplotlib.pyplot as plt |
144 | 219 | def plot_statistics(result, statistics_name): |
145 | 220 | plt.figure(figsize=(12, 6)) |
|
0 commit comments