From bc5f3e777d1c820f36bc3542b6666952f63ed4a0 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Fri, 14 Feb 2025 23:31:47 +0000 Subject: [PATCH 1/2] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20ch?= =?UTF-8?q?anges=20to=20main=20this=20commit=20is=20based=20on?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Created using spr 1.3.4 [skip ci] --- .ci/metrics/metrics.py | 71 ++++++++++++++++++++++++------------------ 1 file changed, 41 insertions(+), 30 deletions(-) diff --git a/.ci/metrics/metrics.py b/.ci/metrics/metrics.py index 70b787665a8b9..27b8def5783fc 100644 --- a/.ci/metrics/metrics.py +++ b/.ci/metrics/metrics.py @@ -47,36 +47,51 @@ def get_sampled_workflow_metrics(github_repo: github.Repository): # Other states are available (pending, waiting, etc), but the meaning # is not documented (See #70540). # "queued" seems to be the info we want. - queued_workflow_count = len( - [ - x - for x in github_repo.get_workflow_runs(status="queued") - if x.name in WORKFLOWS_TO_TRACK - ] - ) - running_workflow_count = len( - [ - x - for x in github_repo.get_workflow_runs(status="in_progress") - if x.name in WORKFLOWS_TO_TRACK - ] - ) + queued_job_counts = {} + for queued_workflow in github_repo.get_workflow_runs(status="queued"): + if queued_workflow.name not in WORKFLOWS_TO_TRACK: + continue + for queued_workflow_job in queued_workflow.jobs(): + job_name = queued_workflow_job.name + if queued_workflow_job.status != "queued": + continue + + if job_name not in queued_job_counts: + queued_job_counts[job_name] = 1 + else: + queued_job_counts[job_name] += 1 + + running_job_counts = {} + for running_workflow in github_repo.get_workflow_runs(status="in_progress"): + if running_workflow.name not in WORKFLOWS_TO_TRACK: + continue + for running_workflow_job in running_workflow.jobs(): + job_name = running_workflow_job.name + if running_workflow_job.status != "in_progress": + continue + + if job_name not in running_job_counts: + running_job_counts[job_name] = 1 + else: + running_job_counts[job_name] += 1 workflow_metrics = [] - workflow_metrics.append( - GaugeMetric( - "workflow_queue_size", - queued_workflow_count, - time.time_ns(), + for queued_job in queued_job_counts: + workflow_metrics.append( + GaugeMetric( + f"workflow_queue_size_{queued_job}", + queued_job_counts[queued_job], + time.time_ns(), + ) ) - ) - workflow_metrics.append( - GaugeMetric( - "running_workflow_count", - running_workflow_count, - time.time_ns(), + for running_job in running_job_counts: + workflow_metrics.append( + GaugeMetric( + f"running_workflow_count_{running_job}", + running_job_counts[running_job], + time.time_ns(), + ) ) - ) # Always send a hearbeat metric so we can monitor is this container is still able to log to Grafana. workflow_metrics.append( GaugeMetric("metrics_container_heartbeat", 1, time.time_ns()) @@ -250,10 +265,6 @@ def main(): while True: current_metrics = get_per_workflow_metrics(github_repo, workflows_to_track) current_metrics += get_sampled_workflow_metrics(github_repo) - # Always send a hearbeat metric so we can monitor is this container is still able to log to Grafana. - current_metrics.append( - GaugeMetric("metrics_container_heartbeat", 1, time.time_ns()) - ) upload_metrics(current_metrics, grafana_metrics_userid, grafana_api_key) print(f"Uploaded {len(current_metrics)} metrics", file=sys.stderr) From 66a49568e85067dac064cc70dd20e7819d5d356e Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Sat, 15 Feb 2025 03:07:56 +0000 Subject: [PATCH 2/2] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20ch?= =?UTF-8?q?anges=20introduced=20through=20rebase?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Created using spr 1.3.4 [skip ci] --- .ci/metrics/metrics.py | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/.ci/metrics/metrics.py b/.ci/metrics/metrics.py index 27b8def5783fc..ed7b908399abd 100644 --- a/.ci/metrics/metrics.py +++ b/.ci/metrics/metrics.py @@ -43,25 +43,30 @@ def get_sampled_workflow_metrics(github_repo: github.Repository): Returns a list of GaugeMetric objects, containing the relevant metrics about the workflow """ + queued_job_counts = {} + running_job_counts = {} # Other states are available (pending, waiting, etc), but the meaning # is not documented (See #70540). # "queued" seems to be the info we want. - queued_job_counts = {} for queued_workflow in github_repo.get_workflow_runs(status="queued"): if queued_workflow.name not in WORKFLOWS_TO_TRACK: continue for queued_workflow_job in queued_workflow.jobs(): job_name = queued_workflow_job.name - if queued_workflow_job.status != "queued": - continue + # Workflows marked as queued can potentially only have some jobs + # queued, so make sure to also count jobs currently in progress. + if queued_workflow_job.status == "queued": + if job_name not in queued_job_counts: + queued_job_counts[job_name] = 1 + else: + queued_job_counts[job_name] += 1 + elif queued_workflow_job.status == "in_progress": + if job_name not in running_job_counts: + running_job_counts[job_name] = 1 + else: + running_job_counts[job_name] += 1 - if job_name not in queued_job_counts: - queued_job_counts[job_name] = 1 - else: - queued_job_counts[job_name] += 1 - - running_job_counts = {} for running_workflow in github_repo.get_workflow_runs(status="in_progress"): if running_workflow.name not in WORKFLOWS_TO_TRACK: continue