diff --git a/torchci/clickhouse_queries/vllm/build_failed_jobs/params.json b/torchci/clickhouse_queries/vllm/build_failed_jobs/params.json new file mode 100644 index 0000000000..f71304d0f1 --- /dev/null +++ b/torchci/clickhouse_queries/vllm/build_failed_jobs/params.json @@ -0,0 +1,14 @@ +{ + "params": { + "repo": "String", + "pipelineName": "String", + "buildNumber": "UInt32" + }, + "tests": [ + { + "repo": "https://github.com/vllm-project/vllm.git", + "pipelineName": "CI", + "buildNumber": 12345 + } + ] +} diff --git a/torchci/clickhouse_queries/vllm/build_failed_jobs/query.sql b/torchci/clickhouse_queries/vllm/build_failed_jobs/query.sql new file mode 100644 index 0000000000..e0a0df49d0 --- /dev/null +++ b/torchci/clickhouse_queries/vllm/build_failed_jobs/query.sql @@ -0,0 +1,29 @@ +-- vLLM failed jobs for a specific build +-- Returns all jobs that hard-failed (soft failures excluded) for a given build number +-- Shows job details: name, state, duration, timestamps, etc. + +SELECT + tupleElement(job, 'name') AS job_name, + tupleElement(job, 'state') AS job_state, + tupleElement(job, 'soft_failed') AS soft_failed, + tupleElement(job, 'started_at') AS job_started_at, + tupleElement(job, 'finished_at') AS job_finished_at, + tupleElement(job, 'web_url') AS job_url, + tupleElement(job, 'exit_status') AS exit_status, + -- Calculate duration in hours + dateDiff( + 'second', + tupleElement(job, 'started_at'), + tupleElement(job, 'finished_at') + ) / 3600.0 AS duration_hours, + toUInt32(tupleElement(build, 'number')) AS build_number, + tupleElement(build, 'web_url') AS build_url +FROM vllm.vllm_buildkite_jobs +WHERE + tupleElement(pipeline, 'repository') = {repo: String} + AND tupleElement(pipeline, 'name') = {pipelineName: String} + AND tupleElement(build, 'branch') = 'main' + AND toUInt32(tupleElement(build, 'number')) = {buildNumber: UInt32} + AND lowerUTF8(tupleElement(job, 'state')) = 'failed' + AND tupleElement(job, 'soft_failed') = FALSE +ORDER BY job_name ASC diff --git a/torchci/clickhouse_queries/vllm/continuous_builds/params.json b/torchci/clickhouse_queries/vllm/continuous_builds/params.json new file mode 100644 index 0000000000..f186818776 --- /dev/null +++ b/torchci/clickhouse_queries/vllm/continuous_builds/params.json @@ -0,0 +1,16 @@ +{ + "params": { + "repo": "String", + "pipelineName": "String", + "startTime": "DateTime64(3)", + "stopTime": "DateTime64(3)" + }, + "tests": [ + { + "repo": "https://github.com/vllm-project/vllm.git", + "pipelineName": "CI", + "startTime": "2025-11-17T00:00:00.000", + "stopTime": "2025-11-24T00:00:00.000" + } + ] +} diff --git a/torchci/clickhouse_queries/vllm/continuous_builds/query.sql b/torchci/clickhouse_queries/vllm/continuous_builds/query.sql new file mode 100644 index 0000000000..f6fe5739e8 --- /dev/null +++ b/torchci/clickhouse_queries/vllm/continuous_builds/query.sql @@ -0,0 +1,67 @@ +-- vLLM continuous builds list (daily and nightly scheduled runs) +-- Returns recent builds that are part of scheduled CI runs +-- Filters by specific BUILDKITE_MESSAGE patterns +-- Only tracks main branch + +SELECT DISTINCT + toUInt32(tupleElement(build, 'number')) AS build_number, + tupleElement(build, 'id') AS build_id, + tupleElement(build, 'state') AS build_state, + tupleElement(build, 'web_url') AS build_url, + tupleElement(build, 'started_at') AS build_started_at, + tupleElement(build, 'finished_at') AS build_finished_at, + tupleElement(build, 'message') AS build_message, + tupleElement(build, 'commit') AS commit, + -- Determine build type + if( + positionCaseInsensitive( + tupleElement(build, 'message'), 'Full CI run - daily' + ) + > 0, + 'Daily', + if( + positionCaseInsensitive( + tupleElement(build, 'message'), 'Nightly run - All tests' + ) + > 0, + 'Nightly', + 'Other' + ) + ) AS build_type, + -- Count jobs for this build + ( + SELECT count(*) + FROM vllm.vllm_buildkite_jobs AS j + WHERE + tupleElement(j.build, 'number') = tupleElement(build, 'number') + AND tupleElement(j.pipeline, 'repository') = {repo: String} + ) AS total_jobs, + -- Count failed jobs for this build + ( + SELECT count(*) + FROM vllm.vllm_buildkite_jobs AS j + WHERE + tupleElement(j.build, 'number') = tupleElement(build, 'number') + AND tupleElement(j.pipeline, 'repository') = {repo: String} + AND lowerUTF8(tupleElement(j.job, 'state')) = 'failed' + AND tupleElement(j.job, 'soft_failed') = FALSE + ) AS failed_jobs_count +FROM vllm.vllm_buildkite_builds +WHERE + tupleElement(pipeline, 'repository') = {repo: String} + AND tupleElement(pipeline, 'name') = {pipelineName: String} + AND tupleElement(build, 'branch') = 'main' + AND tupleElement(build, 'finished_at') IS NOT NULL + AND tupleElement(build, 'finished_at') >= {startTime: DateTime64(3)} + AND tupleElement(build, 'finished_at') < {stopTime: DateTime64(3)} + AND ( + positionCaseInsensitive( + tupleElement(build, 'message'), 'Full CI run - daily' + ) + > 0 + OR positionCaseInsensitive( + tupleElement(build, 'message'), 'Nightly run - All tests' + ) + > 0 + ) +ORDER BY build_finished_at DESC diff --git a/torchci/clickhouse_queries/vllm/docker_build_runtime/params.json b/torchci/clickhouse_queries/vllm/docker_build_runtime/params.json new file mode 100644 index 0000000000..187cdc41eb --- /dev/null +++ b/torchci/clickhouse_queries/vllm/docker_build_runtime/params.json @@ -0,0 +1,16 @@ +{ + "params": { + "repo": "String", + "jobName": "String", + "startTime": "DateTime64(3)", + "stopTime": "DateTime64(3)" + }, + "tests": [ + { + "repo": "https://github.com/vllm-project/vllm.git", + "jobName": ":docker: build image", + "startTime": "2025-10-01T00:00:00.000", + "stopTime": "2025-11-01T00:00:00.000" + } + ] +} diff --git a/torchci/clickhouse_queries/vllm/docker_build_runtime/query.sql b/torchci/clickhouse_queries/vllm/docker_build_runtime/query.sql new file mode 100644 index 0000000000..e63b7853d4 --- /dev/null +++ b/torchci/clickhouse_queries/vllm/docker_build_runtime/query.sql @@ -0,0 +1,32 @@ +-- vLLM Single Job Runtime Trends (main branch only) +-- Tracks runtime for a specific job (parameterized for reusability) +-- Default use case: ":docker: build image" job for build speed monitoring + +WITH jobs AS ( + SELECT + tupleElement(job, 'name') AS job_name, + tupleElement(job, 'started_at') AS job_started_at, + tupleElement(job, 'finished_at') AS job_finished_at, + tupleElement(job, 'state') AS job_state, + tupleElement(build, 'number') AS build_number + FROM vllm.vllm_buildkite_jobs + WHERE + tupleElement(pipeline, 'repository') = {repo: String } + AND tupleElement(build, 'branch') = 'main' + AND tupleElement(job, 'name') = {jobName: String} + AND tupleElement(job, 'started_at') IS NOT NULL + AND tupleElement(job, 'finished_at') IS NOT NULL + AND tupleElement(job, 'started_at') >= {startTime: DateTime64(3) } + AND tupleElement(job, 'started_at') < {stopTime: DateTime64(3) } + AND lowerUTF8(tupleElement(job, 'state')) IN ( + 'passed', 'finished', 'success', 'failed' + ) +) + +SELECT + job_started_at AS timestamp, + build_number, + round(dateDiff('second', job_started_at, job_finished_at) / 60.0, 2) + AS runtime_minutes +FROM jobs +ORDER BY job_started_at ASC diff --git a/torchci/clickhouse_queries/vllm/job_list/params.json b/torchci/clickhouse_queries/vllm/job_list/params.json new file mode 100644 index 0000000000..d81f11f2a2 --- /dev/null +++ b/torchci/clickhouse_queries/vllm/job_list/params.json @@ -0,0 +1,18 @@ +{ + "params": { + "repo": "String", + "pipelineName": "String", + "startTime": "DateTime64(3)", + "stopTime": "DateTime64(3)", + "jobGroups": "Array(String)" + }, + "tests": [ + { + "repo": "https://github.com/vllm-project/vllm.git", + "pipelineName": "CI", + "startTime": "2025-11-17T00:00:00.000", + "stopTime": "2025-11-24T00:00:00.000", + "jobGroups": ["amd", "torch_nightly", "main"] + } + ] +} diff --git a/torchci/clickhouse_queries/vllm/job_list/query.sql b/torchci/clickhouse_queries/vllm/job_list/query.sql new file mode 100644 index 0000000000..ed0fe1d2b4 --- /dev/null +++ b/torchci/clickhouse_queries/vllm/job_list/query.sql @@ -0,0 +1,47 @@ +-- vLLM job list for build exploration +-- Returns a list of all jobs in the time period with basic stats +-- Used for the job selector in the JobBuildsPanel component +-- Only tracks main branch to focus on production CI + +SELECT + tupleElement(job, 'name') AS job_name, + COUNT(*) AS total_runs, + countIf( + lowerUTF8(tupleElement(job, 'state')) IN ( + 'passed', 'finished', 'success' + ) + ) AS passed_count, + countIf(lowerUTF8(tupleElement(job, 'state')) = 'failed') AS failed_count, + max(tupleElement(job, 'finished_at')) AS last_run_at +FROM vllm.vllm_buildkite_jobs +WHERE + tupleElement(pipeline, 'repository') = {repo: String} + AND tupleElement(pipeline, 'name') = {pipelineName: String} + AND tupleElement(build, 'branch') = 'main' + AND tupleElement(job, 'finished_at') IS NOT NULL + AND tupleElement(job, 'finished_at') >= {startTime: DateTime64(3)} + AND tupleElement(job, 'finished_at') < {stopTime: DateTime64(3)} + -- Job group filtering: AMD, Torch Nightly, or Main + AND ( + ( + has({jobGroups: Array(String)}, 'amd') + AND positionCaseInsensitive(tupleElement(job, 'name'), 'AMD') > 0 + ) + OR ( + has({jobGroups: Array(String)}, 'torch_nightly') + AND positionCaseInsensitive( + tupleElement(job, 'name'), 'Torch Nightly' + ) + > 0 + ) + OR ( + has({jobGroups: Array(String)}, 'main') + AND positionCaseInsensitive(tupleElement(job, 'name'), 'AMD') = 0 + AND positionCaseInsensitive( + tupleElement(job, 'name'), 'Torch Nightly' + ) + = 0 + ) + ) +GROUP BY job_name +ORDER BY last_run_at DESC, total_runs DESC diff --git a/torchci/clickhouse_queries/vllm/job_runtime_trends/params.json b/torchci/clickhouse_queries/vllm/job_runtime_trends/params.json new file mode 100644 index 0000000000..5f8714d676 --- /dev/null +++ b/torchci/clickhouse_queries/vllm/job_runtime_trends/params.json @@ -0,0 +1,16 @@ +{ + "params": { + "repo": "String", + "startTime": "DateTime64(3)", + "stopTime": "DateTime64(3)", + "jobGroups": "Array(String)" + }, + "tests": [ + { + "repo": "https://github.com/vllm-project/vllm.git", + "startTime": "2025-10-01T00:00:00.000", + "stopTime": "2025-10-08T00:00:00.000", + "jobGroups": ["main", "amd", "torch_nightly"] + } + ] +} diff --git a/torchci/clickhouse_queries/vllm/job_runtime_trends/query.sql b/torchci/clickhouse_queries/vllm/job_runtime_trends/query.sql new file mode 100644 index 0000000000..48c347009a --- /dev/null +++ b/torchci/clickhouse_queries/vllm/job_runtime_trends/query.sql @@ -0,0 +1,66 @@ +-- vLLM Job Runtime Trends (main branch only) +-- Aggregates per-job runtime statistics by day +-- Shows count, mean, p90, and max runtime for each job per day +-- Supports filtering by job groups: AMD, Torch Nightly, or Main + +WITH jobs AS ( + SELECT + tupleElement(job, 'name') AS job_name, + tupleElement(job, 'started_at') AS job_started_at, + tupleElement(job, 'finished_at') AS job_finished_at, + tupleElement(job, 'state') AS job_state, + tupleElement(build, 'branch') AS branch + FROM vllm.vllm_buildkite_jobs + WHERE + tupleElement(pipeline, 'repository') = {repo: String } + AND tupleElement(build, 'branch') = 'main' + AND tupleElement(job, 'started_at') IS NOT NULL + AND tupleElement(job, 'finished_at') IS NOT NULL + AND tupleElement(job, 'started_at') >= {startTime: DateTime64(3) } + AND tupleElement(job, 'started_at') < {stopTime: DateTime64(3) } + AND lowerUTF8(tupleElement(job, 'state')) IN ( + 'passed', 'finished', 'success', 'failed' + ) + -- Job group filtering: AMD, Torch Nightly, or Main + AND ( + ( + has({jobGroups: Array(String)}, 'amd') + AND positionCaseInsensitive(tupleElement(job, 'name'), 'AMD') + > 0 + ) + OR ( + has({jobGroups: Array(String)}, 'torch_nightly') + AND positionCaseInsensitive( + tupleElement(job, 'name'), 'Torch Nightly' + ) + > 0 + ) + OR ( + has({jobGroups: Array(String)}, 'main') + AND positionCaseInsensitive(tupleElement(job, 'name'), 'AMD') + = 0 + AND positionCaseInsensitive( + tupleElement(job, 'name'), 'Torch Nightly' + ) + = 0 + ) + ) +) + +SELECT + job_name, + toDate(job_started_at) AS date, + count() AS count, + round(avg(dateDiff('second', job_started_at, job_finished_at) / 60.0), 2) + AS mean_runtime_minutes, + round( + quantile(0.9) ( + dateDiff('second', job_started_at, job_finished_at) / 60.0 + ), + 2 + ) AS p90_runtime_minutes, + round(max(dateDiff('second', job_started_at, job_finished_at) / 60.0), 2) + AS max_runtime_minutes +FROM jobs +GROUP BY job_name, date +ORDER BY job_name ASC, date ASC diff --git a/torchci/clickhouse_queries/vllm/recent_job_builds/params.json b/torchci/clickhouse_queries/vllm/recent_job_builds/params.json new file mode 100644 index 0000000000..26bfd65387 --- /dev/null +++ b/torchci/clickhouse_queries/vllm/recent_job_builds/params.json @@ -0,0 +1,18 @@ +{ + "params": { + "repo": "String", + "pipelineName": "String", + "jobName": "String", + "startTime": "DateTime64(3)", + "stopTime": "DateTime64(3)" + }, + "tests": [ + { + "repo": "https://github.com/vllm-project/vllm.git", + "pipelineName": "CI", + "jobName": "Test Example Job", + "startTime": "2025-11-17T00:00:00.000", + "stopTime": "2025-11-24T00:00:00.000" + } + ] +} diff --git a/torchci/clickhouse_queries/vllm/recent_job_builds/query.sql b/torchci/clickhouse_queries/vllm/recent_job_builds/query.sql new file mode 100644 index 0000000000..fd53902a8b --- /dev/null +++ b/torchci/clickhouse_queries/vllm/recent_job_builds/query.sql @@ -0,0 +1,56 @@ +-- vLLM recent builds for a specific job +-- Returns all builds within the time range for a given job name +-- Shows build details: number, state, duration, timestamps, etc. +-- Only tracks main branch + +WITH job_builds AS ( + SELECT + toUInt32(tupleElement(build, 'number')) AS build_number, + tupleElement(build, 'id') AS build_id, + tupleElement(build, 'state') AS build_state, + tupleElement(build, 'web_url') AS build_url, + tupleElement(build, 'started_at') AS build_started_at, + tupleElement(build, 'finished_at') AS build_finished_at, + tupleElement(build, 'commit') AS commit, + tupleElement(build, 'message') AS commit_message, + tupleElement(job, 'name') AS job_name, + tupleElement(job, 'state') AS job_state, + tupleElement(job, 'soft_failed') AS soft_failed, + tupleElement(job, 'started_at') AS job_started_at, + tupleElement(job, 'finished_at') AS job_finished_at, + tupleElement(job, 'web_url') AS job_url, + -- Calculate duration in hours + dateDiff( + 'second', + tupleElement(job, 'started_at'), + tupleElement(job, 'finished_at') + ) / 3600.0 AS duration_hours + FROM vllm.vllm_buildkite_jobs + WHERE + tupleElement(pipeline, 'repository') = {repo: String} + AND tupleElement(pipeline, 'name') = {pipelineName: String} + AND tupleElement(build, 'branch') = 'main' + AND tupleElement(job, 'name') = {jobName: String} + AND tupleElement(job, 'finished_at') IS NOT NULL + AND tupleElement(job, 'finished_at') >= {startTime: DateTime64(3)} + AND tupleElement(job, 'finished_at') < {stopTime: DateTime64(3)} +) + +SELECT + build_number, + build_id, + build_state, + build_url, + build_started_at, + build_finished_at, + commit, + commit_message, + job_name, + job_state, + soft_failed, + job_started_at, + job_finished_at, + job_url, + duration_hours +FROM job_builds +ORDER BY job_finished_at DESC diff --git a/torchci/components/metrics/vllm/CiDurationsPanel.tsx b/torchci/components/metrics/vllm/CiDurationsPanel.tsx index 63b8c49425..30e5db0462 100644 --- a/torchci/components/metrics/vllm/CiDurationsPanel.tsx +++ b/torchci/components/metrics/vllm/CiDurationsPanel.tsx @@ -230,28 +230,11 @@ export default function CiDurationsPanel({ ...getLineSeries(dailyMeanSuccess, dailyMeanNonCanceled), ...getScatterSeriesByState(source), ], - dataZoom: [ - { - type: "slider", - show: true, - xAxisIndex: 0, - bottom: 0, - start: 0, - end: 100, - height: 25, - }, - { - type: "inside", - xAxisIndex: 0, - start: 0, - end: 100, - }, - ], }; return ( Math.pow(x - mean, 2)); + const variance = _.mean(squaredDiffs); + const volatility = Math.sqrt(variance); + + // Count state transitions + const transitions = healthValues.reduce((count, current, index) => { + if (index === 0) return 0; + const previous = healthValues[index - 1]; + return current !== previous ? count + 1 : count; + }, 0); + + // Calculate penalties + const volatilityPenalty = volatility * 50; + const transitionPenalty = Math.min(transitions / healthValues.length, 1) * 50; + + // Return score as percentage (0-1) + return Math.max(0, 100 - volatilityPenalty - transitionPenalty) / 100; +} + +// Helper function to format tooltip +function formatTooltip(params: any, stabilityData: any[]): string { + if (!Array.isArray(params) || params.length === 0) return ""; + + const date = params[0].axisValue; + const dataIndex = params[0].dataIndex; + const data = stabilityData[dataIndex]; + + if (!data) return ""; + + let result = `${date}
`; + result += `${params[0].marker} Stability Score: ${( + data.score * 100 + ).toFixed(1)}%
`; + result += ``; + result += `Volatility: ${(data.volatility * 100).toFixed(1)}% | `; + result += `Transitions: ${data.transitions}`; + result += ``; + + return result; +} + +// Helper function to get line series +function getLineSeries(data: any[]): any[] { + return [ + { + name: "Stability Score", + type: "line", + data: data.map((d) => d.score), + smooth: true, + symbol: "circle", + symbolSize: 6, + lineStyle: { width: 2 }, + itemStyle: { + color: (params: any) => { + const score = params.data; + if (score >= 0.7) return COLOR_SUCCESS; + if (score >= 0.5) return COLOR_WARNING; + return COLOR_ERROR; + }, + }, + areaStyle: { + opacity: 0.2, + color: { + type: "linear", + x: 0, + y: 0, + x2: 0, + y2: 1, + colorStops: [ + { offset: 0, color: COLOR_SUCCESS }, + { offset: 0.5, color: COLOR_WARNING }, + { offset: 1, color: COLOR_ERROR }, + ], + }, + }, + markLine: { + silent: true, + symbol: "none", + lineStyle: { + type: "dashed", + color: COLOR_WARNING, + width: 1, + }, + label: { + formatter: "Target: 70%", + position: "end", + }, + data: [{ yAxis: 0.7 }], + }, + }, + ]; +} + +export default function CiStabilityTrendPanel({ + data, +}: { + data: TrunkHealthData[] | undefined; +}) { + const { darkMode } = useDarkMode(); + + // Group builds by day and determine daily health status + const buildsByDay = _.groupBy( + data || [], + (d) => d.build_started_at?.slice(0, 10) || "" + ); + + const dailyHealth = Object.entries(buildsByDay) + .map(([day, builds]) => { + if (!day) return null; + const sortedBuilds = _.sortBy(builds, "build_started_at"); + const mostRecent = sortedBuilds[sortedBuilds.length - 1]; + return { + date: day, + isGreen: mostRecent?.is_green === 1 ? 1 : 0, + }; + }) + .filter((d) => d !== null) + .sort((a, b) => a!.date.localeCompare(b!.date)) as { + date: string; + isGreen: number; + }[]; + + // Calculate rolling stability score (7-day window) + const windowSize = 7; + const stabilityData = dailyHealth + .map((day, index) => { + if (index < windowSize - 1) return null; // Not enough data for window + + // Get window of health values + const windowData = dailyHealth + .slice(Math.max(0, index - windowSize + 1), index + 1) + .map((d) => d.isGreen); + + // Calculate volatility + const mean = _.mean(windowData); + const squaredDiffs = windowData.map((x) => Math.pow(x - mean, 2)); + const variance = _.mean(squaredDiffs); + const volatility = Math.sqrt(variance); + + // Count transitions + const transitions = windowData.reduce((count, current, idx) => { + if (idx === 0) return 0; + const previous = windowData[idx - 1]; + return current !== previous ? count + 1 : count; + }, 0); + + const score = calculateStabilityScore(windowData); + + return { + date: day.date, + score, + volatility, + transitions, + }; + }) + .filter((d) => d !== null) as { + date: string; + score: number; + volatility: number; + transitions: number; + }[]; + + const dates = stabilityData.map((d) => dayjs(d.date).format("MMM D")); + + const options: EChartsOption = { + title: { + text: "CI Stability Score Over Time", + subtext: `7-day rolling window (target: ≥70%)`, + left: "center", + }, + grid: GRID_DEFAULT, + xAxis: { + type: "category", + data: dates, + name: "Date", + nameLocation: "middle", + nameGap: 35, + axisLabel: { + rotate: 45, + fontSize: 10, + }, + }, + yAxis: { + type: "value", + name: "Stability Score", + nameLocation: "middle", + nameGap: 45, + min: 0, + max: 1, + axisLabel: { + formatter: (value: number) => `${(value * 100).toFixed(0)}%`, + }, + }, + series: stabilityData.length > 0 ? getLineSeries(stabilityData) : [], + tooltip: getCrosshairTooltipConfig(darkMode, (params: any) => + formatTooltip(params, stabilityData) + ), + }; + + return ( + + ); +} diff --git a/torchci/components/metrics/vllm/ContinuousBuildTracker.tsx b/torchci/components/metrics/vllm/ContinuousBuildTracker.tsx new file mode 100644 index 0000000000..d869ba7e06 --- /dev/null +++ b/torchci/components/metrics/vllm/ContinuousBuildTracker.tsx @@ -0,0 +1,386 @@ +import OpenInNewIcon from "@mui/icons-material/OpenInNew"; +import { + Box, + Chip, + Link, + Paper, + Table, + TableBody, + TableCell, + TableContainer, + TableHead, + TableRow, + Typography, +} from "@mui/material"; +import dayjs from "dayjs"; +import { useDarkMode } from "lib/DarkModeContext"; +import { useClickHouseAPIImmutable } from "lib/GeneralUtils"; +import React, { useState } from "react"; +import { + COLOR_ERROR, + COLOR_GRAY, + COLOR_SUCCESS, + COLOR_WARNING, + PIPELINE_NAME, + VLLM_REPO_URL, +} from "./constants"; + +interface ContinuousBuildData { + build_number: number; + build_id: string; + build_state: string; + build_url: string; + build_started_at: string | null; + build_finished_at: string | null; + build_message: string; + commit: string; + build_type: string; + total_jobs: number; + failed_jobs_count: number; +} + +interface FailedJobData { + job_name: string; + job_state: string; + soft_failed: boolean; + job_started_at: string | null; + job_finished_at: string | null; + job_url: string; + exit_status: number | null; + duration_hours: number | null; + build_number: number; + build_url: string; +} + +// Helper function to format duration +function formatDuration(hours: number | null | undefined): string { + if (hours === null || hours === undefined) return "-"; + if (hours < 1) { + return `${(hours * 60).toFixed(0)}m`; + } + return `${hours.toFixed(2)}h`; +} + +// Helper function to get state color +function getStateColor(state: string): { bg: string; text: string } { + const stateLower = state.toLowerCase(); + if ( + stateLower === "passed" || + stateLower === "finished" || + stateLower === "success" + ) { + return { bg: COLOR_SUCCESS, text: "#fff" }; + } + if (stateLower === "failed" || stateLower === "failing") { + return { bg: COLOR_ERROR, text: "#fff" }; + } + if (stateLower === "canceled" || stateLower === "cancelled") { + return { bg: COLOR_GRAY, text: "#fff" }; + } + if (stateLower === "running") { + return { bg: COLOR_WARNING, text: "#fff" }; + } + return { bg: "#999", text: "#fff" }; +} + +// Helper function to get build type chip color +function getBuildTypeColor(buildType: string): string { + if (buildType === "Daily") { + return COLOR_SUCCESS; + } + if (buildType === "Nightly") { + return COLOR_WARNING; + } + return COLOR_GRAY; +} + +export default function ContinuousBuildTracker({ + data, + timeParams, +}: { + data: ContinuousBuildData[] | undefined; + timeParams: { startTime: string; stopTime: string }; +}) { + const { darkMode } = useDarkMode(); + const [selectedBuildNumber, setSelectedBuildNumber] = useState( + null + ); + + // Fetch failed jobs for selected build + const { data: failedJobsData } = useClickHouseAPIImmutable( + "vllm/build_failed_jobs", + { + repo: VLLM_REPO_URL, + pipelineName: PIPELINE_NAME, + buildNumber: selectedBuildNumber || 0, + }, + selectedBuildNumber !== null + ); + + const builds = (data || []) as ContinuousBuildData[]; + + // Auto-select first build if nothing is selected or if selected build is no longer in the list + React.useEffect(() => { + if (builds.length > 0) { + if ( + selectedBuildNumber === null || + !builds.some((b) => b.build_number === selectedBuildNumber) + ) { + setSelectedBuildNumber(builds[0].build_number); + } + } + }, [builds, selectedBuildNumber]); + + // Handle row click + function handleRowClick(buildNumber: number) { + setSelectedBuildNumber(buildNumber); + } + + const failedJobs = (failedJobsData || []) as FailedJobData[]; + const selectedBuild = builds.find( + (b) => b.build_number === selectedBuildNumber + ); + + return ( + + + {/* Builds table on the left */} + + + Continuous Builds + + + + + + Build # + Type + Status + Failed Jobs + Finished At + Link + + + + {builds.length === 0 && ( + + + No continuous builds found in selected time range + + + )} + {builds.map((build) => { + const stateColors = getStateColor(build.build_state); + return ( + handleRowClick(build.build_number)} + selected={selectedBuildNumber === build.build_number} + sx={{ + cursor: "pointer", + "&.Mui-selected": { + backgroundColor: darkMode + ? "rgba(144, 202, 249, 0.16)" + : "rgba(25, 118, 210, 0.12)", + }, + "&.Mui-selected:hover": { + backgroundColor: darkMode + ? "rgba(144, 202, 249, 0.24)" + : "rgba(25, 118, 210, 0.18)", + }, + }} + > + + {build.build_number} + + + + + + + + 0 + ? COLOR_ERROR + : COLOR_SUCCESS, + fontWeight: "bold", + }} + > + {build.failed_jobs_count} / {build.total_jobs} + + + {build.build_finished_at + ? dayjs(build.build_finished_at).format( + "M/D/YY h:mm A" + ) + : "-"} + + + e.stopPropagation()} + > + } + size="small" + clickable + sx={{ fontSize: "0.7rem", height: 22 }} + /> + + + + ); + })} + +
+
+
+ + {/* Failed jobs table on the right */} + + + {selectedBuild && ( + + + Failed Jobs - Build #{selectedBuild.build_number} + + + {selectedBuild.build_type} build from{" "} + {dayjs(selectedBuild.build_finished_at).format( + "M/D/YY h:mm A" + )} + + + )} + + + + + + Job Name + Duration + Exit Code + Finished At + Link + + + + {failedJobs.length === 0 && ( + + + {selectedBuild + ? selectedBuild.failed_jobs_count === 0 + ? "No failed jobs - build passed! ✓" + : "Loading failed jobs..." + : "Select a build to view failed jobs"} + + + )} + {failedJobs.map((job, idx) => ( + + + {job.job_name} + + + {formatDuration(job.duration_hours)} + + + {job.exit_status ?? "-"} + + + {job.job_finished_at + ? dayjs(job.job_finished_at).format("M/D/YY h:mm A") + : "-"} + + + + } + size="small" + clickable + sx={{ fontSize: "0.7rem", height: 22 }} + /> + + + + ))} + +
+
+
+
+
+ ); +} diff --git a/torchci/components/metrics/vllm/DockerBuildRuntimePanel.tsx b/torchci/components/metrics/vllm/DockerBuildRuntimePanel.tsx new file mode 100644 index 0000000000..0124f6c617 --- /dev/null +++ b/torchci/components/metrics/vllm/DockerBuildRuntimePanel.tsx @@ -0,0 +1,164 @@ +import dayjs from "dayjs"; +import { EChartsOption } from "echarts"; +import { useDarkMode } from "lib/DarkModeContext"; +import _ from "lodash"; +import { ChartPaper } from "./chartUtils"; +import { COLOR_SUCCESS, COLOR_WARNING } from "./constants"; + +interface DockerBuildData { + timestamp: string; + build_number: number; + runtime_minutes: number; +} + +// Helper function to format tooltip +function formatTooltip(params: any): string { + if (!params || !params.data) return ""; + + const data = params.data; + + // Handle both scatter (array) and line (object) series + let timestamp, runtime, buildNumber; + + if (Array.isArray(data)) { + timestamp = data[0]; + runtime = data[1]; + buildNumber = data[2]; + } else { + // For line series (daily average) + timestamp = data.day; + runtime = data.value; + buildNumber = null; + } + + if (!timestamp || runtime === undefined) return ""; + + const formattedTime = dayjs(timestamp).format("M/D/YY h:mm A"); + + let result = buildNumber + ? `Build #${buildNumber}
` + : `Daily Average
`; + result += `Time: ${formattedTime}
`; + result += `Runtime: ${runtime.toFixed(1)} min`; + + return result; +} + +// Helper function to handle click events +function handleBuildClick(params: any) { + if (params?.componentType === "series") { + const data = Array.isArray(params.data) ? params.data : [params.data]; + const buildNumber = data[2]; + if (buildNumber !== undefined && buildNumber !== null) { + const url = `https://buildkite.com/vllm/ci/builds/${buildNumber}/`; + if (typeof window !== "undefined") { + window.open(url, "_blank"); + } + } + } +} + +export default function DockerBuildRuntimePanel({ + data, +}: { + data: DockerBuildData[] | undefined; +}) { + const { darkMode } = useDarkMode(); + + // Process data for chart + const chartData = (data || []).map((d) => [ + dayjs(d.timestamp).toISOString(), + d.runtime_minutes, + d.build_number, + ]); + + // Calculate daily average for trend line + const groupedByDay = _.groupBy(data || [], (d) => + dayjs(d.timestamp).format("YYYY-MM-DD") + ); + + const dailyAvg = Object.entries(groupedByDay) + .map(([day, records]) => { + const avgRuntime = _.meanBy(records, "runtime_minutes"); + return { + day, + value: Number(avgRuntime.toFixed(1)), + }; + }) + .sort((a, b) => (a.day < b.day ? -1 : 1)); + + // Calculate statistics + const runtimes = (data || []).map((d) => d.runtime_minutes); + const avgRuntime = runtimes.length ? _.mean(runtimes).toFixed(1) : "N/A"; + const p90Runtime = runtimes.length + ? runtimes + .sort((a, b) => a - b) + [Math.floor(runtimes.length * 0.9)].toFixed(1) + : "N/A"; + + const options: EChartsOption = { + title: { + text: "Docker Build Image Runtime", + subtext: `Avg: ${avgRuntime}m | P90: ${p90Runtime}m | Total builds: ${runtimes.length}`, + textStyle: { + fontSize: 14, + }, + }, + legend: { + top: 24, + data: ["Individual Builds", "Daily Average"], + }, + grid: { top: 60, right: 20, bottom: 80, left: 60 }, + dataset: [{ source: chartData }, { source: dailyAvg }], + xAxis: { + type: "time", + axisLabel: { + hideOverlap: true, + formatter: (value: number) => dayjs(value).format("M/D"), + }, + }, + yAxis: { + type: "value", + name: "Runtime (minutes)", + nameLocation: "middle", + nameGap: 45, + nameRotate: 90, + axisLabel: { + formatter: (value: number) => `${value}m`, + }, + }, + series: [ + { + name: "Individual Builds", + type: "scatter", + datasetIndex: 0, + symbolSize: 6, + itemStyle: { color: COLOR_SUCCESS, opacity: 0.6 }, + }, + { + name: "Daily Average", + type: "line", + datasetIndex: 1, + smooth: true, + encode: { x: "day", y: "value" }, + lineStyle: { color: COLOR_WARNING, width: 2 }, + itemStyle: { color: COLOR_WARNING }, + showSymbol: true, + symbolSize: 4, + }, + ], + tooltip: { + trigger: "item", + formatter: formatTooltip, + }, + }; + + return ( + + ); +} diff --git a/torchci/components/metrics/vllm/DurationDistributionPanel.tsx b/torchci/components/metrics/vllm/DurationDistributionPanel.tsx index b4f9e4ba65..bf261e365d 100644 --- a/torchci/components/metrics/vllm/DurationDistributionPanel.tsx +++ b/torchci/components/metrics/vllm/DurationDistributionPanel.tsx @@ -164,28 +164,11 @@ export default function DurationDistributionPanel({ axisPointer: { type: "shadow" }, formatter: formatDistributionTooltip, }, - dataZoom: [ - { - type: "slider", - show: true, - xAxisIndex: 0, - bottom: 0, - start: 0, - end: 100, - height: 25, - }, - { - type: "inside", - xAxisIndex: 0, - start: 0, - end: 100, - }, - ], }; return ( diff --git a/torchci/components/metrics/vllm/JobBuildsPanel.tsx b/torchci/components/metrics/vllm/JobBuildsPanel.tsx new file mode 100644 index 0000000000..a19f2b799d --- /dev/null +++ b/torchci/components/metrics/vllm/JobBuildsPanel.tsx @@ -0,0 +1,449 @@ +import OpenInNewIcon from "@mui/icons-material/OpenInNew"; +import { + Box, + Chip, + Link, + Paper, + Table, + TableBody, + TableCell, + TableContainer, + TableHead, + TableRow, + TableSortLabel, + TextField, + Tooltip, +} from "@mui/material"; +import dayjs from "dayjs"; +import { useDarkMode } from "lib/DarkModeContext"; +import { useClickHouseAPIImmutable } from "lib/GeneralUtils"; +import React, { useState } from "react"; +import { + COLOR_ERROR, + COLOR_GRAY, + COLOR_SUCCESS, + COLOR_WARNING, + PIPELINE_NAME, + VLLM_REPO_URL, +} from "./constants"; + +interface JobListData { + job_name: string; + total_runs: number; + passed_count: number; + failed_count: number; + last_run_at: string; +} + +interface RecentBuildData { + build_number: number; + build_id: string; + build_state: string; + build_url: string; + build_started_at: string | null; + build_finished_at: string | null; + commit: string; + commit_message: string; + job_name: string; + job_state: string; + soft_failed: boolean; + job_started_at: string | null; + job_finished_at: string | null; + job_url: string; + duration_hours: number | null; +} + +type JobSortField = + | "job_name" + | "total_runs" + | "passed_count" + | "failed_count" + | "last_run_at"; +type SortOrder = "asc" | "desc"; + +// Helper function to format duration +function formatDuration(hours: number | null | undefined): string { + if (hours === null || hours === undefined) return "-"; + if (hours < 1) { + return `${(hours * 60).toFixed(0)}m`; + } + return `${hours.toFixed(2)}h`; +} + +// Helper function to get state color +function getStateColor( + state: string, + softFailed: boolean +): { bg: string; text: string } { + const stateLower = state.toLowerCase(); + if ( + stateLower === "passed" || + stateLower === "finished" || + stateLower === "success" + ) { + return { bg: COLOR_SUCCESS, text: "#fff" }; + } + if (stateLower === "failed") { + if (softFailed) { + return { bg: COLOR_WARNING, text: "#fff" }; + } + return { bg: COLOR_ERROR, text: "#fff" }; + } + if (stateLower === "canceled" || stateLower === "cancelled") { + return { bg: COLOR_GRAY, text: "#fff" }; + } + return { bg: "#999", text: "#fff" }; +} + +// Helper function to get state label +function getStateLabel(state: string, softFailed: boolean): string { + const stateLower = state.toLowerCase(); + if (stateLower === "failed" && softFailed) { + return "Soft Failed"; + } + return state.charAt(0).toUpperCase() + state.slice(1).toLowerCase(); +} + +export default function JobBuildsPanel({ + data, + timeParams, + jobGroups, +}: { + data: JobListData[] | undefined; + timeParams: { startTime: string; stopTime: string }; + jobGroups: string[]; +}) { + const { darkMode } = useDarkMode(); + const [sortField, setSortField] = useState("last_run_at"); + const [sortOrder, setSortOrder] = useState("desc"); + const [searchQuery, setSearchQuery] = useState(""); + const [selectedJob, setSelectedJob] = useState(null); + + // Fetch recent builds for selected job + const { data: recentBuildsData } = useClickHouseAPIImmutable( + "vllm/recent_job_builds", + { + ...timeParams, + repo: VLLM_REPO_URL, + pipelineName: PIPELINE_NAME, + jobName: selectedJob || "", + }, + selectedJob !== null + ); + + // Filter by search query + const filteredJobs = (data || []).filter((job) => + job.job_name.toLowerCase().includes(searchQuery.toLowerCase()) + ); + + // Sort the filtered data + const sortedJobs = [...filteredJobs].sort((a, b) => { + let aValue: number | string = a[sortField]; + let bValue: number | string = b[sortField]; + + if (sortField === "job_name") { + aValue = (aValue as string).toLowerCase(); + bValue = (bValue as string).toLowerCase(); + return sortOrder === "asc" + ? aValue < bValue + ? -1 + : 1 + : aValue > bValue + ? -1 + : 1; + } + + return sortOrder === "asc" + ? (aValue as number) - (bValue as number) + : (bValue as number) - (aValue as number); + }); + + // Auto-select first job if nothing is selected or if selected job is no longer in the list + React.useEffect(() => { + if (sortedJobs.length > 0) { + if (!selectedJob || !sortedJobs.some((j) => j.job_name === selectedJob)) { + setSelectedJob(sortedJobs[0].job_name); + } + } + }, [sortedJobs, selectedJob]); + + // Handle sort request + function handleSort(field: JobSortField) { + if (sortField === field) { + setSortOrder(sortOrder === "asc" ? "desc" : "asc"); + } else { + setSortField(field); + setSortOrder("desc"); + } + } + + // Handle row click + function handleRowClick(jobName: string) { + setSelectedJob(jobName); + } + + const recentBuilds = (recentBuildsData || []) as RecentBuildData[]; + + return ( + + + {/* Jobs table on the left */} + + setSearchQuery(e.target.value)} + sx={{ mb: 1 }} + fullWidth + /> + + + + + + handleSort("job_name")} + > + Job Name + + + + handleSort("total_runs")} + > + Runs + + + + handleSort("passed_count")} + > + ✓ + + + + handleSort("failed_count")} + > + ✗ + + + + + + {sortedJobs.map((job) => ( + handleRowClick(job.job_name)} + selected={selectedJob === job.job_name} + sx={{ + cursor: "pointer", + "&.Mui-selected": { + backgroundColor: darkMode + ? "rgba(144, 202, 249, 0.16)" + : "rgba(25, 118, 210, 0.12)", + }, + "&.Mui-selected:hover": { + backgroundColor: darkMode + ? "rgba(144, 202, 249, 0.24)" + : "rgba(25, 118, 210, 0.18)", + }, + }} + > + + {job.job_name} + + {job.total_runs} + + {job.passed_count} + + + {job.failed_count} + + + ))} + +
+
+
+ + {/* Recent builds table on the right */} + + + {selectedJob && ( + + Recent Builds: {selectedJob} + + )} + + + + + + Build # + Status + Duration + Finished At + Commit + Links + + + + {recentBuilds.length === 0 && ( + + + {selectedJob + ? "No recent builds found" + : "Select a job to view builds"} + + + )} + {recentBuilds.map((build) => { + const stateColors = getStateColor( + build.job_state, + build.soft_failed + ); + return ( + + + {build.build_number} + + + + + + {formatDuration(build.duration_hours)} + + + {build.job_finished_at + ? dayjs(build.job_finished_at).format("M/D/YY h:mm A") + : "-"} + + + + + {build.commit.slice(0, 7)} + + + + + + + } + size="small" + clickable + sx={{ fontSize: "0.7rem", height: 22 }} + /> + + + } + size="small" + clickable + sx={{ fontSize: "0.7rem", height: 22 }} + /> + + + + + ); + })} + +
+
+
+
+
+ ); +} diff --git a/torchci/components/metrics/vllm/JobRuntimePanel.tsx b/torchci/components/metrics/vllm/JobRuntimePanel.tsx new file mode 100644 index 0000000000..80ed0c5dc1 --- /dev/null +++ b/torchci/components/metrics/vllm/JobRuntimePanel.tsx @@ -0,0 +1,397 @@ +import { + Box, + Paper, + Table, + TableBody, + TableCell, + TableContainer, + TableHead, + TableRow, + TableSortLabel, + TextField, +} from "@mui/material"; +import dayjs from "dayjs"; +import { EChartsOption } from "echarts"; +import ReactECharts from "echarts-for-react"; +import { useDarkMode } from "lib/DarkModeContext"; +import React, { useState } from "react"; +import { getCrosshairTooltipConfig, getReactEChartsProps } from "./chartUtils"; +import { COLOR_SUCCESS, COLOR_WARNING } from "./constants"; + +interface JobRuntimeData { + job_name: string; + date: string; + count: number; + mean_runtime_minutes: number; + p90_runtime_minutes: number; + max_runtime_minutes: number; +} + +interface JobAggregatedStats { + job_name: string; + count: number; + mean: number; + p90: number; + max: number; +} + +type SortField = "job_name" | "count" | "mean" | "p90" | "max"; +type SortOrder = "asc" | "desc"; + +// Helper function to aggregate job statistics across all dates +function aggregateJobStats(data: JobRuntimeData[]): JobAggregatedStats[] { + const jobMap = new Map(); + + // Group by job name + data.forEach((row) => { + if (!jobMap.has(row.job_name)) { + jobMap.set(row.job_name, []); + } + jobMap.get(row.job_name)!.push(row); + }); + + // Aggregate statistics + const result: JobAggregatedStats[] = []; + jobMap.forEach((rows, jobName) => { + const totalCount = rows.reduce((sum, r) => sum + r.count, 0); + const avgMean = + rows.reduce((sum, r) => sum + r.mean_runtime_minutes * r.count, 0) / + totalCount; + const avgP90 = + rows.reduce((sum, r) => sum + r.p90_runtime_minutes * r.count, 0) / + totalCount; + const overallMax = Math.max(...rows.map((r) => r.max_runtime_minutes)); + + result.push({ + job_name: jobName, + count: totalCount, + mean: avgMean, + p90: avgP90, + max: overallMax, + }); + }); + + return result; +} + +// Helper function to format runtime with unit +function formatRuntime(minutes: number | null | undefined): string { + if (minutes === null || minutes === undefined) return "-"; + return minutes.toFixed(1) + "m"; +} + +// Helper function to format tooltip +function formatChartTooltip(params: any): string { + if (!Array.isArray(params) || params.length === 0) return ""; + + const date = params[0].axisValue; + let result = `${date}
`; + + params.forEach((p: any) => { + if (p.value !== undefined && p.value !== null) { + result += `${p.marker} ${p.seriesName}: ${p.value.toFixed( + 1 + )}m
`; + } + }); + + return result; +} + +// Helper function to get line chart series +function getLineSeries( + dates: string[], + meanData: number[], + p90Data: number[] +): any[] { + return [ + { + name: "Mean Runtime", + type: "line", + data: meanData, + smooth: true, + symbol: "circle", + symbolSize: 6, + itemStyle: { color: COLOR_SUCCESS }, + lineStyle: { width: 2 }, + emphasis: { focus: "series" }, + }, + { + name: "P90 Runtime", + type: "line", + data: p90Data, + smooth: true, + symbol: "diamond", + symbolSize: 7, + itemStyle: { color: COLOR_WARNING }, + lineStyle: { width: 2, type: "dashed" }, + emphasis: { focus: "series" }, + }, + ]; +} + +export default function JobRuntimePanel({ + data, +}: { + data: JobRuntimeData[] | undefined; +}) { + const { darkMode } = useDarkMode(); + const [sortField, setSortField] = useState("mean"); + const [sortOrder, setSortOrder] = useState("desc"); + const [searchQuery, setSearchQuery] = useState(""); + const [selectedJob, setSelectedJob] = useState(null); + + // Aggregate statistics for the table + const aggregatedStats = aggregateJobStats(data || []); + + // Filter by search query + const filteredStats = aggregatedStats.filter((job) => + job.job_name.toLowerCase().includes(searchQuery.toLowerCase()) + ); + + // Sort the filtered data + const sortedStats = [...filteredStats].sort((a, b) => { + let aValue: number | string = a[sortField]; + let bValue: number | string = b[sortField]; + + if (sortField === "job_name") { + aValue = (aValue as string).toLowerCase(); + bValue = (bValue as string).toLowerCase(); + return sortOrder === "asc" + ? aValue < bValue + ? -1 + : 1 + : aValue > bValue + ? -1 + : 1; + } + + return sortOrder === "asc" + ? (aValue as number) - (bValue as number) + : (bValue as number) - (aValue as number); + }); + + // Auto-select first job if nothing is selected or if selected job is no longer in the list + React.useEffect(() => { + if (sortedStats.length > 0) { + if ( + !selectedJob || + !sortedStats.some((s) => s.job_name === selectedJob) + ) { + setSelectedJob(sortedStats[0].job_name); + } + } + }, [sortedStats, selectedJob]); + + // Handle sort request + function handleSort(field: SortField) { + if (sortField === field) { + setSortOrder(sortOrder === "asc" ? "desc" : "asc"); + } else { + setSortField(field); + setSortOrder("desc"); + } + } + + // Handle row click + function handleRowClick(jobName: string) { + setSelectedJob(jobName); + } + + // Prepare chart data for selected job + const selectedJobData = + selectedJob && data + ? data + .filter((d) => d.job_name === selectedJob) + .sort((a, b) => a.date.localeCompare(b.date)) + : []; + + const chartDates = selectedJobData.map((d) => dayjs(d.date).format("MMM D")); + const chartMeanData = selectedJobData.map((d) => d.mean_runtime_minutes); + const chartP90Data = selectedJobData.map((d) => d.p90_runtime_minutes); + + const chartOptions: EChartsOption = { + title: { + text: selectedJob ? "Runtime Trend" : "Select a job to view", + subtext: selectedJob || "Click a row in the table", + textStyle: { + fontSize: 14, + }, + subtextStyle: { + fontSize: 16, + fontWeight: "bold", + color: darkMode ? "#fff" : "#333", + }, + }, + legend: { + top: 40, + data: ["Mean Runtime", "P90 Runtime"], + }, + grid: { top: 80, right: 20, bottom: 60, left: 60 }, + xAxis: { + type: "category", + data: chartDates, + name: "Date", + nameLocation: "middle", + nameGap: 35, + axisLabel: { + rotate: 45, + fontSize: 10, + }, + }, + yAxis: { + type: "value", + name: "Runtime (minutes)", + nameLocation: "middle", + nameGap: 45, + axisLabel: { + formatter: (value: number) => `${value}m`, + }, + }, + series: + selectedJobData.length > 0 + ? getLineSeries(chartDates, chartMeanData, chartP90Data) + : [], + tooltip: getCrosshairTooltipConfig(darkMode, formatChartTooltip), + }; + + return ( + + + {/* Table on the left */} + + setSearchQuery(e.target.value)} + sx={{ mb: 1 }} + fullWidth + /> + + + + + + handleSort("job_name")} + > + Job Name + + + + handleSort("count")} + > + Count + + + + handleSort("mean")} + > + Mean + + + + handleSort("p90")} + > + P90 + + + + handleSort("max")} + > + Max + + + + + + {sortedStats.map((job) => ( + handleRowClick(job.job_name)} + selected={selectedJob === job.job_name} + sx={{ + cursor: "pointer", + "&.Mui-selected": { + backgroundColor: darkMode + ? "rgba(144, 202, 249, 0.16)" + : "rgba(25, 118, 210, 0.12)", + }, + "&.Mui-selected:hover": { + backgroundColor: darkMode + ? "rgba(144, 202, 249, 0.24)" + : "rgba(25, 118, 210, 0.18)", + }, + }} + > + + {job.job_name} + + {job.count} + + {formatRuntime(job.mean)} + + + {formatRuntime(job.p90)} + + + {formatRuntime(job.max)} + + + ))} + +
+
+
+ + {/* Chart on the right */} + + + +
+
+ ); +} diff --git a/torchci/components/metrics/vllm/constants.ts b/torchci/components/metrics/vllm/constants.ts index c1e85785e5..c5a5409904 100644 --- a/torchci/components/metrics/vllm/constants.ts +++ b/torchci/components/metrics/vllm/constants.ts @@ -1,6 +1,60 @@ -// Shared color constants for vLLM metrics charts +// Shared constants for vLLM metrics dashboard and charts -// Data visualization colors +// ============================================================================ +// Layout Constants +// ============================================================================ +export const ROW_HEIGHT = 375; +export const METRIC_CARD_HEIGHT = 200; +export const JOB_RUNTIME_PANEL_HEIGHT = ROW_HEIGHT + 150; +export const JOB_BUILDS_PANEL_HEIGHT = 600; + +// ============================================================================ +// Repository Constants +// ============================================================================ +export const VLLM_REPO_URL = "https://github.com/vllm-project/vllm.git"; +export const VLLM_REPO_SHORT = "vllm-project/vllm"; +export const PIPELINE_NAME = "CI"; + +// ============================================================================ +// Query Parameter Defaults +// ============================================================================ +export const DEFAULT_MIN_RUNS_RETRY_STATS = 5; +export const DEFAULT_MIN_RUNS_JOB_RELIABILITY = 3; + +// ============================================================================ +// Tab Styling Configuration +// ============================================================================ +export const TAB_CONFIG = { + containerSx: (darkMode: boolean) => ({ + borderBottom: 2, + borderColor: "divider", + mb: 3, + mt: 2, + bgcolor: darkMode ? "rgba(255, 255, 255, 0.05)" : "rgba(0, 0, 0, 0.02)", + borderRadius: "8px 8px 0 0", + px: 2, + }), + tabsSx: { + "& .MuiTab-root": { + fontSize: "1rem", + fontWeight: 600, + minHeight: 56, + textTransform: "none", + px: 3, + }, + "& .Mui-selected": { + fontWeight: 700, + }, + }, + indicatorSx: { + height: 3, + borderRadius: "3px 3px 0 0", + }, +}; + +// ============================================================================ +// Data Visualization Colors +// ============================================================================ export const COLOR_SUCCESS = "#3ba272"; // Green - for successful/passing states export const COLOR_ERROR = "#ee6666"; // Red - for failures/errors export const COLOR_WARNING = "#fc9403"; // Orange - for warnings/manual actions diff --git a/torchci/pages/metrics/vllm.tsx b/torchci/pages/metrics/vllm.tsx index 5ab03bd87c..d93972f792 100644 --- a/torchci/pages/metrics/vllm.tsx +++ b/torchci/pages/metrics/vllm.tsx @@ -7,15 +7,22 @@ import { Link, Skeleton, Stack, + Tab, + Tabs, Typography, } from "@mui/material"; import CiDurationsPanel from "components/metrics/vllm/CiDurationsPanel"; +import CiStabilityTrendPanel from "components/metrics/vllm/CiStabilityTrendPanel"; import CommitsOnRedTrendPanel from "components/metrics/vllm/CommitsOnRedTrendPanel"; +import ContinuousBuildTracker from "components/metrics/vllm/ContinuousBuildTracker"; +import DockerBuildRuntimePanel from "components/metrics/vllm/DockerBuildRuntimePanel"; import DurationDistributionPanel from "components/metrics/vllm/DurationDistributionPanel"; +import JobBuildsPanel from "components/metrics/vllm/JobBuildsPanel"; import JobGroupFilter, { JobGroup, } from "components/metrics/vllm/JobGroupFilter"; import JobReliabilityPanel from "components/metrics/vllm/JobReliabilityPanel"; +import JobRuntimePanel from "components/metrics/vllm/JobRuntimePanel"; import MergesPanel from "components/metrics/vllm/MergesPanel"; import MostRetriedJobsTable from "components/metrics/vllm/MostRetriedJobsTable"; import QueueWaitPerBuildPanel from "components/metrics/vllm/QueueWaitPerBuildPanel"; @@ -32,6 +39,18 @@ import { VllmDualScalarPanel, VllmScalarPanel, } from "components/metrics/vllm/VllmScalarPanel"; +import { + DEFAULT_MIN_RUNS_JOB_RELIABILITY, + DEFAULT_MIN_RUNS_RETRY_STATS, + JOB_BUILDS_PANEL_HEIGHT, + JOB_RUNTIME_PANEL_HEIGHT, + METRIC_CARD_HEIGHT, + PIPELINE_NAME, + ROW_HEIGHT, + TAB_CONFIG, + VLLM_REPO_SHORT, + VLLM_REPO_URL, +} from "components/metrics/vllm/constants"; import dayjs from "dayjs"; import { useDarkMode } from "lib/DarkModeContext"; import { useClickHouseAPIImmutable } from "lib/GeneralUtils"; @@ -39,11 +58,6 @@ import _ from "lodash"; import React, { useState } from "react"; import { TimeRangePicker } from "../metrics"; -const ROW_HEIGHT = 375; -const METRIC_CARD_HEIGHT = 200; // Height for key metric cards (reduced by ~20% from default) - -// moved MergesPanel and CiDurationsPanel to components - // Helper function to safely extract PR cycle data values function getPrCycleValue( data: any[] | undefined, @@ -218,6 +232,11 @@ export default function Page() { "torch_nightly", "main", ]); + const [selectedTab, setSelectedTab] = useState(0); + + const handleTabChange = (_: React.SyntheticEvent, newValue: number) => { + setSelectedTab(newValue); + }; const timeParams = { startTime: startTime.utc().format("YYYY-MM-DDTHH:mm:ss.SSS"), @@ -239,7 +258,7 @@ export default function Page() { { ...timeParams, granularity: "day", - repo: "vllm-project/vllm", + repo: VLLM_REPO_SHORT, } ); @@ -247,9 +266,8 @@ export default function Page() { "vllm/ci_run_duration", { ...timeParams, - // Buildkite uses full repo URL with .git in vLLM dataset - repo: "https://github.com/vllm-project/vllm.git", - pipelineName: "CI", + repo: VLLM_REPO_URL, + pipelineName: PIPELINE_NAME, } ); @@ -257,8 +275,8 @@ export default function Page() { "vllm/ci_run_duration", { ...prevTimeParams, - repo: "https://github.com/vllm-project/vllm.git", - pipelineName: "CI", + repo: VLLM_REPO_URL, + pipelineName: PIPELINE_NAME, } ); @@ -322,7 +340,7 @@ export default function Page() { "vllm/pr_cycle_time_breakdown", { ...timeParams, - repo: "vllm-project/vllm", + repo: VLLM_REPO_SHORT, } ); @@ -330,7 +348,7 @@ export default function Page() { "vllm/pr_cycle_time_breakdown", { ...prevTimeParams, - repo: "vllm-project/vllm", + repo: VLLM_REPO_SHORT, } ); @@ -339,8 +357,8 @@ export default function Page() { { ...timeParams, granularity: "day", - repo: "https://github.com/vllm-project/vllm.git", - pipelineName: "CI", + repo: VLLM_REPO_URL, + pipelineName: PIPELINE_NAME, jobGroups: selectedJobGroups, } ); @@ -348,8 +366,8 @@ export default function Page() { const { data: retryData } = useClickHouseAPIImmutable("vllm/rebuild_rate", { ...timeParams, granularity: "day", - repo: "https://github.com/vllm-project/vllm.git", - pipelineName: "CI", + repo: VLLM_REPO_URL, + pipelineName: PIPELINE_NAME, jobGroups: selectedJobGroups, }); @@ -357,9 +375,9 @@ export default function Page() { "vllm/job_retry_stats", { ...timeParams, - repo: "https://github.com/vllm-project/vllm.git", - pipelineName: "CI", - minRuns: 5, + repo: VLLM_REPO_URL, + pipelineName: PIPELINE_NAME, + minRuns: DEFAULT_MIN_RUNS_RETRY_STATS, jobGroups: selectedJobGroups, } ); @@ -368,20 +386,54 @@ export default function Page() { "vllm/job_reliability", { ...timeParams, - repo: "https://github.com/vllm-project/vllm.git", - pipelineName: "CI", - minRuns: 3, + repo: VLLM_REPO_URL, + pipelineName: PIPELINE_NAME, + minRuns: DEFAULT_MIN_RUNS_JOB_RELIABILITY, + jobGroups: selectedJobGroups, + } + ); + + const { data: jobRuntimeTrendsData } = useClickHouseAPIImmutable( + "vllm/job_runtime_trends", + { + ...timeParams, + repo: VLLM_REPO_URL, jobGroups: selectedJobGroups, } ); + const { data: jobListData } = useClickHouseAPIImmutable("vllm/job_list", { + ...timeParams, + repo: VLLM_REPO_URL, + pipelineName: PIPELINE_NAME, + jobGroups: selectedJobGroups, + }); + + const { data: continuousBuildsData } = useClickHouseAPIImmutable( + "vllm/continuous_builds", + { + ...timeParams, + repo: VLLM_REPO_URL, + pipelineName: PIPELINE_NAME, + } + ); + + const { data: dockerBuildRuntimeData } = useClickHouseAPIImmutable( + "vllm/docker_build_runtime", + { + ...timeParams, + repo: VLLM_REPO_URL, + jobName: ":docker: build image", + } + ); + const { data: trunkHealthData } = useClickHouseAPIImmutable( "vllm/trunk_health", { ...timeParams, granularity: "day", - repo: "https://github.com/vllm-project/vllm.git", - pipelineName: "CI", + repo: VLLM_REPO_URL, + pipelineName: PIPELINE_NAME, jobGroups: selectedJobGroups, } ); @@ -390,8 +442,8 @@ export default function Page() { "vllm/trunk_recovery_time", { ...timeParams, - repo: "https://github.com/vllm-project/vllm.git", - pipelineName: "CI", + repo: VLLM_REPO_URL, + pipelineName: PIPELINE_NAME, jobGroups: selectedJobGroups, } ); @@ -402,8 +454,8 @@ export default function Page() { { ...prevTimeParams, granularity: "day", - repo: "https://github.com/vllm-project/vllm.git", - pipelineName: "CI", + repo: VLLM_REPO_URL, + pipelineName: PIPELINE_NAME, jobGroups: selectedJobGroups, } ); @@ -413,8 +465,8 @@ export default function Page() { { ...prevTimeParams, granularity: "day", - repo: "https://github.com/vllm-project/vllm.git", - pipelineName: "CI", + repo: VLLM_REPO_URL, + pipelineName: PIPELINE_NAME, jobGroups: selectedJobGroups, } ); @@ -424,7 +476,7 @@ export default function Page() { { ...prevTimeParams, granularity: "day", - repo: "vllm-project/vllm", + repo: VLLM_REPO_SHORT, } ); @@ -521,6 +573,60 @@ export default function Page() { ? null : 1 - trunkHealthPct; + // Calculate CI health volatility metrics + // Volatility = standard deviation of daily trunk health percentages + const dailyHealthPercentages = + trunkHealthData === undefined + ? undefined + : Object.entries(buildsByDay).map(([day, builds]) => { + const sortedBuilds = _.sortBy(builds, "build_started_at"); + const mostRecent = sortedBuilds[sortedBuilds.length - 1]; + return mostRecent?.is_green === 1 ? 1.0 : 0.0; + }); + + const ciHealthVolatility = + dailyHealthPercentages === undefined + ? undefined + : dailyHealthPercentages.length === 0 + ? null + : (() => { + const mean = _.mean(dailyHealthPercentages); + const squaredDiffs = dailyHealthPercentages.map((x) => + Math.pow(x - mean, 2) + ); + const variance = _.mean(squaredDiffs); + return Math.sqrt(variance); + })(); + + // Count state transitions (green->red or red->green) + const stateTransitions = + dailyHealthPercentages === undefined + ? undefined + : dailyHealthPercentages.length <= 1 + ? 0 + : dailyHealthPercentages.reduce((count: number, current, index) => { + if (index === 0) return 0; + const previous = dailyHealthPercentages[index - 1]; + return current !== previous ? count + 1 : count; + }, 0); + + // Calculate stability score (lower volatility + fewer transitions = higher score) + // Score from 0-100, where 100 is perfect stability + const ciStabilityScore = + ciHealthVolatility === undefined || stateTransitions === undefined + ? undefined + : ciHealthVolatility === null || stateTransitions === null + ? null + : (() => { + const volatilityPenalty = ciHealthVolatility * 50; // 0-50 penalty + const transitionPenalty = + Math.min( + stateTransitions / (dailyHealthPercentages?.length || 1), + 1 + ) * 50; // 0-50 penalty + return Math.max(0, 100 - volatilityPenalty - transitionPenalty) / 100; + })(); + // Calculate previous period metrics for deltas const prevReliabilityPoints = (prevReliabilityData || []) as any[]; const prevTotalPassed = _.sumBy(prevReliabilityPoints, "passed_count"); @@ -560,6 +666,56 @@ export default function Page() { ? null : 1 - prevTrunkHealthPct; + // Calculate previous period volatility metrics + const prevDailyHealthPercentages = + prevTrunkHealthData === undefined + ? undefined + : Object.entries(prevBuildsByDay).map(([day, builds]) => { + const sortedBuilds = _.sortBy(builds, "build_started_at"); + const mostRecent = sortedBuilds[sortedBuilds.length - 1]; + return mostRecent?.is_green === 1 ? 1.0 : 0.0; + }); + + const prevCiHealthVolatility = + prevDailyHealthPercentages === undefined + ? undefined + : prevDailyHealthPercentages.length === 0 + ? null + : (() => { + const mean = _.mean(prevDailyHealthPercentages); + const squaredDiffs = prevDailyHealthPercentages.map((x) => + Math.pow(x - mean, 2) + ); + const variance = _.mean(squaredDiffs); + return Math.sqrt(variance); + })(); + + const prevStateTransitions = + prevDailyHealthPercentages === undefined + ? undefined + : prevDailyHealthPercentages.length <= 1 + ? 0 + : prevDailyHealthPercentages.reduce((count: number, current, index) => { + if (index === 0) return 0; + const previous = prevDailyHealthPercentages[index - 1]; + return current !== previous ? count + 1 : count; + }, 0); + + const prevCiStabilityScore = + prevCiHealthVolatility === undefined || prevStateTransitions === undefined + ? undefined + : prevCiHealthVolatility === null || prevStateTransitions === null + ? null + : (() => { + const volatilityPenalty = prevCiHealthVolatility * 50; + const transitionPenalty = + Math.min( + prevStateTransitions / (prevDailyHealthPercentages?.length || 1), + 1 + ) * 50; + return Math.max(0, 100 - volatilityPenalty - transitionPenalty) / 100; + })(); + const prevManualMergedFailures = prevMergesData === undefined || prevMergesData.length === 0 ? 0 @@ -627,6 +783,12 @@ export default function Page() { prevManualMergedPct ); + // Calculate deltas for volatility metrics + const ciStabilityScoreDelta = calculateDelta( + ciStabilityScore, + prevCiStabilityScore + ); + // Calculate deltas for time to first review const prevTimeToReviewP50 = getPrCycleValue( prevPrCycleData, @@ -673,7 +835,7 @@ export default function Page() { : _.meanBy(recoveryTimes, "recovery_hours"); return ( -
+ - {/* Section 1: Key Metrics Summary Cards */} + {/* Overview - Always Visible */} Key Metrics Overview @@ -760,6 +922,21 @@ export default function Page() { }, ]} /> + (v ?? 1) < 0.7, + tooltip: + "Measures consistency of trunk health over time (0-100%). Penalizes both volatility (daily health swings) and frequent state changes (green↔red flips). Higher is better. Low scores indicate unpredictable CI that frequently oscillates between passing and failing.", + delta: ciStabilityScoreDelta, + }, + ]} + /> - {/* Section 2: CI Reliability */} - - - CI Reliability - - - - (v ?? 1) < 0.85, - tooltip: - "Percentage of main branch builds with zero hard test failures. Builds with only soft failures (flaky tests) count as passed. Canceled builds excluded from calculation.", - delta: overallSuccessRateDelta, - }, - ]} - /> - (v ?? 0) > 10, - tooltip: - "Count of main branch CI runs with hard test failures (soft failures excluded) in selected time period.", - delta: totalFailedDelta, - }, - ]} - /> - (v ?? 0) > 0.01, - tooltip: - "Percentage of jobs that were manually or automatically retried. Low values (<1%) indicate stable infrastructure. High values may indicate flaky tests or infrastructure issues.", - delta: null, // TODO: Add delta when we have previous retry data - }, - ]} - /> - - - - - - - - - - - - - - - - - - - - - - - - - - - - Trunk Health - - - - (v ?? 0) > 12, - tooltip: - "Average time trunk stays broken before being fixed. Measured from when trunk first breaks (success→failure) to when it's fixed (failure→success). Includes nights, weekends, and investigation time. Lower is better.", - delta: null, // TODO: Calculate when we have previous recovery data - }, - ]} - /> - - - - - - - - - - - - - - - - - + {/* Tabs for detailed sections */} + + + + + + + + + - {/* Section 3: CI Duration Analysis */} - - - CI Duration Analysis - - - - - - - - - - - - - - - - {/* Section 3b: Queue Utilization & Cost */} - - - Queue Utilization & Cost - - - - {isQueueLoading ? ( - <> + {/* Tab 0: Reliability */} + {selectedTab === 0 && ( + <> + + (v ?? 1) < 0.85, + tooltip: + "Percentage of main branch builds with zero hard test failures. Builds with only soft failures (flaky tests) count as passed. Canceled builds excluded from calculation.", + delta: overallSuccessRateDelta, + }, + ]} + /> + (v ?? 0) > 10, + tooltip: + "Count of main branch CI runs with hard test failures (soft failures excluded) in selected time period.", + delta: totalFailedDelta, + }, + ]} + /> + + (v ?? 0) > (dailyHealthPercentages?.length || 1) * 0.3, + tooltip: + "Number of times trunk flipped between green (healthy) and red (broken) states. Lower is better. High values indicate frequent CI instability. Calculated over the selected time period.", + delta: null, + }, + ]} + /> + (v ?? 0) > 0.01, + tooltip: + "Percentage of jobs that were manually or automatically retried. Low values (<1%) indicate stable infrastructure. High values may indicate flaky tests or infrastructure issues.", + delta: null, // TODO: Add delta when we have previous retry data + }, + ]} + /> + (v ?? 0) > 12, + tooltip: + "Average time trunk stays broken before being fixed. Measured from when trunk first breaks (success→failure) to when it's fixed (failure→success). Includes nights, weekends, and investigation time. Lower is better.", + delta: null, // TODO: Calculate when we have previous recovery data + }, + ]} + /> + + - + - + - - ) : ( - <> + + - + - + - - )} - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + )} - {/* Section 4: PR Cycle Metrics */} - - - PR Cycle Metrics - - - - (v ?? 0) > 0.5, - tooltip: - "Percentage of merged PRs where a human clicked 'Merge' button instead of using GitHub auto-merge. Includes both clean manual merges AND force merges. High values may indicate slow merge queues or low CI trust.", - delta: manualMergedPctDelta, - }, - ]} - /> - (v ?? 0) > 24, - badThreshold2: (v) => (v ?? 0) > 72, - tooltip: - "Time from PR ready (labeled 'ready' or created) to first human review comment. P50 = median, P90 = 90th percentile. Excludes bot reviews.", - delta: timeToReviewP50Delta, - delta2: timeToReviewP90Delta, - }, - ]} - /> - (v ?? 0) > 48, - badThreshold2: (v) => (v ?? 0) > 120, - tooltip: - "Time from first human review to first approval from a maintainer (MEMBER/OWNER/COLLABORATOR). P50 = median, P90 = 90th percentile.", - delta: timeToApprovalP50Delta, - delta2: timeToApprovalP90Delta, - }, - ]} - /> - - - - - - -
+ {/* Tab 1: Duration Analysis */} + {selectedTab === 1 && ( + <> + + + + + + + + + + + + + + + + + + + + + + + )} + + {/* Tab 2: Source Control */} + {selectedTab === 2 && ( + <> + + (v ?? 0) > 0.5, + tooltip: + "Percentage of merged PRs where a human clicked 'Merge' button instead of using GitHub auto-merge. Includes both clean manual merges AND force merges. High values may indicate slow merge queues or low CI trust.", + delta: manualMergedPctDelta, + }, + ]} + /> + (v ?? 0) > 24, + badThreshold2: (v) => (v ?? 0) > 72, + tooltip: + "Time from PR ready (labeled 'ready' or created) to first human review comment. P50 = median, P90 = 90th percentile. Excludes bot reviews.", + delta: timeToReviewP50Delta, + delta2: timeToReviewP90Delta, + }, + ]} + /> + (v ?? 0) > 48, + badThreshold2: (v) => (v ?? 0) > 120, + tooltip: + "Time from first human review to first approval from a maintainer (MEMBER/OWNER/COLLABORATOR). P50 = median, P90 = 90th percentile.", + delta: timeToApprovalP50Delta, + delta2: timeToApprovalP90Delta, + }, + ]} + /> + + + + + + + + )} + + {/* Tab 3: Utilization & Cost */} + {selectedTab === 3 && ( + <> + + {isQueueLoading ? ( + <> + + + + + + + + ) : ( + <> + + + + + + + + )} + + + )} + + {/* Tab 4: CI Builds */} + {selectedTab === 4 && ( + <> + + + + + + + + + + + + )} + ); }