Skip to content

Commit 51c52bc

Browse files
committed
[CI] Improve CI with testing for opencl and ptx, also add rerun bot
1 parent e6735f9 commit 51c52bc

File tree

5 files changed

+296
-76
lines changed

5 files changed

+296
-76
lines changed

.github/workflows/build-and-run.yml

Lines changed: 107 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -5,48 +5,66 @@ on:
55
branches: [ main ]
66
pull_request:
77
branches: [ main ]
8-
types: [opened, synchronize, reopened]
8+
types: [opened, synchronize, reopened]
99

10+
env:
11+
JAVA_HOME: /opt/jenkins/jdks/graal-23.1.0/jdk-21.0.3
12+
TORNADO_ROOT: ${{ github.workspace }}/GPULlama3.java/external/tornadovm
13+
LLAMA_ROOT: ${{ github.workspace }}
14+
GRAAL_JARS: /opt/graalJars
15+
MODELS_DIR: /opt/models
1016

1117
jobs:
12-
build-and-run:
18+
code-quality:
1319
runs-on: self-hosted
14-
15-
env:
16-
JAVA_HOME: /opt/jenkins/jdks/graal-23.1.0/jdk-21.0.3
17-
TORNADO_ROOT: ${{ github.workspace }}/GPULlama3.java/external/tornadovm
18-
LLAMA_ROOT: ${{ github.workspace }}
19-
20+
timeout-minutes: 30
21+
2022
steps:
2123
- name: Checkout GPULlama3
2224
uses: actions/checkout@v4
23-
with:
24-
fetch-depth: 0
2525

2626
- name: Check code formatting (Spotless)
2727
run: |
2828
cd ${{ github.workspace }}
29-
#./mvnw -T12C -Pspotless spotless:check
30-
31-
- name: Clone TornadoVM explicitly
29+
# ./mvnw -T12C -Pspotless spotless:check
30+
31+
build-and-run:
32+
runs-on: [self-hosted]
33+
needs: code-quality
34+
timeout-minutes: 30
35+
36+
strategy:
37+
fail-fast: true
38+
matrix:
39+
backend:
40+
- name: opencl
41+
- name: ptx
42+
43+
steps:
44+
- name: Checkout GPULlama3
45+
uses: actions/checkout@v4
46+
47+
- name: Clone TornadoVM master
3248
run: |
33-
git clone --depth 1 --branch develop \
49+
git clone --depth 1 --branch master \
3450
https://github.com/beehive-lab/TornadoVM.git \
35-
GPULlama3.java/external/tornadovm
51+
$TORNADO_ROOT
3652
- name: Set up Python venv for TornadoVM
3753
run: |
38-
python3 -m venv GPULlama3.java/external/tornadovm/venv
39-
source GPULlama3.java/external/tornadovm/venv/bin/activate
54+
python3 -m venv $TORNADO_ROOT/venv
55+
source $TORNADO_ROOT/venv/bin/activate
4056
python --version
4157
- name: Build TornadoVM
4258
run: |
43-
set -x
44-
cd GPULlama3.java/external/tornadovm
59+
cd $TORNADO_ROOT
60+
mkdir -p graalJars && cp $GRAAL_JARS/* graalJars/
4561
source venv/bin/activate
4662
echo "=== Building TornadoVM ==="
47-
make
63+
64+
make BACKEND=${{ matrix.backend.name }}
65+
4866
echo "=== Searching for TornadoVM SDK directory ==="
49-
SDK_DIR=$(find dist -type d -maxdepth 3 -path "*/tornadovm-*-opencl" | head -n 1)
67+
SDK_DIR=$(find dist -type d -maxdepth 3 -path "*/tornadovm-*-${{ matrix.backend.name }}" | head -n 1)
5068
if [ -z "$SDK_DIR" ]; then
5169
echo "::error::Could not locate TornadoVM SDK directory!"
5270
find dist -maxdepth 5 -type d
@@ -66,59 +84,80 @@ jobs:
6684
echo "=== Checking tornado CLI ==="
6785
which tornado || { echo "::error::tornado not in PATH"; exit 1; }
6886
tornado --devices
69-
- name: Build GPULlama3
87+
- name: Build GPULlama3.java
7088
run: |
71-
set -x
7289
cd ${{ github.workspace }}
7390
echo "Using TORNADO_SDK=$TORNADO_SDK"
7491
export PATH="$TORNADO_SDK/bin:$JAVA_HOME/bin:$PATH"
75-
which tornado || { echo "::error::tornado unavailable during GPULlama3 build"; exit 1; }
7692
tornado --version
77-
make
78-
79-
test-models:
80-
runs-on: self-hosted
81-
needs: build-and-run
82-
83-
strategy:
84-
fail-fast: false
85-
matrix:
86-
model:
87-
- /opt/models/DeepSeek-R1-Distill-Qwen-1.5B-F16.gguf
88-
- /opt/models/DeepSeek-R1-Distill-Qwen-1.5B-Q8_0.gguf
89-
- /opt/models/Llama-3.2-1B-Instruct-F16.gguf
90-
- /opt/models/Llama-3.2-1B-Instruct-Q8_0.gguf
91-
- /opt/models/Llama-3.2-3B-Instruct-F16.gguf
92-
- /opt/models/Llama-3.2-3B-Instruct-Q8_0.gguf
93-
- /opt/models/Mistral-7B-Instruct-v0.3.fp16.gguf
94-
- /opt/models/Mistral-7B-Instruct-v0.3.Q8_0.gguf
95-
- /opt/models/Phi-3-mini-4k-instruct-fp16.gguf
96-
- /opt/models/Phi-3-mini-4k-instruct-Q8_0.gguf
97-
- /opt/models/Qwen2.5-0.5B-Instruct-f16.gguf
98-
- /opt/models/Qwen2.5-0.5B-Instruct-Q8_0.gguf
99-
- /opt/models/qwen2.5-1.5b-instruct-fp16.gguf
100-
- /opt/models/qwen2.5-1.5b-instruct-q8_0.gguf
101-
- /opt/models/Qwen3-0.6B-f16.gguf
102-
- /opt/models/Qwen3-0.6B-Q8_0.gguf
103-
- /opt/models/Qwen3-4B-f16.gguf
104-
- /opt/models/Qwen3-4B-Q8_0.gguf
105-
106-
env:
107-
JAVA_HOME: /opt/jenkins/jdks/graal-23.1.0/jdk-21.0.3
108-
TORNADO_SDK: ${{ needs.build-and-run.outputs.tornado_sdk }}
109-
110-
steps:
111-
- name: Checkout GPULlama3
112-
uses: actions/checkout@v4
113-
114-
- name: Run inference for ${{ matrix.model }}
93+
./mvnw clean package -DskipTests
94+
- name: FP16 - Run Llama-3.2-1B-Instruct-F16.gguf
11595
run: |
116-
set -x
11796
cd ${{ github.workspace }}
118-
11997
export PATH="$TORNADO_SDK/bin:$JAVA_HOME/bin:$PATH"
120-
echo "Using Tornado SDK: $TORNADO_SDK"
121-
122-
./llama-tornado --gpu --opencl \
123-
--model "${{ matrix.model }}" \
98+
./llama-tornado --gpu --${{ matrix.backend.name }} \
99+
--model $MODELS_DIR/Llama-3.2-1B-Instruct-F16.gguf \
100+
--prompt "Say hello"
101+
- name: FP16 - Run Qwen3-4B-f16.gguf
102+
run: |
103+
cd ${{ github.workspace }}
104+
export PATH="$TORNADO_SDK/bin:$JAVA_HOME/bin:$PATH"
105+
./llama-tornado --gpu --${{ matrix.backend.name }} \
106+
--model $MODELS_DIR/Qwen3-4B-f16.gguf \
107+
--prompt "Say hello"
108+
- name: FP16 - Run Mistral-7B-Instruct-v0.3.fp16.gguf
109+
run: |
110+
cd ${{ github.workspace }}
111+
export PATH="$TORNADO_SDK/bin:$JAVA_HOME/bin:$PATH"
112+
./llama-tornado --gpu --${{ matrix.backend.name }} \
113+
--model $MODELS_DIR/Mistral-7B-Instruct-v0.3.fp16.gguf \
114+
--prompt "Say hello"
115+
- name: FP16 - Run Qwen2.5-1.5b-instruct-fp16.gguf
116+
run: |
117+
cd ${{ github.workspace }}
118+
export PATH="$TORNADO_SDK/bin:$JAVA_HOME/bin:$PATH"
119+
./llama-tornado --gpu --${{ matrix.backend.name }} \
120+
--model $MODELS_DIR/qwen2.5-1.5b-instruct-fp16.gguf \
121+
--prompt "Say hello"
122+
- name: FP16 - Run Phi-3-mini-4k-instruct-fp16.gguf
123+
run: |
124+
cd ${{ github.workspace }}
125+
export PATH="$TORNADO_SDK/bin:$JAVA_HOME/bin:$PATH"
126+
./llama-tornado --gpu --${{ matrix.backend.name }} \
127+
--model /$MODELS_DIR/Phi-3-mini-4k-instruct-fp16.gguf \
128+
--prompt "Say hello"
129+
- name: Q8 - Run Llama-3.2-1B-Instruct-Q8_0.gguf
130+
run: |
131+
cd ${{ github.workspace }}
132+
export PATH="$TORNADO_SDK/bin:$JAVA_HOME/bin:$PATH"
133+
./llama-tornado --gpu --${{ matrix.backend.name }} \
134+
--model $MODELS_DIR/Llama-3.2-1B-Instruct-Q8_0.gguf \
135+
--prompt "Say hello"
136+
- name: Q8 - Run Qwen3-0.6B-Q8_0.gguf
137+
run: |
138+
cd ${{ github.workspace }}
139+
export PATH="$TORNADO_SDK/bin:$JAVA_HOME/bin:$PATH"
140+
./llama-tornado --gpu --${{ matrix.backend.name }} \
141+
--model $MODELS_DIR/Qwen3-0.6B-Q8_0.gguf \
142+
--prompt "Say hello"
143+
- name: Q8 - Run Phi-3-mini-4k-instruct-Q8_0.gguf
144+
run: |
145+
cd ${{ github.workspace }}
146+
export PATH="$TORNADO_SDK/bin:$JAVA_HOME/bin:$PATH"
147+
./llama-tornado --gpu --${{ matrix.backend.name }} \
148+
--model $MODELS_DIR/Phi-3-mini-4k-instruct-Q8_0.gguf \
149+
--prompt "Say hello"
150+
- name: Q8 - Run Qwen2.5-1.5b-instruct-q8_0.gguf
151+
run: |
152+
cd ${{ github.workspace }}
153+
export PATH="$TORNADO_SDK/bin:$JAVA_HOME/bin:$PATH"
154+
./llama-tornado --gpu --${{ matrix.backend.name }} \
155+
--model $MODELS_DIR/qwen2.5-1.5b-instruct-q8_0.gguf \
156+
--prompt "Say hello"
157+
- name: Q8 - Mistral-7B-Instruct-v0.3.Q8_0.gguf
158+
run: |
159+
cd ${{ github.workspace }}
160+
export PATH="$TORNADO_SDK/bin:$JAVA_HOME/bin:$PATH"
161+
./llama-tornado --gpu --${{ matrix.backend.name }} \
162+
--model $MODELS_DIR/Mistral-7B-Instruct-v0.3.Q8_0.gguf \
124163
--prompt "Say hello"
Lines changed: 181 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,181 @@
1+
name: Rerun Workflows
2+
3+
on:
4+
issue_comment:
5+
types: [created]
6+
7+
jobs:
8+
rerun:
9+
name: Rerun CI Workflows
10+
# Only run on PR comments (not issue comments) with /rerun command
11+
if: |
12+
github.event.issue.pull_request &&
13+
contains(github.event.comment.body, '/rerun')
14+
runs-on: ubuntu-latest
15+
permissions:
16+
actions: write
17+
pull-requests: write
18+
contents: read
19+
20+
steps:
21+
- name: Check for help command
22+
id: help
23+
uses: actions/github-script@v7
24+
with:
25+
script: |
26+
const comment = context.payload.comment.body;
27+
if (comment.match(/\/rerun\s+help/i)) {
28+
await github.rest.issues.createComment({
29+
owner: context.repo.owner,
30+
repo: context.repo.repo,
31+
issue_number: context.issue.number,
32+
body: `## 🔄 Rerun Workflow Commands
33+
34+
| Command | Description |
35+
|---------|-------------|
36+
| \`/rerun\` | Rerun only **failed/cancelled/timed-out** workflows |
37+
| \`/rerun all\` | Rerun **all** workflows for this PR |
38+
| \`/rerun failed\` | Same as \`/rerun\` |
39+
| \`/rerun <name>\` | Rerun workflows matching \`<name>\` (e.g. \`/rerun ci\`, \`/rerun build\`) |
40+
| \`/rerun help\` | Show this help message |
41+
42+
**Note:** Only completed workflows can be rerun. In-progress workflows are skipped.`
43+
});
44+
core.setOutput('is_help', 'true');
45+
} else {
46+
core.setOutput('is_help', 'false');
47+
}
48+
49+
- name: Get PR SHA
50+
if: steps.help.outputs.is_help != 'true'
51+
id: pr
52+
uses: actions/github-script@v7
53+
with:
54+
script: |
55+
const { data: pr } = await github.rest.pulls.get({
56+
owner: context.repo.owner,
57+
repo: context.repo.repo,
58+
pull_number: context.issue.number
59+
});
60+
core.setOutput('sha', pr.head.sha);
61+
core.setOutput('head_ref', pr.head.ref);
62+
console.log(`PR #${context.issue.number} SHA: ${pr.head.sha}`);
63+
console.log(`PR head ref: ${pr.head.ref}`);
64+
65+
- name: Add reaction to comment
66+
if: steps.help.outputs.is_help != 'true'
67+
uses: actions/github-script@v7
68+
with:
69+
script: |
70+
await github.rest.reactions.createForIssueComment({
71+
owner: context.repo.owner,
72+
repo: context.repo.repo,
73+
comment_id: context.payload.comment.id,
74+
content: 'rocket'
75+
});
76+
77+
- name: Post start comment
78+
if: steps.help.outputs.is_help != 'true'
79+
uses: actions/github-script@v7
80+
with:
81+
script: |
82+
const comment = context.payload.comment.body;
83+
const rerunMatch = comment.match(/\/rerun\s*(\S+)?/);
84+
const rerunArg = rerunMatch && rerunMatch[1] ? rerunMatch[1] : 'failed';
85+
86+
await github.rest.issues.createComment({
87+
owner: context.repo.owner,
88+
repo: context.repo.repo,
89+
issue_number: context.issue.number,
90+
body: `🚀 **Workflow rerun started**\n\nMode: \`${rerunArg}\`\nTriggered by: @${context.payload.comment.user.login}\n\n[View Actions](https://github.com/${context.repo.owner}/${context.repo.repo}/actions)`
91+
});
92+
93+
- name: Rerun failed workflows
94+
if: steps.help.outputs.is_help != 'true'
95+
uses: actions/github-script@v7
96+
with:
97+
script: |
98+
const sha = '${{ steps.pr.outputs.sha }}';
99+
const headRef = '${{ steps.pr.outputs.head_ref }}';
100+
101+
// Get all workflow runs for this PR's head SHA
102+
const { data: runs } = await github.rest.actions.listWorkflowRunsForRepo({
103+
owner: context.repo.owner,
104+
repo: context.repo.repo,
105+
head_sha: sha,
106+
per_page: 100
107+
});
108+
109+
console.log(`Found ${runs.total_count} workflow runs for SHA ${sha}`);
110+
111+
if (runs.total_count === 0) {
112+
console.log('No workflow runs found for this PR');
113+
return;
114+
}
115+
116+
// Parse command for specific workflow filter
117+
// Supports: /rerun, /rerun all, /rerun failed, /rerun <workflow-name>
118+
const comment = context.payload.comment.body;
119+
const rerunMatch = comment.match(/\/rerun\s*(\S+)?/);
120+
const rerunArg = rerunMatch && rerunMatch[1] ? rerunMatch[1].toLowerCase() : 'failed';
121+
122+
console.log(`Rerun mode: ${rerunArg}`);
123+
124+
let rerunCount = 0;
125+
126+
for (const run of runs.workflow_runs) {
127+
const shouldRerun =
128+
rerunArg === 'all' ||
129+
(rerunArg === 'failed' && ['failure', 'cancelled', 'timed_out'].includes(run.conclusion)) ||
130+
run.name.toLowerCase().includes(rerunArg);
131+
132+
if (!shouldRerun) {
133+
console.log(`Skipping ${run.name} (status: ${run.status}, conclusion: ${run.conclusion})`);
134+
continue;
135+
}
136+
137+
// Only rerun completed workflows
138+
if (run.status !== 'completed') {
139+
console.log(`Skipping ${run.name} - still ${run.status}`);
140+
continue;
141+
}
142+
143+
try {
144+
console.log(`Rerunning workflow: ${run.name} (ID: ${run.id})`);
145+
146+
// Use rerun-failed-jobs if available and workflow failed, otherwise full rerun
147+
if (['failure', 'cancelled', 'timed_out'].includes(run.conclusion)) {
148+
await github.rest.actions.reRunWorkflowFailedJobs({
149+
owner: context.repo.owner,
150+
repo: context.repo.repo,
151+
run_id: run.id
152+
});
153+
} else {
154+
await github.rest.actions.reRunWorkflow({
155+
owner: context.repo.owner,
156+
repo: context.repo.repo,
157+
run_id: run.id
158+
});
159+
}
160+
rerunCount++;
161+
} catch (error) {
162+
console.log(`Failed to rerun ${run.name}: ${error.message}`);
163+
}
164+
}
165+
166+
console.log(`Reran ${rerunCount} workflow(s)`);
167+
168+
- name: Post completion comment
169+
if: always() && steps.help.outputs.is_help != 'true'
170+
uses: actions/github-script@v7
171+
with:
172+
script: |
173+
const status = '${{ job.status }}';
174+
const emoji = status === 'success' ? '✅' : '❌';
175+
176+
await github.rest.issues.createComment({
177+
owner: context.repo.owner,
178+
repo: context.repo.repo,
179+
issue_number: context.issue.number,
180+
body: `${emoji} **Workflow rerun ${status}**\n\n[View Actions](https://github.com/${context.repo.owner}/${context.repo.repo}/actions)`
181+
});

0 commit comments

Comments
 (0)