Skip to content

Commit 93434fd

Browse files
committed
ci: bench: add mermaid in case of image cannot be uploaded
1 parent 5c0b2a2 commit 93434fd

File tree

2 files changed

+129
-36
lines changed

2 files changed

+129
-36
lines changed

.github/workflows/bench.yml

Lines changed: 90 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,18 @@ jobs:
117117
118118
cat results.github.env >> $GITHUB_ENV
119119
120+
# Remove dataset as we do not want it in the artefact
121+
rm ShareGPT_V3_unfiltered_cleaned_split.json
122+
123+
- uses: actions/upload-artifact@v4
124+
with:
125+
name: benchmark-results
126+
compression-level: 9
127+
path: |
128+
examples/server/bench/*.png
129+
examples/server/bench/*.json
130+
examples/server/bench/*.log
131+
120132
- name: Commit status
121133
uses: Sibz/github-status-action@v1
122134
with:
@@ -128,6 +140,7 @@ jobs:
128140

129141
- name: Upload benchmark images
130142
uses: devicons/[email protected]
143+
continue-on-error: true # Important as it looks unstable: 503
131144
id: imgur_step
132145
with:
133146
client_id: ${{secrets.IMGUR_CLIENT_ID}}
@@ -136,44 +149,95 @@ jobs:
136149
examples/server/bench/predicted_tokens_seconds.png
137150
examples/server/bench/kv_cache_usage_ratio.png
138151
examples/server/bench/requests_processing.png
139-
examples/server/bench/requests_deferred.png
152+
153+
- name: Extract mermaid
154+
id: set_mermaid
155+
run: |
156+
set -eux
157+
158+
cd examples/server/bench
159+
PROMPT_TOKENS_SECONDS=$(cat prompt_tokens_seconds.mermaid)
160+
echo "PROMPT_TOKENS_SECONDS<<EOF" >> $GITHUB_ENV
161+
echo "$PROMPT_TOKENS_SECONDS" >> $GITHUB_ENV
162+
echo "EOF" >> $GITHUB_ENV
163+
164+
PREDICTED_TOKENS_SECONDS=$(cat predicted_tokens_seconds.mermaid)
165+
echo "PREDICTED_TOKENS_SECONDS<<EOF" >> $GITHUB_ENV
166+
echo "$PREDICTED_TOKENS_SECONDS" >> $GITHUB_ENV
167+
echo "EOF" >> $GITHUB_ENV
168+
169+
KV_CACHE_USAGE_RATIO=$(cat kv_cache_usage_ratio.mermaid)
170+
echo "KV_CACHE_USAGE_RATIO<<EOF" >> $GITHUB_ENV
171+
echo "$KV_CACHE_USAGE_RATIO" >> $GITHUB_ENV
172+
echo "EOF" >> $GITHUB_ENV
173+
174+
REQUESTS_PROCESSING=$(cat requests_processing.mermaid)
175+
echo "REQUESTS_PROCESSING<<EOF" >> $GITHUB_ENV
176+
echo "$REQUESTS_PROCESSING" >> $GITHUB_ENV
177+
echo "EOF" >> $GITHUB_ENV
140178
141179
- name: Comment PR
142180
uses: mshick/add-pr-comment@v2
143181
id: comment_pr
144182
if: ${{ github.event.pull_request != '' }}
183+
continue-on-error: true
145184
with:
146185
message-id: bench-${{ github.job }}-${{ env.RUNNER_LABEL }}
147186
message: |
148187
📈 **llama.cpp server** benchmark for _${{ github.job }}_ on _${{ env.RUNNER_LABEL }}_: **${{ env.BENCH_ITERATIONS}} iterations** 🚀
188+
189+
- ${{ env.BENCH_GRAPH_XLABEL }}
190+
- req_avg=${{ env.HTTP_REQ_DURATION_AVG }} pp_avg=${{ env.LLAMACPP_PROMPT_TOKENS_AVG }} tks_avg=${{ env.LLAMACPP_TOKENS_SECOND_AVG }}
191+
192+
149193
<p align="center">
150-
<img width="100%" height="100%" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[0] }}" alt="prompt_tokens_seconds" />
151-
<img width="100%" height="100%" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[1] }}" alt="predicted_tokens_seconds"/>
194+
<img width="100%" height="100%" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[0] }}" alt="prompt_tokens_seconds" />
195+
196+
<details>
197+
<summary>More</summary>
198+
199+
```mermaid
200+
${{ env.PROMPT_TOKENS_SECONDS }}
201+
```
202+
203+
</details>
204+
205+
<img width="100%" height="100%" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[1] }}" alt="predicted_tokens_seconds"/>
206+
207+
<details>
208+
<summary>More</summary>
209+
210+
```mermaid
211+
${{ env.PREDICTED_TOKENS_SECONDS }}
212+
```
213+
214+
</details>
215+
152216
</p>
153217
<details>
154218
<summary>Details</summary>
155219
<p align="center">
156-
<img width="100%" height="100%" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[2] }}" alt="kv_cache_usage_ratio" />
157-
<img width="100%" height="100%" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[3] }}" alt="requests_processing"/>
158-
<img width="100%" height="100%" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[4] }}" alt="requests_deferred"/>
159-
</p>
160-
</detail>
161-
162-
- name: Upload results
163-
if: ${{ github.event.pull_request }}
164-
uses: edunad/[email protected]
165-
with:
166-
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
167-
path: 'examples/server/bench/*.png'
168-
title: |
169-
llama.cpp server benchmark results for ${{ github.job }} on ${{ env.RUNNER_LABEL }}: ${{ env.LLAMACPP_TOKENS_SECOND_AVG}}tk/s
170-
annotationLevel: 'success'
220+
<img width="100%" height="100%" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[2] }}" alt="kv_cache_usage_ratio" />
171221
172-
- uses: actions/upload-artifact@v4
173-
with:
174-
name: benchmark-results
175-
compression-level: 9
176-
path: |
177-
examples/server/bench/*.png
178-
examples/server/bench/*.json
179-
examples/server/bench/*.log
222+
<details>
223+
<summary>More</summary>
224+
225+
```mermaid
226+
${{ env.KV_CACHE_USAGE_RATIO }}
227+
```
228+
229+
</details>
230+
231+
<img width="100%" height="100%" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[3] }}" alt="requests_processing"/>
232+
233+
<details>
234+
<summary>More</summary>
235+
236+
```mermaid
237+
${{ env.REQUESTS_PROCESSING }}
238+
```
239+
240+
</details>
241+
242+
</p>
243+
</details>

examples/server/bench/bench.py

Lines changed: 39 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,12 @@ def main(args_in: list[str] | None = None) -> None:
101101
while is_server_listening(args.host, args.port):
102102
time.sleep(0.1)
103103

104+
title = (f"llama.cpp {args.name} on {args.runner_label}\n "
105+
f"duration={args.duration} {iterations} iterations")
106+
xlabel = (f"{args.hf_repo}/{args.hf_file}\n"
107+
f"parallel={args.parallel} ctx-size={args.ctx_size} ngl={args.n_gpu_layers} batch-size={args.batch_size} ubatch-size={args.ubatch_size} pp={args.max_prompt_tokens} pp+tg={args.max_tokens}\n"
108+
f"branch={args.branch} commit={args.commit}")
109+
104110
# Prometheus
105111
end_time = time.time()
106112
if is_server_listening("0.0.0.0", 9090):
@@ -121,23 +127,20 @@ def main(args_in: list[str] | None = None) -> None:
121127
values = metric_data['data']['result'][0]['values']
122128
timestamps, metric_values = zip(*values)
123129
metric_values = [float(value) for value in metric_values]
124-
timestamps = [datetime.fromtimestamp(int(ts)) for ts in timestamps]
130+
timestamps_dt = [datetime.fromtimestamp(int(ts)) for ts in timestamps]
125131
plt.figure(figsize=(16, 10), dpi=80)
126-
plt.plot(timestamps, metric_values, label=metric)
132+
plt.plot(timestamps_dt, metric_values, label=metric)
127133
plt.xticks(rotation=0, fontsize=14, horizontalalignment='center', alpha=.7)
128134
plt.yticks(fontsize=12, alpha=.7)
129135

130-
plt.title(f"llama.cpp {args.name} on {args.runner_label}\n"
131-
f"duration={args.duration} {iterations} iterations",
136+
ylabel = f"llamacpp:{metric}"
137+
plt.title(title,
132138
fontsize=14, wrap=True)
133139
plt.grid(axis='both', alpha=.3)
134-
plt.ylabel(f"llamacpp:{metric}", fontsize=22)
135-
plt.xlabel(f"{args.hf_repo}/{args.hf_file}\n"
136-
f"parallel={args.parallel} ctx-size={args.ctx_size} ngl={args.n_gpu_layers} batch-size={args.batch_size} ubatch-size={args.ubatch_size}\n"
137-
f"pp={args.max_prompt_tokens} pp+tg={args.max_tokens}\n"
138-
f"branch={args.branch} commit={args.commit}", fontsize=14, wrap=True)
140+
plt.ylabel(ylabel, fontsize=22)
141+
plt.xlabel(xlabel, fontsize=14, wrap=True)
139142
plt.gca().xaxis.set_major_locator(matplotlib.dates.MinuteLocator())
140-
plt.gca().xaxis.set_major_formatter(matplotlib.dates.DateFormatter("%Y%m%d %H:%M:%S"))
143+
plt.gca().xaxis.set_major_formatter(matplotlib.dates.DateFormatter("%Y-%m-%d %H:%M:%S"))
141144
plt.gcf().autofmt_xdate()
142145

143146
# Remove borders
@@ -150,6 +153,27 @@ def main(args_in: list[str] | None = None) -> None:
150153
plt.savefig(f'{metric}.png')
151154
plt.close()
152155

156+
# Mermaid format in case image failed
157+
with (open(f"{metric}.mermaid", 'w') as mermaid_f):
158+
mermaid = (
159+
f"""---
160+
config:
161+
xyChart:
162+
titleFontSize: 12
163+
width: 900
164+
height: 600
165+
themeVariables:
166+
xyChart:
167+
titleColor: "#000000"
168+
---
169+
xychart-beta
170+
title "{title}"
171+
y-axis "llamacpp:{metric}"
172+
x-axis "llamacpp:{metric}" {int(min(timestamps))} --> {int(max(timestamps))}
173+
line [{', '.join([str(round(float(value))) for value in metric_values])}]
174+
""")
175+
mermaid_f.write(mermaid)
176+
153177
# 140 chars max for commit status description
154178
bench_results = {
155179
"req": {
@@ -169,6 +193,11 @@ def main(args_in: list[str] | None = None) -> None:
169193
github_env.write(f"BENCH_RESULTS={json.dumps(bench_results, indent=None, separators=(',', ':') )}\n")
170194
github_env.write(f"BENCH_ITERATIONS={iterations}\n")
171195

196+
title = title.replace('\n', ' ')
197+
xlabel = xlabel.replace('\n', ' ')
198+
github_env.write(f"BENCH_GRAPH_TITLE={title}\n")
199+
github_env.write(f"BENCH_GRAPH_XLABEL={xlabel}\n")
200+
172201

173202
def start_benchmark(args):
174203
k6_path = 'k6'

0 commit comments

Comments
 (0)