Skip to content

Commit d4bd981

Browse files
committed
ci: test mermaid
1 parent f75166e commit d4bd981

File tree

2 files changed

+74
-44
lines changed

2 files changed

+74
-44
lines changed

.github/workflows/bench.yml

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ jobs:
102102
--branch ${{ github.head_ref || github.ref_name }} \
103103
--commit ${{ github.sha }} \
104104
--scenario script.js \
105-
--duration 10m \
105+
--duration 2m \
106106
--hf-repo ggml-org/models \
107107
--hf-file phi-2/ggml-model-q4_0.gguf \
108108
--model-path-prefix /models \
@@ -116,6 +116,11 @@ jobs:
116116
--max-tokens 2048
117117
118118
cat results.github.env >> $GITHUB_ENV
119+
120+
echo PROMPT_TOKENS_SECONDS_=${PROMPT_TOKENS_SECONDS//<br>/\n} >> $GITHUB_ENV
121+
echo PREDICTED_TOKENS_SECONDS_=${PREDICTED_TOKENS_SECONDS//<br>/\n} >> $GITHUB_ENV
122+
echo KV_CACHE_USAGE_RATIO_=${KV_CACHE_USAGE_RATIO//<br>/\n} >> $GITHUB_ENV
123+
echo REQUESTS_PROCESSING_=${REQUESTS_PROCESSING//<br>/\n} >> $GITHUB_ENV
119124
120125
- uses: actions/upload-artifact@v4
121126
with:
@@ -156,14 +161,20 @@ jobs:
156161
message-id: bench-${{ github.job }}-${{ env.RUNNER_LABEL }}
157162
message: |
158163
📈 **llama.cpp server** benchmark for _${{ github.job }}_ on _${{ env.RUNNER_LABEL }}_: **${{ env.BENCH_ITERATIONS}} iterations** 🚀
164+
- ${{ env.BENCH_GRAPH_TITLE }
165+
- ${{ env.BENCH_GRAPH_YLABEL }
159166
<p align="center">
160167
<img width="100%" height="100%" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[0] }}" alt="prompt_tokens_seconds" />
168+
${{ env.PROMPT_TOKENS_SECONDS_ }}
161169
<img width="100%" height="100%" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[1] }}" alt="predicted_tokens_seconds"/>
170+
${{ env.PREDICTED_TOKENS_SECONDS_ }}
162171
</p>
163172
<details>
164173
<summary>Details</summary>
165174
<p align="center">
166175
<img width="100%" height="100%" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[2] }}" alt="kv_cache_usage_ratio" />
176+
${{ env.KV_CACHE_USAGE_RATIO_ }}
167177
<img width="100%" height="100%" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[3] }}" alt="requests_processing"/>
178+
${{ env.REQUESTS_PROCESSING_ }}
168179
</p>
169180
</detail>

examples/server/bench/bench.py

Lines changed: 62 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -107,49 +107,63 @@ def main(args_in: list[str] | None = None) -> None:
107107
metrics = ['prompt_tokens_seconds', 'predicted_tokens_seconds',
108108
'kv_cache_usage_ratio', 'requests_processing', 'requests_deferred']
109109

110-
for metric in metrics:
111-
resp = requests.get(f"http://localhost:9090/api/v1/query_range",
112-
params={'query': 'llamacpp:' + metric, 'start': start_time, 'end': end_time, 'step': 2})
113-
114-
with open(f"{metric}.json", 'w') as metric_json:
115-
metric_json.write(resp.text)
116-
117-
if resp.status_code != 200:
118-
print(f"bench: unable to extract prometheus metric {metric}: {resp.text}")
119-
else:
120-
metric_data = resp.json()
121-
values = metric_data['data']['result'][0]['values']
122-
timestamps, metric_values = zip(*values)
123-
metric_values = [float(value) for value in metric_values]
124-
timestamps = [datetime.fromtimestamp(int(ts)) for ts in timestamps]
125-
plt.figure(figsize=(16, 10), dpi=80)
126-
plt.plot(timestamps, metric_values, label=metric)
127-
plt.xticks(rotation=0, fontsize=14, horizontalalignment='center', alpha=.7)
128-
plt.yticks(fontsize=12, alpha=.7)
129-
130-
131-
plt.title(f"llama.cpp {args.name} on {args.runner_label}\n"
132-
f"duration={args.duration} {iterations} iterations",
133-
fontsize=14, wrap=True)
134-
plt.grid(axis='both', alpha=.3)
135-
plt.ylabel(f"llamacpp:{metric}", fontsize=22)
136-
plt.xlabel(f"{args.hf_repo}/{args.hf_file}\n"
137-
f"parallel={args.parallel} ctx-size={args.ctx_size} ngl={args.n_gpu_layers} batch-size={args.batch_size} ubatch-size={args.ubatch_size}\n"
138-
f"pp={args.max_prompt_tokens} pp+tg={args.max_tokens}\n"
139-
f"branch={args.branch} commit={args.commit}", fontsize=14, wrap=True)
140-
plt.gca().xaxis.set_major_locator(matplotlib.dates.MinuteLocator())
141-
plt.gca().xaxis.set_major_formatter(matplotlib.dates.DateFormatter("%Y%m%d %H:%M:%S"))
142-
plt.gcf().autofmt_xdate()
143-
144-
# Remove borders
145-
plt.gca().spines["top"].set_alpha(0.0)
146-
plt.gca().spines["bottom"].set_alpha(0.3)
147-
plt.gca().spines["right"].set_alpha(0.0)
148-
plt.gca().spines["left"].set_alpha(0.3)
149-
150-
# Save the plot as a PNG image
151-
plt.savefig(f'{metric}.png')
152-
plt.close()
110+
with open("results.github.env", 'a') as github_env:
111+
for metric in metrics:
112+
resp = requests.get(f"http://localhost:9090/api/v1/query_range",
113+
params={'query': 'llamacpp:' + metric, 'start': start_time, 'end': end_time, 'step': 2})
114+
115+
with open(f"{metric}.json", 'w') as metric_json:
116+
metric_json.write(resp.text)
117+
118+
if resp.status_code != 200:
119+
print(f"bench: unable to extract prometheus metric {metric}: {resp.text}")
120+
else:
121+
metric_data = resp.json()
122+
values = metric_data['data']['result'][0]['values']
123+
timestamps, metric_values = zip(*values)
124+
metric_values = [float(value) for value in metric_values]
125+
timestamps_dt = [datetime.fromtimestamp(int(ts)) for ts in timestamps]
126+
plt.figure(figsize=(16, 10), dpi=80)
127+
plt.plot(timestamps_dt, metric_values, label=metric)
128+
plt.xticks(rotation=0, fontsize=14, horizontalalignment='center', alpha=.7)
129+
plt.yticks(fontsize=12, alpha=.7)
130+
131+
title = (f"llama.cpp {args.name} on {args.runner_label}\n "
132+
f"duration={args.duration} {iterations} iterations")
133+
ylabel = f"llamacpp:{metric}"
134+
xlabel = (f"{args.hf_repo}/{args.hf_file}\n"
135+
f"parallel={args.parallel} ctx-size={args.ctx_size} ngl={args.n_gpu_layers} batch-size={args.batch_size} ubatch-size={args.ubatch_size} pp={args.max_prompt_tokens} pp+tg={args.max_tokens}\n"
136+
f"branch={args.branch} commit={args.commit}")
137+
plt.title(title,
138+
fontsize=14, wrap=True)
139+
plt.grid(axis='both', alpha=.3)
140+
plt.ylabel(ylabel, fontsize=22)
141+
plt.xlabel(xlabel, fontsize=14, wrap=True)
142+
plt.gca().xaxis.set_major_locator(matplotlib.dates.MinuteLocator())
143+
plt.gca().xaxis.set_major_formatter(matplotlib.dates.DateFormatter("%Y%m%d %H:%M:%S"))
144+
plt.gcf().autofmt_xdate()
145+
146+
# Remove borders
147+
plt.gca().spines["top"].set_alpha(0.0)
148+
plt.gca().spines["bottom"].set_alpha(0.3)
149+
plt.gca().spines["right"].set_alpha(0.0)
150+
plt.gca().spines["left"].set_alpha(0.3)
151+
152+
# Save the plot as a PNG image
153+
plt.savefig(f'{metric}.png')
154+
plt.close()
155+
156+
# Mermaid format in case image failed
157+
mermaid = f"""```mermaid
158+
xychart-beta
159+
title "{title}"
160+
x-axis "{xlabel}" ["{'", "'.join([datetime.fromtimestamp(int(ts)).strftime("%Y%m%d %H:%M:%S") for ts in timestamps])}"]
161+
y-axis "{ylabel}"
162+
line [{', '.join([str(round(float(value))) for value in metric_values])}]
163+
```
164+
"""
165+
mermaid = mermaid.replace('\n', "<br>")
166+
github_env.write(f"{metric.upper()}={mermaid}\n")
153167

154168
# 140 chars max for commit status description
155169
bench_results = {
@@ -170,6 +184,11 @@ def main(args_in: list[str] | None = None) -> None:
170184
github_env.write(f"BENCH_RESULTS={json.dumps(bench_results, indent=None, separators=(',', ':') )}\n")
171185
github_env.write(f"BENCH_ITERATIONS={iterations}\n")
172186

187+
title = title.replace('\n', '<br>')
188+
ylabel = ylabel.replace('\n', '<br>')
189+
github_env.write(f"BENCH_GRAPH_TITLE={title}\n")
190+
github_env.write(f"BENCH_GRAPH_YLABEL={ylabel}\n")
191+
173192

174193
def start_benchmark(args):
175194
k6_path = 'k6'

0 commit comments

Comments
 (0)