Skip to content

Commit 48d2674

Browse files
committed
server: bench: init
1 parent f4fec0d commit 48d2674

File tree

2 files changed

+17
-12
lines changed

2 files changed

+17
-12
lines changed

.github/workflows/bench.yml

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -103,10 +103,10 @@ jobs:
103103
BENCH_K6_BIN_PATH=./k6 python bench.py \
104104
--runner-label ${{ env.RUNNER_LABEL }} \
105105
--name ${{ github.job }} \
106-
--branch ${{ github.ref_name }} \
106+
--branch ${{ github.head_ref || github.ref_name }} \
107107
--commit ${{ github.sha }} \
108108
--scenario script.js \
109-
--duration 5m \
109+
--duration 1m \
110110
--hf-repo ggml-org/models \
111111
--hf-file phi-2/ggml-model-q4_0.gguf \
112112
--model-path-prefix /models \
@@ -120,6 +120,8 @@ jobs:
120120
--max-tokens 2048
121121
122122
cat results.github.env >> $GITHUB_ENV
123+
124+
cat results.github.env
123125
124126
- name: Commit status
125127
uses: Sibz/github-status-action@v1
@@ -149,17 +151,17 @@ jobs:
149151
with:
150152
message-id: bench-${{ github.job }}-${{ env.RUNNER_LABEL }}
151153
message: |
152-
📈 **llama.cpp** server benchmark for _${{ github.job }}_ on _${{ env.RUNNER_LABEL }}_: **${{ env.BENCH_ITERATIONS}} iterations** - ${{ env.LLAMACPP_TOKENS_SECOND_AVG}}tk/s 🚀
154+
📈 **llama.cpp** server benchmark for _${{ github.job }}_ on **${{ env.RUNNER_LABEL }}**: **${{ env.BENCH_ITERATIONS}} iterations** 🚀
153155
<p align="center">
154-
<img width="400" height="300" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[0] }}" alt="prompt_tokens_seconds" />
155-
<img width="400" height="300" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[1] }}" alt="predicted_tokens_seconds"/>
156+
<img width="80%" height="80%" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[0] }}" alt="prompt_tokens_seconds" />
157+
<img width="80%" height="80%" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[1] }}" alt="predicted_tokens_seconds"/>
156158
</p>
157159
<details>
158160
<summary>Details</summary>
159161
<p align="center">
160-
<img width="400" height="300" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[2] }}" alt="kv_cache_usage_ratio" />
161-
<img width="400" height="300" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[3] }}" alt="requests_processing"/>
162-
<img width="400" height="300" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[4] }}" alt="requests_deferred"/>
162+
<img width="80%" height="80%" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[2] }}" alt="kv_cache_usage_ratio" />
163+
<img width="80%" height="80%" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[3] }}" alt="requests_processing"/>
164+
<img width="80%" height="80%" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[4] }}" alt="requests_deferred"/>
163165
</p>
164166
</detail>
165167

examples/server/bench/bench.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ def main(args_in: list[str] | None = None) -> None:
6060
try:
6161
start_benchmark(args)
6262

63+
iterations = 0
6364
with open("results.github.env", 'w') as github_env:
6465
# parse output
6566
with open('k6-results.json', 'r') as bench_results:
@@ -74,6 +75,7 @@ def main(args_in: list[str] | None = None) -> None:
7475
github_env.write(
7576
f"{escape_metric_name(metric_name)}_{escape_metric_name(metric_metric)}={value}\n")
7677
token_seconds = data['metrics']['llamacpp_tokens_second']['avg']
78+
iterations = data['root_group']['checks']['success completion']['passes']
7779

7880
except Exception:
7981
print("bench: error :")
@@ -120,13 +122,14 @@ def main(args_in: list[str] | None = None) -> None:
120122
plt.xticks(rotation=0, fontsize=14, horizontalalignment='center', alpha=.7)
121123
plt.yticks(fontsize=12, alpha=.7)
122124

123-
plt.title(f"llama.cpp {args.name} on {args.runner_label} {round(token_seconds, 2)}tk/s\n"
125+
plt.title(f"llama.cpp {args.name} on {args.runner_label} {iterations} iterations\n"
124126
f"duration={args.duration}",
125127
fontsize=14, wrap=True)
126128
plt.grid(axis='both', alpha=.3)
127129
plt.ylabel(f"llamacpp:{metric}", fontsize=22)
128-
plt.xlabel(f"hf-repo={args.hf_repo} hf-file={args.hf_file} parallel={args.parallel} ctx-size={args.ctx_size} ngl={args.n_gpu_layers} batch-size={args.batch_size} ubatch-size={args.ubatch_size}\n"
129-
f" pp={args.max_prompt_tokens} pp+tg={args.max_tokens}\n"
130+
plt.xlabel(f"{args.hf_repo}/{args.hf_file}\n"
131+
f"parallel={args.parallel} ctx-size={args.ctx_size} ngl={args.n_gpu_layers} batch-size={args.batch_size} ubatch-size={args.ubatch_size}\n"
132+
f"pp={args.max_prompt_tokens} pp+tg={args.max_tokens}\n"
130133
f"branch={args.branch} commit={args.commit}", fontsize=14, wrap=True)
131134
plt.gcf().autofmt_xdate()
132135

@@ -157,7 +160,7 @@ def main(args_in: list[str] | None = None) -> None:
157160
}
158161
with open("results.github.env", 'a') as github_env:
159162
github_env.write(f"BENCH_RESULTS={json.dumps(bench_results, indent=None, separators=(',', ':') )}")
160-
github_env.write(f"BENCH_ITERATIONS={data['root_group']['checks']['success completion']['passes']}")
163+
github_env.write(f"BENCH_ITERATIONS={iterations}")
161164

162165

163166
def start_benchmark(args):

0 commit comments

Comments
 (0)