server: bench: init

phymbert · phymbert · commit 48d2674dc88d · 2024-03-25T20:56:11.000+01:00
diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml
@@ -103,10 +103,10 @@ jobs:
           BENCH_K6_BIN_PATH=./k6 python bench.py \
               --runner-label ${{ env.RUNNER_LABEL }} \
               --name ${{ github.job }} \
-              --branch ${{ github.ref_name }} \
+              --branch ${{ github.head_ref || github.ref_name }} \
               --commit ${{ github.sha }} \
               --scenario script.js \
-              --duration 5m \
+              --duration 1m \
               --hf-repo ggml-org/models	 \
               --hf-file phi-2/ggml-model-q4_0.gguf \
               --model-path-prefix /models \
@@ -120,6 +120,8 @@ jobs:
               --max-tokens 2048
 
           cat results.github.env >> $GITHUB_ENV
+          
+          cat results.github.env
 
       - name: Commit status
         uses: Sibz/github-status-action@v1
@@ -149,17 +151,17 @@ jobs:
         with:
           message-id: bench-${{ github.job }}-${{ env.RUNNER_LABEL }}
           message: |
-            📈 **llama.cpp** server benchmark for _${{ github.job }}_ on _${{ env.RUNNER_LABEL }}_: **${{ env.BENCH_ITERATIONS}} iterations** - ${{ env.LLAMACPP_TOKENS_SECOND_AVG}}tk/s 🚀
+            📈 **llama.cpp** server benchmark for _${{ github.job }}_ on **${{ env.RUNNER_LABEL }}**: **${{ env.BENCH_ITERATIONS}} iterations** 🚀
             <p align="center">
-                <img width="400" height="300" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[0] }}" alt="prompt_tokens_seconds" />
-                <img width="400" height="300" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[1] }}" alt="predicted_tokens_seconds"/>
+                <img width="80%" height="80%" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[0] }}" alt="prompt_tokens_seconds" />
+                <img width="80%" height="80%" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[1] }}" alt="predicted_tokens_seconds"/>
             </p>
             <details>
                 <summary>Details</summary>
                 <p align="center">
-                    <img width="400" height="300" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[2] }}" alt="kv_cache_usage_ratio" />
-                    <img width="400" height="300" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[3] }}" alt="requests_processing"/>
-                    <img width="400" height="300" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[4] }}" alt="requests_deferred"/>
+                    <img width="80%" height="80%" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[2] }}" alt="kv_cache_usage_ratio" />
+                    <img width="80%" height="80%" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[3] }}" alt="requests_processing"/>
+                    <img width="80%" height="80%" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[4] }}" alt="requests_deferred"/>
                 </p>
             </detail>
 
diff --git a/examples/server/bench/bench.py b/examples/server/bench/bench.py
@@ -60,6 +60,7 @@ def main(args_in: list[str] | None = None) -> None:
     try:
         start_benchmark(args)
 
+        iterations = 0
         with open("results.github.env", 'w') as github_env:
             # parse output
             with open('k6-results.json', 'r') as bench_results:
@@ -74,6 +75,7 @@ def main(args_in: list[str] | None = None) -> None:
                             github_env.write(
                                 f"{escape_metric_name(metric_name)}_{escape_metric_name(metric_metric)}={value}\n")
                 token_seconds = data['metrics']['llamacpp_tokens_second']['avg']
+                iterations = data['root_group']['checks']['success completion']['passes']
 
     except Exception:
         print("bench: error :")
@@ -120,13 +122,14 @@ def main(args_in: list[str] | None = None) -> None:
                 plt.xticks(rotation=0, fontsize=14, horizontalalignment='center', alpha=.7)
                 plt.yticks(fontsize=12, alpha=.7)
 
-                plt.title(f"llama.cpp {args.name} on {args.runner_label} {round(token_seconds, 2)}tk/s\n"
+                plt.title(f"llama.cpp {args.name} on {args.runner_label} {iterations} iterations\n"
                           f"duration={args.duration}",
                           fontsize=14, wrap=True)
                 plt.grid(axis='both', alpha=.3)
                 plt.ylabel(f"llamacpp:{metric}", fontsize=22)
-                plt.xlabel(f"hf-repo={args.hf_repo} hf-file={args.hf_file} parallel={args.parallel} ctx-size={args.ctx_size} ngl={args.n_gpu_layers} batch-size={args.batch_size} ubatch-size={args.ubatch_size}\n"
-                           f" pp={args.max_prompt_tokens} pp+tg={args.max_tokens}\n"
+                plt.xlabel(f"{args.hf_repo}/{args.hf_file}\n"
+                           f"parallel={args.parallel} ctx-size={args.ctx_size} ngl={args.n_gpu_layers} batch-size={args.batch_size} ubatch-size={args.ubatch_size}\n"
+                           f"pp={args.max_prompt_tokens} pp+tg={args.max_tokens}\n"
                            f"branch={args.branch} commit={args.commit}", fontsize=14, wrap=True)
                 plt.gcf().autofmt_xdate()
 
@@ -157,7 +160,7 @@ def main(args_in: list[str] | None = None) -> None:
     }
     with open("results.github.env", 'a') as github_env:
         github_env.write(f"BENCH_RESULTS={json.dumps(bench_results, indent=None, separators=(',', ':') )}")
-        github_env.write(f"BENCH_ITERATIONS={data['root_group']['checks']['success completion']['passes']}")
+        github_env.write(f"BENCH_ITERATIONS={iterations}")
 
 
 def start_benchmark(args):