ci: test mermaid

phymbert · phymbert · commit d4bd981bf12c · 2024-03-25T23:12:10.000+01:00
diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml
@@ -102,7 +102,7 @@ jobs:
               --branch ${{ github.head_ref || github.ref_name }} \
               --commit ${{ github.sha }} \
               --scenario script.js \
-              --duration 10m \
+              --duration 2m \
               --hf-repo ggml-org/models	 \
               --hf-file phi-2/ggml-model-q4_0.gguf \
               --model-path-prefix /models \
@@ -116,6 +116,11 @@ jobs:
               --max-tokens 2048
 
           cat results.github.env >> $GITHUB_ENV
+          
+          echo PROMPT_TOKENS_SECONDS_=${PROMPT_TOKENS_SECONDS//<br>/\n} >> $GITHUB_ENV
+          echo PREDICTED_TOKENS_SECONDS_=${PREDICTED_TOKENS_SECONDS//<br>/\n} >> $GITHUB_ENV
+          echo KV_CACHE_USAGE_RATIO_=${KV_CACHE_USAGE_RATIO//<br>/\n} >> $GITHUB_ENV
+          echo REQUESTS_PROCESSING_=${REQUESTS_PROCESSING//<br>/\n} >> $GITHUB_ENV
 
       - uses: actions/upload-artifact@v4
         with:
@@ -156,14 +161,20 @@ jobs:
           message-id: bench-${{ github.job }}-${{ env.RUNNER_LABEL }}
           message: |
             📈 **llama.cpp server** benchmark for _${{ github.job }}_ on _${{ env.RUNNER_LABEL }}_: **${{ env.BENCH_ITERATIONS}} iterations** 🚀
+            - ${{ env.BENCH_GRAPH_TITLE }
+            - ${{ env.BENCH_GRAPH_YLABEL }
             <p align="center">
                 <img width="100%" height="100%" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[0] }}" alt="prompt_tokens_seconds" />
+                ${{ env.PROMPT_TOKENS_SECONDS_ }}
                 <img width="100%" height="100%" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[1] }}" alt="predicted_tokens_seconds"/>
+                ${{ env.PREDICTED_TOKENS_SECONDS_ }}
             </p>
             <details>
                 <summary>Details</summary>
                 <p align="center">
                     <img width="100%" height="100%" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[2] }}" alt="kv_cache_usage_ratio" />
+                    ${{ env.KV_CACHE_USAGE_RATIO_ }}
                     <img width="100%" height="100%" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[3] }}" alt="requests_processing"/>
+                    ${{ env.REQUESTS_PROCESSING_ }}
                 </p>
             </detail>
diff --git a/examples/server/bench/bench.py b/examples/server/bench/bench.py
@@ -107,49 +107,63 @@ def main(args_in: list[str] | None = None) -> None:
         metrics = ['prompt_tokens_seconds', 'predicted_tokens_seconds',
                    'kv_cache_usage_ratio', 'requests_processing', 'requests_deferred']
 
-        for metric in metrics:
-            resp = requests.get(f"http://localhost:9090/api/v1/query_range",
-                                params={'query': 'llamacpp:' + metric, 'start': start_time, 'end': end_time, 'step': 2})
-
-            with open(f"{metric}.json", 'w') as metric_json:
-                metric_json.write(resp.text)
-
-            if resp.status_code != 200:
-                print(f"bench: unable to extract prometheus metric {metric}: {resp.text}")
-            else:
-                metric_data = resp.json()
-                values = metric_data['data']['result'][0]['values']
-                timestamps, metric_values = zip(*values)
-                metric_values = [float(value) for value in metric_values]
-                timestamps = [datetime.fromtimestamp(int(ts)) for ts in timestamps]
-                plt.figure(figsize=(16, 10), dpi=80)
-                plt.plot(timestamps, metric_values, label=metric)
-                plt.xticks(rotation=0, fontsize=14, horizontalalignment='center', alpha=.7)
-                plt.yticks(fontsize=12, alpha=.7)
-
-
-                plt.title(f"llama.cpp {args.name} on {args.runner_label}\n"
-                          f"duration={args.duration} {iterations} iterations",
-                          fontsize=14, wrap=True)
-                plt.grid(axis='both', alpha=.3)
-                plt.ylabel(f"llamacpp:{metric}", fontsize=22)
-                plt.xlabel(f"{args.hf_repo}/{args.hf_file}\n"
-                           f"parallel={args.parallel} ctx-size={args.ctx_size} ngl={args.n_gpu_layers} batch-size={args.batch_size} ubatch-size={args.ubatch_size}\n"
-                           f"pp={args.max_prompt_tokens} pp+tg={args.max_tokens}\n"
-                           f"branch={args.branch} commit={args.commit}", fontsize=14, wrap=True)
-                plt.gca().xaxis.set_major_locator(matplotlib.dates.MinuteLocator())
-                plt.gca().xaxis.set_major_formatter(matplotlib.dates.DateFormatter("%Y%m%d %H:%M:%S"))
-                plt.gcf().autofmt_xdate()
-
-                # Remove borders
-                plt.gca().spines["top"].set_alpha(0.0)
-                plt.gca().spines["bottom"].set_alpha(0.3)
-                plt.gca().spines["right"].set_alpha(0.0)
-                plt.gca().spines["left"].set_alpha(0.3)
-
-                # Save the plot as a PNG image
-                plt.savefig(f'{metric}.png')
-                plt.close()
+        with open("results.github.env", 'a') as github_env:
+            for metric in metrics:
+                resp = requests.get(f"http://localhost:9090/api/v1/query_range",
+                                    params={'query': 'llamacpp:' + metric, 'start': start_time, 'end': end_time, 'step': 2})
+
+                with open(f"{metric}.json", 'w') as metric_json:
+                    metric_json.write(resp.text)
+
+                if resp.status_code != 200:
+                    print(f"bench: unable to extract prometheus metric {metric}: {resp.text}")
+                else:
+                    metric_data = resp.json()
+                    values = metric_data['data']['result'][0]['values']
+                    timestamps, metric_values = zip(*values)
+                    metric_values = [float(value) for value in metric_values]
+                    timestamps_dt = [datetime.fromtimestamp(int(ts)) for ts in timestamps]
+                    plt.figure(figsize=(16, 10), dpi=80)
+                    plt.plot(timestamps_dt, metric_values, label=metric)
+                    plt.xticks(rotation=0, fontsize=14, horizontalalignment='center', alpha=.7)
+                    plt.yticks(fontsize=12, alpha=.7)
+
+                    title = (f"llama.cpp {args.name} on {args.runner_label}\n "
+                             f"duration={args.duration} {iterations} iterations")
+                    ylabel = f"llamacpp:{metric}"
+                    xlabel = (f"{args.hf_repo}/{args.hf_file}\n"
+                              f"parallel={args.parallel} ctx-size={args.ctx_size} ngl={args.n_gpu_layers} batch-size={args.batch_size} ubatch-size={args.ubatch_size} pp={args.max_prompt_tokens} pp+tg={args.max_tokens}\n"
+                              f"branch={args.branch} commit={args.commit}")
+                    plt.title(title,
+                              fontsize=14, wrap=True)
+                    plt.grid(axis='both', alpha=.3)
+                    plt.ylabel(ylabel, fontsize=22)
+                    plt.xlabel(xlabel, fontsize=14, wrap=True)
+                    plt.gca().xaxis.set_major_locator(matplotlib.dates.MinuteLocator())
+                    plt.gca().xaxis.set_major_formatter(matplotlib.dates.DateFormatter("%Y%m%d %H:%M:%S"))
+                    plt.gcf().autofmt_xdate()
+
+                    # Remove borders
+                    plt.gca().spines["top"].set_alpha(0.0)
+                    plt.gca().spines["bottom"].set_alpha(0.3)
+                    plt.gca().spines["right"].set_alpha(0.0)
+                    plt.gca().spines["left"].set_alpha(0.3)
+
+                    # Save the plot as a PNG image
+                    plt.savefig(f'{metric}.png')
+                    plt.close()
+
+                    # Mermaid format in case image failed
+                    mermaid = f"""```mermaid
+                    xychart-beta
+                        title "{title}"
+                        x-axis "{xlabel}" ["{'", "'.join([datetime.fromtimestamp(int(ts)).strftime("%Y%m%d %H:%M:%S") for ts in timestamps])}"]
+                        y-axis "{ylabel}"
+                        line [{', '.join([str(round(float(value))) for value in metric_values])}]
+                    ```
+                    """
+                    mermaid = mermaid.replace('\n', "<br>")
+                    github_env.write(f"{metric.upper()}={mermaid}\n")
 
     # 140 chars max for commit status description
     bench_results = {
@@ -170,6 +184,11 @@ def main(args_in: list[str] | None = None) -> None:
         github_env.write(f"BENCH_RESULTS={json.dumps(bench_results, indent=None, separators=(',', ':') )}\n")
         github_env.write(f"BENCH_ITERATIONS={iterations}\n")
 
+        title = title.replace('\n', '<br>')
+        ylabel = ylabel.replace('\n', '<br>')
+        github_env.write(f"BENCH_GRAPH_TITLE={title}\n")
+        github_env.write(f"BENCH_GRAPH_YLABEL={ylabel}\n")
+
 
 def start_benchmark(args):
     k6_path = 'k6'