ci: bench: add mermaid in case of image cannot be uploaded

phymbert · phymbert · commit 93434fdc7e98 · 2024-03-26T01:08:59.000+01:00
diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml
@@ -117,6 +117,18 @@ jobs:
 
           cat results.github.env >> $GITHUB_ENV
 
+          # Remove dataset as we do not want it in the artefact
+          rm ShareGPT_V3_unfiltered_cleaned_split.json
+
+      - uses: actions/upload-artifact@v4
+        with:
+          name: benchmark-results
+          compression-level: 9
+          path: |
+            examples/server/bench/*.png
+            examples/server/bench/*.json
+            examples/server/bench/*.log
+
       - name: Commit status
         uses: Sibz/github-status-action@v1
         with:
@@ -128,6 +140,7 @@ jobs:
 
       - name: Upload benchmark images
         uses: devicons/public-upload-to-imgur@v2.2.2
+        continue-on-error: true # Important as it looks unstable: 503
         id: imgur_step
         with:
           client_id: ${{secrets.IMGUR_CLIENT_ID}}
@@ -136,44 +149,95 @@ jobs:
             examples/server/bench/predicted_tokens_seconds.png
             examples/server/bench/kv_cache_usage_ratio.png
             examples/server/bench/requests_processing.png
-            examples/server/bench/requests_deferred.png
+
+      - name: Extract mermaid
+        id: set_mermaid
+        run: |
+          set -eux
+
+          cd examples/server/bench
+          PROMPT_TOKENS_SECONDS=$(cat prompt_tokens_seconds.mermaid)
+          echo "PROMPT_TOKENS_SECONDS<<EOF" >> $GITHUB_ENV
+          echo "$PROMPT_TOKENS_SECONDS" >> $GITHUB_ENV
+          echo "EOF" >> $GITHUB_ENV
+
+          PREDICTED_TOKENS_SECONDS=$(cat predicted_tokens_seconds.mermaid)
+          echo "PREDICTED_TOKENS_SECONDS<<EOF" >> $GITHUB_ENV
+          echo "$PREDICTED_TOKENS_SECONDS" >> $GITHUB_ENV
+          echo "EOF" >> $GITHUB_ENV
+
+          KV_CACHE_USAGE_RATIO=$(cat kv_cache_usage_ratio.mermaid)
+          echo "KV_CACHE_USAGE_RATIO<<EOF" >> $GITHUB_ENV
+          echo "$KV_CACHE_USAGE_RATIO" >> $GITHUB_ENV
+          echo "EOF" >> $GITHUB_ENV
+
+          REQUESTS_PROCESSING=$(cat requests_processing.mermaid)
+          echo "REQUESTS_PROCESSING<<EOF" >> $GITHUB_ENV
+          echo "$REQUESTS_PROCESSING" >> $GITHUB_ENV
+          echo "EOF" >> $GITHUB_ENV
 
       - name: Comment PR
         uses: mshick/add-pr-comment@v2
         id: comment_pr
         if: ${{ github.event.pull_request != '' }}
+        continue-on-error: true
         with:
           message-id: bench-${{ github.job }}-${{ env.RUNNER_LABEL }}
           message: |
             📈 **llama.cpp server** benchmark for _${{ github.job }}_ on _${{ env.RUNNER_LABEL }}_: **${{ env.BENCH_ITERATIONS}} iterations** 🚀
+
+            - ${{ env.BENCH_GRAPH_XLABEL }}
+            - req_avg=${{ env.HTTP_REQ_DURATION_AVG }} pp_avg=${{ env.LLAMACPP_PROMPT_TOKENS_AVG }} tks_avg=${{ env.LLAMACPP_TOKENS_SECOND_AVG }}
+            
+            
             <p align="center">
-                <img width="100%" height="100%" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[0] }}" alt="prompt_tokens_seconds" />
-                <img width="100%" height="100%" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[1] }}" alt="predicted_tokens_seconds"/>
+            <img width="100%" height="100%" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[0] }}" alt="prompt_tokens_seconds" />
+
+            <details>
+                <summary>More</summary>
+
+            ```mermaid
+            ${{ env.PROMPT_TOKENS_SECONDS }}
+            ```
+
+            </details>
+
+            <img width="100%" height="100%" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[1] }}" alt="predicted_tokens_seconds"/>
+
+            <details>
+                <summary>More</summary>
+
+            ```mermaid
+            ${{ env.PREDICTED_TOKENS_SECONDS }}
+            ```
+
+            </details>
+
             </p>
             <details>
                 <summary>Details</summary>
                 <p align="center">
-                    <img width="100%" height="100%" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[2] }}" alt="kv_cache_usage_ratio" />
-                    <img width="100%" height="100%" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[3] }}" alt="requests_processing"/>
-                    <img width="100%" height="100%" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[4] }}" alt="requests_deferred"/>
-                </p>
-            </detail>
-
-      - name: Upload results
-        if: ${{ github.event.pull_request }}
-        uses: edunad/actions-image@v2.0.0
-        with:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-          path: 'examples/server/bench/*.png'
-          title: |
-            llama.cpp server benchmark results for ${{ github.job }} on ${{ env.RUNNER_LABEL }}: ${{ env.LLAMACPP_TOKENS_SECOND_AVG}}tk/s
-          annotationLevel: 'success'
+            <img width="100%" height="100%" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[2] }}" alt="kv_cache_usage_ratio" />
 
-      - uses: actions/upload-artifact@v4
-        with:
-          name: benchmark-results
-          compression-level: 9
-          path: |
-            examples/server/bench/*.png
-            examples/server/bench/*.json
-            examples/server/bench/*.log
+            <details>
+                <summary>More</summary>
+
+            ```mermaid
+            ${{ env.KV_CACHE_USAGE_RATIO }}
+            ```
+
+            </details>
+
+            <img width="100%" height="100%" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[3] }}" alt="requests_processing"/>
+
+            <details>
+                <summary>More</summary>
+
+            ```mermaid
+            ${{ env.REQUESTS_PROCESSING }}
+            ```
+
+            </details>
+            
+            </p>
+            </details>
diff --git a/examples/server/bench/bench.py b/examples/server/bench/bench.py
@@ -101,6 +101,12 @@ def main(args_in: list[str] | None = None) -> None:
         while is_server_listening(args.host, args.port):
             time.sleep(0.1)
 
+    title = (f"llama.cpp {args.name} on {args.runner_label}\n "
+             f"duration={args.duration} {iterations} iterations")
+    xlabel = (f"{args.hf_repo}/{args.hf_file}\n"
+              f"parallel={args.parallel} ctx-size={args.ctx_size} ngl={args.n_gpu_layers} batch-size={args.batch_size} ubatch-size={args.ubatch_size} pp={args.max_prompt_tokens} pp+tg={args.max_tokens}\n"
+              f"branch={args.branch} commit={args.commit}")
+
     # Prometheus
     end_time = time.time()
     if is_server_listening("0.0.0.0", 9090):
@@ -121,23 +127,20 @@ def main(args_in: list[str] | None = None) -> None:
                 values = metric_data['data']['result'][0]['values']
                 timestamps, metric_values = zip(*values)
                 metric_values = [float(value) for value in metric_values]
-                timestamps = [datetime.fromtimestamp(int(ts)) for ts in timestamps]
+                timestamps_dt = [datetime.fromtimestamp(int(ts)) for ts in timestamps]
                 plt.figure(figsize=(16, 10), dpi=80)
-                plt.plot(timestamps, metric_values, label=metric)
+                plt.plot(timestamps_dt, metric_values, label=metric)
                 plt.xticks(rotation=0, fontsize=14, horizontalalignment='center', alpha=.7)
                 plt.yticks(fontsize=12, alpha=.7)
 
-                plt.title(f"llama.cpp {args.name} on {args.runner_label}\n"
-                          f"duration={args.duration} {iterations} iterations",
+                ylabel = f"llamacpp:{metric}"
+                plt.title(title,
                           fontsize=14, wrap=True)
                 plt.grid(axis='both', alpha=.3)
-                plt.ylabel(f"llamacpp:{metric}", fontsize=22)
-                plt.xlabel(f"{args.hf_repo}/{args.hf_file}\n"
-                           f"parallel={args.parallel} ctx-size={args.ctx_size} ngl={args.n_gpu_layers} batch-size={args.batch_size} ubatch-size={args.ubatch_size}\n"
-                           f"pp={args.max_prompt_tokens} pp+tg={args.max_tokens}\n"
-                           f"branch={args.branch} commit={args.commit}", fontsize=14, wrap=True)
+                plt.ylabel(ylabel, fontsize=22)
+                plt.xlabel(xlabel, fontsize=14, wrap=True)
                 plt.gca().xaxis.set_major_locator(matplotlib.dates.MinuteLocator())
-                plt.gca().xaxis.set_major_formatter(matplotlib.dates.DateFormatter("%Y%m%d %H:%M:%S"))
+                plt.gca().xaxis.set_major_formatter(matplotlib.dates.DateFormatter("%Y-%m-%d %H:%M:%S"))
                 plt.gcf().autofmt_xdate()
 
                 # Remove borders
@@ -150,6 +153,27 @@ def main(args_in: list[str] | None = None) -> None:
                 plt.savefig(f'{metric}.png')
                 plt.close()
 
+                # Mermaid format in case image failed
+                with (open(f"{metric}.mermaid", 'w') as mermaid_f):
+                    mermaid = (
+                    f"""---
+config:
+    xyChart:
+        titleFontSize: 12
+        width: 900
+        height: 600
+    themeVariables:
+        xyChart:
+            titleColor: "#000000"
+---
+xychart-beta
+    title "{title}"
+    y-axis "llamacpp:{metric}"
+    x-axis "llamacpp:{metric}" {int(min(timestamps))} --> {int(max(timestamps))}
+    line [{', '.join([str(round(float(value))) for value in metric_values])}]
+                    """)
+                    mermaid_f.write(mermaid)
+
     # 140 chars max for commit status description
     bench_results = {
         "req": {
@@ -169,6 +193,11 @@ def main(args_in: list[str] | None = None) -> None:
         github_env.write(f"BENCH_RESULTS={json.dumps(bench_results, indent=None, separators=(',', ':') )}\n")
         github_env.write(f"BENCH_ITERATIONS={iterations}\n")
 
+        title = title.replace('\n', ' ')
+        xlabel = xlabel.replace('\n', ' ')
+        github_env.write(f"BENCH_GRAPH_TITLE={title}\n")
+        github_env.write(f"BENCH_GRAPH_XLABEL={xlabel}\n")
+
 
 def start_benchmark(args):
     k6_path = 'k6'