@@ -117,6 +117,18 @@ jobs:
117
117
118
118
cat results.github.env >> $GITHUB_ENV
119
119
120
+ # Remove dataset as we do not want it in the artefact
121
+ rm ShareGPT_V3_unfiltered_cleaned_split.json
122
+
123
+ - uses : actions/upload-artifact@v4
124
+ with :
125
+ name : benchmark-results
126
+ compression-level : 9
127
+ path : |
128
+ examples/server/bench/*.png
129
+ examples/server/bench/*.json
130
+ examples/server/bench/*.log
131
+
120
132
- name : Commit status
121
133
uses : Sibz/github-status-action@v1
122
134
with :
@@ -128,6 +140,7 @@ jobs:
128
140
129
141
- name : Upload benchmark images
130
142
143
+ continue-on-error : true # Important as it looks unstable: 503
131
144
id : imgur_step
132
145
with :
133
146
client_id : ${{secrets.IMGUR_CLIENT_ID}}
@@ -136,44 +149,95 @@ jobs:
136
149
examples/server/bench/predicted_tokens_seconds.png
137
150
examples/server/bench/kv_cache_usage_ratio.png
138
151
examples/server/bench/requests_processing.png
139
- examples/server/bench/requests_deferred.png
152
+
153
+ - name : Extract mermaid
154
+ id : set_mermaid
155
+ run : |
156
+ set -eux
157
+
158
+ cd examples/server/bench
159
+ PROMPT_TOKENS_SECONDS=$(cat prompt_tokens_seconds.mermaid)
160
+ echo "PROMPT_TOKENS_SECONDS<<EOF" >> $GITHUB_ENV
161
+ echo "$PROMPT_TOKENS_SECONDS" >> $GITHUB_ENV
162
+ echo "EOF" >> $GITHUB_ENV
163
+
164
+ PREDICTED_TOKENS_SECONDS=$(cat predicted_tokens_seconds.mermaid)
165
+ echo "PREDICTED_TOKENS_SECONDS<<EOF" >> $GITHUB_ENV
166
+ echo "$PREDICTED_TOKENS_SECONDS" >> $GITHUB_ENV
167
+ echo "EOF" >> $GITHUB_ENV
168
+
169
+ KV_CACHE_USAGE_RATIO=$(cat kv_cache_usage_ratio.mermaid)
170
+ echo "KV_CACHE_USAGE_RATIO<<EOF" >> $GITHUB_ENV
171
+ echo "$KV_CACHE_USAGE_RATIO" >> $GITHUB_ENV
172
+ echo "EOF" >> $GITHUB_ENV
173
+
174
+ REQUESTS_PROCESSING=$(cat requests_processing.mermaid)
175
+ echo "REQUESTS_PROCESSING<<EOF" >> $GITHUB_ENV
176
+ echo "$REQUESTS_PROCESSING" >> $GITHUB_ENV
177
+ echo "EOF" >> $GITHUB_ENV
140
178
141
179
- name : Comment PR
142
180
uses : mshick/add-pr-comment@v2
143
181
id : comment_pr
144
182
if : ${{ github.event.pull_request != '' }}
183
+ continue-on-error : true
145
184
with :
146
185
message-id : bench-${{ github.job }}-${{ env.RUNNER_LABEL }}
147
186
message : |
148
187
📈 **llama.cpp server** benchmark for _${{ github.job }}_ on _${{ env.RUNNER_LABEL }}_: **${{ env.BENCH_ITERATIONS}} iterations** 🚀
188
+
189
+ - ${{ env.BENCH_GRAPH_XLABEL }}
190
+ - req_avg=${{ env.HTTP_REQ_DURATION_AVG }} pp_avg=${{ env.LLAMACPP_PROMPT_TOKENS_AVG }} tks_avg=${{ env.LLAMACPP_TOKENS_SECOND_AVG }}
191
+
192
+
149
193
<p align="center">
150
- <img width="100%" height="100%" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[0] }}" alt="prompt_tokens_seconds" />
151
- <img width="100%" height="100%" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[1] }}" alt="predicted_tokens_seconds"/>
194
+ <img width="100%" height="100%" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[0] }}" alt="prompt_tokens_seconds" />
195
+
196
+ <details>
197
+ <summary>More</summary>
198
+
199
+ ```mermaid
200
+ ${{ env.PROMPT_TOKENS_SECONDS }}
201
+ ```
202
+
203
+ </details>
204
+
205
+ <img width="100%" height="100%" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[1] }}" alt="predicted_tokens_seconds"/>
206
+
207
+ <details>
208
+ <summary>More</summary>
209
+
210
+ ```mermaid
211
+ ${{ env.PREDICTED_TOKENS_SECONDS }}
212
+ ```
213
+
214
+ </details>
215
+
152
216
</p>
153
217
<details>
154
218
<summary>Details</summary>
155
219
<p align="center">
156
- <img width="100%" height="100%" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[2] }}" alt="kv_cache_usage_ratio" />
157
- <img width="100%" height="100%" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[3] }}" alt="requests_processing"/>
158
- <img width="100%" height="100%" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[4] }}" alt="requests_deferred"/>
159
- </p>
160
- </detail>
161
-
162
- - name : Upload results
163
- if : ${{ github.event.pull_request }}
164
-
165
- with :
166
- GITHUB_TOKEN : ${{ secrets.GITHUB_TOKEN }}
167
- path : ' examples/server/bench/*.png'
168
- title : |
169
- llama.cpp server benchmark results for ${{ github.job }} on ${{ env.RUNNER_LABEL }}: ${{ env.LLAMACPP_TOKENS_SECOND_AVG}}tk/s
170
- annotationLevel : ' success'
220
+ <img width="100%" height="100%" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[2] }}" alt="kv_cache_usage_ratio" />
171
221
172
- - uses : actions/upload-artifact@v4
173
- with :
174
- name : benchmark-results
175
- compression-level : 9
176
- path : |
177
- examples/server/bench/*.png
178
- examples/server/bench/*.json
179
- examples/server/bench/*.log
222
+ <details>
223
+ <summary>More</summary>
224
+
225
+ ```mermaid
226
+ ${{ env.KV_CACHE_USAGE_RATIO }}
227
+ ```
228
+
229
+ </details>
230
+
231
+ <img width="100%" height="100%" src="${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[3] }}" alt="requests_processing"/>
232
+
233
+ <details>
234
+ <summary>More</summary>
235
+
236
+ ```mermaid
237
+ ${{ env.REQUESTS_PROCESSING }}
238
+ ```
239
+
240
+ </details>
241
+
242
+ </p>
243
+ </details>
0 commit comments