@@ -107,49 +107,63 @@ def main(args_in: list[str] | None = None) -> None:
107
107
metrics = ['prompt_tokens_seconds' , 'predicted_tokens_seconds' ,
108
108
'kv_cache_usage_ratio' , 'requests_processing' , 'requests_deferred' ]
109
109
110
- for metric in metrics :
111
- resp = requests .get (f"http://localhost:9090/api/v1/query_range" ,
112
- params = {'query' : 'llamacpp:' + metric , 'start' : start_time , 'end' : end_time , 'step' : 2 })
113
-
114
- with open (f"{ metric } .json" , 'w' ) as metric_json :
115
- metric_json .write (resp .text )
116
-
117
- if resp .status_code != 200 :
118
- print (f"bench: unable to extract prometheus metric { metric } : { resp .text } " )
119
- else :
120
- metric_data = resp .json ()
121
- values = metric_data ['data' ]['result' ][0 ]['values' ]
122
- timestamps , metric_values = zip (* values )
123
- metric_values = [float (value ) for value in metric_values ]
124
- timestamps = [datetime .fromtimestamp (int (ts )) for ts in timestamps ]
125
- plt .figure (figsize = (16 , 10 ), dpi = 80 )
126
- plt .plot (timestamps , metric_values , label = metric )
127
- plt .xticks (rotation = 0 , fontsize = 14 , horizontalalignment = 'center' , alpha = .7 )
128
- plt .yticks (fontsize = 12 , alpha = .7 )
129
-
130
-
131
- plt .title (f"llama.cpp { args .name } on { args .runner_label } \n "
132
- f"duration={ args .duration } { iterations } iterations" ,
133
- fontsize = 14 , wrap = True )
134
- plt .grid (axis = 'both' , alpha = .3 )
135
- plt .ylabel (f"llamacpp:{ metric } " , fontsize = 22 )
136
- plt .xlabel (f"{ args .hf_repo } /{ args .hf_file } \n "
137
- f"parallel={ args .parallel } ctx-size={ args .ctx_size } ngl={ args .n_gpu_layers } batch-size={ args .batch_size } ubatch-size={ args .ubatch_size } \n "
138
- f"pp={ args .max_prompt_tokens } pp+tg={ args .max_tokens } \n "
139
- f"branch={ args .branch } commit={ args .commit } " , fontsize = 14 , wrap = True )
140
- plt .gca ().xaxis .set_major_locator (matplotlib .dates .MinuteLocator ())
141
- plt .gca ().xaxis .set_major_formatter (matplotlib .dates .DateFormatter ("%Y%m%d %H:%M:%S" ))
142
- plt .gcf ().autofmt_xdate ()
143
-
144
- # Remove borders
145
- plt .gca ().spines ["top" ].set_alpha (0.0 )
146
- plt .gca ().spines ["bottom" ].set_alpha (0.3 )
147
- plt .gca ().spines ["right" ].set_alpha (0.0 )
148
- plt .gca ().spines ["left" ].set_alpha (0.3 )
149
-
150
- # Save the plot as a PNG image
151
- plt .savefig (f'{ metric } .png' )
152
- plt .close ()
110
+ with open ("results.github.env" , 'a' ) as github_env :
111
+ for metric in metrics :
112
+ resp = requests .get (f"http://localhost:9090/api/v1/query_range" ,
113
+ params = {'query' : 'llamacpp:' + metric , 'start' : start_time , 'end' : end_time , 'step' : 2 })
114
+
115
+ with open (f"{ metric } .json" , 'w' ) as metric_json :
116
+ metric_json .write (resp .text )
117
+
118
+ if resp .status_code != 200 :
119
+ print (f"bench: unable to extract prometheus metric { metric } : { resp .text } " )
120
+ else :
121
+ metric_data = resp .json ()
122
+ values = metric_data ['data' ]['result' ][0 ]['values' ]
123
+ timestamps , metric_values = zip (* values )
124
+ metric_values = [float (value ) for value in metric_values ]
125
+ timestamps_dt = [datetime .fromtimestamp (int (ts )) for ts in timestamps ]
126
+ plt .figure (figsize = (16 , 10 ), dpi = 80 )
127
+ plt .plot (timestamps_dt , metric_values , label = metric )
128
+ plt .xticks (rotation = 0 , fontsize = 14 , horizontalalignment = 'center' , alpha = .7 )
129
+ plt .yticks (fontsize = 12 , alpha = .7 )
130
+
131
+ title = (f"llama.cpp { args .name } on { args .runner_label } \n "
132
+ f"duration={ args .duration } { iterations } iterations" )
133
+ ylabel = f"llamacpp:{ metric } "
134
+ xlabel = (f"{ args .hf_repo } /{ args .hf_file } \n "
135
+ f"parallel={ args .parallel } ctx-size={ args .ctx_size } ngl={ args .n_gpu_layers } batch-size={ args .batch_size } ubatch-size={ args .ubatch_size } pp={ args .max_prompt_tokens } pp+tg={ args .max_tokens } \n "
136
+ f"branch={ args .branch } commit={ args .commit } " )
137
+ plt .title (title ,
138
+ fontsize = 14 , wrap = True )
139
+ plt .grid (axis = 'both' , alpha = .3 )
140
+ plt .ylabel (ylabel , fontsize = 22 )
141
+ plt .xlabel (xlabel , fontsize = 14 , wrap = True )
142
+ plt .gca ().xaxis .set_major_locator (matplotlib .dates .MinuteLocator ())
143
+ plt .gca ().xaxis .set_major_formatter (matplotlib .dates .DateFormatter ("%Y%m%d %H:%M:%S" ))
144
+ plt .gcf ().autofmt_xdate ()
145
+
146
+ # Remove borders
147
+ plt .gca ().spines ["top" ].set_alpha (0.0 )
148
+ plt .gca ().spines ["bottom" ].set_alpha (0.3 )
149
+ plt .gca ().spines ["right" ].set_alpha (0.0 )
150
+ plt .gca ().spines ["left" ].set_alpha (0.3 )
151
+
152
+ # Save the plot as a PNG image
153
+ plt .savefig (f'{ metric } .png' )
154
+ plt .close ()
155
+
156
+ # Mermaid format in case image failed
157
+ mermaid = f"""```mermaid
158
+ xychart-beta
159
+ title "{ title } "
160
+ x-axis "{ xlabel } " ["{ '", "' .join ([datetime .fromtimestamp (int (ts )).strftime ("%Y%m%d %H:%M:%S" ) for ts in timestamps ])} "]
161
+ y-axis "{ ylabel } "
162
+ line [{ ', ' .join ([str (round (float (value ))) for value in metric_values ])} ]
163
+ ```
164
+ """
165
+ mermaid = mermaid .replace ('\n ' , "<br>" )
166
+ github_env .write (f"{ metric .upper ()} ={ mermaid } \n " )
153
167
154
168
# 140 chars max for commit status description
155
169
bench_results = {
@@ -170,6 +184,11 @@ def main(args_in: list[str] | None = None) -> None:
170
184
github_env .write (f"BENCH_RESULTS={ json .dumps (bench_results , indent = None , separators = (',' , ':' ) )} \n " )
171
185
github_env .write (f"BENCH_ITERATIONS={ iterations } \n " )
172
186
187
+ title = title .replace ('\n ' , '<br>' )
188
+ ylabel = ylabel .replace ('\n ' , '<br>' )
189
+ github_env .write (f"BENCH_GRAPH_TITLE={ title } \n " )
190
+ github_env .write (f"BENCH_GRAPH_YLABEL={ ylabel } \n " )
191
+
173
192
174
193
def start_benchmark (args ):
175
194
k6_path = 'k6'
0 commit comments