@@ -819,8 +819,6 @@ struct whisper_state {
819
819
820
820
whisper_decoder decoders[WHISPER_MAX_DECODERS];
821
821
822
- ggml_backend_t backend = nullptr ;
823
-
824
822
// ggml-alloc:
825
823
// - stores meta info about the intermediate tensors into the `meta` buffers
826
824
// - stores the actual tensor data into the `data` buffers
@@ -2240,7 +2238,7 @@ static bool whisper_encode_internal(
2240
2238
}
2241
2239
2242
2240
if (!whisper_encode_external (wstate)) {
2243
- if (!ggml_graph_compute_helper (wstate .backend , gf, n_threads)) {
2241
+ if (!ggml_graph_compute_helper (wctx .backend , gf, n_threads)) {
2244
2242
return false ;
2245
2243
}
2246
2244
} else {
@@ -2263,7 +2261,7 @@ static bool whisper_encode_internal(
2263
2261
return false ;
2264
2262
}
2265
2263
2266
- if (!ggml_graph_compute_helper (wstate .backend , gf, n_threads)) {
2264
+ if (!ggml_graph_compute_helper (wctx .backend , gf, n_threads)) {
2267
2265
return false ;
2268
2266
}
2269
2267
}
@@ -2279,7 +2277,7 @@ static bool whisper_encode_internal(
2279
2277
return false ;
2280
2278
}
2281
2279
2282
- if (!ggml_graph_compute_helper (wstate .backend , gf, n_threads)) {
2280
+ if (!ggml_graph_compute_helper (wctx .backend , gf, n_threads)) {
2283
2281
return false ;
2284
2282
}
2285
2283
}
@@ -2744,7 +2742,7 @@ static bool whisper_decode_internal(
2744
2742
2745
2743
logits = gf->nodes [gf->n_nodes - 1 ];
2746
2744
2747
- if (!ggml_graph_compute_helper (wstate .backend , gf, n_threads)) {
2745
+ if (!ggml_graph_compute_helper (wctx .backend , gf, n_threads)) {
2748
2746
return false ;
2749
2747
}
2750
2748
}
@@ -3191,13 +3189,6 @@ struct whisper_state * whisper_init_state(whisper_context * ctx) {
3191
3189
3192
3190
whisper_state * state = new whisper_state;
3193
3191
3194
- state->backend = whisper_backend_init (ctx->params );
3195
- if (!state->backend ) {
3196
- WHISPER_LOG_ERROR (" %s: whisper_backend_init() failed\n " , __func__);
3197
- whisper_free_state (state);
3198
- return nullptr ;
3199
- }
3200
-
3201
3192
// at this point, we don't know yet how many decoders will be used, so we overallocate 3x ctx
3202
3193
// in theory, there can be a case where this is not enough, but in practice it should always be enough
3203
3194
const int factor = 3 ;
@@ -3623,8 +3614,6 @@ void whisper_free_state(struct whisper_state * state) {
3623
3614
ggml_gallocr_free (state->alloc_cross .alloc );
3624
3615
ggml_gallocr_free (state->alloc_decode .alloc );
3625
3616
3626
- ggml_backend_free (state->backend );
3627
-
3628
3617
// [EXPERIMENTAL] Token-level timestamps with DTW
3629
3618
aheads_masks_free (state->aheads_masks );
3630
3619
0 commit comments