whisper : remove extra backend instance (huh?)

ggerganov · ggerganov · commit 4caa64b73ed4 · 2024-05-14T19:17:07.000+03:00
diff --git a/whisper.cpp b/whisper.cpp
@@ -819,8 +819,6 @@ struct whisper_state {
 
     whisper_decoder decoders[WHISPER_MAX_DECODERS];
 
-    ggml_backend_t backend = nullptr;
-
     // ggml-alloc:
     // - stores meta info about the intermediate tensors into the `meta` buffers
     // - stores the actual tensor data into the `data` buffers
@@ -2240,7 +2238,7 @@ static bool whisper_encode_internal(
         }
 
         if (!whisper_encode_external(wstate)) {
-            if (!ggml_graph_compute_helper(wstate.backend, gf, n_threads)) {
+            if (!ggml_graph_compute_helper(wctx.backend, gf, n_threads)) {
                 return false;
             }
         } else {
@@ -2263,7 +2261,7 @@ static bool whisper_encode_internal(
             return false;
         }
 
-        if (!ggml_graph_compute_helper(wstate.backend, gf, n_threads)) {
+        if (!ggml_graph_compute_helper(wctx.backend, gf, n_threads)) {
             return false;
         }
     }
@@ -2279,7 +2277,7 @@ static bool whisper_encode_internal(
             return false;
         }
 
-        if (!ggml_graph_compute_helper(wstate.backend, gf, n_threads)) {
+        if (!ggml_graph_compute_helper(wctx.backend, gf, n_threads)) {
             return false;
         }
     }
@@ -2744,7 +2742,7 @@ static bool whisper_decode_internal(
 
         logits = gf->nodes[gf->n_nodes - 1];
 
-        if (!ggml_graph_compute_helper(wstate.backend, gf, n_threads)) {
+        if (!ggml_graph_compute_helper(wctx.backend, gf, n_threads)) {
             return false;
         }
     }
@@ -3191,13 +3189,6 @@ struct whisper_state * whisper_init_state(whisper_context * ctx) {
 
     whisper_state * state = new whisper_state;
 
-    state->backend = whisper_backend_init(ctx->params);
-    if (!state->backend) {
-        WHISPER_LOG_ERROR("%s: whisper_backend_init() failed\n", __func__);
-        whisper_free_state(state);
-        return nullptr;
-    }
-
     // at this point, we don't know yet how many decoders will be used, so we overallocate 3x ctx
     // in theory, there can be a case where this is not enough, but in practice it should always be enough
     const int factor = 3;
@@ -3623,8 +3614,6 @@ void whisper_free_state(struct whisper_state * state) {
         ggml_gallocr_free(state->alloc_cross.alloc);
         ggml_gallocr_free(state->alloc_decode.alloc);
 
-        ggml_backend_free(state->backend);
-
         // [EXPERIMENTAL] Token-level timestamps with DTW
         aheads_masks_free(state->aheads_masks);
 

Original file line number	Diff line number	Diff line change
`@@ -819,8 +819,6 @@ struct whisper_state {`
`819`	`819`
`820`	`820`	`whisper_decoder decoders[WHISPER_MAX_DECODERS];`
`821`	`821`
`822`		`- ggml_backend_t backend = nullptr;`
`823`		`-`
`824`	`822`	`// ggml-alloc:`
`825`	`823`	// - stores meta info about the intermediate tensors into the `meta` buffers
`826`	`824`	// - stores the actual tensor data into the `data` buffers
`@@ -2240,7 +2238,7 @@ static bool whisper_encode_internal(`
`2240`	`2238`	`}`
`2241`	`2239`
`2242`	`2240`	`if (!whisper_encode_external(wstate)) {`
`2243`		`- if (!ggml_graph_compute_helper(wstate.backend, gf, n_threads)) {`
	`2241`	`+ if (!ggml_graph_compute_helper(wctx.backend, gf, n_threads)) {`
`2244`	`2242`	`return false;`
`2245`	`2243`	`}`
`2246`	`2244`	`} else {`
`@@ -2263,7 +2261,7 @@ static bool whisper_encode_internal(`
`2263`	`2261`	`return false;`
`2264`	`2262`	`}`
`2265`	`2263`
`2266`		`- if (!ggml_graph_compute_helper(wstate.backend, gf, n_threads)) {`
	`2264`	`+ if (!ggml_graph_compute_helper(wctx.backend, gf, n_threads)) {`
`2267`	`2265`	`return false;`
`2268`	`2266`	`}`
`2269`	`2267`	`}`
`@@ -2279,7 +2277,7 @@ static bool whisper_encode_internal(`
`2279`	`2277`	`return false;`
`2280`	`2278`	`}`
`2281`	`2279`
`2282`		`- if (!ggml_graph_compute_helper(wstate.backend, gf, n_threads)) {`
	`2280`	`+ if (!ggml_graph_compute_helper(wctx.backend, gf, n_threads)) {`
`2283`	`2281`	`return false;`
`2284`	`2282`	`}`
`2285`	`2283`	`}`
`@@ -2744,7 +2742,7 @@ static bool whisper_decode_internal(`
`2744`	`2742`
`2745`	`2743`	`logits = gf->nodes[gf->n_nodes - 1];`
`2746`	`2744`
`2747`		`- if (!ggml_graph_compute_helper(wstate.backend, gf, n_threads)) {`
	`2745`	`+ if (!ggml_graph_compute_helper(wctx.backend, gf, n_threads)) {`
`2748`	`2746`	`return false;`
`2749`	`2747`	`}`
`2750`	`2748`	`}`
`@@ -3191,13 +3189,6 @@ struct whisper_state * whisper_init_state(whisper_context * ctx) {`
`3191`	`3189`
`3192`	`3190`	`whisper_state * state = new whisper_state;`
`3193`	`3191`
`3194`		`- state->backend = whisper_backend_init(ctx->params);`
`3195`		`- if (!state->backend) {`
`3196`		`- WHISPER_LOG_ERROR("%s: whisper_backend_init() failed\n", __func__);`
`3197`		`- whisper_free_state(state);`
`3198`		`- return nullptr;`
`3199`		`- }`
`3200`		`-`
`3201`	`3192`	`// at this point, we don't know yet how many decoders will be used, so we overallocate 3x ctx`
`3202`	`3193`	`// in theory, there can be a case where this is not enough, but in practice it should always be enough`
`3203`	`3194`	`const int factor = 3;`
`@@ -3623,8 +3614,6 @@ void whisper_free_state(struct whisper_state * state) {`
`3623`	`3614`	`ggml_gallocr_free(state->alloc_cross.alloc);`
`3624`	`3615`	`ggml_gallocr_free(state->alloc_decode.alloc);`
`3625`	`3616`
`3626`		`- ggml_backend_free(state->backend);`
`3627`		`-`
`3628`	`3617`	`// [EXPERIMENTAL] Token-level timestamps with DTW`
`3629`	`3618`	`aheads_masks_free(state->aheads_masks);`
`3630`	`3619`