Skip to content

Commit 50e3c89

Browse files
ggerganoviThalay
authored andcommitted
whisper : fix external encoder (ggml-org#1860)
1 parent e8a0cd2 commit 50e3c89

File tree

1 file changed

+9
-32
lines changed

1 file changed

+9
-32
lines changed

whisper.cpp

Lines changed: 9 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1659,22 +1659,9 @@ static struct ggml_cgraph * whisper_build_graph_conv(
16591659
ggml_set_name(cur, "embd_conv");
16601660
wstate.embd_conv = cur;
16611661
} else {
1662-
#ifdef WHISPER_USE_COREML
1663-
cur = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_state, n_ctx);
1664-
ggml_allocr_alloc(alloc, cur);
1662+
ggml_build_forward_expand(gf, mel);
16651663

1666-
if (!ggml_allocr_is_measure(alloc)) {
1667-
whisper_coreml_encode(wstate.ctx_coreml, mel->ne[0], mel->ne[1], (float *) mel->data, (float *) cur->data);
1668-
}
1669-
#endif
1670-
#ifdef WHISPER_USE_OPENVINO
16711664
cur = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_state, n_ctx);
1672-
ggml_allocr_alloc(alloc, cur);
1673-
1674-
if (!ggml_allocr_is_measure(alloc)) {
1675-
whisper_openvino_encode(wstate.ctx_openvino, mel, cur);
1676-
}
1677-
#endif
16781665

16791666
ggml_set_name(cur, "embd_enc");
16801667
wstate.embd_enc = cur;
@@ -1708,14 +1695,6 @@ static struct ggml_cgraph * whisper_build_graph_encoder(
17081695

17091696
ggml_cgraph * gf = ggml_new_graph_custom(ctx0, WHISPER_MAX_NODES, false);
17101697

1711-
//ggml_allocr * alloc = wstate.alloc_encode.alloc;
1712-
1713-
//struct ggml_tensor * cur = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_ctx, n_state);
1714-
//ggml_allocr_alloc(alloc, cur);
1715-
1716-
//if (!ggml_allocr_is_measure(alloc)) {
1717-
// ggml_backend_tensor_copy(wstate.embd_conv, cur);
1718-
//}
17191698
struct ggml_tensor * cur = ggml_view_tensor(ctx0, wstate.embd_conv);
17201699

17211700
const float KQscale = 1.0f/sqrtf(float(n_state)/n_head);
@@ -1957,14 +1936,6 @@ static struct ggml_cgraph * whisper_build_graph_cross(
19571936

19581937
ggml_cgraph * gf = ggml_new_graph(ctx0);
19591938

1960-
//ggml_allocr * alloc = wstate.alloc_cross.alloc;
1961-
1962-
//struct ggml_tensor * cur = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_state, n_ctx);
1963-
//ggml_allocr_alloc(alloc, cur);
1964-
1965-
//if (!ggml_allocr_is_measure(alloc)) {
1966-
// ggml_backend_tensor_copy(wstate.embd_enc, cur);
1967-
//}
19681939
struct ggml_tensor * cur = ggml_view_tensor(ctx0, wstate.embd_enc);
19691940

19701941
const float Kscale = pow(float(n_state) / n_head, -0.25);
@@ -2037,13 +2008,13 @@ static bool whisper_encode_internal(
20372008
return false;
20382009
}
20392010

2011+
struct ggml_tensor * mel = ggml_graph_get_tensor(gf, "mel");
2012+
20402013
// set the input
20412014
{
20422015
const auto & mel_inp = wstate.mel;
20432016
const int n_ctx = wstate.exp_n_audio_ctx > 0 ? wstate.exp_n_audio_ctx : wctx.model.hparams.n_audio_ctx;
20442017

2045-
struct ggml_tensor * mel = ggml_graph_get_tensor(gf, "mel");
2046-
20472018
assert(mel->type == GGML_TYPE_F32);
20482019
assert(mel_inp.n_mel == wctx.model.hparams.n_mels);
20492020

@@ -2068,6 +2039,12 @@ static bool whisper_encode_internal(
20682039
if (!ggml_graph_compute_helper(wstate.backend, gf, n_threads)) {
20692040
return false;
20702041
}
2042+
} else {
2043+
#if defined(WHISPER_USE_COREML)
2044+
whisper_coreml_encode(wstate.ctx_coreml, mel->ne[0], mel->ne[1], (float *) mel->data, (float *) wstate.embd_enc->data);
2045+
#elif defined(WHISPER_USE_OPENVINO)
2046+
whisper_openvino_encode(wstate.ctx_openvino, mel, wstate.embd_enc);
2047+
#endif
20712048
}
20722049
}
20732050

0 commit comments

Comments
 (0)