whisper : fix FF + remove it from README

ggerganov · ggerganov · commit 60d0f9da6c97 · 2023-01-25T22:16:24.000+02:00
diff --git a/README.md b/README.md
@@ -13,7 +13,7 @@ High-performance inference of [OpenAI's Whisper](https://github.com/openai/whisp
 - AVX intrinsics support for x86 architectures
 - VSX intrinsics support for POWER architectures
 - Mixed F16 / F32 precision
-- Low memory usage (Flash Attention + Flash Forward)
+- Low memory usage (Flash Attention)
 - Zero memory allocations at runtime
 - Runs on the CPU
 - [C-style API](https://github.com/ggerganov/whisper.cpp/blob/master/whisper.h)
diff --git a/whisper.cpp b/whisper.cpp
@@ -619,6 +619,7 @@ struct whisper_context {
         buf_last = i;
 #else
         (void) i;
+        (void) ctx;
 #endif
     }
 
@@ -1631,7 +1632,7 @@ static bool whisper_encode(
             wctx.use_buf(ctx0, 0);
 
             cur = ggml_flash_ff(ctx0,
-                    ggml_cpy(ctx0, cur, ggml_new_tensor_2d(ctx0, wctx.wtype, n_state, N)),
+                    ggml_cpy(ctx0, cur, ggml_new_tensor_2d(ctx0, wctx.wtype, n_state, n_ctx)),
                     layer.mlp_0_w, layer.mlp_0_b, layer.mlp_1_w, layer.mlp_1_b);
 #else
             wctx.use_buf(ctx0, 0);