ggml : move ggml_flash_attn_ext_get_prec to ggml-impl.h

ggerganov · ggerganov · commit 1888c1fe2d26 · 2024-11-08T10:39:22.000+02:00
diff --git a/ggml/include/ggml.h b/ggml/include/ggml.h
@@ -1746,9 +1746,6 @@ extern "C" {
             struct ggml_tensor * a,
             enum ggml_prec       prec);
 
-    GGML_API enum ggml_prec ggml_flash_attn_ext_get_prec(
-            const struct ggml_tensor * a);
-
     // TODO: needs to be adapted to ggml_flash_attn_ext
     GGML_API struct ggml_tensor * ggml_flash_attn_back(
            struct ggml_context * ctx,
diff --git a/ggml/src/ggml-cuda/fattn.cu b/ggml/src/ggml-cuda/fattn.cu
@@ -7,6 +7,8 @@
 #include "fattn-wmma-f16.cuh"
 #include "fattn.cuh"
 
+#include "ggml-impl.h"
+
 #include <cstdint>
 
 static void ggml_cuda_flash_attn_ext_wmma_f16(ggml_backend_cuda_context & ctx, ggml_tensor * dst) {
diff --git a/ggml/src/ggml-impl.h b/ggml/src/ggml-impl.h
@@ -114,6 +114,8 @@ static void ggml_set_op_params_f32(struct ggml_tensor * tensor, uint32_t i, floa
     ((float *)(tensor->op_params))[i] = value;
 }
 
+static enum ggml_prec ggml_flash_attn_ext_get_prec(const struct ggml_tensor * a);
+
 struct ggml_map_custom1_op_params {
     ggml_custom1_op_t  fun;
     int                n_tasks;

Original file line number	Diff line number	Diff line change
`@@ -114,6 +114,8 @@ static void ggml_set_op_params_f32(struct ggml_tensor * tensor, uint32_t i, floa`
`114`	`114`	`((float *)(tensor->op_params))[i] = value;`
`115`	`115`	`}`
`116`	`116`
	`117`	`+static enum ggml_prec ggml_flash_attn_ext_get_prec(const struct ggml_tensor * a);`
	`118`	`+`
`117`	`119`	`struct ggml_map_custom1_op_params {`
`118`	`120`	`ggml_custom1_op_t fun;`
`119`	`121`	`int n_tasks;`