Skip to content

Commit 6560bed

Browse files
authored
server : support llava 1.6 (#5553)
* server: init working 1.6 * move clip_image to header * remove commented code * remove c++ style from header * remove todo * expose llava_image_embed_make_with_clip_img * fix zig build
1 parent 06bf2cf commit 6560bed

File tree

5 files changed

+9
-36
lines changed

5 files changed

+9
-36
lines changed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -719,7 +719,7 @@ save-load-state: examples/save-load-state/save-load-state.cpp ggml.o llama.o $(C
719719
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
720720
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
721721

722-
server: examples/server/server.cpp examples/server/oai.hpp examples/server/utils.hpp examples/server/httplib.h examples/server/json.hpp examples/server/index.html.hpp examples/server/index.js.hpp examples/server/completion.js.hpp examples/llava/clip.cpp examples/llava/clip.h common/stb_image.h ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
722+
server: examples/server/server.cpp examples/server/oai.hpp examples/server/utils.hpp examples/server/httplib.h examples/server/json.hpp examples/server/index.html.hpp examples/server/index.js.hpp examples/server/completion.js.hpp examples/llava/clip.cpp examples/llava/clip.h examples/llava/llava.h examples/llava/llava.cpp common/stb_image.h ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
723723
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
724724
$(CXX) $(CXXFLAGS) -c examples/llava/clip.cpp -o $(call GET_OBJ_FILE, examples/llava/clip.cpp) -Wno-cast-qual
725725
$(CXX) $(CXXFLAGS) -Iexamples/server $(filter-out %.h %.hpp $< examples/llava/clip.cpp,$^) $(call GET_OBJ_FILE, $<) $(call GET_OBJ_FILE, examples/llava/clip.cpp) -o $@ $(LDFLAGS) $(LWINSOCK2)

build.zig

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,7 @@ pub fn build(b: *std.build.Builder) !void {
123123
const grammar_parser = make.obj("grammar-parser", "common/grammar-parser.cpp");
124124
const train = make.obj("train", "common/train.cpp");
125125
const clip = make.obj("clip", "examples/llava/clip.cpp");
126+
const llava = make.obj("llava", "examples/llava/llava.cpp");
126127

127128
_ = make.exe("main", "examples/main/main.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, common, buildinfo, sampling, console, grammar_parser });
128129
_ = make.exe("quantize", "examples/quantize/quantize.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, common, buildinfo });
@@ -131,7 +132,7 @@ pub fn build(b: *std.build.Builder) !void {
131132
_ = make.exe("finetune", "examples/finetune/finetune.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, common, buildinfo, train });
132133
_ = make.exe("train-text-from-scratch", "examples/train-text-from-scratch/train-text-from-scratch.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, common, buildinfo, train });
133134

134-
const server = make.exe("server", "examples/server/server.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, common, buildinfo, sampling, grammar_parser, clip });
135+
const server = make.exe("server", "examples/server/server.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, common, buildinfo, sampling, grammar_parser, clip, llava });
135136
if (server.target.isWindows()) {
136137
server.linkSystemLibrary("ws2_32");
137138
}

examples/llava/llava.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -311,7 +311,7 @@ bool llava_validate_embed_size(const llama_context * ctx_llama, const clip_ctx *
311311
return true;
312312
}
313313

314-
static bool llava_image_embed_make_with_clip_img(clip_ctx * ctx_clip, int n_threads, const clip_image_u8 * img, float ** image_embd_out, int * n_img_pos_out) {
314+
bool llava_image_embed_make_with_clip_img(clip_ctx * ctx_clip, int n_threads, const clip_image_u8 * img, float ** image_embd_out, int * n_img_pos_out) {
315315
float * image_embd = (float *)malloc(clip_embd_nbytes(ctx_clip)*6); // TODO: base on gridsize/llava model
316316
if (!image_embd) {
317317
fprintf(stderr, "Unable to allocate memory for image embeddings\n");

examples/llava/llava.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@ struct llava_image_embed {
3131
/** sanity check for clip <-> llava embed size match */
3232
LLAVA_API bool llava_validate_embed_size(const llama_context * ctx_llama, const clip_ctx * ctx_clip);
3333

34+
LLAVA_API bool llava_image_embed_make_with_clip_img(clip_ctx * ctx_clip, int n_threads, const clip_image_u8 * img, float ** image_embd_out, int * n_img_pos_out);
35+
3436
/** build an image embed from image file bytes */
3537
LLAVA_API struct llava_image_embed * llava_image_embed_make_with_bytes(struct clip_ctx * ctx_clip, int n_threads, const unsigned char * image_bytes, int image_bytes_length);
3638
/** build an image embed from a path to an image filename */

examples/server/server.cpp

Lines changed: 3 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#include "oai.hpp"
66

77
#include "../llava/clip.h"
8+
#include "../llava/llava.h"
89

910
#include "stb_image.h"
1011

@@ -997,43 +998,12 @@ struct llama_server_context
997998
{
998999
continue;
9991000
}
1000-
clip_image_f32_batch img_res_v;
1001-
img_res_v.size = 0;
1002-
img_res_v.data = nullptr;
1003-
if (!clip_image_preprocess(clp_ctx, img.img_data, img_res_v))
1004-
{
1005-
LOG_TEE("Error processing the given image");
1006-
clip_free(clp_ctx);
1007-
clip_image_f32_batch_free(img_res_v);
1008-
return false;
1009-
}
1010-
if (img_res_v.size == 0)
1011-
{
1012-
LOG_TEE("Error processing the given image");
1013-
return false;
1014-
}
1015-
1016-
// note: assumes only one image was returned by clip_image_preprocess
1017-
clip_image_f32 * img_res = img_res_v.data;
10181001

1019-
img.image_tokens = clip_n_patches(clp_ctx);
1020-
img.image_embedding = (float *)malloc(clip_embd_nbytes(clp_ctx));
1021-
if (!img.image_embedding)
1022-
{
1023-
LOG_TEE("Unable to allocate memory for image embeddings\n");
1024-
clip_image_f32_batch_free(img_res_v);
1025-
clip_free(clp_ctx);
1026-
return false;
1027-
}
1028-
LOG_TEE("slot %i - encoding image [id: %i]\n", slot.id, img.id);
1029-
if (!clip_image_encode(clp_ctx, params.n_threads, img_res, img.image_embedding))
1030-
{
1031-
LOG_TEE("Unable to encode image\n");
1032-
clip_image_f32_batch_free(img_res_v);
1002+
if (!llava_image_embed_make_with_clip_img(clp_ctx, params.n_threads, img.img_data, &img.image_embedding, &img.image_tokens)) {
1003+
LOG_TEE("Error processing the given image");
10331004
return false;
10341005
}
10351006

1036-
clip_image_f32_batch_free(img_res_v);
10371007

10381008
img.request_encode_image = false;
10391009
}

0 commit comments

Comments
 (0)