Skip to content

Commit a0f6e56

Browse files
tinglouLou Ting
authored andcommitted
llava : change API to pure C style for Rust FFI bindgen (ggml-org#6079)
Co-authored-by: Lou Ting <[email protected]>
1 parent 4cc0e05 commit a0f6e56

File tree

4 files changed

+24
-24
lines changed

4 files changed

+24
-24
lines changed

examples/llava/clip.cpp

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1235,16 +1235,16 @@ struct clip_image_f32 * clip_image_f32_init() {
12351235

12361236
void clip_image_u8_free(struct clip_image_u8 * img) { delete img; }
12371237
void clip_image_f32_free(struct clip_image_f32 * img) { delete img; }
1238-
void clip_image_u8_batch_free(struct clip_image_u8_batch & batch) {
1239-
if (batch.size > 0) {
1240-
delete[] batch.data;
1241-
batch.size = 0;
1238+
void clip_image_u8_batch_free(struct clip_image_u8_batch * batch) {
1239+
if (batch->size > 0) {
1240+
delete[] batch->data;
1241+
batch->size = 0;
12421242
}
12431243
}
1244-
void clip_image_f32_batch_free(struct clip_image_f32_batch & batch) {
1245-
if (batch.size > 0) {
1246-
delete[] batch.data;
1247-
batch.size = 0;
1244+
void clip_image_f32_batch_free(struct clip_image_f32_batch * batch) {
1245+
if (batch->size > 0) {
1246+
delete[] batch->data;
1247+
batch->size = 0;
12481248
}
12491249
}
12501250

@@ -1497,7 +1497,7 @@ static std::vector<clip_image_u8*> divide_to_patches_u8(const clip_image_u8 & im
14971497

14981498
// returns the normalized float tensor for llava-1.5, for spatial_unpad with anyres processing for llava-1.6 it returns the normalized image patch tensors as a vector
14991499
// res_imgs memory is being allocated here, previous allocations will be freed if found
1500-
bool clip_image_preprocess(struct clip_ctx * ctx, const clip_image_u8 * img, clip_image_f32_batch & res_imgs) {
1500+
bool clip_image_preprocess(struct clip_ctx * ctx, const clip_image_u8 * img, clip_image_f32_batch * res_imgs) {
15011501
bool pad_to_square = true;
15021502
if (!ctx->has_vision_encoder) {
15031503
printf("This gguf file seems to have no vision encoder\n");
@@ -1509,11 +1509,11 @@ bool clip_image_preprocess(struct clip_ctx * ctx, const clip_image_u8 * img, cli
15091509
pad_to_square = false;
15101510
}
15111511
// free the previous res_imgs if any set
1512-
if (res_imgs.size > 0) {
1512+
if (res_imgs->size > 0) {
15131513
clip_image_f32_batch_free(res_imgs);
15141514
}
1515-
res_imgs.data = nullptr;
1516-
res_imgs.size = 0;
1515+
res_imgs->data = nullptr;
1516+
res_imgs->size = 0;
15171517

15181518
// the logic below is to pad the shorter side to the longer side with a background color: rgb(122, 116, 104)
15191519
// see https://github.com/haotian-liu/LLaVA/blob/e854a2bf85118c504f6f16bf5c3c7c92f8fa8c6b/llava/conversation.py#L113-L156
@@ -1568,11 +1568,11 @@ bool clip_image_preprocess(struct clip_ctx * ctx, const clip_image_u8 * img, cli
15681568
bicubic_resize(*img, *image_original_resize, params.image_size, params.image_size); // in python this is "shortest_edge", but all CLIP are square
15691569
patches.insert(patches.begin(), image_original_resize);
15701570
// clip_image_f32_batch_init(patches.size());
1571-
res_imgs.size = patches.size();
1572-
res_imgs.data = new clip_image_f32[res_imgs.size];
1571+
res_imgs->size = patches.size();
1572+
res_imgs->data = new clip_image_f32[res_imgs->size];
15731573
int num=0;
15741574
for (auto& patch : patches) {
1575-
normalize_image_u8_to_f32(patch, &res_imgs.data[num], ctx->image_mean, ctx->image_std);
1575+
normalize_image_u8_to_f32(patch, &res_imgs->data[num], ctx->image_mean, ctx->image_std);
15761576
num++;
15771577
}
15781578

@@ -1660,9 +1660,9 @@ bool clip_image_preprocess(struct clip_ctx * ctx, const clip_image_u8 * img, cli
16601660
// }
16611661
// res_imgs.push_back(res);
16621662

1663-
res_imgs.size = 1;
1664-
res_imgs.data = new clip_image_f32[res_imgs.size];
1665-
res_imgs.data[0] = *res;
1663+
res_imgs->size = 1;
1664+
res_imgs->data = new clip_image_f32[res_imgs->size];
1665+
res_imgs->data[0] = *res;
16661666
clip_image_f32_free(res);
16671667

16681668
return true;

examples/llava/clip.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -60,16 +60,16 @@ CLIP_API struct clip_image_f32 * clip_image_f32_init();
6060

6161
CLIP_API void clip_image_u8_free (struct clip_image_u8 * img);
6262
CLIP_API void clip_image_f32_free(struct clip_image_f32 * img);
63-
CLIP_API void clip_image_u8_batch_free (struct clip_image_u8_batch & batch);
64-
CLIP_API void clip_image_f32_batch_free(struct clip_image_f32_batch & batch);
63+
CLIP_API void clip_image_u8_batch_free (struct clip_image_u8_batch * batch);
64+
CLIP_API void clip_image_f32_batch_free(struct clip_image_f32_batch * batch);
6565

6666
CLIP_API bool clip_image_load_from_file(const char * fname, struct clip_image_u8 * img);
6767

6868
/** interpret bytes as an image file with length bytes_length, and use the result to populate img */
6969
CLIP_API bool clip_image_load_from_bytes(const unsigned char * bytes, size_t bytes_length, struct clip_image_u8 * img);
7070

7171
/** preprocess img and store the result in res_imgs, pad_to_square may be overriden to false depending on model configuration */
72-
CLIP_API bool clip_image_preprocess(struct clip_ctx * ctx, const clip_image_u8 * img, clip_image_f32_batch & res_imgs );
72+
CLIP_API bool clip_image_preprocess(struct clip_ctx * ctx, const struct clip_image_u8 * img, struct clip_image_f32_batch * res_imgs );
7373

7474
CLIP_API struct ggml_tensor * clip_get_newline_tensor(const struct clip_ctx * ctx);
7575

examples/llava/llava.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -223,7 +223,7 @@ static bool encode_image_with_clip(clip_ctx * ctx_clip, int n_threads, const cli
223223
clip_image_f32_batch img_res_v;
224224
img_res_v.size = 0;
225225
img_res_v.data = nullptr;
226-
if (!clip_image_preprocess(ctx_clip, img, img_res_v)) {
226+
if (!clip_image_preprocess(ctx_clip, img, &img_res_v)) {
227227
fprintf(stderr, "%s: unable to preprocess image\n", __func__);
228228
delete[] img_res_v.data;
229229
return false;

examples/llava/llava.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,9 @@ struct llava_image_embed {
2929
};
3030

3131
/** sanity check for clip <-> llava embed size match */
32-
LLAVA_API bool llava_validate_embed_size(const llama_context * ctx_llama, const clip_ctx * ctx_clip);
32+
LLAVA_API bool llava_validate_embed_size(const struct llama_context * ctx_llama, const struct clip_ctx * ctx_clip);
3333

34-
LLAVA_API bool llava_image_embed_make_with_clip_img(clip_ctx * ctx_clip, int n_threads, const clip_image_u8 * img, float ** image_embd_out, int * n_img_pos_out);
34+
LLAVA_API bool llava_image_embed_make_with_clip_img(struct clip_ctx * ctx_clip, int n_threads, const struct clip_image_u8 * img, float ** image_embd_out, int * n_img_pos_out);
3535

3636
/** build an image embed from image file bytes */
3737
LLAVA_API struct llava_image_embed * llava_image_embed_make_with_bytes(struct clip_ctx * ctx_clip, int n_threads, const unsigned char * image_bytes, int image_bytes_length);

0 commit comments

Comments
 (0)