@@ -518,10 +518,6 @@ struct whisper_kv_cache {
518
518
int n; // number of tokens currently in the cache
519
519
};
520
520
521
- struct whisper_model_data {
522
- ggml_backend_buffer_t buffer_main;
523
- };
524
-
525
521
struct whisper_model {
526
522
e_model type = MODEL_UNKNOWN;
527
523
@@ -556,11 +552,11 @@ struct whisper_model {
556
552
std::vector<whisper_layer_encoder> layers_encoder;
557
553
std::vector<whisper_layer_decoder> layers_decoder;
558
554
559
- // context
555
+ // ggml context that contains all the meta information about the model tensors
560
556
struct ggml_context * ctx;
561
557
562
558
// the model backend data is read-only and can be shared between processors
563
- struct whisper_model_data * data ;
559
+ struct ggml_backend_buffer * buffer ;
564
560
565
561
// tensors
566
562
int n_loaded;
@@ -1283,8 +1279,6 @@ static bool whisper_model_load(struct whisper_model_loader * loader, whisper_con
1283
1279
1284
1280
// init backends
1285
1281
{
1286
- model.data = new whisper_model_data;
1287
-
1288
1282
ggml_backend_t backend_gpu = NULL ;
1289
1283
1290
1284
// initialize the backends
@@ -1323,17 +1317,17 @@ static bool whisper_model_load(struct whisper_model_loader * loader, whisper_con
1323
1317
size_main += ggml_nbytes (t.second ) + ggml_tensor_overhead ();
1324
1318
}
1325
1319
1326
- model.data -> buffer_main = ggml_backend_alloc_buffer (wctx.backend , size_main);
1320
+ model.buffer = ggml_backend_alloc_buffer (wctx.backend , size_main);
1327
1321
1328
1322
WHISPER_LOG_INFO (" %s: %8s buffer size = %8.2f MB\n " , __func__, ggml_backend_name (wctx.backend ), size_main / 1024.0 / 1024.0 );
1329
1323
}
1330
1324
1331
- ggml_allocr * alloc_main = ggml_allocr_new_from_buffer (model.data -> buffer_main );
1325
+ ggml_allocr * alloc = ggml_allocr_new_from_buffer (model.buffer );
1332
1326
1333
1327
// allocate tensors in the backend buffers
1334
1328
{
1335
1329
for (const auto & t : model.tensors ) {
1336
- ggml_allocr_alloc (alloc_main , t.second );
1330
+ ggml_allocr_alloc (alloc , t.second );
1337
1331
}
1338
1332
}
1339
1333
@@ -1455,7 +1449,7 @@ static bool whisper_model_load(struct whisper_model_loader * loader, whisper_con
1455
1449
}
1456
1450
}
1457
1451
1458
- ggml_allocr_free (alloc_main );
1452
+ ggml_allocr_free (alloc );
1459
1453
1460
1454
wctx.t_load_us = ggml_time_us () - t_start_us;
1461
1455
@@ -3198,10 +3192,9 @@ void whisper_free(struct whisper_context * ctx) {
3198
3192
if (ctx->model .ctx ) {
3199
3193
ggml_free (ctx->model .ctx );
3200
3194
}
3201
- if (ctx->model .data ) {
3202
- ggml_backend_buffer_free (ctx->model .data ->buffer_main );
3203
3195
3204
- delete ctx->model .data ;
3196
+ if (ctx->model .buffer ) {
3197
+ ggml_backend_buffer_free (ctx->model .buffer );
3205
3198
}
3206
3199
3207
3200
whisper_free_state (ctx->state );
0 commit comments