fix typo

ngxson · ngxson · commit 544f4f10a431 · 2025-05-21T23:37:59.000+02:00
diff --git a/docs/multimodal.md b/docs/multimodal.md
@@ -6,7 +6,7 @@ llama.cpp supports multimodal input via `libmtmd`. Currently, there are 2 tools
 
 Currently, we support **image** and **audio** input. Audio is highly experimental and may have reduced quality.
 
-To enable it, can use use one of the 2 methods below:
+To enable it, you can use one of the 2 methods below:
 
 - Use `-hf` option with a supported model (see a list of pre-quantized model below)
     - To load a model using `-hf` while disabling multimodal, use `--no-mmproj`
diff --git a/tools/mtmd/clip.cpp b/tools/mtmd/clip.cpp
@@ -2206,6 +2206,8 @@ struct clip_model_loader {
         auto & hparams = ctx_clip.vision_model.hparams;
         std::map<std::string, size_t> tensor_offset;
         std::vector<ggml_tensor *> tensors_to_load;
+
+        // TODO @ngxson : support both audio and video in the future
         const char * prefix = hparams.has_audio ? "a" : "v";
 
         // get offsets