Skip to content

Commit b82577d

Browse files
authored
fix(llama.cpp): consider also native builds (#3839)
This is in order to identify also builds which are not using alternatives based on capabilities. For instance, there are cases when we build the backend only natively in the host. Signed-off-by: Ettore Di Giacinto <[email protected]>
1 parent 97cf028 commit b82577d

File tree

1 file changed

+33
-16
lines changed

1 file changed

+33
-16
lines changed

pkg/model/initializers.go

Lines changed: 33 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -251,8 +251,22 @@ func selectGRPCProcessByHostCapabilities(backend, assetDir string, f16 bool) str
251251

252252
// No GPU found or no specific binaries found, try to load the CPU variant(s)
253253

254-
// Select the Fallback by default
255-
selectedProcess := backendPath(assetDir, LLamaCPPFallback)
254+
// Select a binary based on availability/capability
255+
selectedProcess := ""
256+
257+
// Check if we have a native build (llama-cpp) and use that
258+
if _, err := os.Stat(backendPath(assetDir, LLamaCPPFallback)); err == nil {
259+
log.Debug().Msgf("[%s] %s variant available", LLamaCPPFallback, backend)
260+
selectedProcess = backendPath(assetDir, LLamaCPPFallback)
261+
}
262+
263+
// Check if we have a native build (llama-cpp) and use that instead
264+
// As a reminder, we do ultimately attempt again with the fallback variant
265+
// If things fail with what we select here
266+
if _, err := os.Stat(backendPath(assetDir, LLamaCPP)); err == nil {
267+
log.Debug().Msgf("[%s] attempting to load with native variant", backend)
268+
selectedProcess = backendPath(assetDir, LLamaCPP)
269+
}
256270

257271
// IF we find any optimized binary, we use that
258272
if xsysinfo.HasCPUCaps(cpuid.AVX2) {
@@ -269,14 +283,29 @@ func selectGRPCProcessByHostCapabilities(backend, assetDir string, f16 bool) str
269283
}
270284
}
271285

272-
// Check if the binary exists!
286+
// Safety measure: check if the binary exists otherwise return empty string
273287
if _, err := os.Stat(selectedProcess); err == nil {
274288
return selectedProcess
275289
}
276290

277291
return ""
278292
}
279293

294+
func attemptLoadingOnFailure(backend string, ml *ModelLoader, o *Options, err error) (*Model, error) {
295+
// XXX: This is too backend specific(llama-cpp), remove this bit or generalize further
296+
// We failed somehow starting the binary. For instance, could be that we are missing
297+
// some libraries if running in binary-only mode.
298+
// In this case, we attempt to load the model with the fallback variant.
299+
300+
// If not llama-cpp backend, return the error immediately
301+
if backend != LLamaCPP {
302+
return nil, err
303+
}
304+
305+
log.Error().Msgf("[%s] Failed loading model, trying with fallback '%s', error: %s", backend, LLamaCPPFallback, err.Error())
306+
return ml.LoadModel(o.modelID, o.model, ml.grpcModel(LLamaCPPFallback, false, o))
307+
}
308+
280309
// starts the grpcModelProcess for the backend, and returns a grpc client
281310
// It also loads the model
282311
func (ml *ModelLoader) grpcModel(backend string, autodetect bool, o *Options) func(string, string, string) (*Model, error) {
@@ -450,19 +479,7 @@ func (ml *ModelLoader) BackendLoader(opts ...Option) (client grpc.Backend, err e
450479

451480
model, err := ml.LoadModel(o.modelID, o.model, ml.grpcModel(backendToConsume, AutoDetect, o))
452481
if err != nil {
453-
// XXX: This is too backend specific(llama-cpp), remove this bit or generalize further
454-
// We failed somehow starting the binary. For instance, could be that we are missing
455-
// some libraries if running in binary-only mode.
456-
// In this case, we attempt to load the model with the fallback variant.
457-
458-
// If not llama-cpp backend, return error immediately
459-
if backend != LLamaCPP {
460-
return nil, err
461-
}
462-
463-
// Otherwise attempt with fallback
464-
log.Error().Msgf("[%s] Failed loading model, trying with fallback '%s'", backend, LLamaCPPFallback)
465-
model, err = ml.LoadModel(o.modelID, o.model, ml.grpcModel(LLamaCPPFallback, false, o))
482+
model, err = attemptLoadingOnFailure(backend, ml, o, err)
466483
if err != nil {
467484
return nil, err
468485
}

0 commit comments

Comments
 (0)