@@ -251,8 +251,22 @@ func selectGRPCProcessByHostCapabilities(backend, assetDir string, f16 bool) str
251251
252252 // No GPU found or no specific binaries found, try to load the CPU variant(s)
253253
254- // Select the Fallback by default
255- selectedProcess := backendPath (assetDir , LLamaCPPFallback )
254+ // Select a binary based on availability/capability
255+ selectedProcess := ""
256+
257+ // Check if we have a native build (llama-cpp) and use that
258+ if _ , err := os .Stat (backendPath (assetDir , LLamaCPPFallback )); err == nil {
259+ log .Debug ().Msgf ("[%s] %s variant available" , LLamaCPPFallback , backend )
260+ selectedProcess = backendPath (assetDir , LLamaCPPFallback )
261+ }
262+
263+ // Check if we have a native build (llama-cpp) and use that instead
264+ // As a reminder, we do ultimately attempt again with the fallback variant
265+ // If things fail with what we select here
266+ if _ , err := os .Stat (backendPath (assetDir , LLamaCPP )); err == nil {
267+ log .Debug ().Msgf ("[%s] attempting to load with native variant" , backend )
268+ selectedProcess = backendPath (assetDir , LLamaCPP )
269+ }
256270
257271 // IF we find any optimized binary, we use that
258272 if xsysinfo .HasCPUCaps (cpuid .AVX2 ) {
@@ -269,14 +283,29 @@ func selectGRPCProcessByHostCapabilities(backend, assetDir string, f16 bool) str
269283 }
270284 }
271285
272- // Check if the binary exists!
286+ // Safety measure: check if the binary exists otherwise return empty string
273287 if _ , err := os .Stat (selectedProcess ); err == nil {
274288 return selectedProcess
275289 }
276290
277291 return ""
278292}
279293
294+ func attemptLoadingOnFailure (backend string , ml * ModelLoader , o * Options , err error ) (* Model , error ) {
295+ // XXX: This is too backend specific(llama-cpp), remove this bit or generalize further
296+ // We failed somehow starting the binary. For instance, could be that we are missing
297+ // some libraries if running in binary-only mode.
298+ // In this case, we attempt to load the model with the fallback variant.
299+
300+ // If not llama-cpp backend, return the error immediately
301+ if backend != LLamaCPP {
302+ return nil , err
303+ }
304+
305+ log .Error ().Msgf ("[%s] Failed loading model, trying with fallback '%s', error: %s" , backend , LLamaCPPFallback , err .Error ())
306+ return ml .LoadModel (o .modelID , o .model , ml .grpcModel (LLamaCPPFallback , false , o ))
307+ }
308+
280309// starts the grpcModelProcess for the backend, and returns a grpc client
281310// It also loads the model
282311func (ml * ModelLoader ) grpcModel (backend string , autodetect bool , o * Options ) func (string , string , string ) (* Model , error ) {
@@ -450,19 +479,7 @@ func (ml *ModelLoader) BackendLoader(opts ...Option) (client grpc.Backend, err e
450479
451480 model , err := ml .LoadModel (o .modelID , o .model , ml .grpcModel (backendToConsume , AutoDetect , o ))
452481 if err != nil {
453- // XXX: This is too backend specific(llama-cpp), remove this bit or generalize further
454- // We failed somehow starting the binary. For instance, could be that we are missing
455- // some libraries if running in binary-only mode.
456- // In this case, we attempt to load the model with the fallback variant.
457-
458- // If not llama-cpp backend, return error immediately
459- if backend != LLamaCPP {
460- return nil , err
461- }
462-
463- // Otherwise attempt with fallback
464- log .Error ().Msgf ("[%s] Failed loading model, trying with fallback '%s'" , backend , LLamaCPPFallback )
465- model , err = ml .LoadModel (o .modelID , o .model , ml .grpcModel (LLamaCPPFallback , false , o ))
482+ model , err = attemptLoadingOnFailure (backend , ml , o , err )
466483 if err != nil {
467484 return nil , err
468485 }
0 commit comments