@@ -29,13 +29,14 @@ var Aliases map[string]string = map[string]string{
2929const (
3030 LlamaGGML = "llama-ggml"
3131
32- LLamaCPP = "llama-cpp"
32+ LLamaCPP = "llama-cpp"
3333
3434 LLamaCPPCUDA12 = "llama-cpp-cuda12"
3535 LLamaCPPAVX2 = "llama-cpp-avx2"
3636 LLamaCPPAVX = "llama-cpp-avx"
3737 LLamaCPPFallback = "llama-cpp-fallback"
3838 LLamaCPPCUDA = "llama-cpp-cuda"
39+ LLamaCPPGRPC = "llama-cpp-grpc"
3940
4041 Gpt4AllLlamaBackend = "gpt4all-llama"
4142 Gpt4AllMptBackend = "gpt4all-mpt"
8182 }
8283 }
8384
84- foundLCPPAVX , foundLCPPAVX2 , foundLCPPFallback := false , false , false
85+ // if we find the llama.cpp variants, show them of as a single backend (llama-cpp)
86+ foundLCPPAVX , foundLCPPAVX2 , foundLCPPFallback , foundLCPPGRPC := false , false , false , false
8587 if _ , ok := backends [LLamaCPP ]; ! ok {
8688 for _ , e := range entry {
8789 if strings .Contains (e .Name (), LLamaCPPAVX2 ) && ! foundLCPPAVX2 {
@@ -96,16 +98,23 @@ ENTRY:
9698 backends [LLamaCPP ] = append (backends [LLamaCPP ], LLamaCPPFallback )
9799 foundLCPPFallback = true
98100 }
101+ if strings .Contains (e .Name (), LLamaCPPGRPC ) && ! foundLCPPGRPC {
102+ backends [LLamaCPP ] = append (backends [LLamaCPP ], LLamaCPPGRPC )
103+ foundLCPPGRPC = true
104+ }
99105 }
100106 }
101107
102108 // order backends from the asset directory.
103109 // as we scan for backends, we want to keep some order which backends are tried of.
104110 // for example, llama.cpp should be tried first, and we want to keep the huggingface backend at the last.
105- // sets a priority list
106- // First has more priority
111+
112+ // sets a priority list - first has more priority
107113 priorityList := []string {
108- // First llama.cpp and llama-ggml
114+
115+ // First llama.cpp(variants) and llama-ggml to follow.
116+ // We keep the fallback to prevent that if the llama.cpp variants
117+ // that depends on shared libs if breaks have still a safety net.
109118 LLamaCPP , LlamaGGML , Gpt4All , LLamaCPPFallback ,
110119 }
111120
@@ -142,6 +151,50 @@ ENTRY:
142151 return orderedBackends , nil
143152}
144153
154+ // selectGRPCProcess selects the GRPC process to start based on system capabilities
155+ func selectGRPCProcess (backend , assetDir string ) string {
156+ foundCUDA := false
157+ var grpcProcess string
158+
159+ // Select backend now just for llama.cpp
160+ if backend != LLamaCPP {
161+ return ""
162+ }
163+
164+ // Note: This environment variable is read by the LocalAI's llama.cpp grpc-server
165+ if os .Getenv ("LLAMACPP_GRPC_SERVERS" ) != "" {
166+ return backendPath (assetDir , LLamaCPPGRPC )
167+ }
168+
169+ gpus , err := xsysinfo .GPUs ()
170+ if err == nil {
171+ for _ , gpu := range gpus {
172+ if strings .Contains (gpu .String (), "nvidia" ) {
173+ log .Info ().Msgf ("[%s] attempting to load with CUDA variant" , backend )
174+ grpcProcess = backendPath (assetDir , LLamaCPPCUDA )
175+ if _ , err := os .Stat (grpcProcess ); err == nil {
176+ foundCUDA = true
177+ }
178+ }
179+ }
180+ }
181+
182+ if ! foundCUDA {
183+ if cpu .X86 .HasAVX2 {
184+ log .Info ().Msgf ("[%s] attempting to load with AVX2 variant" , backend )
185+ grpcProcess = backendPath (assetDir , LLamaCPPAVX2 )
186+ } else if cpu .X86 .HasAVX {
187+ log .Info ().Msgf ("[%s] attempting to load with AVX variant" , backend )
188+ grpcProcess = backendPath (assetDir , LLamaCPPAVX )
189+ } else {
190+ log .Info ().Msgf ("[%s] attempting to load with fallback variant" , backend )
191+ grpcProcess = backendPath (assetDir , LLamaCPPFallback )
192+ }
193+ }
194+
195+ return grpcProcess
196+ }
197+
145198// starts the grpcModelProcess for the backend, and returns a grpc client
146199// It also loads the model
147200func (ml * ModelLoader ) grpcModel (backend string , o * Options ) func (string , string ) (ModelAddress , error ) {
@@ -192,33 +245,10 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string
192245 } else {
193246 grpcProcess := backendPath (o .assetDir , backend )
194247
195- foundCUDA := false
196- // for llama-cpp, check CPU capabilities and load the appropriate variant
197- if backend == LLamaCPP {
198- gpus , err := xsysinfo .GPUs ()
199- if err == nil {
200- for _ , gpu := range gpus {
201- if strings .Contains (gpu .String (), "nvidia" ) {
202- log .Info ().Msgf ("[%s] attempting to load with CUDA variant" , backend )
203- grpcProcess = backendPath (o .assetDir , LLamaCPPCUDA )
204- if _ , err := os .Stat (grpcProcess ); err == nil {
205- foundCUDA = true
206- }
207- }
208- }
209- }
210-
211- if ! foundCUDA {
212- if cpu .X86 .HasAVX2 {
213- log .Info ().Msgf ("[%s] attempting to load with AVX2 variant" , backend )
214- grpcProcess = backendPath (o .assetDir , LLamaCPPAVX2 )
215- } else if cpu .X86 .HasAVX {
216- log .Info ().Msgf ("[%s] attempting to load with AVX variant" , backend )
217- grpcProcess = backendPath (o .assetDir , LLamaCPPAVX )
218- } else {
219- log .Info ().Msgf ("[%s] attempting to load with fallback variant" , backend )
220- grpcProcess = backendPath (o .assetDir , LLamaCPPFallback )
221- }
248+ if os .Getenv ("DISABLE_AUTODETECT" ) != "true" {
249+ // autoDetect GRPC process to start based on system capabilities
250+ if selectedProcess := selectGRPCProcess (backend , o .assetDir ); selectedProcess != "" {
251+ grpcProcess = selectedProcess
222252 }
223253 }
224254
0 commit comments