Skip to content
41 changes: 31 additions & 10 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -152,9 +152,11 @@ ifeq ($(findstring tts,$(GO_TAGS)),tts)
OPTIONAL_GRPC+=backend-assets/grpc/piper
endif

ALL_GRPC_BACKENDS=backend-assets/grpc/langchain-huggingface
ALL_GRPC_BACKENDS=backend-assets/grpc/huggingface
ALL_GRPC_BACKENDS+=backend-assets/grpc/bert-embeddings
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-noavx
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-cpp-fallback
ALL_GRPC_BACKENDS+=backend-assets/grpc/llama-ggml
ALL_GRPC_BACKENDS+=backend-assets/grpc/gpt4all
ALL_GRPC_BACKENDS+=backend-assets/grpc/rwkv
Expand Down Expand Up @@ -293,6 +295,7 @@ clean: ## Remove build related file
rm -rf backend-assets/*
$(MAKE) -C backend/cpp/grpc clean
$(MAKE) -C backend/cpp/llama clean
rm -rf backend/cpp/llama-* || true
$(MAKE) dropreplace
$(MAKE) protogen-clean
rmdir pkg/grpc/proto || true
Expand All @@ -311,7 +314,7 @@ build: prepare backend-assets grpcs ## Build the project
CGO_LDFLAGS="$(CGO_LDFLAGS)" $(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o $(BINARY_NAME) ./

build-minimal:
BUILD_GRPC_FOR_BACKEND_LLAMA=true GRPC_BACKENDS=backend-assets/grpc/llama-cpp GO_TAGS=none $(MAKE) build
BUILD_GRPC_FOR_BACKEND_LLAMA=true GRPC_BACKENDS="backend-assets/grpc/llama-cpp" GO_TAGS=none $(MAKE) build

build-api:
BUILD_GRPC_FOR_BACKEND_LLAMA=true BUILD_API_ONLY=true GO_TAGS=none $(MAKE) build
Expand Down Expand Up @@ -616,8 +619,8 @@ backend-assets/grpc/gpt4all: sources/gpt4all sources/gpt4all/gpt4all-bindings/go
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ LIBRARY_PATH=$(CURDIR)/sources/gpt4all/gpt4all-bindings/golang/ \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/gpt4all ./backend/go/llm/gpt4all/

backend-assets/grpc/langchain-huggingface: backend-assets/grpc
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/langchain-huggingface ./backend/go/llm/langchain/
backend-assets/grpc/huggingface: backend-assets/grpc
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/huggingface ./backend/go/llm/langchain/

backend/cpp/llama/llama.cpp:
LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/llama llama.cpp
Expand All @@ -629,7 +632,7 @@ ADDED_CMAKE_ARGS=-Dabsl_DIR=${INSTALLED_LIB_CMAKE}/absl \
-Dutf8_range_DIR=${INSTALLED_LIB_CMAKE}/utf8_range \
-DgRPC_DIR=${INSTALLED_LIB_CMAKE}/grpc \
-DCMAKE_CXX_STANDARD_INCLUDE_DIRECTORIES=${INSTALLED_PACKAGES}/include
backend/cpp/llama/grpc-server:
build-llama-cpp-grpc-server:
# Conditionally build grpc for the llama backend to use if needed
ifdef BUILD_GRPC_FOR_BACKEND_LLAMA
$(MAKE) -C backend/cpp/grpc build
Expand All @@ -638,19 +641,37 @@ ifdef BUILD_GRPC_FOR_BACKEND_LLAMA
PATH="${INSTALLED_PACKAGES}/bin:${PATH}" \
CMAKE_ARGS="${CMAKE_ARGS} ${ADDED_CMAKE_ARGS}" \
LLAMA_VERSION=$(CPPLLAMA_VERSION) \
$(MAKE) -C backend/cpp/llama grpc-server
$(MAKE) -C backend/cpp/${VARIANT} grpc-server
else
echo "BUILD_GRPC_FOR_BACKEND_LLAMA is not defined."
LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/llama grpc-server
LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/${VARIANT} grpc-server
endif

backend-assets/grpc/llama-cpp: backend-assets/grpc backend/cpp/llama/grpc-server
cp -rfv backend/cpp/llama/grpc-server backend-assets/grpc/llama-cpp
backend-assets/grpc/llama-cpp: backend-assets/grpc
$(info ${GREEN}I llama-cpp build info:standard${RESET})
cp -rf backend/cpp/llama backend/cpp/llama-default
$(MAKE) -C backend/cpp/llama-default purge
$(MAKE) VARIANT="llama-default" build-llama-cpp-grpc-server
cp -rfv backend/cpp/llama-default/grpc-server backend-assets/grpc/llama-cpp
# TODO: every binary should have its own folder instead, so can have different metal implementations
ifeq ($(BUILD_TYPE),metal)
cp backend/cpp/llama/llama.cpp/build/bin/default.metallib backend-assets/grpc/
cp backend/cpp/llama-default/llama.cpp/build/bin/default.metallib backend-assets/grpc/
endif

backend-assets/grpc/llama-cpp-noavx: backend-assets/grpc
cp -rf backend/cpp/llama backend/cpp/llama-noavx
$(MAKE) -C backend/cpp/llama-noavx purge
$(info ${GREEN}I llama-cpp build info:noavx${RESET})
CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_AVX512=OFF -DLLAMA_AVX2=OFF" $(MAKE) VARIANT="llama-noavx" build-llama-cpp-grpc-server
cp -rfv backend/cpp/llama-noavx/grpc-server backend-assets/grpc/llama-cpp-noavx

backend-assets/grpc/llama-cpp-fallback: backend-assets/grpc
cp -rf backend/cpp/llama backend/cpp/llama-fallback
$(MAKE) -C backend/cpp/llama-fallback purge
$(info ${GREEN}I llama-cpp build info:fallback${RESET})
CMAKE_ARGS="$(CMAKE_ARGS) -DLLAMA_F16C=OFF -DLLAMA_AVX512=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF" $(MAKE) VARIANT="llama-fallback" build-llama-cpp-grpc-server
cp -rfv backend/cpp/llama-fallback/grpc-server backend-assets/grpc/llama-cpp-fallback

backend-assets/grpc/llama-ggml: sources/go-llama.cpp sources/go-llama.cpp/libbinding.a backend-assets/grpc
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/sources/go-llama.cpp LIBRARY_PATH=$(CURDIR)/sources/go-llama.cpp \
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/llama-ggml ./backend/go/llm/llama-ggml/
Expand Down
26 changes: 9 additions & 17 deletions backend/cpp/llama/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -43,31 +43,23 @@ llama.cpp:

llama.cpp/examples/grpc-server: llama.cpp
mkdir -p llama.cpp/examples/grpc-server
cp -r $(abspath ./)/CMakeLists.txt llama.cpp/examples/grpc-server/
cp -r $(abspath ./)/grpc-server.cpp llama.cpp/examples/grpc-server/
cp -rfv $(abspath ./)/json.hpp llama.cpp/examples/grpc-server/
cp -rfv $(abspath ./)/utils.hpp llama.cpp/examples/grpc-server/
echo "add_subdirectory(grpc-server)" >> llama.cpp/examples/CMakeLists.txt
## XXX: In some versions of CMake clip wasn't being built before llama.
## This is an hack for now, but it should be fixed in the future.
cp -rfv llama.cpp/examples/llava/clip.h llama.cpp/examples/grpc-server/clip.h
cp -rfv llama.cpp/examples/llava/llava.cpp llama.cpp/examples/grpc-server/llava.cpp
echo '#include "llama.h"' > llama.cpp/examples/grpc-server/llava.h
cat llama.cpp/examples/llava/llava.h >> llama.cpp/examples/grpc-server/llava.h
cp -rfv llama.cpp/examples/llava/clip.cpp llama.cpp/examples/grpc-server/clip.cpp
bash prepare.sh

rebuild:
cp -rfv $(abspath ./)/CMakeLists.txt llama.cpp/examples/grpc-server/
cp -rfv $(abspath ./)/grpc-server.cpp llama.cpp/examples/grpc-server/
cp -rfv $(abspath ./)/json.hpp llama.cpp/examples/grpc-server/
bash prepare.sh
rm -rf grpc-server
$(MAKE) grpc-server

clean:
rm -rf llama.cpp
purge:
rm -rf llama.cpp/build
rm -rf llama.cpp/examples/grpc-server
rm -rf grpc-server

clean: purge
rm -rf llama.cpp

grpc-server: llama.cpp llama.cpp/examples/grpc-server
@echo "Building grpc-server with $(BUILD_TYPE) build type and $(CMAKE_ARGS)"
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
bash -c "source $(ONEAPI_VARS); \
cd llama.cpp && mkdir -p build && cd build && cmake .. $(CMAKE_ARGS) && cmake --build . --config Release"
Expand Down
20 changes: 20 additions & 0 deletions backend/cpp/llama/prepare.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#!/bin/bash

cp -r CMakeLists.txt llama.cpp/examples/grpc-server/
cp -r grpc-server.cpp llama.cpp/examples/grpc-server/
cp -rfv json.hpp llama.cpp/examples/grpc-server/
cp -rfv utils.hpp llama.cpp/examples/grpc-server/

if grep -q "grpc-server" llama.cpp/examples/CMakeLists.txt; then
echo "grpc-server already added"
else
echo "add_subdirectory(grpc-server)" >> llama.cpp/examples/CMakeLists.txt
fi

## XXX: In some versions of CMake clip wasn't being built before llama.
## This is an hack for now, but it should be fixed in the future.
cp -rfv llama.cpp/examples/llava/clip.h llama.cpp/examples/grpc-server/clip.h
cp -rfv llama.cpp/examples/llava/llava.cpp llama.cpp/examples/grpc-server/llava.cpp
echo '#include "llama.h"' > llama.cpp/examples/grpc-server/llava.h
cat llama.cpp/examples/llava/llava.h >> llama.cpp/examples/grpc-server/llava.h
cp -rfv llama.cpp/examples/llava/clip.cpp llama.cpp/examples/grpc-server/clip.cpp
10 changes: 8 additions & 2 deletions backend/go/llm/langchain/langchain.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ package main
// It is meant to be used by the main executable that is the server for the specific backend type (falcon, gpt3, etc)
import (
"fmt"
"os"

"github.com/go-skynet/LocalAI/pkg/grpc/base"
pb "github.com/go-skynet/LocalAI/pkg/grpc/proto"
Expand All @@ -18,9 +19,14 @@ type LLM struct {
}

func (llm *LLM) Load(opts *pb.ModelOptions) error {
llm.langchain, _ = langchain.NewHuggingFace(opts.Model)
var err error
hfToken := os.Getenv("HUGGINGFACEHUB_API_TOKEN")
if hfToken == "" {
return fmt.Errorf("no huggingface token provided")
}
llm.langchain, err = langchain.NewHuggingFace(opts.Model, hfToken)
llm.model = opts.Model
return nil
return err
}

func (llm *LLM) Predict(opts *pb.PredictOptions) (string, error) {
Expand Down
4 changes: 2 additions & 2 deletions core/http/app_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -787,11 +787,11 @@ var _ = Describe("API test", func() {
})

It("returns errors", func() {
backends := len(model.AutoLoadBackends) + 1 // +1 for huggingface
_, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "foomodel", Prompt: testPrompt})
Expect(err).To(HaveOccurred())
Expect(err.Error()).To(ContainSubstring(fmt.Sprintf("error, status code: 500, message: could not load model - all backends returned error: %d errors occurred:", backends)))
Expect(err.Error()).To(ContainSubstring("error, status code: 500, message: could not load model - all backends returned error:"))
})

It("transcribes audio", func() {
if runtime.GOOS != "linux" {
Skip("test supported only on linux")
Expand Down
10 changes: 8 additions & 2 deletions pkg/langchain/huggingface.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,26 +2,32 @@ package langchain

import (
"context"
"fmt"

"github.com/tmc/langchaingo/llms"
"github.com/tmc/langchaingo/llms/huggingface"
)

type HuggingFace struct {
modelPath string
token string
}

func NewHuggingFace(repoId string) (*HuggingFace, error) {
func NewHuggingFace(repoId, token string) (*HuggingFace, error) {
if token == "" {
return nil, fmt.Errorf("no huggingface token provided")
}
return &HuggingFace{
modelPath: repoId,
token: token,
}, nil
}

func (s *HuggingFace) PredictHuggingFace(text string, opts ...PredictOption) (*Predict, error) {
po := NewPredictOptions(opts...)

// Init client
llm, err := huggingface.New()
llm, err := huggingface.New(huggingface.WithToken(s.token))
if err != nil {
return nil, err
}
Expand Down
104 changes: 83 additions & 21 deletions pkg/model/initializers.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,27 +2,32 @@ package model

import (
"context"
"errors"
"fmt"
"os"
"path/filepath"
"slices"
"strings"
"time"

grpc "github.com/go-skynet/LocalAI/pkg/grpc"
"github.com/hashicorp/go-multierror"
"github.com/phayes/freeport"
"github.com/rs/zerolog/log"
)

var Aliases map[string]string = map[string]string{
"go-llama": LLamaCPP,
"llama": LLamaCPP,
"embedded-store": LocalStoreBackend,
"go-llama": LLamaCPP,
"llama": LLamaCPP,
"embedded-store": LocalStoreBackend,
"langchain-huggingface": LCHuggingFaceBackend,
}

const (
LlamaGGML = "llama-ggml"
LLamaCPP = "llama-cpp"
LlamaGGML = "llama-ggml"
LLamaCPP = "llama-cpp"

LLamaCPPFallback = "llama-cpp-fallback"

Gpt4AllLlamaBackend = "gpt4all-llama"
Gpt4AllMptBackend = "gpt4all-mpt"
Gpt4AllJBackend = "gpt4all-j"
Expand All @@ -34,21 +39,73 @@ const (
StableDiffusionBackend = "stablediffusion"
TinyDreamBackend = "tinydream"
PiperBackend = "piper"
LCHuggingFaceBackend = "langchain-huggingface"
LCHuggingFaceBackend = "huggingface"

LocalStoreBackend = "local-store"
)

var AutoLoadBackends []string = []string{
LLamaCPP,
LlamaGGML,
Gpt4All,
BertEmbeddingsBackend,
RwkvBackend,
WhisperBackend,
StableDiffusionBackend,
TinyDreamBackend,
PiperBackend,
func backendPath(assetDir, backend string) string {
return filepath.Join(assetDir, "backend-assets", "grpc", backend)
}

func backendsInAssetDir(assetDir string) ([]string, error) {
excludeBackends := []string{"local-store"}
entry, err := os.ReadDir(backendPath(assetDir, ""))
if err != nil {
return nil, err
}
var backends []string
ENTRY:
for _, e := range entry {
for _, exclude := range excludeBackends {
if e.Name() == exclude {
continue ENTRY
}
}
if !e.IsDir() {
backends = append(backends, e.Name())
}
}

// order backends from the asset directory.
// as we scan for backends, we want to keep some order which backends are tried of.
// for example, llama.cpp should be tried first, and we want to keep the huggingface backend at the last.

// sets a priority list
// First has more priority
priorityList := []string{
// First llama.cpp and llama-ggml
LLamaCPP, LLamaCPPFallback, LlamaGGML, Gpt4All,
}
toTheEnd := []string{
// last has to be huggingface
LCHuggingFaceBackend,
// then bert embeddings
BertEmbeddingsBackend,
}
slices.Reverse(priorityList)
slices.Reverse(toTheEnd)

// order certain backends first
for _, b := range priorityList {
for i, be := range backends {
if be == b {
backends = append([]string{be}, append(backends[:i], backends[i+1:]...)...)
break
}
}
}
// make sure that some others are pushed at the end
for _, b := range toTheEnd {
for i, be := range backends {
if be == b {
backends = append(append(backends[:i], backends[i+1:]...), be)
break
}
}
}

return backends, nil
}

// starts the grpcModelProcess for the backend, and returns a grpc client
Expand Down Expand Up @@ -99,7 +156,7 @@ func (ml *ModelLoader) grpcModel(backend string, o *Options) func(string, string
client = ModelAddress(uri)
}
} else {
grpcProcess := filepath.Join(o.assetDir, "backend-assets", "grpc", backend)
grpcProcess := backendPath(o.assetDir, backend)
// Check if the file exists
if _, err := os.Stat(grpcProcess); os.IsNotExist(err) {
return "", fmt.Errorf("grpc process not found: %s. some backends(stablediffusion, tts) require LocalAI compiled with GO_TAGS", grpcProcess)
Expand Down Expand Up @@ -243,7 +300,12 @@ func (ml *ModelLoader) GreedyLoader(opts ...Option) (grpc.Backend, error) {

// autoload also external backends
allBackendsToAutoLoad := []string{}
allBackendsToAutoLoad = append(allBackendsToAutoLoad, AutoLoadBackends...)
autoLoadBackends, err := backendsInAssetDir(o.assetDir)
if err != nil {
return nil, err
}
log.Debug().Msgf("Loading from the following backends (in order): %+v", autoLoadBackends)
allBackendsToAutoLoad = append(allBackendsToAutoLoad, autoLoadBackends...)
for _, b := range o.externalBackends {
allBackendsToAutoLoad = append(allBackendsToAutoLoad, b)
}
Expand Down Expand Up @@ -271,10 +333,10 @@ func (ml *ModelLoader) GreedyLoader(opts ...Option) (grpc.Backend, error) {
log.Info().Msgf("[%s] Loads OK", b)
return model, nil
} else if modelerr != nil {
err = multierror.Append(err, modelerr)
err = errors.Join(err, modelerr)
log.Info().Msgf("[%s] Fails: %s", b, modelerr.Error())
} else if model == nil {
err = multierror.Append(err, fmt.Errorf("backend returned no usable model"))
err = errors.Join(err, fmt.Errorf("backend returned no usable model"))
log.Info().Msgf("[%s] Fails: %s", b, "backend returned no usable model")
}
}
Expand Down