Skip to content

Commit ceafde2

Browse files
committed
feat: add support for cublas/openblas on the llama.cpp backend
1 parent e533b00 commit ceafde2

File tree

3 files changed

+32
-29
lines changed

3 files changed

+32
-29
lines changed

Makefile

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ GOTEST=$(GOCMD) test
33
GOVET=$(GOCMD) vet
44
BINARY_NAME=local-ai
55

6-
GOLLAMA_VERSION?=eb99b5438787cbd687682da445e879e02bfeaa07
6+
GOLLAMA_VERSION?=7f9ae4246088f0c08ed322acbae21d69cd2eb547
77
GPT4ALL_REPO?=https://github.com/go-skynet/gpt4all
88
GPT4ALL_VERSION?=a330bfe26e9e35ca402e16df18973a3b162fb4db
99
GOGPT2_VERSION?=92421a8cf61ed6e03babd9067af292b094cb1307
@@ -12,7 +12,9 @@ RWKV_VERSION?=07166da10cb2a9e8854395a4f210464dcea76e47
1212
WHISPER_CPP_VERSION?=1d17cd5bb37a3212679d6055ad69ba5a8d58eb71
1313
BERT_VERSION?=33118e0da50318101408986b86a331daeb4a6658
1414
BLOOMZ_VERSION?=e9366e82abdfe70565644fbfae9651976714efd1
15-
15+
BUILD_TYPE?=
16+
CGO_LDFLAGS?=
17+
CUDA_LIBPATH?=/usr/local/cuda/lib64/
1618

1719
GREEN := $(shell tput -Txterm setaf 2)
1820
YELLOW := $(shell tput -Txterm setaf 3)
@@ -23,15 +25,12 @@ RESET := $(shell tput -Txterm sgr0)
2325
C_INCLUDE_PATH=$(shell pwd)/go-llama:$(shell pwd)/gpt4all/gpt4all-bindings/golang/:$(shell pwd)/go-gpt2:$(shell pwd)/go-rwkv:$(shell pwd)/whisper.cpp:$(shell pwd)/go-bert:$(shell pwd)/bloomz
2426
LIBRARY_PATH=$(shell pwd)/go-llama:$(shell pwd)/gpt4all/gpt4all-bindings/golang/:$(shell pwd)/go-gpt2:$(shell pwd)/go-rwkv:$(shell pwd)/whisper.cpp:$(shell pwd)/go-bert:$(shell pwd)/bloomz
2527

26-
# Use this if you want to set the default behavior
27-
ifndef BUILD_TYPE
28-
BUILD_TYPE:=default
28+
ifeq ($(BUILD_TYPE),openblas)
29+
CGO_LDFLAGS+="-lopenblas"
2930
endif
3031

31-
ifeq ($(BUILD_TYPE), "generic")
32-
GENERIC_PREFIX:=generic-
33-
else
34-
GENERIC_PREFIX:=
32+
ifeq ($(BUILD_TYPE),cublas)
33+
CGO_LDFLAGS+="-lcublas -lcudart -L$(CUDA_LIBPATH)"
3534
endif
3635

3736
.PHONY: all test build vendor
@@ -94,7 +93,7 @@ go-bert/libgobert.a: go-bert
9493
$(MAKE) -C go-bert libgobert.a
9594

9695
gpt4all/gpt4all-bindings/golang/libgpt4all.a: gpt4all
97-
$(MAKE) -C gpt4all/gpt4all-bindings/golang/ $(GENERIC_PREFIX)libgpt4all.a
96+
$(MAKE) -C gpt4all/gpt4all-bindings/golang/ libgpt4all.a
9897

9998
## CEREBRAS GPT
10099
go-gpt2:
@@ -113,7 +112,7 @@ go-gpt2:
113112
@find ./go-gpt2 -type f -name "*.cpp" -exec sed -i'' -e 's/json_/json_gpt2_/g' {} +
114113

115114
go-gpt2/libgpt2.a: go-gpt2
116-
$(MAKE) -C go-gpt2 $(GENERIC_PREFIX)libgpt2.a
115+
$(MAKE) -C go-gpt2 libgpt2.a
117116

118117
whisper.cpp:
119118
git clone https://github.com/ggerganov/whisper.cpp.git
@@ -127,7 +126,7 @@ go-llama:
127126
cd go-llama && git checkout -b build $(GOLLAMA_VERSION) && git submodule update --init --recursive --depth 1
128127

129128
go-llama/libbinding.a: go-llama
130-
$(MAKE) -C go-llama $(GENERIC_PREFIX)libbinding.a
129+
$(MAKE) -C go-llama BUILD_TYPE=$(BUILD_TYPE) libbinding.a
131130

132131
replace:
133132
$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(shell pwd)/go-llama

api/config.go

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -15,23 +15,23 @@ import (
1515
)
1616

1717
type Config struct {
18-
OpenAIRequest `yaml:"parameters"`
19-
Name string `yaml:"name"`
20-
StopWords []string `yaml:"stopwords"`
21-
Cutstrings []string `yaml:"cutstrings"`
22-
TrimSpace []string `yaml:"trimspace"`
23-
ContextSize int `yaml:"context_size"`
24-
F16 bool `yaml:"f16"`
25-
Threads int `yaml:"threads"`
26-
Debug bool `yaml:"debug"`
27-
Roles map[string]string `yaml:"roles"`
28-
Embeddings bool `yaml:"embeddings"`
29-
Backend string `yaml:"backend"`
30-
TemplateConfig TemplateConfig `yaml:"template"`
31-
MirostatETA float64 `yaml:"mirostat_eta"`
32-
MirostatTAU float64 `yaml:"mirostat_tau"`
33-
Mirostat int `yaml:"mirostat"`
34-
18+
OpenAIRequest `yaml:"parameters"`
19+
Name string `yaml:"name"`
20+
StopWords []string `yaml:"stopwords"`
21+
Cutstrings []string `yaml:"cutstrings"`
22+
TrimSpace []string `yaml:"trimspace"`
23+
ContextSize int `yaml:"context_size"`
24+
F16 bool `yaml:"f16"`
25+
Threads int `yaml:"threads"`
26+
Debug bool `yaml:"debug"`
27+
Roles map[string]string `yaml:"roles"`
28+
Embeddings bool `yaml:"embeddings"`
29+
Backend string `yaml:"backend"`
30+
TemplateConfig TemplateConfig `yaml:"template"`
31+
MirostatETA float64 `yaml:"mirostat_eta"`
32+
MirostatTAU float64 `yaml:"mirostat_tau"`
33+
Mirostat int `yaml:"mirostat"`
34+
NGPULayers int `yaml:"gpu_layers"`
3535
PromptStrings, InputStrings []string
3636
InputToken [][]int
3737
}

api/prediction.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,10 @@ func defaultLLamaOpts(c Config) []llama.ModelOption {
3131
llamaOpts = append(llamaOpts, llama.EnableEmbeddings)
3232
}
3333

34+
if c.NGPULayers != 0 {
35+
llamaOpts = append(llamaOpts, llama.SetGPULayers(c.NGPULayers))
36+
}
37+
3438
return llamaOpts
3539
}
3640

0 commit comments

Comments
 (0)