Skip to content

Commit fe0606c

Browse files
committed
Merge branch 'master' into layla-build
2 parents 9dc9e5a + b8fe4b5 commit fe0606c

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

56 files changed

+4191
-976
lines changed

.github/workflows/build.yml

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,10 @@ on:
1010
push:
1111
branches:
1212
- master
13-
paths: ['.github/workflows/**', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift']
13+
paths: ['.github/workflows/**', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m']
1414
pull_request:
1515
types: [opened, synchronize, reopened]
16-
paths: ['**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift']
16+
paths: ['**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m']
1717

1818
env:
1919
BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
@@ -276,6 +276,11 @@ jobs:
276276
run: |
277277
xcodebuild -scheme llama -destination "${{ matrix.destination }}"
278278
279+
- name: Build Swift Example
280+
id: make_build_swift_example
281+
run: |
282+
make swift
283+
279284
windows-latest-cmake:
280285
runs-on: windows-latest
281286

.github/workflows/gguf-publish.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,9 @@ jobs:
3636
poetry install
3737
3838
- name: Build package
39-
run: poetry build
39+
run: cd gguf-py && poetry build
4040
- name: Publish package
4141
uses: pypa/gh-action-pypi-publish@release/v1
4242
with:
4343
password: ${{ secrets.PYPI_API_TOKEN }}
44+
packages-dir: gguf-py/dist

.github/workflows/zig-build.yml

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
name: Zig CI
2+
3+
on:
4+
pull_request:
5+
push:
6+
branches:
7+
- master
8+
9+
jobs:
10+
build:
11+
strategy:
12+
fail-fast: false
13+
matrix:
14+
runs-on: [ubuntu-latest, macos-latest, windows-latest]
15+
runs-on: ${{ matrix.runs-on }}
16+
steps:
17+
- uses: actions/checkout@v3
18+
with:
19+
submodules: recursive
20+
fetch-depth: 0
21+
- uses: goto-bus-stop/setup-zig@v2
22+
with:
23+
version: 0.11.0
24+
- name: Build Summary
25+
run: zig build --summary all -freference-trace

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ models-mnt
5656
/server
5757
/simple
5858
/batched
59+
/batched-bench
5960
/export-lora
6061
/finetune
6162
/speculative

CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -602,6 +602,8 @@ add_library(ggml OBJECT
602602
ggml.h
603603
ggml-alloc.c
604604
ggml-alloc.h
605+
ggml-backend.c
606+
ggml-backend.h
605607
${GGML_SOURCES_CUDA} ${GGML_HEADERS_CUDA}
606608
${GGML_SOURCES_OPENCL} ${GGML_HEADERS_OPENCL}
607609
${GGML_SOURCES_METAL} ${GGML_HEADERS_METAL}

Makefile

Lines changed: 77 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,14 @@
11
# Define the default target now so that it is always the first target
2-
BUILD_TARGETS = main quantize quantize-stats perplexity embedding vdot q8dot train-text-from-scratch convert-llama2c-to-ggml simple batched save-load-state server embd-input-test gguf llama-bench baby-llama beam-search speculative infill benchmark-matmult parallel finetune export-lora tests/test-c.o
2+
BUILD_TARGETS = \
3+
main quantize quantize-stats perplexity embedding vdot q8dot train-text-from-scratch convert-llama2c-to-ggml \
4+
simple batched batched-bench save-load-state server embd-input-test gguf llama-bench baby-llama beam-search \
5+
speculative infill benchmark-matmult parallel finetune export-lora tests/test-c.o
36

47
# Binaries only useful for tests
5-
TEST_TARGETS = tests/test-llama-grammar tests/test-grammar-parser tests/test-double-float tests/test-grad0 tests/test-opt tests/test-quantize-fns tests/test-quantize-perf tests/test-sampling tests/test-tokenizer-0-llama tests/test-tokenizer-0-falcon tests/test-tokenizer-1-llama tests/test-tokenizer-1-bpe
8+
TEST_TARGETS = \
9+
tests/test-llama-grammar tests/test-grammar-parser tests/test-double-float tests/test-grad0 tests/test-opt \
10+
tests/test-quantize-fns tests/test-quantize-perf tests/test-sampling tests/test-tokenizer-0-llama \
11+
tests/test-tokenizer-0-falcon tests/test-tokenizer-1-llama tests/test-tokenizer-1-bpe
612

713
# Code coverage output files
814
COV_TARGETS = *.gcno tests/*.gcno *.gcda tests/*.gcda *.gcov tests/*.gcov lcov-report gcovr-report
@@ -172,6 +178,24 @@ else
172178
MK_CPPFLAGS += -DNDEBUG
173179
endif
174180

181+
ifdef LLAMA_SANITIZE_THREAD
182+
MK_CFLAGS += -fsanitize=thread -g
183+
MK_CXXFLAGS += -fsanitize=thread -g
184+
MK_LDFLAGS += -fsanitize=thread -g
185+
endif
186+
187+
ifdef LLAMA_SANITIZE_ADDRESS
188+
MK_CFLAGS += -fsanitize=address -fno-omit-frame-pointer -g
189+
MK_CXXFLAGS += -fsanitize=address -fno-omit-frame-pointer -g
190+
MK_LDFLAGS += -fsanitize=address -fno-omit-frame-pointer -g
191+
endif
192+
193+
ifdef LLAMA_SANITIZE_UNDEFINED
194+
MK_CFLAGS += -fsanitize=undefined -g
195+
MK_CXXFLAGS += -fsanitize=undefined -g
196+
MK_LDFLAGS += -fsanitize=undefined -g
197+
endif
198+
175199
ifdef LLAMA_SERVER_VERBOSE
176200
MK_CPPFLAGS += -DSERVER_VERBOSE=$(LLAMA_SERVER_VERBOSE)
177201
endif
@@ -512,12 +536,21 @@ ggml.o: ggml.c ggml.h ggml-cuda.h
512536
ggml-alloc.o: ggml-alloc.c ggml.h ggml-alloc.h
513537
$(CC) $(CFLAGS) -c $< -o $@
514538

515-
OBJS += ggml-alloc.o
539+
ggml-backend.o: ggml-backend.c ggml.h ggml-backend.h
540+
$(CC) $(CFLAGS) -c $< -o $@
541+
542+
OBJS += ggml-alloc.o ggml-backend.o
516543

517-
llama.o: llama.cpp ggml.h ggml-alloc.h ggml-cuda.h ggml-metal.h llama.h
544+
llama.o: llama.cpp ggml.h ggml-alloc.h ggml-backend.h ggml-cuda.h ggml-metal.h llama.h
518545
$(CXX) $(CXXFLAGS) -c $< -o $@
519546

520-
common.o: common/common.cpp common/common.h build-info.h common/log.h
547+
COMMON_H_DEPS = common/common.h common/sampling.h build-info.h common/log.h
548+
COMMON_DEPS = $(COMMON_H_DEPS) common.o sampling.o
549+
550+
common.o: common/common.cpp $(COMMON_H_DEPS)
551+
$(CXX) $(CXXFLAGS) -c $< -o $@
552+
553+
sampling.o: common/sampling.cpp $(COMMON_H_DEPS)
521554
$(CXX) $(CXXFLAGS) -c $< -o $@
522555

523556
console.o: common/console.cpp common/console.h
@@ -539,19 +572,22 @@ clean:
539572
# Examples
540573
#
541574

542-
main: examples/main/main.cpp build-info.h ggml.o llama.o common.o console.o grammar-parser.o $(OBJS)
575+
main: examples/main/main.cpp build-info.h ggml.o llama.o $(COMMON_DEPS) console.o grammar-parser.o $(OBJS)
543576
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
544577
@echo
545578
@echo '==== Run ./main -h for help. ===='
546579
@echo
547580

548-
infill: examples/infill/infill.cpp build-info.h ggml.o llama.o common.o console.o grammar-parser.o $(OBJS)
581+
infill: examples/infill/infill.cpp build-info.h ggml.o llama.o $(COMMON_DEPS) console.o grammar-parser.o $(OBJS)
582+
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
583+
584+
simple: examples/simple/simple.cpp build-info.h ggml.o llama.o $(COMMON_DEPS) $(OBJS)
549585
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
550586

551-
simple: examples/simple/simple.cpp build-info.h ggml.o llama.o common.o $(OBJS)
587+
batched: examples/batched/batched.cpp build-info.h ggml.o llama.o $(COMMON_DEPS) $(OBJS)
552588
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
553589

554-
batched: examples/batched/batched.cpp build-info.h ggml.o llama.o common.o $(OBJS)
590+
batched-bench: examples/batched-bench/batched-bench.cpp build-info.h ggml.o llama.o common.o $(OBJS)
555591
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
556592

557593
quantize: examples/quantize/quantize.cpp build-info.h ggml.o llama.o $(OBJS)
@@ -560,60 +596,65 @@ quantize: examples/quantize/quantize.cpp build-info.h ggml.
560596
quantize-stats: examples/quantize-stats/quantize-stats.cpp build-info.h ggml.o llama.o $(OBJS)
561597
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
562598

563-
perplexity: examples/perplexity/perplexity.cpp build-info.h ggml.o llama.o common.o $(OBJS)
599+
perplexity: examples/perplexity/perplexity.cpp build-info.h ggml.o llama.o $(COMMON_DEPS) $(OBJS)
564600
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
565601

566-
embedding: examples/embedding/embedding.cpp build-info.h ggml.o llama.o common.o $(OBJS)
602+
embedding: examples/embedding/embedding.cpp build-info.h ggml.o llama.o $(COMMON_DEPS) $(OBJS)
567603
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
568604

569-
save-load-state: examples/save-load-state/save-load-state.cpp build-info.h ggml.o llama.o common.o $(OBJS)
605+
save-load-state: examples/save-load-state/save-load-state.cpp build-info.h ggml.o llama.o $(COMMON_DEPS) $(OBJS)
570606
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
571607

572-
server: examples/server/server.cpp examples/server/httplib.h examples/server/json.hpp examples/server/index.html.hpp examples/server/index.js.hpp examples/server/completion.js.hpp build-info.h ggml.o llama.o common.o grammar-parser.o $(OBJS)
608+
server: examples/server/server.cpp examples/server/httplib.h examples/server/json.hpp examples/server/index.html.hpp examples/server/index.js.hpp examples/server/completion.js.hpp build-info.h ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
573609
$(CXX) $(CXXFLAGS) -Iexamples/server $(filter-out %.h,$(filter-out %.hpp,$^)) -o $@ $(LDFLAGS) $(LWINSOCK2)
574610

575-
$(LIB_PRE)embdinput$(DSO_EXT): examples/embd-input/embd-input.h examples/embd-input/embd-input-lib.cpp build-info.h ggml.o llama.o common.o $(OBJS)
611+
$(LIB_PRE)embdinput$(DSO_EXT): examples/embd-input/embd-input.h examples/embd-input/embd-input-lib.cpp build-info.h ggml.o llama.o $(COMMON_DEPS) $(OBJS)
576612
$(CXX) --shared $(CXXFLAGS) $(filter-out %.h,$(filter-out %.hpp,$^)) -o $@ $(LDFLAGS)
577613

578614

579-
embd-input-test: $(LIB_PRE)embdinput$(DSO_EXT) examples/embd-input/embd-input-test.cpp build-info.h ggml.o llama.o common.o $(OBJS)
615+
embd-input-test: $(LIB_PRE)embdinput$(DSO_EXT) examples/embd-input/embd-input-test.cpp build-info.h ggml.o llama.o $(COMMON_DEPS) $(OBJS)
580616
$(CXX) $(CXXFLAGS) $(filter-out %$(DSO_EXT),$(filter-out %.h,$(filter-out %.hpp,$^))) -o $@ $(LDFLAGS) -L. -lembdinput
581617

582618
gguf: examples/gguf/gguf.cpp ggml.o llama.o $(OBJS)
583619
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
584620

585-
train-text-from-scratch: examples/train-text-from-scratch/train-text-from-scratch.cpp ggml.o llama.o common.o train.o $(OBJS)
621+
train-text-from-scratch: examples/train-text-from-scratch/train-text-from-scratch.cpp ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS)
586622
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
587623

588624
convert-llama2c-to-ggml: examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp ggml.o llama.o $(OBJS)
589625
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
590626

591-
llama-bench: examples/llama-bench/llama-bench.cpp build-info.h ggml.o llama.o common.o $(OBJS)
627+
llama-bench: examples/llama-bench/llama-bench.cpp build-info.h ggml.o llama.o $(COMMON_DEPS) $(OBJS)
592628
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
593629

594-
baby-llama: examples/baby-llama/baby-llama.cpp ggml.o llama.o common.o train.o $(OBJS)
630+
baby-llama: examples/baby-llama/baby-llama.cpp ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS)
595631
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
596632

597-
beam-search: examples/beam-search/beam-search.cpp build-info.h ggml.o llama.o common.o $(OBJS)
633+
beam-search: examples/beam-search/beam-search.cpp build-info.h ggml.o llama.o $(COMMON_DEPS) $(OBJS)
598634
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
599635

600-
finetune: examples/finetune/finetune.cpp build-info.h ggml.o llama.o common.o train.o $(OBJS)
636+
finetune: examples/finetune/finetune.cpp build-info.h ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS)
601637
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
602638

603-
export-lora: examples/export-lora/export-lora.cpp build-info.h ggml.o llama.o common.o $(OBJS)
639+
export-lora: examples/export-lora/export-lora.cpp build-info.h ggml.o llama.o $(COMMON_DEPS) $(OBJS)
604640
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
605641

606-
speculative: examples/speculative/speculative.cpp build-info.h ggml.o llama.o common.o grammar-parser.o $(OBJS)
642+
speculative: examples/speculative/speculative.cpp build-info.h ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
607643
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
608644

609-
parallel: examples/parallel/parallel.cpp build-info.h ggml.o llama.o common.o $(OBJS)
645+
parallel: examples/parallel/parallel.cpp build-info.h ggml.o llama.o $(COMMON_DEPS) $(OBJS)
610646
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
611647

612648
ifdef LLAMA_METAL
613649
metal: examples/metal/metal.cpp ggml.o $(OBJS)
614650
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
615651
endif
616652

653+
ifeq ($(UNAME_S),Darwin)
654+
swift: examples/batched.swift
655+
(cd examples/batched.swift; make build)
656+
endif
657+
617658
build-info.h: $(wildcard .git/index) scripts/build-info.sh
618659
@sh scripts/build-info.sh $(CC) > $@.tmp
619660
@if ! cmp -s $@.tmp $@; then \
@@ -634,48 +675,48 @@ benchmark-matmult: examples/benchmark/benchmark-matmult.cpp build-info.h ggml.o
634675
run-benchmark-matmult: benchmark-matmult
635676
./$@
636677

637-
.PHONY: run-benchmark-matmult
678+
.PHONY: run-benchmark-matmult swift
638679

639680
vdot: pocs/vdot/vdot.cpp ggml.o $(OBJS)
640681
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
641682

642683
q8dot: pocs/vdot/q8dot.cpp ggml.o $(OBJS)
643684
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
644685

645-
tests/test-llama-grammar: tests/test-llama-grammar.cpp build-info.h ggml.o common.o grammar-parser.o $(OBJS)
686+
tests/test-llama-grammar: tests/test-llama-grammar.cpp build-info.h ggml.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
646687
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
647688

648-
tests/test-grammar-parser: tests/test-grammar-parser.cpp build-info.h ggml.o llama.o common.o grammar-parser.o $(OBJS)
689+
tests/test-grammar-parser: tests/test-grammar-parser.cpp build-info.h ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
649690
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
650691

651-
tests/test-double-float: tests/test-double-float.cpp build-info.h ggml.o llama.o common.o $(OBJS)
692+
tests/test-double-float: tests/test-double-float.cpp build-info.h ggml.o llama.o $(COMMON_DEPS) $(OBJS)
652693
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
653694

654-
tests/test-grad0: tests/test-grad0.cpp build-info.h ggml.o llama.o common.o $(OBJS)
695+
tests/test-grad0: tests/test-grad0.cpp build-info.h ggml.o llama.o $(COMMON_DEPS) $(OBJS)
655696
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
656697

657-
tests/test-opt: tests/test-opt.cpp build-info.h ggml.o llama.o common.o $(OBJS)
698+
tests/test-opt: tests/test-opt.cpp build-info.h ggml.o llama.o $(COMMON_DEPS) $(OBJS)
658699
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
659700

660-
tests/test-quantize-fns: tests/test-quantize-fns.cpp build-info.h ggml.o llama.o common.o $(OBJS)
701+
tests/test-quantize-fns: tests/test-quantize-fns.cpp build-info.h ggml.o llama.o $(COMMON_DEPS) $(OBJS)
661702
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
662703

663-
tests/test-quantize-perf: tests/test-quantize-perf.cpp build-info.h ggml.o llama.o common.o $(OBJS)
704+
tests/test-quantize-perf: tests/test-quantize-perf.cpp build-info.h ggml.o llama.o $(COMMON_DEPS) $(OBJS)
664705
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
665706

666-
tests/test-sampling: tests/test-sampling.cpp build-info.h ggml.o llama.o common.o $(OBJS)
707+
tests/test-sampling: tests/test-sampling.cpp build-info.h ggml.o llama.o $(COMMON_DEPS) $(OBJS)
667708
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
668709

669-
tests/test-tokenizer-0-falcon: tests/test-tokenizer-0-falcon.cpp build-info.h ggml.o llama.o common.o $(OBJS)
710+
tests/test-tokenizer-0-falcon: tests/test-tokenizer-0-falcon.cpp build-info.h ggml.o llama.o $(COMMON_DEPS) $(OBJS)
670711
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
671712

672-
tests/test-tokenizer-0-llama: tests/test-tokenizer-0-llama.cpp build-info.h ggml.o llama.o common.o $(OBJS)
713+
tests/test-tokenizer-0-llama: tests/test-tokenizer-0-llama.cpp build-info.h ggml.o llama.o $(COMMON_DEPS) $(OBJS)
673714
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
674715

675-
tests/test-tokenizer-1-bpe: tests/test-tokenizer-1-bpe.cpp build-info.h ggml.o llama.o common.o $(OBJS)
716+
tests/test-tokenizer-1-bpe: tests/test-tokenizer-1-bpe.cpp build-info.h ggml.o llama.o $(COMMON_DEPS) $(OBJS)
676717
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
677718

678-
tests/test-tokenizer-1-llama: tests/test-tokenizer-1-llama.cpp build-info.h ggml.o llama.o common.o $(OBJS)
719+
tests/test-tokenizer-1-llama: tests/test-tokenizer-1-llama.cpp build-info.h ggml.o llama.o $(COMMON_DEPS) $(OBJS)
679720
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
680721

681722
tests/test-c.o: tests/test-c.c llama.h

Package.swift

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
1-
// swift-tools-version:5.3
1+
// swift-tools-version:5.5
22

33
import PackageDescription
44

55
#if arch(arm) || arch(arm64)
66
let platforms: [SupportedPlatform]? = [
7-
.macOS(.v11),
7+
.macOS(.v12),
88
.iOS(.v14),
99
.watchOS(.v4),
1010
.tvOS(.v14)
@@ -41,12 +41,13 @@ let package = Package(
4141
"ggml.c",
4242
"llama.cpp",
4343
"ggml-alloc.c",
44+
"ggml-backend.c",
4445
"k_quants.c",
4546
] + additionalSources,
4647
resources: resources,
4748
publicHeadersPath: "spm-headers",
4849
cSettings: [
49-
.unsafeFlags(["-Wno-shorten-64-to-32"]),
50+
.unsafeFlags(["-Wno-shorten-64-to-32", "-O3", "-DNDEBUG"]),
5051
.define("GGML_USE_K_QUANTS"),
5152
.define("GGML_USE_ACCELERATE")
5253
// NOTE: NEW_LAPACK will required iOS version 16.4+

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,8 @@ as the main playground for developing new features for the [ggml](https://github
9696
- [X] [Starcoder models](https://github.com/ggerganov/llama.cpp/pull/3187)
9797
- [X] [Mistral AI v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1)
9898
- [X] [Refact](https://huggingface.co/smallcloudai/Refact-1_6B-fim)
99+
- [X] [Bloom](https://github.com/ggerganov/llama.cpp/pull/3553)
100+
- [X] [MPT](https://github.com/ggerganov/llama.cpp/pull/3417)
99101

100102
**Bindings:**
101103

0 commit comments

Comments
 (0)