Skip to content

Commit 66a8dd3

Browse files
committed
Merge branch 'master' into cuda-cublas-opts
2 parents c830a05 + 5aa365d commit 66a8dd3

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

55 files changed

+2420
-513
lines changed

.devops/tools.sh

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ elif [[ "$arg1" == '--quantize' || "$arg1" == '-q' ]]; then
1313
./quantize "$@"
1414
elif [[ "$arg1" == '--run' || "$arg1" == '-r' ]]; then
1515
./main "$@"
16+
elif [[ "$arg1" == '--finetune' || "$arg1" == '-f' ]]; then
17+
./finetune "$@"
1618
elif [[ "$arg1" == '--all-in-one' || "$arg1" == '-a' ]]; then
1719
echo "Converting PTH to GGML..."
1820
for i in `ls $1/$2/ggml-model-f16.bin*`; do
@@ -34,6 +36,8 @@ else
3436
echo " ex: --outtype f16 \"/models/7B/\" "
3537
echo " --quantize (-q): Optimize with quantization process ggml"
3638
echo " ex: \"/models/7B/ggml-model-f16.bin\" \"/models/7B/ggml-model-q4_0.bin\" 2"
39+
echo " --finetune (-f): Run finetune command to create a lora finetune of the model"
40+
echo " See documentation for finetune for command-line parameters"
3741
echo " --all-in-one (-a): Execute --convert & --quantize"
3842
echo " ex: \"/models/\" 7B"
3943
echo " --server (-s): Run a model on the server"

.github/workflows/build.yml

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -498,6 +498,17 @@ jobs:
498498
path: |
499499
cudart-llama-bin-win-cu${{ matrix.cuda }}-x64.zip
500500
501+
ios-xcode-build:
502+
runs-on: macos-latest
503+
504+
steps:
505+
- name: Checkout code
506+
uses: actions/checkout@v3
507+
508+
- name: Build Xcode project
509+
run: xcodebuild -project examples/llama.swiftui/llama.swiftui.xcodeproj -scheme llama.swiftui -sdk iphoneos CODE_SIGNING_REQUIRED=NO CODE_SIGN_IDENTITY= -destination 'generic/platform=iOS' build
510+
511+
501512
# freeBSD-latest:
502513
# runs-on: macos-12
503514
# steps:

.gitignore

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -88,15 +88,16 @@ poetry.lock
8888
poetry.toml
8989

9090
# Test binaries
91-
tests/test-grammar-parser
92-
tests/test-llama-grammar
93-
tests/test-double-float
94-
tests/test-grad0
95-
tests/test-opt
96-
tests/test-quantize-fns
97-
tests/test-quantize-perf
98-
tests/test-sampling
99-
tests/test-tokenizer-0-llama
100-
tests/test-tokenizer-0-falcon
101-
tests/test-tokenizer-1-llama
102-
tests/test-tokenizer-1-bpe
91+
/tests/test-grammar-parser
92+
/tests/test-llama-grammar
93+
/tests/test-double-float
94+
/tests/test-grad0
95+
/tests/test-opt
96+
/tests/test-quantize-fns
97+
/tests/test-quantize-perf
98+
/tests/test-sampling
99+
/tests/test-tokenizer-0-llama
100+
/tests/test-tokenizer-0-falcon
101+
/tests/test-tokenizer-1-llama
102+
/tests/test-tokenizer-1-bpe
103+
/tests/test-rope

CMakeLists.txt

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ else()
4343
endif()
4444

4545
# general
46+
option(BUILD_SHARED_LIBS "build shared libraries" OFF)
4647
option(LLAMA_STATIC "llama: static link libraries" OFF)
4748
option(LLAMA_NATIVE "llama: enable -march=native flag" ON)
4849
option(LLAMA_LTO "llama: enable link time optimization" OFF)
@@ -100,6 +101,9 @@ option(LLAMA_BUILD_TESTS "llama: build tests" ${LLAMA_STANDALO
100101
option(LLAMA_BUILD_EXAMPLES "llama: build examples" ${LLAMA_STANDALONE})
101102
option(LLAMA_BUILD_SERVER "llama: build server example" ON)
102103

104+
# Required for relocatable CMake package
105+
include(${CMAKE_CURRENT_SOURCE_DIR}/scripts/build-info.cmake)
106+
103107
#
104108
# Compile flags
105109
#
@@ -112,6 +116,11 @@ set(THREADS_PREFER_PTHREAD_FLAG ON)
112116
find_package(Threads REQUIRED)
113117
include(CheckCXXCompilerFlag)
114118

119+
# enable libstdc++ assertions for debug builds
120+
if (CMAKE_SYSTEM_NAME MATCHES "Linux")
121+
add_compile_definitions($<$<CONFIG:Debug>:_GLIBCXX_ASSERTIONS>)
122+
endif()
123+
115124
if (NOT MSVC)
116125
if (LLAMA_SANITIZE_THREAD)
117126
add_compile_options(-fsanitize=thread)
@@ -161,7 +170,7 @@ if (LLAMA_METAL)
161170
#add_compile_definitions(GGML_METAL_DIR_KERNELS="${CMAKE_CURRENT_SOURCE_DIR}/")
162171

163172
# copy ggml-metal.metal to bin directory
164-
configure_file(ggml-metal.metal bin/ggml-metal.metal COPYONLY)
173+
configure_file(ggml-metal.metal ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.metal COPYONLY)
165174

166175
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS}
167176
${FOUNDATION_LIBRARY}

Makefile

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ BUILD_TARGETS = \
88
TEST_TARGETS = \
99
tests/test-llama-grammar tests/test-grammar-parser tests/test-double-float tests/test-grad0 tests/test-opt \
1010
tests/test-quantize-fns tests/test-quantize-perf tests/test-sampling tests/test-tokenizer-0-llama \
11-
tests/test-tokenizer-0-falcon tests/test-tokenizer-1-llama tests/test-tokenizer-1-bpe
11+
tests/test-tokenizer-0-falcon tests/test-tokenizer-1-llama tests/test-tokenizer-1-bpe tests/test-rope
1212

1313
# Code coverage output files
1414
COV_TARGETS = *.gcno tests/*.gcno *.gcda tests/*.gcda *.gcov tests/*.gcov lcov-report gcovr-report
@@ -30,7 +30,7 @@ ifeq '' '$(findstring clang,$(shell $(CC) --version))'
3030
CC_VER := $(shell $(CC) -dumpfullversion -dumpversion | awk -F. '{ printf("%02d%02d%02d", $$1, $$2, $$3) }')
3131
else
3232
CC_IS_CLANG=1
33-
ifeq '' '$(findstring Apple LLVM,$(shell $(CC) --version))'
33+
ifeq '' '$(findstring Apple,$(shell $(CC) --version))'
3434
CC_IS_LLVM_CLANG=1
3535
else
3636
CC_IS_APPLE_CLANG=1
@@ -174,6 +174,10 @@ ifdef LLAMA_DEBUG
174174
MK_CFLAGS += -O0 -g
175175
MK_CXXFLAGS += -O0 -g
176176
MK_LDFLAGS += -g
177+
178+
ifeq ($(UNAME_S),Linux)
179+
MK_CXXFLAGS += -Wp,-D_GLIBCXX_ASSERTIONS
180+
endif
177181
else
178182
MK_CPPFLAGS += -DNDEBUG
179183
endif
@@ -648,7 +652,7 @@ beam-search: examples/beam-search/beam-search.cpp ggml.o llama.o $(COMMON_DEPS)
648652
finetune: examples/finetune/finetune.cpp ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS)
649653
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
650654

651-
export-lora: examples/export-lora/export-lora.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
655+
export-lora: examples/export-lora/export-lora.cpp ggml.o common/common.h $(OBJS)
652656
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
653657

654658
speculative: examples/speculative/speculative.cpp ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
@@ -701,28 +705,28 @@ vdot: pocs/vdot/vdot.cpp ggml.o $(OBJS)
701705
q8dot: pocs/vdot/q8dot.cpp ggml.o $(OBJS)
702706
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
703707

704-
tests/test-llama-grammar: tests/test-llama-grammar.cpp ggml.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
708+
tests/test-llama-grammar: tests/test-llama-grammar.cpp ggml.o grammar-parser.o $(OBJS)
705709
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
706710

707-
tests/test-grammar-parser: tests/test-grammar-parser.cpp ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
711+
tests/test-grammar-parser: tests/test-grammar-parser.cpp ggml.o llama.o grammar-parser.o $(OBJS)
708712
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
709713

710-
tests/test-double-float: tests/test-double-float.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
714+
tests/test-double-float: tests/test-double-float.cpp ggml.o $(OBJS)
711715
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
712716

713-
tests/test-grad0: tests/test-grad0.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
717+
tests/test-grad0: tests/test-grad0.cpp ggml.o $(OBJS)
714718
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
715719

716-
tests/test-opt: tests/test-opt.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
720+
tests/test-opt: tests/test-opt.cpp ggml.o $(OBJS)
717721
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
718722

719-
tests/test-quantize-fns: tests/test-quantize-fns.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
723+
tests/test-quantize-fns: tests/test-quantize-fns.cpp ggml.o $(OBJS)
720724
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
721725

722-
tests/test-quantize-perf: tests/test-quantize-perf.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
726+
tests/test-quantize-perf: tests/test-quantize-perf.cpp ggml.o $(OBJS)
723727
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
724728

725-
tests/test-sampling: tests/test-sampling.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
729+
tests/test-sampling: tests/test-sampling.cpp ggml.o llama.o $(OBJS)
726730
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
727731

728732
tests/test-tokenizer-0-falcon: tests/test-tokenizer-0-falcon.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
@@ -737,5 +741,8 @@ tests/test-tokenizer-1-bpe: tests/test-tokenizer-1-bpe.cpp ggml.o llama.o $(COMM
737741
tests/test-tokenizer-1-llama: tests/test-tokenizer-1-llama.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
738742
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
739743

744+
tests/test-rope: tests/test-rope.cpp ggml.o $(OBJS)
745+
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
746+
740747
tests/test-c.o: tests/test-c.c llama.h
741748
$(CC) $(CFLAGS) -c $(filter-out %.h,$^) -o $@

Package.swift

Lines changed: 16 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -2,59 +2,45 @@
22

33
import PackageDescription
44

5-
#if arch(arm) || arch(arm64)
6-
let platforms: [SupportedPlatform]? = [
7-
.macOS(.v12),
8-
.iOS(.v14),
9-
.watchOS(.v4),
10-
.tvOS(.v14)
11-
]
12-
let exclude: [String] = []
13-
let resources: [Resource] = [
14-
.process("ggml-metal.metal")
15-
]
16-
let additionalSources: [String] = ["ggml-metal.m"]
17-
let additionalSettings: [CSetting] = [
18-
.unsafeFlags(["-fno-objc-arc"]),
19-
.define("GGML_USE_METAL")
20-
]
21-
#else
22-
let platforms: [SupportedPlatform]? = nil
23-
let exclude: [String] = ["ggml-metal.metal"]
24-
let resources: [Resource] = []
25-
let additionalSources: [String] = []
26-
let additionalSettings: [CSetting] = []
27-
#endif
28-
295
let package = Package(
306
name: "llama",
31-
platforms: platforms,
7+
platforms: [
8+
.macOS(.v12),
9+
.iOS(.v14),
10+
.watchOS(.v4),
11+
.tvOS(.v14)
12+
],
3213
products: [
3314
.library(name: "llama", targets: ["llama"]),
3415
],
3516
targets: [
3617
.target(
3718
name: "llama",
3819
path: ".",
39-
exclude: exclude,
20+
exclude: [],
4021
sources: [
4122
"ggml.c",
4223
"llama.cpp",
4324
"ggml-alloc.c",
4425
"ggml-backend.c",
4526
"ggml-quants.c",
46-
] + additionalSources,
47-
resources: resources,
27+
"ggml-metal.m",
28+
],
29+
resources: [
30+
.process("ggml-metal.metal")
31+
],
4832
publicHeadersPath: "spm-headers",
4933
cSettings: [
5034
.unsafeFlags(["-Wno-shorten-64-to-32", "-O3", "-DNDEBUG"]),
51-
.define("GGML_USE_ACCELERATE")
35+
.define("GGML_USE_ACCELERATE"),
36+
.unsafeFlags(["-fno-objc-arc"]),
37+
.define("GGML_USE_METAL"),
5238
// NOTE: NEW_LAPACK will required iOS version 16.4+
5339
// We should consider add this in the future when we drop support for iOS 14
5440
// (ref: ref: https://developer.apple.com/documentation/accelerate/1513264-cblas_sgemm?language=objc)
5541
// .define("ACCELERATE_NEW_LAPACK"),
5642
// .define("ACCELERATE_LAPACK_ILP64")
57-
] + additionalSettings,
43+
],
5844
linkerSettings: [
5945
.linkedFramework("Accelerate")
6046
]

README.md

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,8 @@ as the main playground for developing new features for the [ggml](https://github
116116
- [nat/openplayground](https://github.com/nat/openplayground)
117117
- [oobabooga/text-generation-webui](https://github.com/oobabooga/text-generation-webui)
118118
- [withcatai/catai](https://github.com/withcatai/catai)
119+
- [semperai/amica](https://github.com/semperai/amica)
120+
- [psugihara/FreeChat](https://github.com/psugihara/FreeChat)
119121

120122
---
121123

@@ -322,7 +324,7 @@ mpirun -hostfile hostfile -n 3 ./main -m ./models/7B/ggml-model-q4_0.gguf -n 128
322324
323325
### BLAS Build
324326
325-
Building the program with BLAS support may lead to some performance improvements in prompt processing using batch sizes higher than 32 (the default is 512). BLAS doesn't affect the normal generation performance. There are currently three different implementations of it:
327+
Building the program with BLAS support may lead to some performance improvements in prompt processing using batch sizes higher than 32 (the default is 512). Support with CPU-only BLAS implementations doesn't affect the normal generation performance. We may see generation performance improvements with GPU-involved BLAS implementations, e.g. cuBLAS, hipBLAS and CLBlast. There are currently several different BLAS implementations available for build and use:
326328

327329
- #### Accelerate Framework:
328330

@@ -894,7 +896,7 @@ Additionally, there the following images, similar to the above:
894896
- `ghcr.io/ggerganov/llama.cpp:full-rocm`: Same as `full` but compiled with ROCm support. (platforms: `linux/amd64`, `linux/arm64`)
895897
- `ghcr.io/ggerganov/llama.cpp:light-rocm`: Same as `light` but compiled with ROCm support. (platforms: `linux/amd64`, `linux/arm64`)
896898

897-
The GPU enabled images are not currently tested by CI beyond being built. They are not built with any variation from the ones in the Dockerfiles defined in [.devops/](.devops/) and the Gitlab Action defined in [.github/workflows/docker.yml](.github/workflows/docker.yml). If you need different settings (for example, a different CUDA or ROCm library, you'll need to build the images locally for now).
899+
The GPU enabled images are not currently tested by CI beyond being built. They are not built with any variation from the ones in the Dockerfiles defined in [.devops/](.devops/) and the GitHub Action defined in [.github/workflows/docker.yml](.github/workflows/docker.yml). If you need different settings (for example, a different CUDA or ROCm library, you'll need to build the images locally for now).
898900
899901
#### Usage
900902

common/CMakeLists.txt

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,12 @@ if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/../.git")
1111
if(NOT IS_DIRECTORY "${GIT_DIR}")
1212
file(READ ${GIT_DIR} REAL_GIT_DIR_LINK)
1313
string(REGEX REPLACE "gitdir: (.*)\n$" "\\1" REAL_GIT_DIR ${REAL_GIT_DIR_LINK})
14-
set(GIT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../${REAL_GIT_DIR}")
14+
string(FIND "${REAL_GIT_DIR}" "/" SLASH_POS)
15+
if (SLASH_POS EQUAL 0)
16+
set(GIT_DIR "${REAL_GIT_DIR}")
17+
else()
18+
set(GIT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../${REAL_GIT_DIR}")
19+
endif()
1520
endif()
1621

1722
set(GIT_INDEX "${GIT_DIR}/index")
@@ -26,7 +31,7 @@ add_custom_command(
2631
COMMENT "Generating build details from Git"
2732
COMMAND ${CMAKE_COMMAND} -DMSVC=${MSVC} -DCMAKE_C_COMPILER_VERSION=${CMAKE_C_COMPILER_VERSION}
2833
-DCMAKE_C_COMPILER_ID=${CMAKE_C_COMPILER_ID} -DCMAKE_VS_PLATFORM_NAME=${CMAKE_VS_PLATFORM_NAME}
29-
-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} -P "${CMAKE_CURRENT_SOURCE_DIR}/../scripts/build-info.cmake"
34+
-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} -P "${CMAKE_CURRENT_SOURCE_DIR}/../scripts/gen-build-info-cpp.cmake"
3035
WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/.."
3136
DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/build-info.cpp.in" ${GIT_INDEX}
3237
VERBATIM

0 commit comments

Comments
 (0)