Skip to content

Commit 3e1e86d

Browse files
committed
Merge branch 'master' into server-probs
2 parents 48bea64 + 3323112 commit 3e1e86d

30 files changed

+4500
-3775
lines changed

CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -280,8 +280,8 @@ if (LLAMA_CUBLAS)
280280
# 52 == lowest CUDA 12 standard
281281
# 60 == f16 CUDA intrinsics
282282
# 61 == integer CUDA intrinsics
283-
# 70 == (assumed) compute capability at which unrolling a loop in mul_mat_q kernels is faster
284-
if (LLAMA_CUDA_DMMV_F16)
283+
# 70 == compute capability at which unrolling a loop in mul_mat_q kernels is faster
284+
if (LLAMA_CUDA_F16 OR LLAMA_CUDA_DMMV_F16)
285285
set(CMAKE_CUDA_ARCHITECTURES "60;61;70") # needed for f16 CUDA intrinsics
286286
else()
287287
set(CMAKE_CUDA_ARCHITECTURES "52;61;70") # lowest CUDA 12 standard + lowest for integer intrinsics

Makefile

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -340,6 +340,9 @@ llama.o: llama.cpp ggml.h ggml-alloc.h ggml-cuda.h ggml-metal.h llama.h llama-ut
340340
common.o: examples/common.cpp examples/common.h
341341
$(CXX) $(CXXFLAGS) -c $< -o $@
342342

343+
console.o: examples/console.cpp examples/console.h
344+
$(CXX) $(CXXFLAGS) -c $< -o $@
345+
343346
grammar-parser.o: examples/grammar-parser.cpp examples/grammar-parser.h
344347
$(CXX) $(CXXFLAGS) -c $< -o $@
345348

@@ -353,7 +356,7 @@ clean:
353356
# Examples
354357
#
355358

356-
main: examples/main/main.cpp build-info.h ggml.o llama.o common.o grammar-parser.o $(OBJS)
359+
main: examples/main/main.cpp build-info.h ggml.o llama.o common.o console.o grammar-parser.o $(OBJS)
357360
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
358361
@echo
359362
@echo '==== Run ./main -h for help. ===='
@@ -411,13 +414,13 @@ benchmark-matmult: examples/benchmark/benchmark-matmult.cpp build-info.h ggml.o
411414
vdot: pocs/vdot/vdot.cpp ggml.o $(OBJS)
412415
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
413416

414-
tests/test-double-float: tests/test-double-float.c build-info.h ggml.o llama.o common.o $(OBJS)
417+
tests/test-double-float: tests/test-double-float.cpp build-info.h ggml.o llama.o common.o $(OBJS)
415418
$(CXX) $(CXXFLAGS) $(filter-out %.txt,$^) -o $@ $(LDFLAGS)
416419

417-
tests/test-grad0: tests/test-grad0.c build-info.h ggml.o llama.o common.o $(OBJS)
420+
tests/test-grad0: tests/test-grad0.cpp build-info.h ggml.o llama.o common.o $(OBJS)
418421
$(CXX) $(CXXFLAGS) $(filter-out %.txt,$^) -o $@ $(LDFLAGS)
419422

420-
tests/test-opt: tests/test-opt.c build-info.h ggml.o llama.o common.o $(OBJS)
423+
tests/test-opt: tests/test-opt.cpp build-info.h ggml.o llama.o common.o $(OBJS)
421424
$(CXX) $(CXXFLAGS) $(filter-out %.txt,$^) -o $@ $(LDFLAGS)
422425

423426
tests/test-quantize-fns: tests/test-quantize-fns.cpp build-info.h ggml.o llama.o common.o $(OBJS)

README.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ as the main playground for developing new features for the [ggml](https://github
8888
- [X] [Pygmalion 7B / Metharme 7B](#using-pygmalion-7b--metharme-7b)
8989
- [X] [WizardLM](https://github.com/nlpxucan/WizardLM)
9090
- [X] [Baichuan-7B](https://huggingface.co/baichuan-inc/baichuan-7B) and its derivations (such as [baichuan-7b-sft](https://huggingface.co/hiyouga/baichuan-7b-sft))
91+
- [X] [Aquila-7B](https://huggingface.co/BAAI/Aquila-7B) / [AquilaChat-7B](https://huggingface.co/BAAI/AquilaChat-7B)
9192

9293
**Bindings:**
9394

@@ -492,13 +493,19 @@ Building the program with BLAS support may lead to some performance improvements
492493
# obtain the original LLaMA model weights and place them in ./models
493494
ls ./models
494495
65B 30B 13B 7B tokenizer_checklist.chk tokenizer.model
496+
# [Optional] for models using BPE tokenizers
497+
ls ./models
498+
65B 30B 13B 7B vocab.json
495499
496500
# install Python dependencies
497501
python3 -m pip install -r requirements.txt
498502
499503
# convert the 7B model to ggml FP16 format
500504
python3 convert.py models/7B/
501505
506+
# [Optional] for models using BPE tokenizers
507+
python convert.py models/7B/ --vocabtype bpe
508+
502509
# quantize the model to 4-bits (using q4_0 method)
503510
./quantize ./models/7B/ggml-model-f16.bin ./models/7B/ggml-model-q4_0.bin q4_0
504511

examples/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ set(TARGET common)
1313
add_library(${TARGET} OBJECT
1414
common.h
1515
common.cpp
16+
console.h
17+
console.cpp
1618
grammar-parser.h
1719
grammar-parser.cpp
1820
)

0 commit comments

Comments
 (0)