Skip to content

Commit d911cd1

Browse files
committed
Merge branch 'master' into compilade/bitnet-ternary
2 parents 96b3d41 + 4134999 commit d911cd1

File tree

138 files changed

+6990
-1862
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

138 files changed

+6990
-1862
lines changed

.devops/llama-server.Dockerfile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ ARG UBUNTU_VERSION=22.04
33
FROM ubuntu:$UBUNTU_VERSION AS build
44

55
RUN apt-get update && \
6-
apt-get install -y build-essential git libcurl4-openssl-dev curl
6+
apt-get install -y build-essential git libcurl4-openssl-dev
77

88
WORKDIR /app
99

@@ -16,7 +16,7 @@ RUN make -j$(nproc) llama-server
1616
FROM ubuntu:$UBUNTU_VERSION AS runtime
1717

1818
RUN apt-get update && \
19-
apt-get install -y libcurl4-openssl-dev libgomp1
19+
apt-get install -y libcurl4-openssl-dev libgomp1 curl
2020

2121
COPY --from=build /app/llama-server /llama-server
2222

.devops/nix/package.nix

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -126,16 +126,9 @@ let
126126
++ optionals useMetalKit [ MetalKit ];
127127

128128
cudaBuildInputs = with cudaPackages; [
129-
cuda_cccl.dev # <nv/target>
130-
131-
# A temporary hack for reducing the closure size, remove once cudaPackages
132-
# have stopped using lndir: https://github.com/NixOS/nixpkgs/issues/271792
133-
cuda_cudart.dev
134-
cuda_cudart.lib
135-
cuda_cudart.static
136-
libcublas.dev
137-
libcublas.lib
138-
libcublas.static
129+
cuda_cudart
130+
cuda_cccl # <nv/target>
131+
libcublas
139132
];
140133

141134
rocmBuildInputs = with rocmPackages; [

.gitignore

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,6 @@ models-mnt
7979
!models/ggml-vocab-*.gguf*
8080

8181
# Zig
82-
8382
zig-out/
8483
zig-cache/
8584

CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,8 @@ set(LLAMA_BIN_INSTALL_DIR ${CMAKE_INSTALL_BINDIR} CACHE PATH "Location o
139139
# determining _precisely_ which defines are necessary for the llama-config
140140
# package.
141141
#
142-
get_directory_property(GGML_DIR_DEFINES DIRECTORY ggml/src COMPILE_DEFINITIONS)
142+
get_target_property(GGML_DIRECTORY ggml SOURCE_DIR)
143+
get_directory_property(GGML_DIR_DEFINES DIRECTORY ${GGML_DIRECTORY} COMPILE_DEFINITIONS)
143144
get_target_property(GGML_TARGET_DEFINES ggml COMPILE_DEFINITIONS)
144145
set(GGML_TRANSIENT_DEFINES ${GGML_TARGET_DEFINES} ${GGML_DIR_DEFINES})
145146
get_target_property(GGML_LINK_LIBRARIES ggml LINK_LIBRARIES)

CONTRIBUTING.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
- Execute [the full CI locally on your machine](ci/README.md) before publishing
66
- Please rate the complexity of your PR (i.e. `Review Complexity : Low`, `Review Complexity : Medium`, `Review Complexity : High`). This makes it easier for maintainers to triage the PRs.
77
- The PR template has a series of review complexity checkboxes `[ ]` that [you can mark as](https://docs.github.com/en/get-started/writing-on-github/working-with-advanced-formatting/about-task-lists) `[X]` for your convenience
8+
- Consider allowing write access to your branch for faster review
89
- If your PR becomes stale, don't hesitate to ping the maintainers in the comments
910

1011
# Pull requests (for collaborators)

Makefile

Lines changed: 37 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ BUILD_TARGETS = \
1919
llama-imatrix \
2020
llama-infill \
2121
llama-llava-cli \
22+
llama-minicpmv-cli\
2223
llama-lookahead \
2324
llama-lookup \
2425
llama-lookup-create \
@@ -888,15 +889,16 @@ ggml/src/ggml-metal-embed.o: \
888889
ggml/src/ggml-common.h
889890
@echo "Embedding Metal library"
890891
@sed -e '/#include "ggml-common.h"/r ggml/src/ggml-common.h' -e '/#include "ggml-common.h"/d' < ggml/src/ggml-metal.metal > ggml/src/ggml-metal-embed.metal
891-
$(eval TEMP_ASSEMBLY=$(shell mktemp))
892-
@echo ".section __DATA, __ggml_metallib" > $(TEMP_ASSEMBLY)
893-
@echo ".globl _ggml_metallib_start" >> $(TEMP_ASSEMBLY)
894-
@echo "_ggml_metallib_start:" >> $(TEMP_ASSEMBLY)
895-
@echo ".incbin \"ggml/src/ggml-metal-embed.metal\"" >> $(TEMP_ASSEMBLY)
896-
@echo ".globl _ggml_metallib_end" >> $(TEMP_ASSEMBLY)
897-
@echo "_ggml_metallib_end:" >> $(TEMP_ASSEMBLY)
898-
@$(AS) $(TEMP_ASSEMBLY) -o $@
899-
@rm -f ${TEMP_ASSEMBLY}
892+
$(eval TEMP_ASSEMBLY=$(shell mktemp -d))
893+
@echo ".section __DATA, __ggml_metallib" > $(TEMP_ASSEMBLY)/ggml-metal-embed.s
894+
@echo ".globl _ggml_metallib_start" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
895+
@echo "_ggml_metallib_start:" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
896+
@echo ".incbin \"ggml/src/ggml-metal-embed.metal\"" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
897+
@echo ".globl _ggml_metallib_end" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
898+
@echo "_ggml_metallib_end:" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
899+
$(CC) $(CFLAGS) -c $(TEMP_ASSEMBLY)/ggml-metal-embed.s -o $@
900+
@rm -f ${TEMP_ASSEMBLY}/ggml-metal-embed.s
901+
@rmdir ${TEMP_ASSEMBLY}
900902
endif
901903
endif # GGML_METAL
902904

@@ -1205,6 +1207,7 @@ clean:
12051207
rm -rvf ggml/*.dll
12061208
rm -rvf ggml/*.so
12071209
rm -vrf ggml/src/*.o
1210+
rm -rvf ggml/src/llamafile/*.o
12081211
rm -rvf common/build-info.cpp
12091212
rm -vrf ggml/src/ggml-metal-embed.metal
12101213
rm -vrf ggml/src/ggml-cuda/*.o
@@ -1451,15 +1454,20 @@ libllava.a: examples/llava/llava.cpp \
14511454
$(CXX) $(CXXFLAGS) -static -fPIC -c $< -o $@ -Wno-cast-qual
14521455

14531456
llama-llava-cli: examples/llava/llava-cli.cpp \
1454-
examples/llava/clip.h \
1455-
examples/llava/clip.cpp \
1457+
examples/llava/llava.cpp \
14561458
examples/llava/llava.h \
1459+
examples/llava/clip.cpp \
1460+
examples/llava/clip.h \
1461+
$(OBJ_ALL)
1462+
$(CXX) $(CXXFLAGS) $< $(filter-out %.h $<,$^) -o $@ $(LDFLAGS) -Wno-cast-qual
1463+
1464+
llama-minicpmv-cli: examples/llava/minicpmv-cli.cpp \
14571465
examples/llava/llava.cpp \
1466+
examples/llava/llava.h \
1467+
examples/llava/clip.cpp \
1468+
examples/llava/clip.h \
14581469
$(OBJ_ALL)
1459-
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1460-
$(CXX) $(CXXFLAGS) -c examples/llava/clip.cpp -o $(call GET_OBJ_FILE, examples/llava/clip.cpp) -Wno-cast-qual
1461-
$(CXX) $(CXXFLAGS) -c examples/llava/llava.cpp -o $(call GET_OBJ_FILE, examples/llava/llava.cpp)
1462-
$(CXX) $(CXXFLAGS) $(filter-out %.h $< examples/llava/clip.cpp examples/llava/llava.cpp,$^) $(call GET_OBJ_FILE, $<) $(call GET_OBJ_FILE, examples/llava/clip.cpp) $(call GET_OBJ_FILE, examples/llava/llava.cpp) -o $@ $(LDFLAGS)
1470+
$(CXX) $(CXXFLAGS) $< $(filter-out %.h $<,$^) -o $@ $(LDFLAGS) -Wno-cast-qual
14631471

14641472
ifeq ($(UNAME_S),Darwin)
14651473
swift: examples/batched.swift
@@ -1605,42 +1613,41 @@ llama-q8dot: pocs/vdot/q8dot.cpp ggml/src/ggml.o \
16051613
# Mark legacy binary targets as .PHONY so that they are always checked.
16061614
.PHONY: main quantize perplexity embedding server
16071615

1616+
# Define the object file target
1617+
examples/deprecation-warning/deprecation-warning.o: examples/deprecation-warning/deprecation-warning.cpp
1618+
$(CXX) $(CXXFLAGS) -c $< -o $@
1619+
16081620
# NOTE: We currently will always build the deprecation-warning `main` and `server` binaries to help users migrate.
16091621
# Eventually we will want to remove these target from building all the time.
1610-
main: examples/deprecation-warning/deprecation-warning.cpp
1611-
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1612-
$(CXX) $(CXXFLAGS) $(filter-out $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1622+
main: examples/deprecation-warning/deprecation-warning.o
1623+
$(CXX) $(CXXFLAGS) $< -o $@ $(LDFLAGS)
16131624
@echo "NOTICE: The 'main' binary is deprecated. Please use 'llama-cli' instead."
16141625

1615-
server: examples/deprecation-warning/deprecation-warning.cpp
1616-
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1617-
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1626+
server: examples/deprecation-warning/deprecation-warning.o
1627+
$(CXX) $(CXXFLAGS) $< -o $@ $(LDFLAGS)
16181628
@echo "NOTICE: The 'server' binary is deprecated. Please use 'llama-server' instead."
16191629

1620-
quantize: examples/deprecation-warning/deprecation-warning.cpp
1630+
quantize: examples/deprecation-warning/deprecation-warning.o
16211631
ifneq (,$(wildcard quantize))
1622-
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1623-
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1632+
$(CXX) $(CXXFLAGS) $< -o $@ $(LDFLAGS)
16241633
@echo "#########"
16251634
@echo "WARNING: The 'quantize' binary is deprecated. Please use 'llama-quantize' instead."
16261635
@echo " Remove the 'quantize' binary to remove this warning."
16271636
@echo "#########"
16281637
endif
16291638

1630-
perplexity: examples/deprecation-warning/deprecation-warning.cpp
1639+
perplexity: examples/deprecation-warning/deprecation-warning.o
16311640
ifneq (,$(wildcard perplexity))
1632-
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1633-
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1641+
$(CXX) $(CXXFLAGS) $< -o $@ $(LDFLAGS)
16341642
@echo "#########"
16351643
@echo "WARNING: The 'perplexity' binary is deprecated. Please use 'llama-perplexity' instead."
16361644
@echo " Remove the 'perplexity' binary to remove this warning."
16371645
@echo "#########"
16381646
endif
16391647

1640-
embedding: examples/deprecation-warning/deprecation-warning.cpp
1648+
embedding: examples/deprecation-warning/deprecation-warning.o
16411649
ifneq (,$(wildcard embedding))
1642-
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1643-
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1650+
$(CXX) $(CXXFLAGS) $< -o $@ $(LDFLAGS)
16441651
@echo "#########"
16451652
@echo "WARNING: The 'embedding' binary is deprecated. Please use 'llama-embedding' instead."
16461653
@echo " Remove the 'embedding' binary to remove this warning."

README.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,8 +95,16 @@ Typically finetunes of the base models below are supported as well.
9595
- [x] [SEA-LION](https://huggingface.co/models?search=sea-lion)
9696
- [x] [GritLM-7B](https://huggingface.co/GritLM/GritLM-7B) + [GritLM-8x7B](https://huggingface.co/GritLM/GritLM-8x7B)
9797
- [x] [OLMo](https://allenai.org/olmo)
98+
- [x] [Granite models](https://huggingface.co/collections/ibm-granite/granite-code-models-6624c5cec322e4c148c8b330)
9899
- [x] [GPT-NeoX](https://github.com/EleutherAI/gpt-neox) + [Pythia](https://github.com/EleutherAI/pythia)
100+
- [x] [Snowflake-Arctic MoE](https://huggingface.co/collections/Snowflake/arctic-66290090abe542894a5ac520)
101+
- [x] [Smaug](https://huggingface.co/models?search=Smaug)
102+
- [x] [Poro 34B](https://huggingface.co/LumiOpen/Poro-34B)
103+
- [x] [Bitnet b1.58 models](https://huggingface.co/1bitLLM)
104+
- [x] [Flan T5](https://huggingface.co/models?search=flan-t5)
105+
- [x] [Open Elm models](https://huggingface.co/collections/apple/openelm-instruct-models-6619ad295d7ae9f868b759ca)
99106
- [x] [ChatGLM3-6b](https://huggingface.co/THUDM/chatglm3-6b) + [ChatGLM4-9b](https://huggingface.co/THUDM/glm-4-9b)
107+
- [x] [SmolLM](https://huggingface.co/collections/HuggingFaceTB/smollm-6695016cad7167254ce15966)
100108

101109
(instructions for supporting more models: [HOWTO-add-model.md](./docs/development/HOWTO-add-model.md))
102110

@@ -145,6 +153,7 @@ Unless otherwise noted these projects are open-source with permissive licensing:
145153
- [Faraday](https://faraday.dev/) (proprietary)
146154
- [LMStudio](https://lmstudio.ai/) (proprietary)
147155
- [Layla](https://play.google.com/store/apps/details?id=com.laylalite) (proprietary)
156+
- [ramalama](https://github.com/containers/ramalama) (MIT)
148157
- [LocalAI](https://github.com/mudler/LocalAI) (MIT)
149158
- [LostRuins/koboldcpp](https://github.com/LostRuins/koboldcpp) (AGPL)
150159
- [Mozilla-Ocho/llamafile](https://github.com/Mozilla-Ocho/llamafile)

0 commit comments

Comments
 (0)