Skip to content

Temp #19

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 19 commits into from
Nov 15, 2024
Merged

Temp #19

Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
54ef9cf
vulkan: Throttle the number of shader compiles during the build step.…
jeffbolznv Nov 11, 2024
80dd7ff
vulkan: Optimize contiguous copies (#10254)
jeffbolznv Nov 13, 2024
2e82ffa
sycl : Fixes to broken builds and test-backend-ops (#10257)
Alcpz Nov 13, 2024
a0ec17b
metadata: Detailed Dataset Authorship Metadata (#8875)
mofosyne Nov 13, 2024
0e712a5
server : fix incorrect res in validate_model_chat_template (#10272)
jhen0409 Nov 13, 2024
ff7fb67
server : add missing docs (#10269)
z80maniac Nov 13, 2024
1ee9eea
docs : update bindings list (#10261)
xuegao-tzx Nov 13, 2024
5ea926d
sync : ggml
ggerganov Nov 13, 2024
fb4a0ec
llama : propagate the results of `graph_compute` (#9525)
Xarbirus Nov 13, 2024
66798e4
vulkan: Use macros to make the mat mul pipeline creation more concise…
jeffbolznv Nov 13, 2024
af148c9
vulkan: Optimize binary ops (#10270)
jeffbolznv Nov 14, 2024
2a82891
speculative : fix out-of-bounds access (#10289)
ggerganov Nov 14, 2024
4a8ccb3
CUDA: no -sm row for very small matrices (#10185)
JohannesGaessler Nov 14, 2024
ae8de6d
ggml : build backends as libraries (#10256)
slaren Nov 14, 2024
1607a5e
backend cpu: add online flow for aarch64 Q4_0 GEMV/GEMM kernels (#9921)
chaxu01 Nov 15, 2024
5a54af4
sycl: Use syclcompat::dp4a (#10267)
Rbiessy Nov 15, 2024
4802ad3
scripts : fix regex in sync [no ci]
ggerganov Nov 15, 2024
231f936
cann: dockerfile and doc adjustment (#10302)
noemotiovon Nov 15, 2024
729d133
Merge branch 'master' into temp
apicalshark Nov 15, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .devops/llama-cli-cann.Dockerfile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
ARG ASCEND_VERSION=8.0.rc2.alpha003-910b-openeuler22.03-py3.8

FROM cosdt/cann:$ASCEND_VERSION AS build
FROM ascendai/cann:$ASCEND_VERSION AS build

WORKDIR /app

Expand All @@ -26,7 +26,7 @@ RUN echo "Building with static libs" && \
cmake --build build --config Release --target llama-cli

# TODO: use image with NNRT
FROM cosdt/cann:$ASCEND_VERSION AS runtime
FROM ascendai/cann:$ASCEND_VERSION AS runtime
COPY --from=build /app/build/bin/llama-cli /llama-cli

ENV LC_ALL=C.utf8
Expand Down
9 changes: 5 additions & 4 deletions .devops/llama-cli-cuda.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,16 @@ RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \
export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \
fi && \
cmake -B build -DGGML_CUDA=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
cmake --build build --config Release --target llama-cli -j$(nproc)
cmake --build build --config Release --target llama-cli -j$(nproc) && \
mkdir -p /app/lib && \
find build -name "*.so" -exec cp {} /app/lib \;

FROM ${BASE_CUDA_RUN_CONTAINER} AS runtime

RUN apt-get update && \
apt-get install -y libgomp1

COPY --from=build /app/build/ggml/src/libggml.so /libggml.so
COPY --from=build /app/build/src/libllama.so /libllama.so
COPY --from=build /app/build/bin/llama-cli /llama-cli
COPY --from=build /app/lib/ /
COPY --from=build /app/build/bin/llama-cli /

ENTRYPOINT [ "/llama-cli" ]
7 changes: 4 additions & 3 deletions .devops/llama-cli-musa.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,16 @@ WORKDIR /app
COPY . .

RUN cmake -B build -DGGML_MUSA=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
cmake --build build --config Release --target llama-cli -j$(nproc)
cmake --build build --config Release --target llama-cli -j$(nproc) && \
mkdir -p /app/lib && \
find build -name "*.so" -exec cp {} /app/lib \;

FROM ${BASE_MUSA_RUN_CONTAINER} AS runtime

RUN apt-get update && \
apt-get install -y libgomp1

COPY --from=build /app/build/ggml/src/libggml.so /libggml.so
COPY --from=build /app/build/src/libllama.so /libllama.so
COPY --from=build /app/lib/ /
COPY --from=build /app/build/bin/llama-cli /llama-cli

ENTRYPOINT [ "/llama-cli" ]
7 changes: 4 additions & 3 deletions .devops/llama-server-cuda.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,16 @@ RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \
export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \
fi && \
cmake -B build -DGGML_CUDA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
cmake --build build --config Release --target llama-server -j$(nproc)
cmake --build build --config Release --target llama-server -j$(nproc) && \
mkdir -p /app/lib && \
find build -name "*.so" -exec cp {} /app/lib \;

FROM ${BASE_CUDA_RUN_CONTAINER} AS runtime

RUN apt-get update && \
apt-get install -y libcurl4-openssl-dev libgomp1 curl

COPY --from=build /app/build/ggml/src/libggml.so /libggml.so
COPY --from=build /app/build/src/libllama.so /libllama.so
COPY --from=build /app/lib/ /
COPY --from=build /app/build/bin/llama-server /llama-server

# Must be set to 0.0.0.0 so it can listen to requests from host machine
Expand Down
7 changes: 4 additions & 3 deletions .devops/llama-server-musa.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,16 @@ WORKDIR /app
COPY . .

RUN cmake -B build -DGGML_MUSA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
cmake --build build --config Release --target llama-server -j$(nproc)
cmake --build build --config Release --target llama-server -j$(nproc) && \
mkdir -p /app/lib && \
find build -name "*.so" -exec cp {} /app/lib \;

FROM ${BASE_MUSA_RUN_CONTAINER} AS runtime

RUN apt-get update && \
apt-get install -y libcurl4-openssl-dev libgomp1 curl

COPY --from=build /app/build/ggml/src/libggml.so /libggml.so
COPY --from=build /app/build/src/libllama.so /libllama.so
COPY --from=build /app/lib/ /
COPY --from=build /app/build/bin/llama-server /llama-server

# Must be set to 0.0.0.0 so it can listen to requests from host machine
Expand Down
6 changes: 3 additions & 3 deletions .devops/nix/package.nix
Original file line number Diff line number Diff line change
Expand Up @@ -126,9 +126,9 @@ effectiveStdenv.mkDerivation (finalAttrs: {
};

postPatch = ''
substituteInPlace ./ggml/src/ggml-metal.m \
substituteInPlace ./ggml/src/ggml-metal/ggml-metal.m \
--replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
substituteInPlace ./ggml/src/ggml-metal.m \
substituteInPlace ./ggml/src/ggml-metal/ggml-metal.m \
--replace '[bundle pathForResource:@"default" ofType:@"metallib"];' "@\"$out/bin/default.metallib\";"
'';

Expand Down Expand Up @@ -173,7 +173,7 @@ effectiveStdenv.mkDerivation (finalAttrs: {
(cmakeBool "GGML_NATIVE" false)
(cmakeBool "GGML_BLAS" useBlas)
(cmakeBool "GGML_CUDA" useCuda)
(cmakeBool "GGML_HIPBLAS" useRocm)
(cmakeBool "GGML_HIP" useRocm)
(cmakeBool "GGML_METAL" useMetalKit)
(cmakeBool "GGML_VULKAN" useVulkan)
(cmakeBool "GGML_STATIC" enableStatic)
Expand Down
2 changes: 1 addition & 1 deletion .gitmodules
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
[submodule "kompute"]
path = ggml/src/kompute
path = ggml/src/ggml-kompute/kompute
url = https://github.com/nomic-ai/kompute.git
1 change: 0 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,6 @@ set(LLAMA_INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR} CACHE PATH "Location o
set(LLAMA_LIB_INSTALL_DIR ${CMAKE_INSTALL_LIBDIR} CACHE PATH "Location of library files")
set(LLAMA_BIN_INSTALL_DIR ${CMAKE_INSTALL_BINDIR} CACHE PATH "Location of binary files")


# At the moment some compile definitions are placed within the ggml/src
# directory but not exported on the `ggml` target. This could be improved by
# determining _precisely_ which defines are necessary for the llama-config
Expand Down
Loading
Loading