Skip to content

Commit 28029c5

Browse files
authored
Merge branch 'ggerganov:master' into master
2 parents c42f7cb + a8bd14d commit 28029c5

File tree

246 files changed

+81938
-44187
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

246 files changed

+81938
-44187
lines changed

.clang-tidy

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ Checks: >
1212
-readability-implicit-bool-conversion,
1313
-readability-magic-numbers,
1414
-readability-uppercase-literal-suffix,
15+
-readability-simplify-boolean-expr,
1516
clang-analyzer-*,
1617
-clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling,
1718
performance-*,

.devops/full-cuda.Dockerfile

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ FROM ${BASE_CUDA_DEV_CONTAINER} as build
1212
ARG CUDA_DOCKER_ARCH=all
1313

1414
RUN apt-get update && \
15-
apt-get install -y build-essential python3 python3-pip git
15+
apt-get install -y build-essential python3 python3-pip git libcurl4-openssl-dev
1616

1717
COPY requirements.txt requirements.txt
1818
COPY requirements requirements
@@ -26,8 +26,10 @@ COPY . .
2626

2727
# Set nvcc architecture
2828
ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
29-
# Enable cuBLAS
30-
ENV LLAMA_CUBLAS=1
29+
# Enable CUDA
30+
ENV LLAMA_CUDA=1
31+
# Enable cURL
32+
ENV LLAMA_CURL=1
3133

3234
RUN make
3335

.devops/full-rocm.Dockerfile

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,11 @@ ENV LLAMA_HIPBLAS=1
4040
ENV CC=/opt/rocm/llvm/bin/clang
4141
ENV CXX=/opt/rocm/llvm/bin/clang++
4242

43+
# Enable cURL
44+
ENV LLAMA_CURL=1
45+
RUN apt-get update && \
46+
apt-get install -y libcurl4-openssl-dev
47+
4348
RUN make
4449

4550
ENTRYPOINT ["/app/.devops/tools.sh"]

.devops/full.Dockerfile

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ ARG UBUNTU_VERSION=22.04
33
FROM ubuntu:$UBUNTU_VERSION as build
44

55
RUN apt-get update && \
6-
apt-get install -y build-essential python3 python3-pip git
6+
apt-get install -y build-essential python3 python3-pip git libcurl4-openssl-dev
77

88
COPY requirements.txt requirements.txt
99
COPY requirements requirements
@@ -15,6 +15,9 @@ WORKDIR /app
1515

1616
COPY . .
1717

18+
ENV LLAMA_CURL=1
19+
20+
1821
RUN make
1922

2023
ENV LC_ALL=C.utf8

.devops/llama-cpp-clblast.srpm.spec

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# SRPM for building from source and packaging an RPM for RPM-based distros.
2-
# https://fedoraproject.org/wiki/How_to_create_an_RPM_package
2+
# https://docs.fedoraproject.org/en-US/quick-docs/creating-rpm-packages
33
# Built and maintained by John Boero - [email protected]
44
# In honor of Seth Vidal https://www.redhat.com/it/blog/thank-you-seth-vidal
55

.devops/llama-cpp-cublas.srpm.spec renamed to .devops/llama-cpp-cuda.srpm.spec

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# SRPM for building from source and packaging an RPM for RPM-based distros.
2-
# https://fedoraproject.org/wiki/How_to_create_an_RPM_package
2+
# https://docs.fedoraproject.org/en-US/quick-docs/creating-rpm-packages
33
# Built and maintained by John Boero - [email protected]
44
# In honor of Seth Vidal https://www.redhat.com/it/blog/thank-you-seth-vidal
55

@@ -12,7 +12,7 @@
1212
# 4. OpenCL/CLBLAST support simply requires the ICD loader and basic opencl libraries.
1313
# It is up to the user to install the correct vendor-specific support.
1414

15-
Name: llama.cpp-cublas
15+
Name: llama.cpp-cuda
1616
Version: %( date "+%%Y%%m%%d" )
1717
Release: 1%{?dist}
1818
Summary: CPU Inference of LLaMA model in pure C/C++ (no CUDA/OpenCL)
@@ -32,24 +32,24 @@ CPU inference for Meta's Lllama2 models using default options.
3232
%setup -n llama.cpp-master
3333

3434
%build
35-
make -j LLAMA_CUBLAS=1
35+
make -j LLAMA_CUDA=1
3636

3737
%install
3838
mkdir -p %{buildroot}%{_bindir}/
39-
cp -p main %{buildroot}%{_bindir}/llamacppcublas
40-
cp -p server %{buildroot}%{_bindir}/llamacppcublasserver
41-
cp -p simple %{buildroot}%{_bindir}/llamacppcublassimple
39+
cp -p main %{buildroot}%{_bindir}/llamacppcuda
40+
cp -p server %{buildroot}%{_bindir}/llamacppcudaserver
41+
cp -p simple %{buildroot}%{_bindir}/llamacppcudasimple
4242

4343
mkdir -p %{buildroot}/usr/lib/systemd/system
44-
%{__cat} <<EOF > %{buildroot}/usr/lib/systemd/system/llamacublas.service
44+
%{__cat} <<EOF > %{buildroot}/usr/lib/systemd/system/llamacuda.service
4545
[Unit]
4646
Description=Llama.cpp server, CPU only (no GPU support in this build).
4747
After=syslog.target network.target local-fs.target remote-fs.target nss-lookup.target
4848

4949
[Service]
5050
Type=simple
5151
EnvironmentFile=/etc/sysconfig/llama
52-
ExecStart=/usr/bin/llamacppcublasserver $LLAMA_ARGS
52+
ExecStart=/usr/bin/llamacppcudaserver $LLAMA_ARGS
5353
ExecReload=/bin/kill -s HUP $MAINPID
5454
Restart=never
5555

@@ -67,10 +67,10 @@ rm -rf %{buildroot}
6767
rm -rf %{_builddir}/*
6868

6969
%files
70-
%{_bindir}/llamacppcublas
71-
%{_bindir}/llamacppcublasserver
72-
%{_bindir}/llamacppcublassimple
73-
/usr/lib/systemd/system/llamacublas.service
70+
%{_bindir}/llamacppcuda
71+
%{_bindir}/llamacppcudaserver
72+
%{_bindir}/llamacppcudasimple
73+
/usr/lib/systemd/system/llamacuda.service
7474
%config /etc/sysconfig/llama
7575

7676
%pre

.devops/llama-cpp.srpm.spec

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# SRPM for building from source and packaging an RPM for RPM-based distros.
2-
# https://fedoraproject.org/wiki/How_to_create_an_RPM_package
2+
# https://docs.fedoraproject.org/en-US/quick-docs/creating-rpm-packages
33
# Built and maintained by John Boero - [email protected]
44
# In honor of Seth Vidal https://www.redhat.com/it/blog/thank-you-seth-vidal
55

.devops/main-cuda.Dockerfile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,8 @@ COPY . .
2020

2121
# Set nvcc architecture
2222
ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
23-
# Enable cuBLAS
24-
ENV LLAMA_CUBLAS=1
23+
# Enable CUDA
24+
ENV LLAMA_CUDA=1
2525

2626
RUN make
2727

.devops/nix/package.nix

Lines changed: 35 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,14 @@
44
config,
55
stdenv,
66
mkShell,
7+
runCommand,
78
cmake,
89
ninja,
910
pkg-config,
1011
git,
1112
python3,
1213
mpi,
13-
openblas, # TODO: Use the generic `blas` so users could switch between alternative implementations
14+
blas,
1415
cudaPackages,
1516
darwin,
1617
rocmPackages,
@@ -23,7 +24,7 @@
2324
useOpenCL
2425
useRocm
2526
useVulkan
26-
],
27+
] && blas.meta.available,
2728
useCuda ? config.cudaSupport,
2829
useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin && !useOpenCL,
2930
useMpi ? false, # Increases the runtime closure size by ~700M
@@ -35,7 +36,8 @@
3536
# It's necessary to consistently use backendStdenv when building with CUDA support,
3637
# otherwise we get libstdc++ errors downstream.
3738
effectiveStdenv ? if useCuda then cudaPackages.backendStdenv else stdenv,
38-
enableStatic ? effectiveStdenv.hostPlatform.isStatic
39+
enableStatic ? effectiveStdenv.hostPlatform.isStatic,
40+
precompileMetalShaders ? false
3941
}@inputs:
4042

4143
let
@@ -65,10 +67,15 @@ let
6567
strings.optionalString (suffices != [ ])
6668
", accelerated with ${strings.concatStringsSep ", " suffices}";
6769

70+
executableSuffix = effectiveStdenv.hostPlatform.extensions.executable;
71+
6872
# TODO: package the Python in this repository in a Nix-like way.
6973
# It'd be nice to migrate to buildPythonPackage, as well as ensure this repo
7074
# is PEP 517-compatible, and ensure the correct .dist-info is generated.
7175
# https://peps.python.org/pep-0517/
76+
#
77+
# TODO: Package up each Python script or service appropriately, by making
78+
# them into "entrypoints"
7279
llama-python = python3.withPackages (
7380
ps: [
7481
ps.numpy
@@ -87,6 +94,11 @@ let
8794
]
8895
);
8996

97+
xcrunHost = runCommand "xcrunHost" {} ''
98+
mkdir -p $out/bin
99+
ln -s /usr/bin/xcrun $out/bin
100+
'';
101+
90102
# apple_sdk is supposed to choose sane defaults, no need to handle isAarch64
91103
# separately
92104
darwinBuildInputs =
@@ -150,13 +162,18 @@ effectiveStdenv.mkDerivation (
150162
postPatch = ''
151163
substituteInPlace ./ggml-metal.m \
152164
--replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
153-
154-
# TODO: Package up each Python script or service appropriately.
155-
# If we were to migrate to buildPythonPackage and prepare the `pyproject.toml`,
156-
# we could make those *.py into setuptools' entrypoints
157-
substituteInPlace ./*.py --replace "/usr/bin/env python" "${llama-python}/bin/python"
165+
substituteInPlace ./ggml-metal.m \
166+
--replace '[bundle pathForResource:@"default" ofType:@"metallib"];' "@\"$out/bin/default.metallib\";"
158167
'';
159168

169+
# With PR#6015 https://github.com/ggerganov/llama.cpp/pull/6015,
170+
# `default.metallib` may be compiled with Metal compiler from XCode
171+
# and we need to escape sandbox on MacOS to access Metal compiler.
172+
# `xcrun` is used find the path of the Metal compiler, which is varible
173+
# and not on $PATH
174+
# see https://github.com/ggerganov/llama.cpp/pull/6118 for discussion
175+
__noChroot = effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders;
176+
160177
nativeBuildInputs =
161178
[
162179
cmake
@@ -173,6 +190,8 @@ effectiveStdenv.mkDerivation (
173190
]
174191
++ optionals (effectiveStdenv.hostPlatform.isGnu && enableStatic) [
175192
glibc.static
193+
] ++ optionals (effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders) [
194+
xcrunHost
176195
];
177196

178197
buildInputs =
@@ -181,6 +200,7 @@ effectiveStdenv.mkDerivation (
181200
++ optionals useMpi [ mpi ]
182201
++ optionals useOpenCL [ clblast ]
183202
++ optionals useRocm rocmBuildInputs
203+
++ optionals useBlas [ blas ]
184204
++ optionals useVulkan vulkanBuildInputs;
185205

186206
cmakeFlags =
@@ -191,7 +211,7 @@ effectiveStdenv.mkDerivation (
191211
(cmakeBool "CMAKE_SKIP_BUILD_RPATH" true)
192212
(cmakeBool "LLAMA_BLAS" useBlas)
193213
(cmakeBool "LLAMA_CLBLAST" useOpenCL)
194-
(cmakeBool "LLAMA_CUBLAS" useCuda)
214+
(cmakeBool "LLAMA_CUDA" useCuda)
195215
(cmakeBool "LLAMA_HIPBLAS" useRocm)
196216
(cmakeBool "LLAMA_METAL" useMetalKit)
197217
(cmakeBool "LLAMA_MPI" useMpi)
@@ -216,14 +236,16 @@ effectiveStdenv.mkDerivation (
216236
# Should likely use `rocmPackages.clr.gpuTargets`.
217237
"-DAMDGPU_TARGETS=gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102"
218238
]
219-
++ optionals useMetalKit [ (lib.cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1") ]
220-
++ optionals useBlas [ (lib.cmakeFeature "LLAMA_BLAS_VENDOR" "OpenBLAS") ];
239+
++ optionals useMetalKit [
240+
(lib.cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1")
241+
(cmakeBool "LLAMA_METAL_EMBED_LIBRARY" (!precompileMetalShaders))
242+
];
221243

222244
# TODO(SomeoneSerge): It's better to add proper install targets at the CMake level,
223245
# if they haven't been added yet.
224246
postInstall = ''
225-
mv $out/bin/main $out/bin/llama
226-
mv $out/bin/server $out/bin/llama-server
247+
mv $out/bin/main${executableSuffix} $out/bin/llama${executableSuffix}
248+
mv $out/bin/server${executableSuffix} $out/bin/llama-server${executableSuffix}
227249
mkdir -p $out/include
228250
cp $src/llama.h $out/include/
229251
'';

.devops/server-cuda.Dockerfile

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,21 +12,26 @@ FROM ${BASE_CUDA_DEV_CONTAINER} as build
1212
ARG CUDA_DOCKER_ARCH=all
1313

1414
RUN apt-get update && \
15-
apt-get install -y build-essential git
15+
apt-get install -y build-essential git libcurl4-openssl-dev
1616

1717
WORKDIR /app
1818

1919
COPY . .
2020

2121
# Set nvcc architecture
2222
ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
23-
# Enable cuBLAS
24-
ENV LLAMA_CUBLAS=1
23+
# Enable CUDA
24+
ENV LLAMA_CUDA=1
25+
# Enable cURL
26+
ENV LLAMA_CURL=1
2527

2628
RUN make
2729

2830
FROM ${BASE_CUDA_RUN_CONTAINER} as runtime
2931

32+
RUN apt-get update && \
33+
apt-get install -y libcurl4-openssl-dev
34+
3035
COPY --from=build /app/server /server
3136

3237
ENTRYPOINT [ "/server" ]

.devops/server-intel.Dockerfile

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ FROM intel/oneapi-basekit:$ONEAPI_VERSION as build
44

55
ARG LLAMA_SYCL_F16=OFF
66
RUN apt-get update && \
7-
apt-get install -y git
7+
apt-get install -y git libcurl4-openssl-dev
88

99
WORKDIR /app
1010

@@ -16,11 +16,14 @@ RUN mkdir build && \
1616
echo "LLAMA_SYCL_F16 is set" && \
1717
export OPT_SYCL_F16="-DLLAMA_SYCL_F16=ON"; \
1818
fi && \
19-
cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ${OPT_SYCL_F16} && \
19+
cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_CURL=ON ${OPT_SYCL_F16} && \
2020
cmake --build . --config Release --target server
2121

2222
FROM intel/oneapi-basekit:$ONEAPI_VERSION as runtime
2323

24+
RUN apt-get update && \
25+
apt-get install -y libcurl4-openssl-dev
26+
2427
COPY --from=build /app/build/bin/server /server
2528

2629
ENV LC_ALL=C.utf8

.devops/server-rocm.Dockerfile

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,11 @@ ENV LLAMA_HIPBLAS=1
4040
ENV CC=/opt/rocm/llvm/bin/clang
4141
ENV CXX=/opt/rocm/llvm/bin/clang++
4242

43+
# Enable cURL
44+
ENV LLAMA_CURL=1
45+
RUN apt-get update && \
46+
apt-get install -y libcurl4-openssl-dev
47+
4348
RUN make
4449

4550
ENTRYPOINT [ "/app/server" ]

.devops/server-vulkan.Dockerfile

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,16 @@ RUN wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key
1111
apt update -y && \
1212
apt-get install -y vulkan-sdk
1313

14+
# Install cURL
15+
RUN apt-get update && \
16+
apt-get install -y libcurl4-openssl-dev
17+
1418
# Build it
1519
WORKDIR /app
1620
COPY . .
1721
RUN mkdir build && \
1822
cd build && \
19-
cmake .. -DLLAMA_VULKAN=1 && \
23+
cmake .. -DLLAMA_VULKAN=1 -DLLAMA_CURL=1 && \
2024
cmake --build . --config Release --target server
2125

2226
# Clean up

.devops/server.Dockerfile

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,16 +3,21 @@ ARG UBUNTU_VERSION=22.04
33
FROM ubuntu:$UBUNTU_VERSION as build
44

55
RUN apt-get update && \
6-
apt-get install -y build-essential git
6+
apt-get install -y build-essential git libcurl4-openssl-dev
77

88
WORKDIR /app
99

1010
COPY . .
1111

12+
ENV LLAMA_CURL=1
13+
1214
RUN make
1315

1416
FROM ubuntu:$UBUNTU_VERSION as runtime
1517

18+
RUN apt-get update && \
19+
apt-get install -y libcurl4-openssl-dev
20+
1621
COPY --from=build /app/server /server
1722

1823
ENV LC_ALL=C.utf8

0 commit comments

Comments
 (0)