Skip to content

Commit c42f7cb

Browse files
committed
2 parents dc5d269 + 652ca2b commit c42f7cb

File tree

230 files changed

+106197
-8766
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

230 files changed

+106197
-8766
lines changed

.devops/main-intel.Dockerfile

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,27 @@
11
ARG ONEAPI_VERSION=2024.0.1-devel-ubuntu22.04
2-
ARG UBUNTU_VERSION=22.04
32

4-
FROM intel/hpckit:$ONEAPI_VERSION as build
3+
FROM intel/oneapi-basekit:$ONEAPI_VERSION as build
54

5+
ARG LLAMA_SYCL_F16=OFF
66
RUN apt-get update && \
77
apt-get install -y git
88

99
WORKDIR /app
1010

1111
COPY . .
1212

13-
# for some reasons, "-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=Intel10_64lp -DLLAMA_NATIVE=ON" give worse performance
1413
RUN mkdir build && \
1514
cd build && \
16-
cmake .. -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx && \
17-
cmake --build . --config Release --target main server
15+
if [ "${LLAMA_SYCL_F16}" = "ON" ]; then \
16+
echo "LLAMA_SYCL_F16 is set" && \
17+
export OPT_SYCL_F16="-DLLAMA_SYCL_F16=ON"; \
18+
fi && \
19+
cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ${OPT_SYCL_F16} && \
20+
cmake --build . --config Release --target main
1821

19-
FROM ubuntu:$UBUNTU_VERSION as runtime
22+
FROM intel/oneapi-basekit:$ONEAPI_VERSION as runtime
2023

2124
COPY --from=build /app/build/bin/main /main
22-
COPY --from=build /app/build/bin/server /server
2325

2426
ENV LC_ALL=C.utf8
2527

.devops/main-vulkan.Dockerfile

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
ARG UBUNTU_VERSION=jammy
2+
3+
FROM ubuntu:$UBUNTU_VERSION as build
4+
5+
# Install build tools
6+
RUN apt update && apt install -y git build-essential cmake wget
7+
8+
# Install Vulkan SDK
9+
RUN wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
10+
wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
11+
apt update -y && \
12+
apt-get install -y vulkan-sdk
13+
14+
# Build it
15+
WORKDIR /app
16+
COPY . .
17+
RUN mkdir build && \
18+
cd build && \
19+
cmake .. -DLLAMA_VULKAN=1 && \
20+
cmake --build . --config Release --target main
21+
22+
# Clean up
23+
WORKDIR /
24+
RUN cp /app/build/bin/main /main && \
25+
rm -rf /app
26+
27+
ENV LC_ALL=C.utf8
28+
29+
ENTRYPOINT [ "/main" ]

.devops/nix/docker.nix

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
{
2+
lib,
3+
dockerTools,
4+
buildEnv,
5+
llama-cpp,
6+
interactive ? true,
7+
coreutils,
8+
}:
9+
10+
# A tar that can be fed into `docker load`:
11+
#
12+
# $ nix build .#llamaPackages.docker
13+
# $ docker load < result
14+
15+
# For details and variations cf.
16+
# - https://nixos.org/manual/nixpkgs/unstable/#ssec-pkgs-dockerTools-buildLayeredImage
17+
# - https://discourse.nixos.org/t/a-faster-dockertools-buildimage-prototype/16922
18+
# - https://nixery.dev/
19+
20+
# Approximate (compressed) sizes, at the time of writing, are:
21+
#
22+
# .#llamaPackages.docker: 125M;
23+
# .#llamaPackagesCuda.docker: 537M;
24+
# .#legacyPackages.aarch64-linux.llamaPackagesXavier.docker: 415M.
25+
26+
dockerTools.buildLayeredImage {
27+
name = llama-cpp.pname;
28+
tag = "latest";
29+
30+
contents =
31+
[ llama-cpp ]
32+
++ lib.optionals interactive [
33+
coreutils
34+
dockerTools.binSh
35+
dockerTools.caCertificates
36+
];
37+
}

.devops/nix/package.nix

Lines changed: 29 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
{
22
lib,
3+
glibc,
34
config,
45
stdenv,
56
mkShell,
@@ -13,19 +14,28 @@
1314
cudaPackages,
1415
darwin,
1516
rocmPackages,
17+
vulkan-headers,
18+
vulkan-loader,
1619
clblast,
1720
useBlas ? builtins.all (x: !x) [
1821
useCuda
1922
useMetalKit
2023
useOpenCL
2124
useRocm
25+
useVulkan
2226
],
2327
useCuda ? config.cudaSupport,
2428
useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin && !useOpenCL,
2529
useMpi ? false, # Increases the runtime closure size by ~700M
2630
useOpenCL ? false,
2731
useRocm ? config.rocmSupport,
32+
useVulkan ? false,
2833
llamaVersion ? "0.0.0", # Arbitrary version, substituted by the flake
34+
35+
# It's necessary to consistently use backendStdenv when building with CUDA support,
36+
# otherwise we get libstdc++ errors downstream.
37+
effectiveStdenv ? if useCuda then cudaPackages.backendStdenv else stdenv,
38+
enableStatic ? effectiveStdenv.hostPlatform.isStatic
2939
}@inputs:
3040

3141
let
@@ -37,18 +47,16 @@ let
3747
versionOlder
3848
;
3949

40-
# It's necessary to consistently use backendStdenv when building with CUDA support,
41-
# otherwise we get libstdc++ errors downstream.
4250
stdenv = throw "Use effectiveStdenv instead";
43-
effectiveStdenv = if useCuda then cudaPackages.backendStdenv else inputs.stdenv;
4451

4552
suffices =
4653
lib.optionals useBlas [ "BLAS" ]
4754
++ lib.optionals useCuda [ "CUDA" ]
4855
++ lib.optionals useMetalKit [ "MetalKit" ]
4956
++ lib.optionals useMpi [ "MPI" ]
5057
++ lib.optionals useOpenCL [ "OpenCL" ]
51-
++ lib.optionals useRocm [ "ROCm" ];
58+
++ lib.optionals useRocm [ "ROCm" ]
59+
++ lib.optionals useVulkan [ "Vulkan" ];
5260

5361
pnameSuffix =
5462
strings.optionalString (suffices != [ ])
@@ -108,6 +116,11 @@ let
108116
hipblas
109117
rocblas
110118
];
119+
120+
vulkanBuildInputs = [
121+
vulkan-headers
122+
vulkan-loader
123+
];
111124
in
112125

113126
effectiveStdenv.mkDerivation (
@@ -157,27 +170,33 @@ effectiveStdenv.mkDerivation (
157170
# TODO: Replace with autoAddDriverRunpath
158171
# once https://github.com/NixOS/nixpkgs/pull/275241 has been merged
159172
cudaPackages.autoAddOpenGLRunpathHook
173+
]
174+
++ optionals (effectiveStdenv.hostPlatform.isGnu && enableStatic) [
175+
glibc.static
160176
];
161177

162178
buildInputs =
163179
optionals effectiveStdenv.isDarwin darwinBuildInputs
164180
++ optionals useCuda cudaBuildInputs
165181
++ optionals useMpi [ mpi ]
166182
++ optionals useOpenCL [ clblast ]
167-
++ optionals useRocm rocmBuildInputs;
183+
++ optionals useRocm rocmBuildInputs
184+
++ optionals useVulkan vulkanBuildInputs;
168185

169186
cmakeFlags =
170187
[
171188
(cmakeBool "LLAMA_NATIVE" false)
172189
(cmakeBool "LLAMA_BUILD_SERVER" true)
173-
(cmakeBool "BUILD_SHARED_LIBS" true)
190+
(cmakeBool "BUILD_SHARED_LIBS" (!enableStatic))
174191
(cmakeBool "CMAKE_SKIP_BUILD_RPATH" true)
175192
(cmakeBool "LLAMA_BLAS" useBlas)
176193
(cmakeBool "LLAMA_CLBLAST" useOpenCL)
177194
(cmakeBool "LLAMA_CUBLAS" useCuda)
178195
(cmakeBool "LLAMA_HIPBLAS" useRocm)
179196
(cmakeBool "LLAMA_METAL" useMetalKit)
180197
(cmakeBool "LLAMA_MPI" useMpi)
198+
(cmakeBool "LLAMA_VULKAN" useVulkan)
199+
(cmakeBool "LLAMA_STATIC" enableStatic)
181200
]
182201
++ optionals useCuda [
183202
(
@@ -218,13 +237,17 @@ effectiveStdenv.mkDerivation (
218237
useMpi
219238
useOpenCL
220239
useRocm
240+
useVulkan
221241
;
222242

223243
shell = mkShell {
224244
name = "shell-${finalAttrs.finalPackage.name}";
225245
description = "contains numpy and sentencepiece";
226246
buildInputs = [ llama-python ];
227247
inputsFrom = [ finalAttrs.finalPackage ];
248+
shellHook = ''
249+
addToSearchPath "LD_LIBRARY_PATH" "${lib.getLib effectiveStdenv.cc.cc}/lib"
250+
'';
228251
};
229252

230253
shell-extra = mkShell {

.devops/nix/scope.nix

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,5 +12,8 @@ lib.makeScope newScope (
1212
self: {
1313
inherit llamaVersion;
1414
llama-cpp = self.callPackage ./package.nix { };
15+
docker = self.callPackage ./docker.nix { };
16+
docker-min = self.callPackage ./docker.nix { interactive = false; };
17+
sif = self.callPackage ./sif.nix { };
1518
}
1619
)

.devops/nix/sif.nix

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
{
2+
lib,
3+
singularity-tools,
4+
llama-cpp,
5+
bashInteractive,
6+
interactive ? false,
7+
}:
8+
9+
let
10+
optionalInt = cond: x: if cond then x else 0;
11+
in
12+
singularity-tools.buildImage rec {
13+
inherit (llama-cpp) name;
14+
contents = [ llama-cpp ] ++ lib.optionals interactive [ bashInteractive ];
15+
16+
# These are excessive (but safe) for most variants. Building singularity
17+
# images requires superuser privileges, so we build them inside a VM in a
18+
# writable image of pre-determined size.
19+
#
20+
# ROCm is currently affected by https://github.com/NixOS/nixpkgs/issues/276846
21+
#
22+
# Expected image sizes:
23+
# - cpu/blas: 150M,
24+
# - cuda, all gencodes: 560M,
25+
diskSize = 4096 + optionalInt llama-cpp.useRocm 16384;
26+
memSize = diskSize;
27+
}

.devops/server-cuda.Dockerfile

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
ARG UBUNTU_VERSION=22.04
2+
# This needs to generally match the container host's environment.
3+
ARG CUDA_VERSION=11.7.1
4+
# Target the CUDA build image
5+
ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
6+
# Target the CUDA runtime image
7+
ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
8+
9+
FROM ${BASE_CUDA_DEV_CONTAINER} as build
10+
11+
# Unless otherwise specified, we make a fat build.
12+
ARG CUDA_DOCKER_ARCH=all
13+
14+
RUN apt-get update && \
15+
apt-get install -y build-essential git
16+
17+
WORKDIR /app
18+
19+
COPY . .
20+
21+
# Set nvcc architecture
22+
ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
23+
# Enable cuBLAS
24+
ENV LLAMA_CUBLAS=1
25+
26+
RUN make
27+
28+
FROM ${BASE_CUDA_RUN_CONTAINER} as runtime
29+
30+
COPY --from=build /app/server /server
31+
32+
ENTRYPOINT [ "/server" ]

.devops/server-intel.Dockerfile

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
ARG ONEAPI_VERSION=2024.0.1-devel-ubuntu22.04
2+
3+
FROM intel/oneapi-basekit:$ONEAPI_VERSION as build
4+
5+
ARG LLAMA_SYCL_F16=OFF
6+
RUN apt-get update && \
7+
apt-get install -y git
8+
9+
WORKDIR /app
10+
11+
COPY . .
12+
13+
RUN mkdir build && \
14+
cd build && \
15+
if [ "${LLAMA_SYCL_F16}" = "ON" ]; then \
16+
echo "LLAMA_SYCL_F16 is set" && \
17+
export OPT_SYCL_F16="-DLLAMA_SYCL_F16=ON"; \
18+
fi && \
19+
cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ${OPT_SYCL_F16} && \
20+
cmake --build . --config Release --target server
21+
22+
FROM intel/oneapi-basekit:$ONEAPI_VERSION as runtime
23+
24+
COPY --from=build /app/build/bin/server /server
25+
26+
ENV LC_ALL=C.utf8
27+
28+
ENTRYPOINT [ "/server" ]

.devops/server-rocm.Dockerfile

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
ARG UBUNTU_VERSION=22.04
2+
3+
# This needs to generally match the container host's environment.
4+
ARG ROCM_VERSION=5.6
5+
6+
# Target the CUDA build image
7+
ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete
8+
9+
FROM ${BASE_ROCM_DEV_CONTAINER} as build
10+
11+
# Unless otherwise specified, we make a fat build.
12+
# List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878
13+
# This is mostly tied to rocBLAS supported archs.
14+
ARG ROCM_DOCKER_ARCH=\
15+
gfx803 \
16+
gfx900 \
17+
gfx906 \
18+
gfx908 \
19+
gfx90a \
20+
gfx1010 \
21+
gfx1030 \
22+
gfx1100 \
23+
gfx1101 \
24+
gfx1102
25+
26+
COPY requirements.txt requirements.txt
27+
COPY requirements requirements
28+
29+
RUN pip install --upgrade pip setuptools wheel \
30+
&& pip install -r requirements.txt
31+
32+
WORKDIR /app
33+
34+
COPY . .
35+
36+
# Set nvcc architecture
37+
ENV GPU_TARGETS=${ROCM_DOCKER_ARCH}
38+
# Enable ROCm
39+
ENV LLAMA_HIPBLAS=1
40+
ENV CC=/opt/rocm/llvm/bin/clang
41+
ENV CXX=/opt/rocm/llvm/bin/clang++
42+
43+
RUN make
44+
45+
ENTRYPOINT [ "/app/server" ]

0 commit comments

Comments
 (0)