Pints-AI
diff --git a/‎.devops/main-intel.Dockerfile
Lines changed: 9 additions & 7 deletions b/‎.devops/main-intel.Dockerfile
Lines changed: 9 additions & 7 deletions
diff --git a/‎.devops/main-vulkan.Dockerfile
Lines changed: 29 additions & 0 deletions b/‎.devops/main-vulkan.Dockerfile
Lines changed: 29 additions & 0 deletions
diff --git a/‎.devops/nix/docker.nix
Lines changed: 37 additions & 0 deletions b/‎.devops/nix/docker.nix
Lines changed: 37 additions & 0 deletions
diff --git a/‎.devops/nix/package.nix
Lines changed: 29 additions & 6 deletions b/‎.devops/nix/package.nix
Lines changed: 29 additions & 6 deletions
diff --git a/‎.devops/nix/scope.nix
Lines changed: 3 additions & 0 deletions b/‎.devops/nix/scope.nix
Lines changed: 3 additions & 0 deletions
diff --git a/‎.devops/nix/sif.nix
Lines changed: 27 additions & 0 deletions b/‎.devops/nix/sif.nix
Lines changed: 27 additions & 0 deletions
diff --git a/‎.devops/server-cuda.Dockerfile
Lines changed: 32 additions & 0 deletions b/‎.devops/server-cuda.Dockerfile
Lines changed: 32 additions & 0 deletions
diff --git a/‎.devops/server-intel.Dockerfile
Lines changed: 28 additions & 0 deletions b/‎.devops/server-intel.Dockerfile
Lines changed: 28 additions & 0 deletions
diff --git a/‎.devops/server-rocm.Dockerfile
Lines changed: 45 additions & 0 deletions b/‎.devops/server-rocm.Dockerfile
Lines changed: 45 additions & 0 deletions
@@ -1,25 +1,27 @@
 ARG ONEAPI_VERSION=2024.0.1-devel-ubuntu22.04
-ARG UBUNTU_VERSION=22.04
 
-FROM intel/hpckit:$ONEAPI_VERSION as build
+FROM intel/oneapi-basekit:$ONEAPI_VERSION as build
 
+ARG LLAMA_SYCL_F16=OFF
 RUN apt-get update && \
     apt-get install -y git
 
 WORKDIR /app
 
 COPY . .
 
-# for some reasons, "-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=Intel10_64lp -DLLAMA_NATIVE=ON" give worse performance
 RUN mkdir build && \
     cd build && \
-    cmake .. -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx && \
-    cmake --build . --config Release --target main server
+    if [ "${LLAMA_SYCL_F16}" = "ON" ]; then \
+        echo "LLAMA_SYCL_F16 is set" && \
+        export OPT_SYCL_F16="-DLLAMA_SYCL_F16=ON"; \
+    fi && \
+    cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ${OPT_SYCL_F16} && \
+    cmake --build . --config Release --target main
 
-FROM ubuntu:$UBUNTU_VERSION as runtime
+FROM intel/oneapi-basekit:$ONEAPI_VERSION as runtime
 
 COPY --from=build /app/build/bin/main /main
-COPY --from=build /app/build/bin/server /server
 
 ENV LC_ALL=C.utf8
 
 
@@ -0,0 +1,29 @@
+ARG UBUNTU_VERSION=jammy
+
+FROM ubuntu:$UBUNTU_VERSION as build
+
+# Install build tools
+RUN apt update && apt install -y git build-essential cmake wget
+
+# Install Vulkan SDK
+RUN wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
+    wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
+    apt update -y && \
+    apt-get install -y vulkan-sdk
+
+# Build it
+WORKDIR /app
+COPY . .
+RUN mkdir build && \
+    cd build && \
+    cmake .. -DLLAMA_VULKAN=1 && \
+    cmake --build . --config Release --target main
+
+# Clean up
+WORKDIR /
+RUN cp /app/build/bin/main /main && \
+    rm -rf /app
+
+ENV LC_ALL=C.utf8
+
+ENTRYPOINT [ "/main" ]
@@ -0,0 +1,37 @@
+{
+  lib,
+  dockerTools,
+  buildEnv,
+  llama-cpp,
+  interactive ? true,
+  coreutils,
+}:
+
+# A tar that can be fed into `docker load`:
+#
+# $ nix build .#llamaPackages.docker
+# $ docker load < result
+
+# For details and variations cf.
+# - https://nixos.org/manual/nixpkgs/unstable/#ssec-pkgs-dockerTools-buildLayeredImage
+# - https://discourse.nixos.org/t/a-faster-dockertools-buildimage-prototype/16922
+# - https://nixery.dev/
+
+# Approximate (compressed) sizes, at the time of writing, are:
+#
+# .#llamaPackages.docker: 125M;
+# .#llamaPackagesCuda.docker: 537M;
+# .#legacyPackages.aarch64-linux.llamaPackagesXavier.docker: 415M.
+
+dockerTools.buildLayeredImage {
+  name = llama-cpp.pname;
+  tag = "latest";
+
+  contents =
+    [ llama-cpp ]
+    ++ lib.optionals interactive [
+      coreutils
+      dockerTools.binSh
+      dockerTools.caCertificates
+    ];
+}
@@ -1,5 +1,6 @@
 {
   lib,
+  glibc,
   config,
   stdenv,
   mkShell,
@@ -13,19 +14,28 @@
   cudaPackages,
   darwin,
   rocmPackages,
+  vulkan-headers,
+  vulkan-loader,
   clblast,
   useBlas ? builtins.all (x: !x) [
     useCuda
     useMetalKit
     useOpenCL
     useRocm
+    useVulkan
   ],
   useCuda ? config.cudaSupport,
   useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin && !useOpenCL,
   useMpi ? false, # Increases the runtime closure size by ~700M
   useOpenCL ? false,
   useRocm ? config.rocmSupport,
+  useVulkan ? false,
   llamaVersion ? "0.0.0", # Arbitrary version, substituted by the flake
+
+  # It's necessary to consistently use backendStdenv when building with CUDA support,
+  # otherwise we get libstdc++ errors downstream.
+  effectiveStdenv ? if useCuda then cudaPackages.backendStdenv else stdenv,
+  enableStatic ? effectiveStdenv.hostPlatform.isStatic
 }@inputs:
 
 let
@@ -37,18 +47,16 @@ let
     versionOlder
     ;
 
-  # It's necessary to consistently use backendStdenv when building with CUDA support,
-  # otherwise we get libstdc++ errors downstream.
   stdenv = throw "Use effectiveStdenv instead";
-  effectiveStdenv = if useCuda then cudaPackages.backendStdenv else inputs.stdenv;
 
   suffices =
     lib.optionals useBlas [ "BLAS" ]
     ++ lib.optionals useCuda [ "CUDA" ]
     ++ lib.optionals useMetalKit [ "MetalKit" ]
     ++ lib.optionals useMpi [ "MPI" ]
     ++ lib.optionals useOpenCL [ "OpenCL" ]
-    ++ lib.optionals useRocm [ "ROCm" ];
+    ++ lib.optionals useRocm [ "ROCm" ]
+    ++ lib.optionals useVulkan [ "Vulkan" ];
 
   pnameSuffix =
     strings.optionalString (suffices != [ ])
@@ -108,6 +116,11 @@ let
     hipblas
     rocblas
   ];
+
+  vulkanBuildInputs = [
+    vulkan-headers
+    vulkan-loader
+  ];
 in
 
 effectiveStdenv.mkDerivation (
@@ -157,27 +170,33 @@ effectiveStdenv.mkDerivation (
         # TODO: Replace with autoAddDriverRunpath
         # once https://github.com/NixOS/nixpkgs/pull/275241 has been merged
         cudaPackages.autoAddOpenGLRunpathHook
+      ]
+      ++ optionals (effectiveStdenv.hostPlatform.isGnu && enableStatic) [
+        glibc.static
       ];
 
     buildInputs =
       optionals effectiveStdenv.isDarwin darwinBuildInputs
       ++ optionals useCuda cudaBuildInputs
       ++ optionals useMpi [ mpi ]
       ++ optionals useOpenCL [ clblast ]
-      ++ optionals useRocm rocmBuildInputs;
+      ++ optionals useRocm rocmBuildInputs
+      ++ optionals useVulkan vulkanBuildInputs;
 
     cmakeFlags =
       [
         (cmakeBool "LLAMA_NATIVE" false)
         (cmakeBool "LLAMA_BUILD_SERVER" true)
-        (cmakeBool "BUILD_SHARED_LIBS" true)
+        (cmakeBool "BUILD_SHARED_LIBS" (!enableStatic))
         (cmakeBool "CMAKE_SKIP_BUILD_RPATH" true)
         (cmakeBool "LLAMA_BLAS" useBlas)
         (cmakeBool "LLAMA_CLBLAST" useOpenCL)
         (cmakeBool "LLAMA_CUBLAS" useCuda)
         (cmakeBool "LLAMA_HIPBLAS" useRocm)
         (cmakeBool "LLAMA_METAL" useMetalKit)
         (cmakeBool "LLAMA_MPI" useMpi)
+        (cmakeBool "LLAMA_VULKAN" useVulkan)
+        (cmakeBool "LLAMA_STATIC" enableStatic)
       ]
       ++ optionals useCuda [
         (
@@ -218,13 +237,17 @@ effectiveStdenv.mkDerivation (
         useMpi
         useOpenCL
         useRocm
+        useVulkan
         ;
 
       shell = mkShell {
         name = "shell-${finalAttrs.finalPackage.name}";
         description = "contains numpy and sentencepiece";
         buildInputs = [ llama-python ];
         inputsFrom = [ finalAttrs.finalPackage ];
+        shellHook = ''
+          addToSearchPath "LD_LIBRARY_PATH" "${lib.getLib effectiveStdenv.cc.cc}/lib"
+        '';
       };
 
       shell-extra = mkShell {
 
@@ -12,5 +12,8 @@ lib.makeScope newScope (
   self: {
     inherit llamaVersion;
     llama-cpp = self.callPackage ./package.nix { };
+    docker = self.callPackage ./docker.nix { };
+    docker-min = self.callPackage ./docker.nix { interactive = false; };
+    sif = self.callPackage ./sif.nix { };
   }
 )
@@ -0,0 +1,27 @@
+{
+  lib,
+  singularity-tools,
+  llama-cpp,
+  bashInteractive,
+  interactive ? false,
+}:
+
+let
+  optionalInt = cond: x: if cond then x else 0;
+in
+singularity-tools.buildImage rec {
+  inherit (llama-cpp) name;
+  contents = [ llama-cpp ] ++ lib.optionals interactive [ bashInteractive ];
+
+  # These are excessive (but safe) for most variants. Building singularity
+  # images requires superuser privileges, so we build them inside a VM in a
+  # writable image of pre-determined size.
+  #
+  # ROCm is currently affected by https://github.com/NixOS/nixpkgs/issues/276846
+  #
+  # Expected image sizes:
+  # - cpu/blas: 150M,
+  # - cuda, all gencodes: 560M,
+  diskSize = 4096 + optionalInt llama-cpp.useRocm 16384;
+  memSize = diskSize;
+}
@@ -0,0 +1,32 @@
+ARG UBUNTU_VERSION=22.04
+# This needs to generally match the container host's environment.
+ARG CUDA_VERSION=11.7.1
+# Target the CUDA build image
+ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
+# Target the CUDA runtime image
+ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
+
+FROM ${BASE_CUDA_DEV_CONTAINER} as build
+
+# Unless otherwise specified, we make a fat build.
+ARG CUDA_DOCKER_ARCH=all
+
+RUN apt-get update && \
+    apt-get install -y build-essential git
+
+WORKDIR /app
+
+COPY . .
+
+# Set nvcc architecture
+ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
+# Enable cuBLAS
+ENV LLAMA_CUBLAS=1
+
+RUN make
+
+FROM ${BASE_CUDA_RUN_CONTAINER} as runtime
+
+COPY --from=build /app/server /server
+
+ENTRYPOINT [ "/server" ]
@@ -0,0 +1,28 @@
+ARG ONEAPI_VERSION=2024.0.1-devel-ubuntu22.04
+
+FROM intel/oneapi-basekit:$ONEAPI_VERSION as build
+
+ARG LLAMA_SYCL_F16=OFF
+RUN apt-get update && \
+    apt-get install -y git
+
+WORKDIR /app
+
+COPY . .
+
+RUN mkdir build && \
+    cd build && \
+    if [ "${LLAMA_SYCL_F16}" = "ON" ]; then \
+        echo "LLAMA_SYCL_F16 is set" && \
+        export OPT_SYCL_F16="-DLLAMA_SYCL_F16=ON"; \
+    fi && \
+    cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ${OPT_SYCL_F16} && \
+    cmake --build . --config Release --target server
+
+FROM intel/oneapi-basekit:$ONEAPI_VERSION as runtime
+
+COPY --from=build /app/build/bin/server /server
+
+ENV LC_ALL=C.utf8
+
+ENTRYPOINT [ "/server" ]
@@ -0,0 +1,45 @@
+ARG UBUNTU_VERSION=22.04
+
+# This needs to generally match the container host's environment.
+ARG ROCM_VERSION=5.6
+
+# Target the CUDA build image
+ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete
+
+FROM ${BASE_ROCM_DEV_CONTAINER} as build
+
+# Unless otherwise specified, we make a fat build.
+# List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878
+# This is mostly tied to rocBLAS supported archs.
+ARG ROCM_DOCKER_ARCH=\
+    gfx803 \
+    gfx900 \
+    gfx906 \
+    gfx908 \
+    gfx90a \
+    gfx1010 \
+    gfx1030 \
+    gfx1100 \
+    gfx1101 \
+    gfx1102
+
+COPY requirements.txt   requirements.txt
+COPY requirements       requirements
+
+RUN pip install --upgrade pip setuptools wheel \
+    && pip install -r requirements.txt
+
+WORKDIR /app
+
+COPY . .
+
+# Set nvcc architecture
+ENV GPU_TARGETS=${ROCM_DOCKER_ARCH}
+# Enable ROCm
+ENV LLAMA_HIPBLAS=1
+ENV CC=/opt/rocm/llvm/bin/clang
+ENV CXX=/opt/rocm/llvm/bin/clang++
+
+RUN make
+
+ENTRYPOINT [ "/app/server" ]
Original file line number	Diff line number	Diff line change
`@@ -12,5 +12,8 @@ lib.makeScope newScope (`
`12`	`12`	`self: {`
`13`	`13`	`inherit llamaVersion;`
`14`	`14`	`llama-cpp = self.callPackage ./package.nix { };`
	`15`	`+ docker = self.callPackage ./docker.nix { };`
	`16`	`+ docker-min = self.callPackage ./docker.nix { interactive = false; };`
	`17`	`+ sif = self.callPackage ./sif.nix { };`
`15`	`18`	`}`
`16`	`19`	`)`