Skip to content

Commit 7f3a619

Browse files
committed
Merge branch 'master' into compilade/bitnet-ternary
2 parents cb6d996 + 581c305 commit 7f3a619

File tree

94 files changed

+12535
-8090
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

94 files changed

+12535
-8090
lines changed

.devops/full-cuda.Dockerfile

Lines changed: 11 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,16 @@
11
ARG UBUNTU_VERSION=22.04
2-
32
# This needs to generally match the container host's environment.
4-
ARG CUDA_VERSION=11.7.1
5-
3+
ARG CUDA_VERSION=12.6.0
64
# Target the CUDA build image
75
ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
86

97
FROM ${BASE_CUDA_DEV_CONTAINER} AS build
108

11-
# Unless otherwise specified, we make a fat build.
12-
ARG CUDA_DOCKER_ARCH=all
9+
# CUDA architecture to build for (defaults to all supported archs)
10+
ARG CUDA_DOCKER_ARCH=default
1311

1412
RUN apt-get update && \
15-
apt-get install -y build-essential python3 python3-pip git libcurl4-openssl-dev libgomp1
13+
apt-get install -y build-essential cmake python3 python3-pip git libcurl4-openssl-dev libgomp1
1614

1715
COPY requirements.txt requirements.txt
1816
COPY requirements requirements
@@ -24,13 +22,12 @@ WORKDIR /app
2422

2523
COPY . .
2624

27-
# Set nvcc architecture
28-
ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
29-
# Enable CUDA
30-
ENV GGML_CUDA=1
31-
# Enable cURL
32-
ENV LLAMA_CURL=1
33-
34-
RUN make -j$(nproc)
25+
# Use the default CUDA archs if not specified
26+
RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \
27+
export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \
28+
fi && \
29+
cmake -B build -DGGML_CUDA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
30+
cmake --build build --config Release -j$(nproc) && \
31+
cp build/bin/* .
3532

3633
ENTRYPOINT ["/app/.devops/tools.sh"]

.devops/llama-cli-cuda.Dockerfile

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,35 +1,37 @@
11
ARG UBUNTU_VERSION=22.04
22
# This needs to generally match the container host's environment.
3-
ARG CUDA_VERSION=11.7.1
3+
ARG CUDA_VERSION=12.6.0
44
# Target the CUDA build image
55
ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
66
# Target the CUDA runtime image
77
ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
88

99
FROM ${BASE_CUDA_DEV_CONTAINER} AS build
1010

11-
# Unless otherwise specified, we make a fat build.
12-
ARG CUDA_DOCKER_ARCH=all
11+
# CUDA architecture to build for (defaults to all supported archs)
12+
ARG CUDA_DOCKER_ARCH=default
1313

1414
RUN apt-get update && \
15-
apt-get install -y build-essential git
15+
apt-get install -y build-essential git cmake
1616

1717
WORKDIR /app
1818

1919
COPY . .
2020

21-
# Set nvcc architecture
22-
ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
23-
# Enable CUDA
24-
ENV GGML_CUDA=1
25-
26-
RUN make -j$(nproc) llama-cli
21+
# Use the default CUDA archs if not specified
22+
RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \
23+
export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \
24+
fi && \
25+
cmake -B build -DGGML_CUDA=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
26+
cmake --build build --config Release --target llama-cli -j$(nproc)
2727

2828
FROM ${BASE_CUDA_RUN_CONTAINER} AS runtime
2929

3030
RUN apt-get update && \
3131
apt-get install -y libgomp1
3232

33-
COPY --from=build /app/llama-cli /llama-cli
33+
COPY --from=build /app/build/ggml/src/libggml.so /libggml.so
34+
COPY --from=build /app/build/src/libllama.so /libllama.so
35+
COPY --from=build /app/build/bin/llama-cli /llama-cli
3436

3537
ENTRYPOINT [ "/llama-cli" ]

.devops/llama-server-cuda.Dockerfile

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,38 +1,41 @@
11
ARG UBUNTU_VERSION=22.04
22
# This needs to generally match the container host's environment.
3-
ARG CUDA_VERSION=11.7.1
3+
ARG CUDA_VERSION=12.6.0
44
# Target the CUDA build image
55
ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
66
# Target the CUDA runtime image
77
ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
88

99
FROM ${BASE_CUDA_DEV_CONTAINER} AS build
1010

11-
# Unless otherwise specified, we make a fat build.
12-
ARG CUDA_DOCKER_ARCH=all
11+
# CUDA architecture to build for (defaults to all supported archs)
12+
ARG CUDA_DOCKER_ARCH=default
1313

1414
RUN apt-get update && \
15-
apt-get install -y build-essential git libcurl4-openssl-dev
15+
apt-get install -y build-essential git cmake libcurl4-openssl-dev
1616

1717
WORKDIR /app
1818

1919
COPY . .
2020

21-
# Set nvcc architecture
22-
ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
23-
# Enable CUDA
24-
ENV GGML_CUDA=1
25-
# Enable cURL
26-
ENV LLAMA_CURL=1
27-
28-
RUN make -j$(nproc) llama-server
21+
# Use the default CUDA archs if not specified
22+
RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \
23+
export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \
24+
fi && \
25+
cmake -B build -DGGML_CUDA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
26+
cmake --build build --config Release --target llama-server -j$(nproc)
2927

3028
FROM ${BASE_CUDA_RUN_CONTAINER} AS runtime
3129

3230
RUN apt-get update && \
3331
apt-get install -y libcurl4-openssl-dev libgomp1 curl
3432

35-
COPY --from=build /app/llama-server /llama-server
33+
COPY --from=build /app/build/ggml/src/libggml.so /libggml.so
34+
COPY --from=build /app/build/src/libllama.so /libllama.so
35+
COPY --from=build /app/build/bin/llama-server /llama-server
36+
37+
# Must be set to 0.0.0.0 so it can listen to requests from host machine
38+
ENV LLAMA_ARG_HOST=0.0.0.0
3639

3740
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
3841

.devops/llama-server-intel.Dockerfile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ RUN apt-get update && \
2626
COPY --from=build /app/build/bin/llama-server /llama-server
2727

2828
ENV LC_ALL=C.utf8
29+
# Must be set to 0.0.0.0 so it can listen to requests from host machine
30+
ENV LLAMA_ARG_HOST=0.0.0.0
2931

3032
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
3133

.devops/llama-server-rocm.Dockerfile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@ ENV GPU_TARGETS=${ROCM_DOCKER_ARCH}
3939
ENV GGML_HIPBLAS=1
4040
ENV CC=/opt/rocm/llvm/bin/clang
4141
ENV CXX=/opt/rocm/llvm/bin/clang++
42+
# Must be set to 0.0.0.0 so it can listen to requests from host machine
43+
ENV LLAMA_ARG_HOST=0.0.0.0
4244

4345
# Enable cURL
4446
ENV LLAMA_CURL=1

.devops/llama-server-vulkan.Dockerfile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@ RUN cp /app/build/bin/llama-server /llama-server && \
2323
rm -rf /app
2424

2525
ENV LC_ALL=C.utf8
26+
# Must be set to 0.0.0.0 so it can listen to requests from host machine
27+
ENV LLAMA_ARG_HOST=0.0.0.0
2628

2729
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
2830

.devops/llama-server.Dockerfile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@ RUN apt-get update && \
2121
COPY --from=build /app/llama-server /llama-server
2222

2323
ENV LC_ALL=C.utf8
24+
# Must be set to 0.0.0.0 so it can listen to requests from host machine
25+
ENV LLAMA_ARG_HOST=0.0.0.0
2426

2527
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
2628

.devops/nix/devshells.nix

Lines changed: 46 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,52 @@
1+
{ inputs, ... }:
2+
13
{
24
perSystem =
3-
{ config, lib, ... }:
5+
{
6+
config,
7+
lib,
8+
system,
9+
...
10+
}:
411
{
512
devShells =
6-
lib.concatMapAttrs
7-
(name: package: {
8-
${name} = package.passthru.shell;
9-
${name + "-extra"} = package.passthru.shell-extra;
10-
})
11-
config.packages;
13+
let
14+
pkgs = import inputs.nixpkgs { inherit system; };
15+
stdenv = pkgs.stdenv;
16+
scripts = config.packages.python-scripts;
17+
in
18+
lib.pipe (config.packages) [
19+
(lib.concatMapAttrs (
20+
name: package: {
21+
${name} = pkgs.mkShell {
22+
name = "${name}";
23+
inputsFrom = [ package ];
24+
shellHook = ''
25+
echo "Entering ${name} devShell"
26+
'';
27+
};
28+
"${name}-extra" =
29+
if (name == "python-scripts") then
30+
null
31+
else
32+
pkgs.mkShell {
33+
name = "${name}-extra";
34+
inputsFrom = [
35+
package
36+
scripts
37+
];
38+
# Extra packages that *may* be used by some scripts
39+
packages = [
40+
pkgs.python3Packages.tiktoken
41+
];
42+
shellHook = ''
43+
echo "Entering ${name} devShell"
44+
addToSearchPath "LD_LIBRARY_PATH" "${lib.getLib stdenv.cc.cc}/lib"
45+
'';
46+
};
47+
}
48+
))
49+
(lib.filterAttrs (name: value: value != null))
50+
];
1251
};
1352
}

.devops/nix/nixpkgs-instances.nix

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -26,16 +26,14 @@
2626
config.cudaSupport = true;
2727
config.allowUnfreePredicate =
2828
p:
29-
builtins.all
30-
(
31-
license:
32-
license.free
33-
|| builtins.elem license.shortName [
34-
"CUDA EULA"
35-
"cuDNN EULA"
36-
]
37-
)
38-
(p.meta.licenses or [ p.meta.license ]);
29+
builtins.all (
30+
license:
31+
license.free
32+
|| builtins.elem license.shortName [
33+
"CUDA EULA"
34+
"cuDNN EULA"
35+
]
36+
) (p.meta.licenses or [ p.meta.license ]);
3937
};
4038
# Ensure dependencies use ROCm consistently
4139
pkgsRocm = import inputs.nixpkgs {

.devops/nix/package-gguf-py.nix

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
{
2+
lib,
3+
llamaVersion,
4+
numpy,
5+
tqdm,
6+
sentencepiece,
7+
pyyaml,
8+
poetry-core,
9+
buildPythonPackage,
10+
pytestCheckHook,
11+
}:
12+
13+
buildPythonPackage {
14+
pname = "gguf";
15+
version = llamaVersion;
16+
pyproject = true;
17+
nativeBuildInputs = [ poetry-core ];
18+
propagatedBuildInputs = [
19+
numpy
20+
tqdm
21+
sentencepiece
22+
pyyaml
23+
];
24+
src = lib.cleanSource ../../gguf-py;
25+
pythonImportsCheck = [
26+
"numpy"
27+
"gguf"
28+
];
29+
nativeCheckInputs = [ pytestCheckHook ];
30+
doCheck = true;
31+
meta = with lib; {
32+
description = "Python package for writing binary files in the GGUF format";
33+
license = licenses.mit;
34+
maintainers = [ maintainers.ditsuke ];
35+
};
36+
}

0 commit comments

Comments
 (0)