ggml-org · CISC · Apr 20, 2024 · Apr 20, 2024 · Apr 20, 2024 · Apr 20, 2024
@@ -10,14 +10,12 @@ WORKDIR /app
 
 COPY . .
 
-RUN mkdir build && \
-    cd build && \
-    if [ "${LLAMA_SYCL_F16}" = "ON" ]; then \
+RUN if [ "${LLAMA_SYCL_F16}" = "ON" ]; then \
         echo "LLAMA_SYCL_F16 is set" && \
         export OPT_SYCL_F16="-DLLAMA_SYCL_F16=ON"; \
     fi && \
-    cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ${OPT_SYCL_F16} && \
-    cmake --build . --config Release --target main
+    cmake -B build -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ${OPT_SYCL_F16} && \
+    cmake --build build --config Release --target main
 
 FROM intel/oneapi-basekit:$ONEAPI_VERSION as runtime
 

@@ -14,10 +14,8 @@ RUN wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key
 # Build it
 WORKDIR /app
 COPY . .
-RUN mkdir build && \
-    cd build && \
-    cmake .. -DLLAMA_VULKAN=1 && \
-    cmake --build . --config Release --target main
+RUN cmake -B build -DLLAMA_VULKAN=1 && \
+    cmake --build build --config Release --target main
 
 # Clean up
 WORKDIR /

@@ -10,14 +10,12 @@ WORKDIR /app
 
 COPY . .
 
-RUN mkdir build && \
-    cd build && \
-    if [ "${LLAMA_SYCL_F16}" = "ON" ]; then \
+RUN if [ "${LLAMA_SYCL_F16}" = "ON" ]; then \
         echo "LLAMA_SYCL_F16 is set" && \
         export OPT_SYCL_F16="-DLLAMA_SYCL_F16=ON"; \
     fi && \
-    cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_CURL=ON ${OPT_SYCL_F16} && \
-    cmake --build . --config Release --target server
+    cmake -B build -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_CURL=ON ${OPT_SYCL_F16} && \
+    cmake --build build --config Release --target server
 
 FROM intel/oneapi-basekit:$ONEAPI_VERSION as runtime
 

@@ -18,10 +18,8 @@ RUN apt-get update && \
 # Build it
 WORKDIR /app
 COPY . .
-RUN mkdir build && \
-    cd build && \
-    cmake .. -DLLAMA_VULKAN=1 -DLLAMA_CURL=1 && \
-    cmake --build . --config Release --target server
+RUN cmake -B build -DLLAMA_VULKAN=1 -DLLAMA_CURL=1 && \
+    cmake --build build --config Release --target server
 
 # Clean up
 WORKDIR /

diff --git a/.flake8 b/.flake8
@@ -1,3 +1,17 @@
 [flake8]
 max-line-length = 125
-ignore = W503
+ignore = E203,E211,E221,E225,E231,E241,E251,E261,E266,E501,E701,E704,W503
+exclude =
+    # Do not traverse examples
+    examples,
+    # Do not include package initializers
+    __init__.py,
+    # No need to traverse our git directory
+    .git,
+    # There's no value in checking cache directories
+    __pycache__,
+    # No need to include the build path
+    build,
+    # This contains builds that we don't want to check
+    dist  # This is generated with `python build .` for package releases
+# max-complexity = 10
diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml
@@ -32,7 +32,7 @@ on:
     -  cron: '04 2 * * *'
 
 concurrency:
-  group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}-${{ github.event.inputs.sha }}
+  group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || github.run_id }}-${{ github.event.inputs.sha }}
   cancel-in-progress: true
 
 jobs:
@@ -52,7 +52,19 @@ jobs:
             ftype: q4_0
             pr_comment_enabled: "true"
 
-    if: ${{ github.event.inputs.gpu-series == 'Standard_NC4as_T4_v3' || github.event.schedule || github.event.pull_request || github.head_ref == 'master' || github.ref_name == 'master' || github.event.push.ref == 'refs/heads/master' }}
+    if: |
+      inputs.gpu-series == 'Standard_NC4as_T4_v3'
+      || (
+        github.event_name == 'schedule'
+        && github.ref_name == 'master'
+        && github.repository_owner == 'ggerganov'
+      )
+      || github.event_name == 'pull_request_target'
+      || (
+        github.event_name == 'push'
+        && github.event.ref == 'refs/heads/master'
+        && github.repository_owner == 'ggerganov'
+      )
     steps:
       - name: Clone
         id: checkout
@@ -96,9 +108,7 @@ jobs:
         id: cmake_build
         run: |
           set -eux
-          mkdir build
-          cd build
-          cmake .. \
+          cmake -B build \
               -DLLAMA_NATIVE=OFF \
               -DLLAMA_BUILD_SERVER=ON \
               -DLLAMA_CURL=ON \
@@ -109,7 +119,7 @@ jobs:
               -DLLAMA_FATAL_WARNINGS=OFF \
               -DLLAMA_ALL_WARNINGS=OFF \
               -DCMAKE_BUILD_TYPE=Release;
-          cmake --build . --config Release -j $(nproc) --target server
+          cmake --build build --config Release -j $(nproc) --target server
 
       - name: Download the dataset
         id: download_dataset

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -593,6 +593,63 @@ jobs:
         run: |
             make swift
 
+  windows-msys2:
+    runs-on: windows-latest
+
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - { sys: UCRT64,  env: ucrt-x86_64,  build: Release }
+          - { sys: CLANG64, env: clang-x86_64, build: Release }
+
+    steps:
+      - name: Clone
+        uses: actions/checkout@v4
+
+      - name: Setup ${{ matrix.sys }}
+        uses: msys2/setup-msys2@v2
+        with:
+          update: true
+          msystem: ${{matrix.sys}}
+          install: >-
+            base-devel
+            mingw-w64-${{matrix.env}}-toolchain
+            mingw-w64-${{matrix.env}}-cmake
+            mingw-w64-${{matrix.env}}-openblas
+
+      - name: Build using make
+        shell: msys2 {0}
+        run: |
+            make -j $(nproc)
+
+      - name: Clean after building using make
+        shell: msys2 {0}
+        run: |
+            make clean
+
+      - name: Build using make w/ OpenBLAS
+        shell: msys2 {0}
+        run: |
+            make LLAMA_OPENBLAS=1 -j $(nproc)
+
+      - name: Build using CMake
+        shell: msys2 {0}
+        run: |
+            cmake -B build
+            cmake --build build --config ${{ matrix.build }} -j $(nproc)
+
+      - name: Clean after building using CMake
+        shell: msys2 {0}
+        run: |
+            rm -rf build
+
+      - name: Build using CMake w/ OpenBLAS
+        shell: msys2 {0}
+        run: |
+            cmake -B build -DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS
+            cmake --build build --config ${{ matrix.build }} -j $(nproc)
+
   windows-latest-cmake:
     runs-on: windows-latest
 

diff --git a/.github/workflows/close-issue.yml b/.github/workflows/close-issue.yml
@@ -12,7 +12,7 @@ jobs:
     steps:
       - uses: actions/stale@v5
         with:
-          exempt-issue-labels: "refactor,help wanted,good first issue,research"
+          exempt-issue-labels: "refactor,help wanted,good first issue,research,bug"
           days-before-issue-stale: 30
           days-before-issue-close: 14
           stale-issue-label: "stale"

diff --git a/.github/workflows/python-lint.yml b/.github/workflows/python-lint.yml
@@ -20,5 +20,4 @@ jobs:
       - name: flake8 Lint
         uses: py-actions/flake8@v2
         with:
-            ignore: "E203,E211,E221,E225,E231,E241,E251,E261,E266,E501,E701,E704,W503"
-            exclude: "examples/*,examples/*/**,*/**/__init__.py"
+            plugins: "flake8-no-print"
diff --git a/.github/workflows/server.yml b/.github/workflows/server.yml
@@ -23,7 +23,7 @@ on:
     -  cron: '2 4 * * *'
 
 concurrency:
-  group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
+  group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || github.run_id }}
   cancel-in-progress: true
 
 jobs:
@@ -41,23 +41,16 @@ jobs:
             sanitizer: ""
       fail-fast: false # While -DLLAMA_SANITIZE_THREAD=ON is broken
 
-    container:
-      image: ubuntu:latest
-      ports:
-        - 8888
-      options: --cpus 4
-
     steps:
       - name: Dependencies
         id: depends
         run: |
-          apt-get update
-          apt-get -y install \
+          sudo apt-get update
+          sudo apt-get -y install \
             build-essential \
             xxd \
             git \
             cmake \
-            python3-pip \
             curl \
             wget \
             language-pack-en \
@@ -70,6 +63,17 @@ jobs:
           fetch-depth: 0
           ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }}
 
+      - name: Python setup
+        id: setup_python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.11'
+
+      - name: Tests dependencies
+        id: test_dependencies
+        run: |
+          pip install -r examples/server/tests/requirements.txt
+
       - name: Verify server deps
         id: verify_server_deps
         run: |
@@ -90,20 +94,14 @@ jobs:
       - name: Build
         id: cmake_build
         run: |
-          mkdir build
-          cd build
-          cmake .. \
+          cmake -B build \
               -DLLAMA_NATIVE=OFF \
               -DLLAMA_BUILD_SERVER=ON \
               -DLLAMA_CURL=ON \
               -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
               -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON ;
-          cmake --build . --config ${{ matrix.build_type }} -j $(nproc) --target server
+          cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target server
 
-      - name: Tests dependencies
-        id: test_dependencies
-        run: |
-          pip install -r examples/server/tests/requirements.txt
 
       - name: Tests
         id: server_integration_tests
@@ -129,6 +127,7 @@ jobs:
         uses: actions/checkout@v4
         with:
           fetch-depth: 0
+          ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }}
 
       - name: libCURL
         id: get_libcurl
@@ -142,10 +141,8 @@ jobs:
       - name: Build
         id: cmake_build
         run: |
-          mkdir build
-          cd build
-          cmake .. -DLLAMA_CURL=ON -DCURL_LIBRARY="$env:RUNNER_TEMP/libcurl/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="$env:RUNNER_TEMP/libcurl/include"
-          cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS} --target server
+          cmake -B build -DLLAMA_CURL=ON -DCURL_LIBRARY="$env:RUNNER_TEMP/libcurl/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="$env:RUNNER_TEMP/libcurl/include"
+          cmake --build build --config Release -j ${env:NUMBER_OF_PROCESSORS} --target server
 
       - name: Python setup
         id: setup_python

diff --git a/.gitignore b/.gitignore
@@ -2,6 +2,7 @@
 *.a
 *.so
 *.gguf
+*.gguf.json
 *.bin
 *.exe
 *.dll
@@ -34,6 +35,7 @@ lcov-report/
 gcovr-report/
 
 build*
+!build.zig
 cmake-build-*
 out/
 tmp/
@@ -100,7 +102,25 @@ qnt-*.txt
 perf-*.txt
 
 examples/jeopardy/results.txt
+examples/server/*.html.hpp
+examples/server/*.js.hpp
+examples/server/*.mjs.hpp
 
 poetry.lock
 poetry.toml
 nppBackup
+
+# Test binaries
+/tests/test-grammar-parser
+/tests/test-llama-grammar
+/tests/test-double-float
+/tests/test-grad0
+/tests/test-opt
+/tests/test-quantize-fns
+/tests/test-quantize-perf
+/tests/test-sampling
+/tests/test-tokenizer-0
+/tests/test-tokenizer-1-spm
+/tests/test-tokenizer-1-bpe
+/tests/test-rope
+/tests/test-backend-ops
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -3,13 +3,14 @@
 exclude: prompts/.*.txt
 repos:
 - repo: https://github.com/pre-commit/pre-commit-hooks
-  rev: v3.2.0
+  rev: v4.6.0
   hooks:
   - id: trailing-whitespace
   - id: end-of-file-fixer
   - id: check-yaml
   - id: check-added-large-files
 - repo: https://github.com/PyCQA/flake8
-  rev: 6.0.0
+  rev: 7.0.0
   hooks:
   -   id: flake8
+      additional_dependencies: [flake8-no-print]