ggml-org · slaren · May 2, 2025 · May 1, 2025 · May 2, 2025 · May 2, 2025
diff --git a/.editorconfig b/.editorconfig
@@ -21,23 +21,23 @@ indent_style = tab
 [prompts/*.txt]
 insert_final_newline = unset
 
-[examples/server/public/*]
+[tools/server/public/*]
 indent_size = 2
 
-[examples/server/public/deps_*]
+[tools/server/public/deps_*]
 trim_trailing_whitespace = unset
 indent_style = unset
 indent_size = unset
 
-[examples/server/deps_*]
+[tools/server/deps_*]
 trim_trailing_whitespace = unset
 indent_style = unset
 indent_size = unset
 
 [examples/llama.swiftui/llama.swiftui.xcodeproj/*]
 indent_style = tab
 
-[examples/cvector-generator/*.txt]
+[tools/cvector-generator/*.txt]
 trim_trailing_whitespace = unset
 insert_final_newline = unset
 

diff --git a/.flake8 b/.flake8
@@ -2,8 +2,9 @@
 max-line-length = 125
 ignore = E203,E211,E221,E225,E231,E241,E251,E261,E266,E501,E701,E704,W503
 exclude =
-    # Do not traverse examples
+    # Do not traverse examples and tools
     examples,
+    tools,
     # Do not include package initializers
     __init__.py,
     # No need to traverse our git directory

diff --git a/.github/labeler.yml b/.github/labeler.yml
@@ -45,7 +45,9 @@ build:
             - CMakePresets.json
 examples:
     - changed-files:
-        - any-glob-to-any-file: examples/**
+        - any-glob-to-any-file:
+            - examples/**
+            - tools/**
 devops:
     - changed-files:
         - any-glob-to-any-file:
@@ -70,7 +72,7 @@ android:
 server:
     - changed-files:
         - any-glob-to-any-file:
-            - examples/server/**
+            - tools/server/**
 ggml:
     - changed-files:
         - any-glob-to-any-file:

diff --git a/.github/workflows/bench.yml.disabled b/.github/workflows/bench.yml.disabled
@@ -27,10 +27,10 @@ on:
   push:
     branches:
       - master
-    paths: ['llama.cpp', 'ggml.c', 'ggml-backend.cpp', 'ggml-quants.c', '**/*.cu', 'examples/server/*.h*', 'examples/server/*.cpp']
+    paths: ['llama.cpp', 'ggml.c', 'ggml-backend.cpp', 'ggml-quants.c', '**/*.cu', 'tools/server/*.h*', 'tools/server/*.cpp']
   pull_request_target:
     types: [opened, synchronize, reopened]
-    paths: ['llama.cpp', 'ggml.c', 'ggml-backend.cpp', 'ggml-quants.c', '**/*.cu', 'examples/server/*.h*', 'examples/server/*.cpp']
+    paths: ['llama.cpp', 'ggml.c', 'ggml-backend.cpp', 'ggml-quants.c', '**/*.cu', 'tools/server/*.h*', 'tools/server/*.cpp']
   schedule:
     -  cron: '04 2 * * *'
 
@@ -69,7 +69,7 @@ jobs:
       - name: Install python env
         id: pipenv
         run: |
-          cd examples/server/bench
+          cd tools/server/bench
           python3 -m venv venv
           source venv/bin/activate
           pip install -r requirements.txt
@@ -79,7 +79,7 @@ jobs:
         run: |
           wget --quiet https://github.com/prometheus/prometheus/releases/download/v2.51.0/prometheus-2.51.0.linux-amd64.tar.gz
           tar xzf prometheus*.tar.gz --strip-components=1
-          ./prometheus --config.file=examples/server/bench/prometheus.yml &
+          ./prometheus --config.file=tools/server/bench/prometheus.yml &
           while ! nc -z localhost 9090; do
             sleep 0.1
           done
@@ -92,7 +92,7 @@ jobs:
       - name: Install k6 and xk6-sse
         id: k6_installation
         run: |
-          cd examples/server/bench
+          cd tools/server/bench
           go install go.k6.io/xk6/cmd/xk6@latest
           xk6 build master \
               --with github.com/phymbert/xk6-sse
@@ -116,7 +116,7 @@ jobs:
       - name: Download the dataset
         id: download_dataset
         run: |
-          cd examples/server/bench
+          cd tools/server/bench
           wget --quiet https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json
 
       - name: Server bench
@@ -126,7 +126,7 @@ jobs:
         run: |
           set -eux
 
-          cd examples/server/bench
+          cd tools/server/bench
           source venv/bin/activate
           python bench.py \
               --runner-label ${{ env.RUNNER_LABEL }} \
@@ -157,9 +157,9 @@ jobs:
           name: bench-server-${{ github.job }}-${{ env.RUNNER_LABEL }}-${{ matrix.model }}-${{ matrix.ftype }}
           compression-level: 9
           path: |
-            examples/server/bench/*.jpg
-            examples/server/bench/*.json
-            examples/server/bench/*.log
+            tools/server/bench/*.jpg
+            tools/server/bench/*.json
+            tools/server/bench/*.log
 
       - name: Commit status
         uses: Sibz/github-status-action@v1
@@ -178,17 +178,17 @@ jobs:
         with:
           client_id: ${{secrets.IMGUR_CLIENT_ID}}
           path: |
-            examples/server/bench/prompt_tokens_seconds.jpg
-            examples/server/bench/predicted_tokens_seconds.jpg
-            examples/server/bench/kv_cache_usage_ratio.jpg
-            examples/server/bench/requests_processing.jpg
+            tools/server/bench/prompt_tokens_seconds.jpg
+            tools/server/bench/predicted_tokens_seconds.jpg
+            tools/server/bench/kv_cache_usage_ratio.jpg
+            tools/server/bench/requests_processing.jpg
 
       - name: Extract mermaid
         id: set_mermaid
         run: |
           set -eux
 
-          cd examples/server/bench
+          cd tools/server/bench
           PROMPT_TOKENS_SECONDS=$(cat prompt_tokens_seconds.mermaid)
           echo "PROMPT_TOKENS_SECONDS<<EOF" >> $GITHUB_ENV
           echo "$PROMPT_TOKENS_SECONDS" >> $GITHUB_ENV

diff --git a/.github/workflows/build-linux-cross.yml b/.github/workflows/build-linux-cross.yml
@@ -34,6 +34,7 @@ jobs:
           cmake -B build -DCMAKE_BUILD_TYPE=Release \
                          -DGGML_OPENMP=OFF \
                          -DLLAMA_BUILD_EXAMPLES=ON \
+                         -DLLAMA_BUILD_TOOLS=ON \
                          -DLLAMA_BUILD_TESTS=OFF \
                          -DCMAKE_SYSTEM_NAME=Linux \
                          -DCMAKE_SYSTEM_PROCESSOR=riscv64 \
@@ -80,6 +81,7 @@ jobs:
                          -DGGML_VULKAN=ON \
                          -DGGML_OPENMP=OFF \
                          -DLLAMA_BUILD_EXAMPLES=ON \
+                         -DLLAMA_BUILD_TOOLS=ON \
                          -DLLAMA_BUILD_TESTS=OFF \
                          -DCMAKE_SYSTEM_NAME=Linux \
                          -DCMAKE_SYSTEM_PROCESSOR=riscv64 \
@@ -125,6 +127,7 @@ jobs:
                          -DGGML_VULKAN=ON \
                          -DGGML_OPENMP=OFF \
                          -DLLAMA_BUILD_EXAMPLES=ON \
+                         -DLLAMA_BUILD_TOOLS=ON \
                          -DLLAMA_BUILD_TESTS=OFF \
                          -DCMAKE_SYSTEM_NAME=Linux \
                          -DCMAKE_SYSTEM_PROCESSOR=aarch64 \

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -633,6 +633,7 @@ jobs:
             -DGGML_METAL_EMBED_LIBRARY=ON \
             -DLLAMA_BUILD_COMMON=OFF \
             -DLLAMA_BUILD_EXAMPLES=OFF \
+            -DLLAMA_BUILD_TOOLS=OFF \
             -DLLAMA_BUILD_TESTS=OFF \
             -DLLAMA_BUILD_SERVER=OFF \
             -DCMAKE_SYSTEM_NAME=iOS \
@@ -669,6 +670,7 @@ jobs:
             -DGGML_METAL_EMBED_LIBRARY=ON \
             -DLLAMA_BUILD_COMMON=OFF \
             -DLLAMA_BUILD_EXAMPLES=OFF \
+            -DLLAMA_BUILD_TOOLS=OFF \
             -DLLAMA_BUILD_TESTS=OFF \
             -DLLAMA_BUILD_SERVER=OFF \
             -DCMAKE_SYSTEM_NAME=tvOS \
@@ -699,6 +701,7 @@ jobs:
             -DGGML_METAL_EMBED_LIBRARY=ON \
             -DLLAMA_BUILD_COMMON=OFF \
             -DLLAMA_BUILD_EXAMPLES=OFF \
+            -DLLAMA_BUILD_TOOLS=OFF \
             -DLLAMA_BUILD_TESTS=OFF \
             -DLLAMA_BUILD_SERVER=OFF \
             -DCMAKE_SYSTEM_NAME=visionOS \
@@ -739,6 +742,7 @@ jobs:
             -DGGML_METAL_EMBED_LIBRARY=ON \
             -DLLAMA_CURL=OFF \
             -DLLAMA_BUILD_EXAMPLES=OFF \
+            -DLLAMA_BUILD_TOOLS=OFF \
             -DLLAMA_BUILD_TESTS=OFF \
             -DLLAMA_BUILD_SERVER=OFF \
             -DCMAKE_OSX_ARCHITECTURES="arm64;x86_64"
@@ -1417,6 +1421,7 @@ jobs:
             -DGGML_METAL_EMBED_LIBRARY=ON \
             -DLLAMA_CURL=OFF \
             -DLLAMA_BUILD_EXAMPLES=OFF \
+            -DLLAMA_BUILD_TOOLS=OFF \
             -DLLAMA_BUILD_TESTS=OFF \
             -DLLAMA_BUILD_SERVER=OFF \
             -DCMAKE_SYSTEM_NAME=iOS \

diff --git a/.github/workflows/server.yml b/.github/workflows/server.yml
@@ -15,10 +15,10 @@ on:
   push:
     branches:
       - master
-    paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/**.*']
+    paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'tools/server/**.*']
   pull_request:
     types: [opened, synchronize, reopened]
-    paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/**.*']
+    paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'tools/server/**.*']
 
 env:
   LLAMA_LOG_COLORS: 1
@@ -74,7 +74,7 @@ jobs:
       - name: Tests dependencies
         id: test_dependencies
         run: |
-          pip install -r examples/server/tests/requirements.txt
+          pip install -r tools/server/tests/requirements.txt
 
       # Setup nodejs (to be used for verifying bundled index.html)
       - uses: actions/setup-node@v4
@@ -84,14 +84,14 @@ jobs:
       - name: WebUI - Install dependencies
         id: webui_lint
         run: |
-          cd examples/server/webui
+          cd tools/server/webui
           npm ci
 
       - name: WebUI - Check code format
         id: webui_format
         run: |
           git config --global --add safe.directory $(realpath .)
-          cd examples/server/webui
+          cd tools/server/webui
           git status
 
           npm run format
@@ -108,7 +108,7 @@ jobs:
         id: verify_server_index_html
         run: |
           git config --global --add safe.directory $(realpath .)
-          cd examples/server/webui
+          cd tools/server/webui
           git status
 
           npm run build
@@ -161,21 +161,21 @@ jobs:
         env:
           GITHUB_ACTIONS: "true"
         run: |
-          cd examples/server/tests
+          cd tools/server/tests
           ./tests.sh
 
       - name: Tests (sanitizers)
         id: server_integration_tests_sanitizers
         if: ${{ matrix.sanitizer != '' }}
         run: |
-          cd examples/server/tests
+          cd tools/server/tests
           LLAMA_SANITIZE=1 ./tests.sh
 
       - name: Slow tests
         id: server_integration_tests_slow
         if: ${{ (github.event.schedule || github.event.inputs.slow_tests == 'true') && matrix.build_type == 'Release' }}
         run: |
-          cd examples/server/tests
+          cd tools/server/tests
           SLOW_TESTS=1 ./tests.sh
 
 
@@ -211,7 +211,7 @@ jobs:
       - name: Tests dependencies
         id: test_dependencies
         run: |
-          pip install -r examples/server/tests/requirements.txt
+          pip install -r tools/server/tests/requirements.txt
 
       - name: Copy Libcurl
         id: prepare_libcurl
@@ -224,14 +224,14 @@ jobs:
         id: server_integration_tests
         if: ${{ !matrix.disabled_on_pr || !github.event.pull_request }}
         run: |
-          cd examples/server/tests
+          cd tools/server/tests
           $env:PYTHONIOENCODING = ":replace"
           pytest -v -x -m "not slow"
 
       - name: Slow tests
         id: server_integration_tests_slow
         if: ${{ (github.event.schedule || github.event.inputs.slow_tests == 'true') && matrix.build_type == 'Release' }}
         run: |
-          cd examples/server/tests
+          cd tools/server/tests
           $env:SLOW_TESTS = "1"
           pytest -v -x
diff --git a/.gitignore b/.gitignore
@@ -96,11 +96,11 @@ perf-*.txt
 # Examples
 
 examples/jeopardy/results.txt
-examples/server/*.css.hpp
-examples/server/*.html.hpp
-examples/server/*.js.hpp
-examples/server/*.mjs.hpp
-examples/server/*.gz.hpp
+tools/server/*.css.hpp
+tools/server/*.html.hpp
+tools/server/*.js.hpp
+tools/server/*.mjs.hpp
+tools/server/*.gz.hpp
 !build_64.sh
 !examples/*.bat
 !examples/*/*.kts
@@ -110,7 +110,7 @@ examples/server/*.gz.hpp
 
 # Server Web UI temporary files
 node_modules
-examples/server/webui/dist
+tools/server/webui/dist
 
 # Python
 

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -77,6 +77,7 @@ option(LLAMA_BUILD_COMMON "llama: build common utils library" ${LLAMA_STANDALONE
 
 # extra artifacts
 option(LLAMA_BUILD_TESTS    "llama: build tests"          ${LLAMA_STANDALONE})
+option(LLAMA_BUILD_TOOLS    "llama: build tools"          ${LLAMA_STANDALONE})
 option(LLAMA_BUILD_EXAMPLES "llama: build examples"       ${LLAMA_STANDALONE})
 option(LLAMA_BUILD_SERVER   "llama: build server example" ${LLAMA_STANDALONE})
 
@@ -187,6 +188,10 @@ if (LLAMA_BUILD_COMMON AND LLAMA_BUILD_EXAMPLES)
     add_subdirectory(pocs)
 endif()
 
+if (LLAMA_BUILD_COMMON AND LLAMA_BUILD_TOOLS)
+    add_subdirectory(tools)
+endif()
+
 #
 # install
 #

diff --git a/CODEOWNERS b/CODEOWNERS
@@ -2,7 +2,7 @@
 
 /ci/ @ggerganov
 /.devops/*.Dockerfile @ngxson
-/examples/server/ @ngxson
+/tools/server/ @ngxson
 /ggml/src/ggml-cuda/fattn* @JohannesGaessler
 /ggml/src/ggml-cuda/mmq.* @JohannesGaessler
 /ggml/src/ggml-cuda/mmv.* @JohannesGaessler