diff --git a/.editorconfig b/.editorconfig
index 5d63d0a51e466..1eadda334ae71 100644
--- a/.editorconfig
+++ b/.editorconfig
@@ -21,15 +21,15 @@ indent_style = tab
 [prompts/*.txt]
 insert_final_newline = unset
 
-[examples/server/public/*]
+[tools/server/public/*]
 indent_size = 2
 
-[examples/server/public/deps_*]
+[tools/server/public/deps_*]
 trim_trailing_whitespace = unset
 indent_style = unset
 indent_size = unset
 
-[examples/server/deps_*]
+[tools/server/deps_*]
 trim_trailing_whitespace = unset
 indent_style = unset
 indent_size = unset
@@ -37,7 +37,7 @@ indent_size = unset
 [examples/llama.swiftui/llama.swiftui.xcodeproj/*]
 indent_style = tab
 
-[examples/cvector-generator/*.txt]
+[tools/cvector-generator/*.txt]
 trim_trailing_whitespace = unset
 insert_final_newline = unset
 
diff --git a/.flake8 b/.flake8
index d64c2564aca8f..669d231f1f63b 100644
--- a/.flake8
+++ b/.flake8
@@ -2,8 +2,9 @@
 max-line-length = 125
 ignore = E203,E211,E221,E225,E231,E241,E251,E261,E266,E501,E701,E704,W503
 exclude =
-    # Do not traverse examples
+    # Do not traverse examples and tools
     examples,
+    tools,
     # Do not include package initializers
     __init__.py,
     # No need to traverse our git directory
diff --git a/.github/labeler.yml b/.github/labeler.yml
index 1b47bc96885c4..278032ef2e1a4 100644
--- a/.github/labeler.yml
+++ b/.github/labeler.yml
@@ -45,7 +45,9 @@ build:
             - CMakePresets.json
 examples:
     - changed-files:
-        - any-glob-to-any-file: examples/**
+        - any-glob-to-any-file:
+            - examples/**
+            - tools/**
 devops:
     - changed-files:
         - any-glob-to-any-file:
@@ -70,7 +72,7 @@ android:
 server:
     - changed-files:
         - any-glob-to-any-file:
-            - examples/server/**
+            - tools/server/**
 ggml:
     - changed-files:
         - any-glob-to-any-file:
diff --git a/.github/workflows/bench.yml.disabled b/.github/workflows/bench.yml.disabled
index 75d2714792891..f2d7e16e981ac 100644
--- a/.github/workflows/bench.yml.disabled
+++ b/.github/workflows/bench.yml.disabled
@@ -27,10 +27,10 @@ on:
   push:
     branches:
       - master
-    paths: ['llama.cpp', 'ggml.c', 'ggml-backend.cpp', 'ggml-quants.c', '**/*.cu', 'examples/server/*.h*', 'examples/server/*.cpp']
+    paths: ['llama.cpp', 'ggml.c', 'ggml-backend.cpp', 'ggml-quants.c', '**/*.cu', 'tools/server/*.h*', 'tools/server/*.cpp']
   pull_request_target:
     types: [opened, synchronize, reopened]
-    paths: ['llama.cpp', 'ggml.c', 'ggml-backend.cpp', 'ggml-quants.c', '**/*.cu', 'examples/server/*.h*', 'examples/server/*.cpp']
+    paths: ['llama.cpp', 'ggml.c', 'ggml-backend.cpp', 'ggml-quants.c', '**/*.cu', 'tools/server/*.h*', 'tools/server/*.cpp']
   schedule:
     -  cron: '04 2 * * *'
 
@@ -69,7 +69,7 @@ jobs:
       - name: Install python env
         id: pipenv
         run: |
-          cd examples/server/bench
+          cd tools/server/bench
           python3 -m venv venv
           source venv/bin/activate
           pip install -r requirements.txt
@@ -79,7 +79,7 @@ jobs:
         run: |
           wget --quiet https://github.com/prometheus/prometheus/releases/download/v2.51.0/prometheus-2.51.0.linux-amd64.tar.gz
           tar xzf prometheus*.tar.gz --strip-components=1
-          ./prometheus --config.file=examples/server/bench/prometheus.yml &
+          ./prometheus --config.file=tools/server/bench/prometheus.yml &
           while ! nc -z localhost 9090; do
             sleep 0.1
           done
@@ -92,7 +92,7 @@ jobs:
       - name: Install k6 and xk6-sse
         id: k6_installation
         run: |
-          cd examples/server/bench
+          cd tools/server/bench
           go install go.k6.io/xk6/cmd/xk6@latest
           xk6 build master \
               --with github.com/phymbert/xk6-sse
@@ -116,7 +116,7 @@ jobs:
       - name: Download the dataset
         id: download_dataset
         run: |
-          cd examples/server/bench
+          cd tools/server/bench
           wget --quiet https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json
 
       - name: Server bench
@@ -126,7 +126,7 @@ jobs:
         run: |
           set -eux
 
-          cd examples/server/bench
+          cd tools/server/bench
           source venv/bin/activate
           python bench.py \
               --runner-label ${{ env.RUNNER_LABEL }} \
@@ -157,9 +157,9 @@ jobs:
           name: bench-server-${{ github.job }}-${{ env.RUNNER_LABEL }}-${{ matrix.model }}-${{ matrix.ftype }}
           compression-level: 9
           path: |
-            examples/server/bench/*.jpg
-            examples/server/bench/*.json
-            examples/server/bench/*.log
+            tools/server/bench/*.jpg
+            tools/server/bench/*.json
+            tools/server/bench/*.log
 
       - name: Commit status
         uses: Sibz/github-status-action@v1
@@ -178,17 +178,17 @@ jobs:
         with:
           client_id: ${{secrets.IMGUR_CLIENT_ID}}
           path: |
-            examples/server/bench/prompt_tokens_seconds.jpg
-            examples/server/bench/predicted_tokens_seconds.jpg
-            examples/server/bench/kv_cache_usage_ratio.jpg
-            examples/server/bench/requests_processing.jpg
+            tools/server/bench/prompt_tokens_seconds.jpg
+            tools/server/bench/predicted_tokens_seconds.jpg
+            tools/server/bench/kv_cache_usage_ratio.jpg
+            tools/server/bench/requests_processing.jpg
 
       - name: Extract mermaid
         id: set_mermaid
         run: |
           set -eux
 
-          cd examples/server/bench
+          cd tools/server/bench
           PROMPT_TOKENS_SECONDS=$(cat prompt_tokens_seconds.mermaid)
           echo "PROMPT_TOKENS_SECONDS<<EOF" >> $GITHUB_ENV
           echo "$PROMPT_TOKENS_SECONDS" >> $GITHUB_ENV
diff --git a/.github/workflows/build-linux-cross.yml b/.github/workflows/build-linux-cross.yml
index d104b8b12432e..1c38d7e11da6b 100644
--- a/.github/workflows/build-linux-cross.yml
+++ b/.github/workflows/build-linux-cross.yml
@@ -34,6 +34,7 @@ jobs:
           cmake -B build -DCMAKE_BUILD_TYPE=Release \
                          -DGGML_OPENMP=OFF \
                          -DLLAMA_BUILD_EXAMPLES=ON \
+                         -DLLAMA_BUILD_TOOLS=ON \
                          -DLLAMA_BUILD_TESTS=OFF \
                          -DCMAKE_SYSTEM_NAME=Linux \
                          -DCMAKE_SYSTEM_PROCESSOR=riscv64 \
@@ -80,6 +81,7 @@ jobs:
                          -DGGML_VULKAN=ON \
                          -DGGML_OPENMP=OFF \
                          -DLLAMA_BUILD_EXAMPLES=ON \
+                         -DLLAMA_BUILD_TOOLS=ON \
                          -DLLAMA_BUILD_TESTS=OFF \
                          -DCMAKE_SYSTEM_NAME=Linux \
                          -DCMAKE_SYSTEM_PROCESSOR=riscv64 \
@@ -125,6 +127,7 @@ jobs:
                          -DGGML_VULKAN=ON \
                          -DGGML_OPENMP=OFF \
                          -DLLAMA_BUILD_EXAMPLES=ON \
+                         -DLLAMA_BUILD_TOOLS=ON \
                          -DLLAMA_BUILD_TESTS=OFF \
                          -DCMAKE_SYSTEM_NAME=Linux \
                          -DCMAKE_SYSTEM_PROCESSOR=aarch64 \
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 34417985d2399..bcea1a8afcf47 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -633,6 +633,7 @@ jobs:
             -DGGML_METAL_EMBED_LIBRARY=ON \
             -DLLAMA_BUILD_COMMON=OFF \
             -DLLAMA_BUILD_EXAMPLES=OFF \
+            -DLLAMA_BUILD_TOOLS=OFF \
             -DLLAMA_BUILD_TESTS=OFF \
             -DLLAMA_BUILD_SERVER=OFF \
             -DCMAKE_SYSTEM_NAME=iOS \
@@ -669,6 +670,7 @@ jobs:
             -DGGML_METAL_EMBED_LIBRARY=ON \
             -DLLAMA_BUILD_COMMON=OFF \
             -DLLAMA_BUILD_EXAMPLES=OFF \
+            -DLLAMA_BUILD_TOOLS=OFF \
             -DLLAMA_BUILD_TESTS=OFF \
             -DLLAMA_BUILD_SERVER=OFF \
             -DCMAKE_SYSTEM_NAME=tvOS \
@@ -699,6 +701,7 @@ jobs:
             -DGGML_METAL_EMBED_LIBRARY=ON \
             -DLLAMA_BUILD_COMMON=OFF \
             -DLLAMA_BUILD_EXAMPLES=OFF \
+            -DLLAMA_BUILD_TOOLS=OFF \
             -DLLAMA_BUILD_TESTS=OFF \
             -DLLAMA_BUILD_SERVER=OFF \
             -DCMAKE_SYSTEM_NAME=visionOS \
@@ -739,6 +742,7 @@ jobs:
             -DGGML_METAL_EMBED_LIBRARY=ON \
             -DLLAMA_CURL=OFF \
             -DLLAMA_BUILD_EXAMPLES=OFF \
+            -DLLAMA_BUILD_TOOLS=OFF \
             -DLLAMA_BUILD_TESTS=OFF \
             -DLLAMA_BUILD_SERVER=OFF \
             -DCMAKE_OSX_ARCHITECTURES="arm64;x86_64"
@@ -1417,6 +1421,7 @@ jobs:
             -DGGML_METAL_EMBED_LIBRARY=ON \
             -DLLAMA_CURL=OFF \
             -DLLAMA_BUILD_EXAMPLES=OFF \
+            -DLLAMA_BUILD_TOOLS=OFF \
             -DLLAMA_BUILD_TESTS=OFF \
             -DLLAMA_BUILD_SERVER=OFF \
             -DCMAKE_SYSTEM_NAME=iOS \
diff --git a/.github/workflows/server.yml b/.github/workflows/server.yml
index 6c9b5132276fe..4baf6f6c755ee 100644
--- a/.github/workflows/server.yml
+++ b/.github/workflows/server.yml
@@ -15,10 +15,10 @@ on:
   push:
     branches:
       - master
-    paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/**.*']
+    paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'tools/server/**.*']
   pull_request:
     types: [opened, synchronize, reopened]
-    paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/**.*']
+    paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'tools/server/**.*']
 
 env:
   LLAMA_LOG_COLORS: 1
@@ -74,7 +74,7 @@ jobs:
       - name: Tests dependencies
         id: test_dependencies
         run: |
-          pip install -r examples/server/tests/requirements.txt
+          pip install -r tools/server/tests/requirements.txt
 
       # Setup nodejs (to be used for verifying bundled index.html)
       - uses: actions/setup-node@v4
@@ -84,14 +84,14 @@ jobs:
       - name: WebUI - Install dependencies
         id: webui_lint
         run: |
-          cd examples/server/webui
+          cd tools/server/webui
           npm ci
 
       - name: WebUI - Check code format
         id: webui_format
         run: |
           git config --global --add safe.directory $(realpath .)
-          cd examples/server/webui
+          cd tools/server/webui
           git status
 
           npm run format
@@ -108,7 +108,7 @@ jobs:
         id: verify_server_index_html
         run: |
           git config --global --add safe.directory $(realpath .)
-          cd examples/server/webui
+          cd tools/server/webui
           git status
 
           npm run build
@@ -161,21 +161,21 @@ jobs:
         env:
           GITHUB_ACTIONS: "true"
         run: |
-          cd examples/server/tests
+          cd tools/server/tests
           ./tests.sh
 
       - name: Tests (sanitizers)
         id: server_integration_tests_sanitizers
         if: ${{ matrix.sanitizer != '' }}
         run: |
-          cd examples/server/tests
+          cd tools/server/tests
           LLAMA_SANITIZE=1 ./tests.sh
 
       - name: Slow tests
         id: server_integration_tests_slow
         if: ${{ (github.event.schedule || github.event.inputs.slow_tests == 'true') && matrix.build_type == 'Release' }}
         run: |
-          cd examples/server/tests
+          cd tools/server/tests
           SLOW_TESTS=1 ./tests.sh
 
 
@@ -211,7 +211,7 @@ jobs:
       - name: Tests dependencies
         id: test_dependencies
         run: |
-          pip install -r examples/server/tests/requirements.txt
+          pip install -r tools/server/tests/requirements.txt
 
       - name: Copy Libcurl
         id: prepare_libcurl
@@ -224,7 +224,7 @@ jobs:
         id: server_integration_tests
         if: ${{ !matrix.disabled_on_pr || !github.event.pull_request }}
         run: |
-          cd examples/server/tests
+          cd tools/server/tests
           $env:PYTHONIOENCODING = ":replace"
           pytest -v -x -m "not slow"
 
@@ -232,6 +232,6 @@ jobs:
         id: server_integration_tests_slow
         if: ${{ (github.event.schedule || github.event.inputs.slow_tests == 'true') && matrix.build_type == 'Release' }}
         run: |
-          cd examples/server/tests
+          cd tools/server/tests
           $env:SLOW_TESTS = "1"
           pytest -v -x
diff --git a/.gitignore b/.gitignore
index 2c67ad7f7c609..f8ceb1560a1df 100644
--- a/.gitignore
+++ b/.gitignore
@@ -96,11 +96,11 @@ perf-*.txt
 # Examples
 
 examples/jeopardy/results.txt
-examples/server/*.css.hpp
-examples/server/*.html.hpp
-examples/server/*.js.hpp
-examples/server/*.mjs.hpp
-examples/server/*.gz.hpp
+tools/server/*.css.hpp
+tools/server/*.html.hpp
+tools/server/*.js.hpp
+tools/server/*.mjs.hpp
+tools/server/*.gz.hpp
 !build_64.sh
 !examples/*.bat
 !examples/*/*.kts
@@ -110,7 +110,7 @@ examples/server/*.gz.hpp
 
 # Server Web UI temporary files
 node_modules
-examples/server/webui/dist
+tools/server/webui/dist
 
 # Python
 
diff --git a/CMakeLists.txt b/CMakeLists.txt
index de51c0a17b2f6..3f7e43b6e625f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -77,6 +77,7 @@ option(LLAMA_BUILD_COMMON "llama: build common utils library" ${LLAMA_STANDALONE
 
 # extra artifacts
 option(LLAMA_BUILD_TESTS    "llama: build tests"          ${LLAMA_STANDALONE})
+option(LLAMA_BUILD_TOOLS    "llama: build tools"          ${LLAMA_STANDALONE})
 option(LLAMA_BUILD_EXAMPLES "llama: build examples"       ${LLAMA_STANDALONE})
 option(LLAMA_BUILD_SERVER   "llama: build server example" ${LLAMA_STANDALONE})
 
@@ -187,6 +188,10 @@ if (LLAMA_BUILD_COMMON AND LLAMA_BUILD_EXAMPLES)
     add_subdirectory(pocs)
 endif()
 
+if (LLAMA_BUILD_COMMON AND LLAMA_BUILD_TOOLS)
+    add_subdirectory(tools)
+endif()
+
 #
 # install
 #
diff --git a/CODEOWNERS b/CODEOWNERS
index 72d594b46e911..3186f8eb1c514 100644
--- a/CODEOWNERS
+++ b/CODEOWNERS
@@ -2,7 +2,7 @@
 
 /ci/ @ggerganov
 /.devops/*.Dockerfile @ngxson
-/examples/server/ @ngxson
+/tools/server/ @ngxson
 /ggml/src/ggml-cuda/fattn* @JohannesGaessler
 /ggml/src/ggml-cuda/mmq.* @JohannesGaessler
 /ggml/src/ggml-cuda/mmv.* @JohannesGaessler
diff --git a/Makefile b/Makefile
index 772993ada2707..305037089b882 100644
--- a/Makefile
+++ b/Makefile
@@ -1156,10 +1156,10 @@ $(LIB_COMMON_S): $(OBJ_COMMON)
 
 # Clean generated server assets
 clean-server-assets:
-	find examples/server -type f -name "*.js.hpp"   -delete
-	find examples/server -type f -name "*.mjs.hpp"  -delete
-	find examples/server -type f -name "*.css.hpp"  -delete
-	find examples/server -type f -name "*.html.hpp" -delete
+	find tools/server -type f -name "*.js.hpp"   -delete
+	find tools/server -type f -name "*.mjs.hpp"  -delete
+	find tools/server -type f -name "*.css.hpp"  -delete
+	find tools/server -type f -name "*.html.hpp" -delete
 
 # Clean rule
 clean: clean-server-assets
@@ -1179,7 +1179,7 @@ clean: clean-server-assets
 # Helper function that replaces .c, .cpp, and .cu file endings with .o:
 GET_OBJ_FILE = $(patsubst %.c,%.o,$(patsubst %.cpp,%.o,$(patsubst %.cu,%.o,$(1))))
 
-llama-cli: examples/main/main.cpp \
+llama-cli: tools/main/main.cpp \
 	$(OBJ_ALL)
 	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
 	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
@@ -1192,7 +1192,7 @@ llama-infill: examples/infill/infill.cpp \
 	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
 	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
 
-llama-run: examples/run/run.cpp \
+llama-run: tools/run/run.cpp \
 	$(OBJ_ALL)
 	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
 	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
@@ -1207,7 +1207,7 @@ llama-simple-chat: examples/simple-chat/simple-chat.cpp \
 	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
 	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
 
-llama-tokenize: examples/tokenize/tokenize.cpp \
+llama-tokenize: tools/tokenize/tokenize.cpp \
 	$(OBJ_ALL)
 	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
 	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
@@ -1217,27 +1217,27 @@ llama-batched: examples/batched/batched.cpp \
 	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
 	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
 
-llama-batched-bench: examples/batched-bench/batched-bench.cpp \
+llama-batched-bench: tools/batched-bench/batched-bench.cpp \
 	$(OBJ_ALL)
 	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
 	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
 
-llama-quantize: examples/quantize/quantize.cpp \
+llama-quantize: tools/quantize/quantize.cpp \
 	$(OBJ_ALL)
 	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
 	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
 
-llama-quantize-stats: examples/quantize-stats/quantize-stats.cpp \
+llama-quantize-stats: tools/quantize-stats/quantize-stats.cpp \
 	$(OBJ_ALL)
 	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
 	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
 
-llama-perplexity: examples/perplexity/perplexity.cpp \
+llama-perplexity: tools/perplexity/perplexity.cpp \
 	$(OBJ_ALL)
 	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
 	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
 
-llama-imatrix: examples/imatrix/imatrix.cpp \
+llama-imatrix: tools/imatrix/imatrix.cpp \
 	$(OBJ_ALL)
 	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
 	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
@@ -1279,7 +1279,7 @@ llama-gguf-hash: examples/gguf-hash/gguf-hash.cpp examples/gguf-hash/deps/sha1/s
 	$(CXX) $(CXXFLAGS) -Iexamples/gguf-hash/deps -c $< -o $(call GET_OBJ_FILE, $<)
 	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
 
-llama-gguf-split: examples/gguf-split/gguf-split.cpp \
+llama-gguf-split: tools/gguf-split/gguf-split.cpp \
 	$(OBJ_ALL)
 	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
 	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
@@ -1289,7 +1289,7 @@ llama-eval-callback: examples/eval-callback/eval-callback.cpp \
 	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
 	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
 
-llama-cvector-generator: examples/cvector-generator/cvector-generator.cpp \
+llama-cvector-generator: tools/cvector-generator/cvector-generator.cpp \
 	$(OBJ_ALL)
 	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
 	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
@@ -1299,12 +1299,12 @@ llama-convert-llama2c-to-ggml: examples/convert-llama2c-to-ggml/convert-llama2c-
 	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
 	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
 
-llama-bench: examples/llama-bench/llama-bench.cpp \
+llama-bench: tools/llama-bench/llama-bench.cpp \
 	$(OBJ_ALL)
 	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
 	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
 
-llama-export-lora: examples/export-lora/export-lora.cpp \
+llama-export-lora: tools/export-lora/export-lora.cpp \
 	$(OBJ_ALL)
 	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
 	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
@@ -1360,17 +1360,17 @@ llama-gbnf-validator: examples/gbnf-validator/gbnf-validator.cpp \
 	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
 
 ifdef GGML_RPC
-rpc-server: examples/rpc/rpc-server.cpp \
+rpc-server: tools/rpc/rpc-server.cpp \
 	$(OBJ_GGML)
 	$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
 endif # GGML_RPC
 
 llama-server: \
-	examples/server/server.cpp \
-	examples/server/utils.hpp \
-	examples/server/httplib.h \
-	examples/server/index.html.hpp \
-	examples/server/loading.html.hpp \
+	tools/server/server.cpp \
+	tools/server/utils.hpp \
+	tools/server/httplib.h \
+	tools/server/index.html.hpp \
+	tools/server/loading.html.hpp \
 	common/chat.cpp \
 	common/chat.h \
 	common/chat-template.hpp \
@@ -1378,10 +1378,10 @@ llama-server: \
 	common/minja.hpp \
 	$(OBJ_ALL)
 	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
-	$(CXX) $(CXXFLAGS) $(filter-out %.h %.hpp $<,$^) -Iexamples/server $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) $(LWINSOCK2)
+	$(CXX) $(CXXFLAGS) $(filter-out %.h %.hpp $<,$^) -Itools/server $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) $(LWINSOCK2)
 
-# Portable equivalent of `cd examples/server/public && xxd -i $(notdir $<) ../$(notdir $<).hpp`:
-examples/server/%.hpp: examples/server/public/% FORCE Makefile
+# Portable equivalent of `cd tools/server/public && xxd -i $(notdir $<) ../$(notdir $<).hpp`:
+tools/server/%.hpp: tools/server/public/% FORCE Makefile
 	@( export NAME=$(subst .,_,$(subst -,_,$(notdir $<))) && \
 		echo "unsigned char $${NAME}[] = {" && \
 		cat $< | od -v -t x1 -An | sed -E 's/([0-9a-fA-F]+)/0x\1, /g' && \
@@ -1394,36 +1394,36 @@ llama-gen-docs: examples/gen-docs/gen-docs.cpp \
 	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
 	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
 
-libllava.a: examples/llava/llava.cpp \
-	examples/llava/llava.h \
-	examples/llava/clip.cpp \
-	examples/llava/clip.h \
+libllava.a: tools/llava/llava.cpp \
+	tools/llava/llava.h \
+	tools/llava/clip.cpp \
+	tools/llava/clip.h \
 	common/stb_image.h \
 	common/base64.hpp \
 	$(OBJ_ALL)
 	$(CXX) $(CXXFLAGS) -static -fPIC -c $< -o $@ -Wno-cast-qual
 
-llama-llava-cli: examples/llava/llava-cli.cpp \
-	examples/llava/llava.cpp \
-	examples/llava/llava.h \
-	examples/llava/clip.cpp \
-	examples/llava/clip.h \
+llama-llava-cli: tools/llava/llava-cli.cpp \
+	tools/llava/llava.cpp \
+	tools/llava/llava.h \
+	tools/llava/clip.cpp \
+	tools/llava/clip.h \
 	$(OBJ_ALL)
 	$(CXX) $(CXXFLAGS) $< $(filter-out %.h $<,$^) -o $@ $(LDFLAGS) -Wno-cast-qual
 
-llama-minicpmv-cli: examples/llava/minicpmv-cli.cpp \
-	examples/llava/llava.cpp \
-	examples/llava/llava.h \
-	examples/llava/clip.cpp \
-	examples/llava/clip.h \
+llama-minicpmv-cli: tools/llava/minicpmv-cli.cpp \
+	tools/llava/llava.cpp \
+	tools/llava/llava.h \
+	tools/llava/clip.cpp \
+	tools/llava/clip.h \
 	$(OBJ_ALL)
 	$(CXX) $(CXXFLAGS) $< $(filter-out %.h $<,$^) -o $@ $(LDFLAGS) -Wno-cast-qual
 
-llama-qwen2vl-cli: examples/llava/qwen2vl-cli.cpp \
-	examples/llava/llava.cpp \
-	examples/llava/llava.h \
-	examples/llava/clip.cpp \
-	examples/llava/clip.h \
+llama-qwen2vl-cli: tools/llava/qwen2vl-cli.cpp \
+	tools/llava/llava.cpp \
+	tools/llava/llava.h \
+	tools/llava/clip.cpp \
+	tools/llava/clip.h \
 	$(OBJ_ALL)
 	$(CXX) $(CXXFLAGS) $< $(filter-out %.h $<,$^) -o $@ $(LDFLAGS) -Wno-cast-qual
 
@@ -1480,12 +1480,12 @@ tests/test-double-float: tests/test-double-float.cpp
 
 tests/test-json-schema-to-grammar: tests/test-json-schema-to-grammar.cpp \
 	$(OBJ_ALL)
-	$(CXX) $(CXXFLAGS) -Iexamples/server -c $< -o $(call GET_OBJ_FILE, $<)
+	$(CXX) $(CXXFLAGS) -Itools/server -c $< -o $(call GET_OBJ_FILE, $<)
 	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
 
 tests/test-chat: tests/test-chat.cpp \
 	$(OBJ_ALL)
-	$(CXX) $(CXXFLAGS) -Iexamples/server -c $< -o $(call GET_OBJ_FILE, $<)
+	$(CXX) $(CXXFLAGS) -Itools/server -c $< -o $(call GET_OBJ_FILE, $<)
 	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
 
 tests/test-opt: tests/test-opt.cpp \
diff --git a/README.md b/README.md
index 42c0eb633ef5d..e0232478c75a2 100644
--- a/README.md
+++ b/README.md
@@ -242,7 +242,7 @@ Instructions for adding support for new models: [HOWTO-add-model.md](docs/develo
 | [Vulkan](docs/build.md#vulkan) | GPU |
 | [CANN](docs/build.md#cann) | Ascend NPU |
 | [OpenCL](docs/backend/OPENCL.md) | Adreno GPU |
-| [RPC](https://github.com/ggml-org/llama.cpp/tree/master/examples/rpc) | All |
+| [RPC](https://github.com/ggml-org/llama.cpp/tree/master/tools/rpc) | All |
 
 ## Building the project
 
@@ -276,9 +276,9 @@ The Hugging Face platform provides a variety of online tools for converting, qua
 - Use the [GGUF-editor space](https://huggingface.co/spaces/CISCai/gguf-editor) to edit GGUF meta data in the browser (more info: https://github.com/ggml-org/llama.cpp/discussions/9268)
 - Use the [Inference Endpoints](https://ui.endpoints.huggingface.co/) to directly host `llama.cpp` in the cloud (more info: https://github.com/ggml-org/llama.cpp/discussions/9669)
 
-To learn more about model quantization, [read this documentation](examples/quantize/README.md)
+To learn more about model quantization, [read this documentation](tools/quantize/README.md)
 
-## [`llama-cli`](examples/main)
+## [`llama-cli`](tools/main)
 
 #### A CLI tool for accessing and experimenting with most of `llama.cpp`'s functionality.
 
@@ -341,7 +341,7 @@ To learn more about model quantization, [read this documentation](examples/quant
     </details>
 
 
-## [`llama-server`](examples/server)
+## [`llama-server`](tools/server)
 
 #### A lightweight, [OpenAI API](https://github.com/openai/openai-openapi) compatible, HTTP server for serving LLMs.
 
@@ -411,7 +411,7 @@ To learn more about model quantization, [read this documentation](examples/quant
     </details>
 
 
-## [`llama-perplexity`](examples/perplexity)
+## [`llama-perplexity`](tools/perplexity)
 
 #### A tool for measuring the perplexity [^1][^2] (and other quality metrics) of a model over a given text.
 
@@ -436,10 +436,10 @@ To learn more about model quantization, [read this documentation](examples/quant
 
     </details>
 
-[^1]: [examples/perplexity/README.md](./examples/perplexity/README.md)
+[^1]: [tools/perplexity/README.md](./tools/perplexity/README.md)
 [^2]: [https://huggingface.co/docs/transformers/perplexity](https://huggingface.co/docs/transformers/perplexity)
 
-## [`llama-bench`](examples/llama-bench)
+## [`llama-bench`](tools/llama-bench)
 
 #### Benchmark the performance of the inference for various parameters.
 
@@ -460,7 +460,7 @@ To learn more about model quantization, [read this documentation](examples/quant
 
     </details>
 
-## [`llama-run`](examples/run)
+## [`llama-run`](tools/run)
 
 #### A comprehensive example for running `llama.cpp` models. Useful for inferencing. Used with RamaLama [^3].
 
@@ -504,8 +504,8 @@ To learn more about model quantization, [read this documentation](examples/quant
 
 ## Other documentation
 
-- [main (cli)](examples/main/README.md)
-- [server](examples/server/README.md)
+- [main (cli)](tools/main/README.md)
+- [server](tools/server/README.md)
 - [GBNF grammars](grammars/README.md)
 
 #### Development documentation
diff --git a/SECURITY.md b/SECURITY.md
index 9370fb1a88321..9749e95b715a7 100644
--- a/SECURITY.md
+++ b/SECURITY.md
@@ -40,7 +40,7 @@ To protect sensitive data from potential leaks or unauthorized access, it is cru
 ### Untrusted environments or networks
 
 If you can't run your models in a secure and isolated environment or if it must be exposed to an untrusted network, make sure to take the following security precautions:
-* Do not use the RPC backend, [rpc-server](https://github.com/ggml-org/llama.cpp/tree/master/examples/rpc) and [llama-server](https://github.com/ggml-org/llama.cpp/tree/master/examples/server) functionality (see https://github.com/ggml-org/llama.cpp/pull/13061).
+* Do not use the RPC backend, [rpc-server](https://github.com/ggml-org/llama.cpp/tree/master/tools/rpc) and [llama-server](https://github.com/ggml-org/llama.cpp/tree/master/tools/server) functionality (see https://github.com/ggml-org/llama.cpp/pull/13061).
 * Confirm the hash of any downloaded artifact (e.g. pre-trained model weights) matches a known-good value.
 * Encrypt your data if sending it over the network.
 
diff --git a/build-xcframework.sh b/build-xcframework.sh
index 97001b5f7ff85..3c2498b035b98 100755
--- a/build-xcframework.sh
+++ b/build-xcframework.sh
@@ -8,6 +8,7 @@ TVOS_MIN_OS_VERSION=16.4
 
 BUILD_SHARED_LIBS=OFF
 LLAMA_BUILD_EXAMPLES=OFF
+LLAMA_BUILD_TOOLS=OFF
 LLAMA_BUILD_TESTS=OFF
 LLAMA_BUILD_SERVER=OFF
 GGML_METAL=ON
@@ -31,6 +32,7 @@ COMMON_CMAKE_ARGS=(
     -DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
     -DBUILD_SHARED_LIBS=${BUILD_SHARED_LIBS}
     -DLLAMA_BUILD_EXAMPLES=${LLAMA_BUILD_EXAMPLES}
+    -DLLAMA_BUILD_TOOLS=${LLAMA_BUILD_TOOLS}
     -DLLAMA_BUILD_TESTS=${LLAMA_BUILD_TESTS}
     -DLLAMA_BUILD_SERVER=${LLAMA_BUILD_SERVER}
     -DGGML_METAL_EMBED_LIBRARY=${GGML_METAL_EMBED_LIBRARY}
diff --git a/ci/run.sh b/ci/run.sh
index f463d7a8b2009..b49a3a5f82357 100755
--- a/ci/run.sh
+++ b/ci/run.sh
@@ -187,8 +187,8 @@ function gg_run_test_scripts_debug {
 
     set -e
 
-    (cd ./examples/gguf-split && time bash tests.sh "$SRC/build-ci-debug/bin" "$MNT/models") 2>&1 | tee -a $OUT/${ci}-scripts.log
-    (cd ./examples/quantize   && time bash tests.sh "$SRC/build-ci-debug/bin" "$MNT/models") 2>&1 | tee -a $OUT/${ci}-scripts.log
+    (cd ./tools/gguf-split && time bash tests.sh "$SRC/build-ci-debug/bin" "$MNT/models") 2>&1 | tee -a $OUT/${ci}-scripts.log
+    (cd ./tools/quantize   && time bash tests.sh "$SRC/build-ci-debug/bin" "$MNT/models") 2>&1 | tee -a $OUT/${ci}-scripts.log
 
     set +e
 }
@@ -211,8 +211,8 @@ function gg_run_test_scripts_release {
 
     set -e
 
-    (cd ./examples/gguf-split && time bash tests.sh "$SRC/build-ci-release/bin" "$MNT/models") 2>&1 | tee -a $OUT/${ci}-scripts.log
-    (cd ./examples/quantize   && time bash tests.sh "$SRC/build-ci-release/bin" "$MNT/models") 2>&1 | tee -a $OUT/${ci}-scripts.log
+    (cd ./tools/gguf-split && time bash tests.sh "$SRC/build-ci-release/bin" "$MNT/models") 2>&1 | tee -a $OUT/${ci}-scripts.log
+    (cd ./tools/quantize   && time bash tests.sh "$SRC/build-ci-release/bin" "$MNT/models") 2>&1 | tee -a $OUT/${ci}-scripts.log
 
     set +e
 }
diff --git a/common/arg.cpp b/common/arg.cpp
index aface844c9319..5080aa2fcbffd 100644
--- a/common/arg.cpp
+++ b/common/arg.cpp
@@ -2211,14 +2211,14 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
     ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_NO_CONT_BATCHING"));
     add_opt(common_arg(
         {"--mmproj"}, "FILE",
-        "path to a multimodal projector file. see examples/llava/README.md",
+        "path to a multimodal projector file. see tools/llava/README.md",
         [](common_params & params, const std::string & value) {
             params.mmproj.path = value;
         }
     ).set_examples(mmproj_examples));
     add_opt(common_arg(
         {"--mmproj-url"}, "URL",
-        "URL to a multimodal projector file. see examples/llava/README.md",
+        "URL to a multimodal projector file. see tools/llava/README.md",
         [](common_params & params, const std::string & value) {
             params.mmproj.url = value;
         }
diff --git a/common/common.h b/common/common.h
index 0a9dc0599f722..cfe1b72786795 100644
--- a/common/common.h
+++ b/common/common.h
@@ -340,7 +340,7 @@ struct common_params {
 
     common_conversation_mode conversation_mode = COMMON_CONVERSATION_MODE_AUTO;
 
-    // multimodal models (see examples/llava)
+    // multimodal models (see tools/llava)
     struct common_params_model mmproj;
     bool mmproj_use_gpu = true;     // use GPU for multimodal model
     bool no_mmproj = false;         // explicitly disable multimodal model
@@ -414,8 +414,8 @@ struct common_params {
     int n_pca_batch = 100;
     int n_pca_iterations = 1000;
     dimre_method cvector_dimre_method = DIMRE_METHOD_PCA;
-    std::string cvector_positive_file = "examples/cvector-generator/positive.txt";
-    std::string cvector_negative_file = "examples/cvector-generator/negative.txt";
+    std::string cvector_positive_file = "tools/cvector-generator/positive.txt";
+    std::string cvector_negative_file = "tools/cvector-generator/negative.txt";
 
     bool spm_infill = false; // suffix/prefix/middle pattern for infill
 
diff --git a/docs/development/HOWTO-add-model.md b/docs/development/HOWTO-add-model.md
index 78c6f76077a2b..7f71e0247ddc7 100644
--- a/docs/development/HOWTO-add-model.md
+++ b/docs/development/HOWTO-add-model.md
@@ -9,10 +9,10 @@ Adding a model requires few steps:
 After following these steps, you can open PR.
 
 Also, it is important to check that the examples and main ggml backends (CUDA, METAL, CPU) are working with the new architecture, especially:
-- [main](/examples/main/)
-- [imatrix](/examples/imatrix/)
-- [quantize](/examples/quantize/)
-- [server](/examples/server/)
+- [main](/tools/main/)
+- [imatrix](/tools/imatrix/)
+- [quantize](/tools/quantize/)
+- [server](/tools/server/)
 
 ### 1. Convert the model to GGUF
 
diff --git a/docs/multimodal/MobileVLM.md b/docs/multimodal/MobileVLM.md
index 20ac02f7a8dfc..a647d7d357ed6 100644
--- a/docs/multimodal/MobileVLM.md
+++ b/docs/multimodal/MobileVLM.md
@@ -33,13 +33,13 @@ git clone https://huggingface.co/openai/clip-vit-large-patch14-336
 2. Use `llava_surgery.py` to split the LLaVA model to LLaMA and multimodel projector constituents:
 
 ```sh
-python ./examples/llava/llava_surgery.py -m path/to/MobileVLM-1.7B
+python ./tools/llava/llava_surgery.py -m path/to/MobileVLM-1.7B
 ```
 
 3. Use `convert_image_encoder_to_gguf.py` with `--projector-type ldp` (for **V2** please use `--projector-type ldpv2`) to convert the LLaVA image encoder to GGUF:
 
 ```sh
-python ./examples/llava/convert_image_encoder_to_gguf.py \
+python ./tools/llava/convert_image_encoder_to_gguf.py \
     -m path/to/clip-vit-large-patch14-336 \
     --llava-projector path/to/MobileVLM-1.7B/llava.projector \
     --output-dir path/to/MobileVLM-1.7B \
@@ -47,7 +47,7 @@ python ./examples/llava/convert_image_encoder_to_gguf.py \
 ```
 
 ```sh
-python ./examples/llava/convert_image_encoder_to_gguf.py \
+python ./tools/llava/convert_image_encoder_to_gguf.py \
     -m path/to/clip-vit-large-patch14-336 \
     --llava-projector path/to/MobileVLM-1.7B_V2/llava.projector \
     --output-dir path/to/MobileVLM-1.7B_V2 \
@@ -69,10 +69,10 @@ Now both the LLaMA part and the image encoder is in the `MobileVLM-1.7B` directo
 
 ## Android compile and run
 ### compile
-refer to `examples/llava/android/build_64.sh`
+refer to `tools/llava/android/build_64.sh`
 ```sh
-mkdir examples/llava/android/build_64
-cd examples/llava/android/build_64
+mkdir tools/llava/android/build_64
+cd tools/llava/android/build_64
 ../build_64.sh
 ```
 ### run on Android
diff --git a/docs/multimodal/glmedge.md b/docs/multimodal/glmedge.md
index af6b696a8ad27..e7dfafdde1266 100644
--- a/docs/multimodal/glmedge.md
+++ b/docs/multimodal/glmedge.md
@@ -25,13 +25,13 @@ git clone https://huggingface.co/THUDM/glm-edge-v-5b or https://huggingface.co/T
 2. Use `glmedge-surgery.py` to split the GLMV-EDGE model to LLM and multimodel projector constituents:
 
 ```sh
-python ./examples/llava/glmedge-surgery.py -m ../model_path
+python ./tools/llava/glmedge-surgery.py -m ../model_path
 ```
 
 4. Use `glmedge-convert-image-encoder-to-gguf.py` to convert the GLMV-EDGE image encoder to GGUF:
 
 ```sh
-python ./examples/llava/glmedge-convert-image-encoder-to-gguf.py -m ../model_path --llava-projector ../model_path/glm.projector --output-dir ../model_path
+python ./tools/llava/glmedge-convert-image-encoder-to-gguf.py -m ../model_path --llava-projector ../model_path/glm.projector --output-dir ../model_path
 ```
 
 5. Use `examples/convert_hf_to_gguf.py` to convert the LLM part of GLMV-EDGE to GGUF:
diff --git a/docs/multimodal/llava.md b/docs/multimodal/llava.md
index c5bdc82158ede..0087b10610654 100644
--- a/docs/multimodal/llava.md
+++ b/docs/multimodal/llava.md
@@ -37,19 +37,19 @@ git clone https://huggingface.co/openai/clip-vit-large-patch14-336
 2. Install the required Python packages:
 
 ```sh
-pip install -r examples/llava/requirements.txt
+pip install -r tools/llava/requirements.txt
 ```
 
 3. Use `llava_surgery.py` to split the LLaVA model to LLaMA and multimodel projector constituents:
 
 ```sh
-python ./examples/llava/llava_surgery.py -m ../llava-v1.5-7b
+python ./tools/llava/llava_surgery.py -m ../llava-v1.5-7b
 ```
 
 4. Use `convert_image_encoder_to_gguf.py` to convert the LLaVA image encoder to GGUF:
 
 ```sh
-python ./examples/llava/convert_image_encoder_to_gguf.py -m ../clip-vit-large-patch14-336 --llava-projector ../llava-v1.5-7b/llava.projector --output-dir ../llava-v1.5-7b
+python ./tools/llava/convert_image_encoder_to_gguf.py -m ../clip-vit-large-patch14-336 --llava-projector ../llava-v1.5-7b/llava.projector --output-dir ../llava-v1.5-7b
 ```
 
 5. Use `examples/convert_legacy_llama.py` to convert the LLaMA part of LLaVA to GGUF:
@@ -69,12 +69,12 @@ git clone https://huggingface.co/liuhaotian/llava-v1.6-vicuna-7b
 2) Install the required Python packages:
 
 ```sh
-pip install -r examples/llava/requirements.txt
+pip install -r tools/llava/requirements.txt
 ```
 
 3) Use `llava_surgery_v2.py` which also supports llava-1.5 variants pytorch as well as safetensor models:
 ```console
-python examples/llava/llava_surgery_v2.py -C -m ../llava-v1.6-vicuna-7b/
+python tools/llava/llava_surgery_v2.py -C -m ../llava-v1.6-vicuna-7b/
 ```
 - you will find a llava.projector and a llava.clip file in your model directory
 
@@ -88,7 +88,7 @@ curl -s -q https://huggingface.co/cmp-nct/llava-1.6-gguf/raw/main/config_vit.jso
 
 5) Create the visual gguf model:
 ```console
-python ./examples/llava/convert_image_encoder_to_gguf.py -m vit --llava-projector vit/llava.projector --output-dir vit --clip-model-is-vision
+python ./tools/llava/convert_image_encoder_to_gguf.py -m vit --llava-projector vit/llava.projector --output-dir vit --clip-model-is-vision
 ```
 - This is similar to llava-1.5, the difference is that we tell the encoder that we are working with the pure vision model part of CLIP
 
diff --git a/docs/multimodal/minicpmo2.6.md b/docs/multimodal/minicpmo2.6.md
index de470d8a82cc6..c9aab8abc0d90 100644
--- a/docs/multimodal/minicpmo2.6.md
+++ b/docs/multimodal/minicpmo2.6.md
@@ -29,8 +29,8 @@ cmake --build build --config Release
 Convert PyTorch model to gguf files (You can also download the converted [gguf](https://huggingface.co/openbmb/MiniCPM-o-2_6-gguf) by us)
 
 ```bash
-python ./examples/llava/minicpmv-surgery.py -m ../MiniCPM-o-2_6
-python ./examples/llava/minicpmv-convert-image-encoder-to-gguf.py -m ../MiniCPM-o-2_6 --minicpmv-projector ../MiniCPM-o-2_6/minicpmv.projector --output-dir ../MiniCPM-o-2_6/ --image-mean 0.5 0.5 0.5 --image-std 0.5 0.5 0.5 --minicpmv_version 4
+python ./tools/llava/minicpmv-surgery.py -m ../MiniCPM-o-2_6
+python ./tools/llava/minicpmv-convert-image-encoder-to-gguf.py -m ../MiniCPM-o-2_6 --minicpmv-projector ../MiniCPM-o-2_6/minicpmv.projector --output-dir ../MiniCPM-o-2_6/ --image-mean 0.5 0.5 0.5 --image-std 0.5 0.5 0.5 --minicpmv_version 4
 python ./convert_hf_to_gguf.py ../MiniCPM-o-2_6/model
 
 # quantize int4 version
diff --git a/docs/multimodal/minicpmv2.5.md b/docs/multimodal/minicpmv2.5.md
index 7a6879d3959ca..4603bd7c1c735 100644
--- a/docs/multimodal/minicpmv2.5.md
+++ b/docs/multimodal/minicpmv2.5.md
@@ -28,8 +28,8 @@ cmake --build build --config Release
 Convert PyTorch model to gguf files (You can also download the converted [gguf](https://huggingface.co/openbmb/MiniCPM-Llama3-V-2_5-gguf) by us)
 
 ```bash
-python ./examples/llava/minicpmv-surgery.py -m ../MiniCPM-Llama3-V-2_5
-python ./examples/llava/minicpmv-convert-image-encoder-to-gguf.py -m ../MiniCPM-Llama3-V-2_5 --minicpmv-projector ../MiniCPM-Llama3-V-2_5/minicpmv.projector --output-dir ../MiniCPM-Llama3-V-2_5/ --image-mean 0.5 0.5 0.5 --image-std 0.5 0.5 0.5 --minicpmv_version 2
+python ./tools/llava/minicpmv-surgery.py -m ../MiniCPM-Llama3-V-2_5
+python ./tools/llava/minicpmv-convert-image-encoder-to-gguf.py -m ../MiniCPM-Llama3-V-2_5 --minicpmv-projector ../MiniCPM-Llama3-V-2_5/minicpmv.projector --output-dir ../MiniCPM-Llama3-V-2_5/ --image-mean 0.5 0.5 0.5 --image-std 0.5 0.5 0.5 --minicpmv_version 2
 python ./convert_hf_to_gguf.py ../MiniCPM-Llama3-V-2_5/model
 
 # quantize int4 version
diff --git a/docs/multimodal/minicpmv2.6.md b/docs/multimodal/minicpmv2.6.md
index 410a5dd1771e4..69ebc12961399 100644
--- a/docs/multimodal/minicpmv2.6.md
+++ b/docs/multimodal/minicpmv2.6.md
@@ -28,8 +28,8 @@ cmake --build build --config Release
 Convert PyTorch model to gguf files (You can also download the converted [gguf](https://huggingface.co/openbmb/MiniCPM-V-2_6-gguf) by us)
 
 ```bash
-python ./examples/llava/minicpmv-surgery.py -m ../MiniCPM-V-2_6
-python ./examples/llava/minicpmv-convert-image-encoder-to-gguf.py -m ../MiniCPM-V-2_6 --minicpmv-projector ../MiniCPM-V-2_6/minicpmv.projector --output-dir ../MiniCPM-V-2_6/ --image-mean 0.5 0.5 0.5 --image-std 0.5 0.5 0.5 --minicpmv_version 3
+python ./tools/llava/minicpmv-surgery.py -m ../MiniCPM-V-2_6
+python ./tools/llava/minicpmv-convert-image-encoder-to-gguf.py -m ../MiniCPM-V-2_6 --minicpmv-projector ../MiniCPM-V-2_6/minicpmv.projector --output-dir ../MiniCPM-V-2_6/ --image-mean 0.5 0.5 0.5 --image-std 0.5 0.5 0.5 --minicpmv_version 3
 python ./convert_hf_to_gguf.py ../MiniCPM-V-2_6/model
 
 # quantize int4 version
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
index 37476f9043e78..eca0d0b097197 100644
--- a/examples/CMakeLists.txt
+++ b/examples/CMakeLists.txt
@@ -12,51 +12,30 @@ llama_add_compile_flags()
 
 # examples
 
-include_directories(${CMAKE_CURRENT_SOURCE_DIR})
-
 if (EMSCRIPTEN)
 else()
-    add_subdirectory(batched-bench)
     add_subdirectory(batched)
     add_subdirectory(embedding)
     add_subdirectory(eval-callback)
 
     add_subdirectory(gguf-hash)
-    add_subdirectory(gguf-split)
     add_subdirectory(gguf)
     add_subdirectory(gritlm)
-    add_subdirectory(imatrix)
     add_subdirectory(infill)
-    add_subdirectory(llama-bench)
     add_subdirectory(lookahead)
     add_subdirectory(lookup)
-    add_subdirectory(main)
     add_subdirectory(parallel)
     add_subdirectory(passkey)
-    add_subdirectory(perplexity)
-    add_subdirectory(quantize)
     add_subdirectory(retrieval)
-    if (LLAMA_BUILD_SERVER)
-        add_subdirectory(server)
-    endif()
     add_subdirectory(save-load-state)
-    add_subdirectory(run)
     add_subdirectory(simple)
     add_subdirectory(simple-chat)
     add_subdirectory(speculative)
     add_subdirectory(speculative-simple)
-    add_subdirectory(tokenize)
-    add_subdirectory(tts)
     add_subdirectory(gen-docs)
     if (NOT GGML_BACKEND_DL)
-        # these examples use the backends directly and cannot be built with dynamic loading
         add_subdirectory(convert-llama2c-to-ggml)
-        add_subdirectory(cvector-generator)
-        add_subdirectory(export-lora)
-        add_subdirectory(llava)
-        if (GGML_RPC)
-            add_subdirectory(rpc)
-        endif()
+        # these examples use the backends directly and cannot be built with dynamic loading
         if (GGML_SYCL)
             add_subdirectory(sycl)
         endif()
diff --git a/examples/pydantic_models_to_grammar_examples.py b/examples/pydantic_models_to_grammar_examples.py
index f94b82ca47570..6dadb7f3fa48d 100755
--- a/examples/pydantic_models_to_grammar_examples.py
+++ b/examples/pydantic_models_to_grammar_examples.py
@@ -23,7 +23,7 @@ def create_completion(host, prompt, gbnf_grammar):
     """Calls the /completion API on llama-server.
 
     See
-    https://github.com/ggml-org/llama.cpp/tree/HEAD/examples/server#api-endpoints
+    https://github.com/ggml-org/llama.cpp/tree/HEAD/tools/server#api-endpoints
     """
     print(f"  Request:\n    Grammar:\n{textwrap.indent(gbnf_grammar, '      ')}\n    Prompt:\n{textwrap.indent(prompt.rstrip(), '      ')}")
     headers = {"Content-Type": "application/json"}
diff --git a/examples/server/public/index.html.gz b/examples/server/public/index.html.gz
deleted file mode 100644
index 674e227571e2d..0000000000000
Binary files a/examples/server/public/index.html.gz and /dev/null differ
diff --git a/grammars/README.md b/grammars/README.md
index 5aa12acc1bff3..a63198b5aeb8e 100644
--- a/grammars/README.md
+++ b/grammars/README.md
@@ -1,6 +1,6 @@
 # GBNF Guide
 
-GBNF (GGML BNF) is a format for defining [formal grammars](https://en.wikipedia.org/wiki/Formal_grammar) to constrain model outputs in `llama.cpp`. For example, you can use it to force the model to generate valid JSON, or speak only in emojis. GBNF grammars are supported in various ways in `examples/main` and `examples/server`.
+GBNF (GGML BNF) is a format for defining [formal grammars](https://en.wikipedia.org/wiki/Formal_grammar) to constrain model outputs in `llama.cpp`. For example, you can use it to force the model to generate valid JSON, or speak only in emojis. GBNF grammars are supported in various ways in `tools/main` and `tools/server`.
 
 ## Background
 
@@ -110,21 +110,21 @@ While semantically correct, the syntax `x? x? x?.... x?` (with N repetitions) ma
 
 You can use GBNF grammars:
 
-- In [llama-server](../examples/server)'s completion endpoints, passed as the `grammar` body field
-- In [llama-cli](../examples/main), passed as the `--grammar` & `--grammar-file` flags
+- In [llama-server](../tools/server)'s completion endpoints, passed as the `grammar` body field
+- In [llama-cli](../tools/main), passed as the `--grammar` & `--grammar-file` flags
 - With [test-gbnf-validator](../tests/test-gbnf-validator.cpp), to test them against strings.
 
 ## JSON Schemas → GBNF
 
 `llama.cpp` supports converting a subset of https://json-schema.org/ to GBNF grammars:
 
-- In [llama-server](../examples/server):
+- In [llama-server](../tools/server):
     - For any completion endpoints, passed as the `json_schema` body field
     - For the `/chat/completions` endpoint, passed inside the `response_format` body field (e.g. `{"type", "json_object", "schema": {"items": {}}}` or `{ type: "json_schema", json_schema: {"schema": ...} }`)
-- In [llama-cli](../examples/main), passed as the `--json` / `-j` flag
+- In [llama-cli](../tools/main), passed as the `--json` / `-j` flag
 - To convert to a grammar ahead of time:
     - in CLI, with [examples/json_schema_to_grammar.py](../examples/json_schema_to_grammar.py)
-    - in JavaScript with [json-schema-to-grammar.mjs](../examples/server/public_legacy/json-schema-to-grammar.mjs) (this is used by the [server](../examples/server)'s Web UI)
+    - in JavaScript with [json-schema-to-grammar.mjs](../tools/server/public_legacy/json-schema-to-grammar.mjs) (this is used by the [server](../tools/server)'s Web UI)
 
 Take a look at [tests](../tests/test-json-schema-to-grammar.cpp) to see which features are likely supported (you'll also find usage examples in https://github.com/ggml-org/llama.cpp/pull/5978, https://github.com/ggml-org/llama.cpp/pull/6659 & https://github.com/ggml-org/llama.cpp/pull/6555).
 
diff --git a/pyrightconfig.json b/pyrightconfig.json
index 9acbbeb78a2ed..5320fe5864a8e 100644
--- a/pyrightconfig.json
+++ b/pyrightconfig.json
@@ -15,7 +15,7 @@
     },
     {
       // uses match expressions in steps.py
-      "root": "examples/server/tests",
+      "root": "tools/server/tests",
       "pythonVersion": "3.10",
     },
   ],
diff --git a/requirements/requirements-all.txt b/requirements/requirements-all.txt
index eba0a59f62fe3..4b6d4b7d7cc9f 100644
--- a/requirements/requirements-all.txt
+++ b/requirements/requirements-all.txt
@@ -1,6 +1,6 @@
--r ../examples/llava/requirements.txt
--r ../examples/server/bench/requirements.txt
--r ../examples/server/tests/requirements.txt
+-r ../tools/llava/requirements.txt
+-r ../tools/server/bench/requirements.txt
+-r ../tools/server/tests/requirements.txt
 
 -r ./requirements-compare-llama-bench.txt
 -r ./requirements-pydantic.txt
diff --git a/scripts/fetch_server_test_models.py b/scripts/fetch_server_test_models.py
index e6775bfc5867c..ac483ef5d7dce 100755
--- a/scripts/fetch_server_test_models.py
+++ b/scripts/fetch_server_test_models.py
@@ -8,7 +8,7 @@
 
     Example:
         python scripts/fetch_server_test_models.py
-        ( cd examples/server/tests && ./tests.sh -v -x -m slow )
+        ( cd tools/server/tests && ./tests.sh -v -x -m slow )
 '''
 import ast
 import glob
@@ -66,7 +66,7 @@ def collect_hf_model_test_parameters(test_file) -> Generator[HuggingFaceModel, N
 
     models = sorted(list(set([
         model
-        for test_file in glob.glob('examples/server/tests/unit/test_*.py')
+        for test_file in glob.glob('tools/server/tests/unit/test_*.py')
         for model in collect_hf_model_test_parameters(test_file)
     ])), key=lambda m: (m.hf_repo, m.hf_file))
 
diff --git a/scripts/tool_bench.py b/scripts/tool_bench.py
index 0f406bc42ac77..a2f2a2eb02004 100755
--- a/scripts/tool_bench.py
+++ b/scripts/tool_bench.py
@@ -2,7 +2,7 @@
 '''
     Simplistic tool call benchmarks for llama-server and ollama.
 
-    Essentially runs the tests at server/examples/server/tests/unit/test_tool_call.py N times, at different temperatures and on different backends (current llama-server, baseline llama-server and ollama),
+    Essentially runs the tests at server/tools/server/tests/unit/test_tool_call.py N times, at different temperatures and on different backends (current llama-server, baseline llama-server and ollama),
     and plots the results of multiple runs (from same .jsonl file or multiple ones) as a success rate heatmap.
 
     Simple usage example:
@@ -51,8 +51,8 @@
 
 sys.path.insert(0, Path(__file__).parent.parent.as_posix())
 if True:
-    from examples.server.tests.utils import ServerProcess
-    from examples.server.tests.unit.test_tool_call import TIMEOUT_SERVER_START, do_test_calc_result, do_test_hello_world, do_test_weather
+    from tools.server.tests.utils import ServerProcess
+    from tools.server.tests.unit.test_tool_call import TIMEOUT_SERVER_START, do_test_calc_result, do_test_hello_world, do_test_weather
 
 
 @contextmanager
diff --git a/scripts/xxd.cmake b/scripts/xxd.cmake
index f5ad6ab9b1a79..14d2753808a8e 100644
--- a/scripts/xxd.cmake
+++ b/scripts/xxd.cmake
@@ -1,5 +1,5 @@
 # CMake equivalent of `xxd -i ${INPUT} ${OUTPUT}`
-# Usage: cmake -DINPUT=examples/server/public/index.html -DOUTPUT=examples/server/index.html.hpp -P scripts/xxd.cmake
+# Usage: cmake -DINPUT=tools/server/public/index.html -DOUTPUT=tools/server/index.html.hpp -P scripts/xxd.cmake
 
 SET(INPUT "" CACHE STRING "Input File")
 SET(OUTPUT "" CACHE STRING "Output File")
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index ae68275251d01..8acbe689a4b0b 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -111,7 +111,7 @@ if (NOT WIN32)
     # TODO: disabled on loongarch64 because the ggml-ci node lacks Python 3.8
     if (NOT ${CMAKE_SYSTEM_PROCESSOR} MATCHES "loongarch64")
         llama_build_and_test(test-json-schema-to-grammar.cpp   WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/..)
-        target_include_directories(test-json-schema-to-grammar PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../examples/server)
+        target_include_directories(test-json-schema-to-grammar PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../tools/server)
     endif()
 
     llama_build(test-quantize-stats.cpp)
diff --git a/tests/run-json-schema-to-grammar.mjs b/tests/run-json-schema-to-grammar.mjs
index b20ac1d6b5f2a..450c3dde0abad 100644
--- a/tests/run-json-schema-to-grammar.mjs
+++ b/tests/run-json-schema-to-grammar.mjs
@@ -1,5 +1,5 @@
 import { readFileSync } from "fs"
-import { SchemaConverter } from "../examples/server/public_legacy/json-schema-to-grammar.mjs"
+import { SchemaConverter } from "../tools/server/public_legacy/json-schema-to-grammar.mjs"
 
 const [, , file] = process.argv
 const url = `file://${file}`
diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt
new file mode 100644
index 0000000000000..119d7c50a0c41
--- /dev/null
+++ b/tools/CMakeLists.txt
@@ -0,0 +1,39 @@
+# dependencies
+
+find_package(Threads REQUIRED)
+
+# third-party
+
+# ...
+
+# flags
+
+llama_add_compile_flags()
+
+# tools
+
+if (EMSCRIPTEN)
+else()
+    add_subdirectory(batched-bench)
+    add_subdirectory(gguf-split)
+    add_subdirectory(imatrix)
+    add_subdirectory(llama-bench)
+    add_subdirectory(main)
+    add_subdirectory(perplexity)
+    add_subdirectory(quantize)
+    if (LLAMA_BUILD_SERVER)
+        add_subdirectory(server)
+    endif()
+    add_subdirectory(run)
+    add_subdirectory(tokenize)
+    add_subdirectory(tts)
+    if (NOT GGML_BACKEND_DL)
+        # these examples use the backends directly and cannot be built with dynamic loading
+        add_subdirectory(cvector-generator)
+        add_subdirectory(export-lora)
+        add_subdirectory(llava)
+        if (GGML_RPC)
+            add_subdirectory(rpc)
+        endif()
+    endif()
+endif()
diff --git a/examples/batched-bench/CMakeLists.txt b/tools/batched-bench/CMakeLists.txt
similarity index 100%
rename from examples/batched-bench/CMakeLists.txt
rename to tools/batched-bench/CMakeLists.txt
diff --git a/examples/batched-bench/README.md b/tools/batched-bench/README.md
similarity index 100%
rename from examples/batched-bench/README.md
rename to tools/batched-bench/README.md
diff --git a/examples/batched-bench/batched-bench.cpp b/tools/batched-bench/batched-bench.cpp
similarity index 100%
rename from examples/batched-bench/batched-bench.cpp
rename to tools/batched-bench/batched-bench.cpp
diff --git a/examples/cvector-generator/CMakeLists.txt b/tools/cvector-generator/CMakeLists.txt
similarity index 100%
rename from examples/cvector-generator/CMakeLists.txt
rename to tools/cvector-generator/CMakeLists.txt
diff --git a/examples/cvector-generator/README.md b/tools/cvector-generator/README.md
similarity index 100%
rename from examples/cvector-generator/README.md
rename to tools/cvector-generator/README.md
diff --git a/examples/cvector-generator/completions.txt b/tools/cvector-generator/completions.txt
similarity index 100%
rename from examples/cvector-generator/completions.txt
rename to tools/cvector-generator/completions.txt
diff --git a/examples/cvector-generator/cvector-generator.cpp b/tools/cvector-generator/cvector-generator.cpp
similarity index 100%
rename from examples/cvector-generator/cvector-generator.cpp
rename to tools/cvector-generator/cvector-generator.cpp
diff --git a/examples/cvector-generator/mean.hpp b/tools/cvector-generator/mean.hpp
similarity index 100%
rename from examples/cvector-generator/mean.hpp
rename to tools/cvector-generator/mean.hpp
diff --git a/examples/cvector-generator/negative.txt b/tools/cvector-generator/negative.txt
similarity index 100%
rename from examples/cvector-generator/negative.txt
rename to tools/cvector-generator/negative.txt
diff --git a/examples/cvector-generator/pca.hpp b/tools/cvector-generator/pca.hpp
similarity index 100%
rename from examples/cvector-generator/pca.hpp
rename to tools/cvector-generator/pca.hpp
diff --git a/examples/cvector-generator/positive.txt b/tools/cvector-generator/positive.txt
similarity index 100%
rename from examples/cvector-generator/positive.txt
rename to tools/cvector-generator/positive.txt
diff --git a/examples/export-lora/CMakeLists.txt b/tools/export-lora/CMakeLists.txt
similarity index 100%
rename from examples/export-lora/CMakeLists.txt
rename to tools/export-lora/CMakeLists.txt
diff --git a/examples/export-lora/README.md b/tools/export-lora/README.md
similarity index 100%
rename from examples/export-lora/README.md
rename to tools/export-lora/README.md
diff --git a/examples/export-lora/export-lora.cpp b/tools/export-lora/export-lora.cpp
similarity index 100%
rename from examples/export-lora/export-lora.cpp
rename to tools/export-lora/export-lora.cpp
diff --git a/examples/gguf-split/CMakeLists.txt b/tools/gguf-split/CMakeLists.txt
similarity index 100%
rename from examples/gguf-split/CMakeLists.txt
rename to tools/gguf-split/CMakeLists.txt
diff --git a/examples/gguf-split/README.md b/tools/gguf-split/README.md
similarity index 100%
rename from examples/gguf-split/README.md
rename to tools/gguf-split/README.md
diff --git a/examples/gguf-split/gguf-split.cpp b/tools/gguf-split/gguf-split.cpp
similarity index 100%
rename from examples/gguf-split/gguf-split.cpp
rename to tools/gguf-split/gguf-split.cpp
diff --git a/examples/gguf-split/tests.sh b/tools/gguf-split/tests.sh
similarity index 100%
rename from examples/gguf-split/tests.sh
rename to tools/gguf-split/tests.sh
diff --git a/examples/imatrix/CMakeLists.txt b/tools/imatrix/CMakeLists.txt
similarity index 100%
rename from examples/imatrix/CMakeLists.txt
rename to tools/imatrix/CMakeLists.txt
diff --git a/examples/imatrix/README.md b/tools/imatrix/README.md
similarity index 98%
rename from examples/imatrix/README.md
rename to tools/imatrix/README.md
index 9aa2b20347927..6d8897d98bb61 100644
--- a/examples/imatrix/README.md
+++ b/tools/imatrix/README.md
@@ -1,4 +1,4 @@
-# llama.cpp/examples/imatrix
+# llama.cpp/tools/imatrix
 
 Compute an importance matrix for a model and given text dataset. Can be used during quantization to enhance the quality of the quantized models.
 More information is available here: https://github.com/ggml-org/llama.cpp/pull/4861
diff --git a/examples/imatrix/imatrix.cpp b/tools/imatrix/imatrix.cpp
similarity index 100%
rename from examples/imatrix/imatrix.cpp
rename to tools/imatrix/imatrix.cpp
diff --git a/examples/llama-bench/CMakeLists.txt b/tools/llama-bench/CMakeLists.txt
similarity index 100%
rename from examples/llama-bench/CMakeLists.txt
rename to tools/llama-bench/CMakeLists.txt
diff --git a/examples/llama-bench/README.md b/tools/llama-bench/README.md
similarity index 99%
rename from examples/llama-bench/README.md
rename to tools/llama-bench/README.md
index 1f5e2f66200a6..d6fc77df880bd 100644
--- a/examples/llama-bench/README.md
+++ b/tools/llama-bench/README.md
@@ -1,4 +1,4 @@
-# llama.cpp/examples/llama-bench
+# llama.cpp/tools/llama-bench
 
 Performance testing tool for llama.cpp.
 
diff --git a/examples/llama-bench/llama-bench.cpp b/tools/llama-bench/llama-bench.cpp
similarity index 100%
rename from examples/llama-bench/llama-bench.cpp
rename to tools/llama-bench/llama-bench.cpp
diff --git a/examples/llava/CMakeLists.txt b/tools/llava/CMakeLists.txt
similarity index 100%
rename from examples/llava/CMakeLists.txt
rename to tools/llava/CMakeLists.txt
diff --git a/examples/llava/README-quantize.md b/tools/llava/README-quantize.md
similarity index 100%
rename from examples/llava/README-quantize.md
rename to tools/llava/README-quantize.md
diff --git a/examples/llava/README.md b/tools/llava/README.md
similarity index 100%
rename from examples/llava/README.md
rename to tools/llava/README.md
diff --git a/examples/llava/android/adb_run.sh b/tools/llava/android/adb_run.sh
similarity index 100%
rename from examples/llava/android/adb_run.sh
rename to tools/llava/android/adb_run.sh
diff --git a/examples/llava/android/build_64.sh b/tools/llava/android/build_64.sh
similarity index 100%
rename from examples/llava/android/build_64.sh
rename to tools/llava/android/build_64.sh
diff --git a/examples/llava/clip-impl.h b/tools/llava/clip-impl.h
similarity index 100%
rename from examples/llava/clip-impl.h
rename to tools/llava/clip-impl.h
diff --git a/examples/llava/clip-quantize-cli.cpp b/tools/llava/clip-quantize-cli.cpp
similarity index 100%
rename from examples/llava/clip-quantize-cli.cpp
rename to tools/llava/clip-quantize-cli.cpp
diff --git a/examples/llava/clip.cpp b/tools/llava/clip.cpp
similarity index 100%
rename from examples/llava/clip.cpp
rename to tools/llava/clip.cpp
diff --git a/examples/llava/clip.h b/tools/llava/clip.h
similarity index 100%
rename from examples/llava/clip.h
rename to tools/llava/clip.h
diff --git a/examples/llava/convert_image_encoder_to_gguf.py b/tools/llava/convert_image_encoder_to_gguf.py
similarity index 100%
rename from examples/llava/convert_image_encoder_to_gguf.py
rename to tools/llava/convert_image_encoder_to_gguf.py
diff --git a/examples/llava/deprecation-warning.cpp b/tools/llava/deprecation-warning.cpp
similarity index 100%
rename from examples/llava/deprecation-warning.cpp
rename to tools/llava/deprecation-warning.cpp
diff --git a/examples/llava/glmedge-convert-image-encoder-to-gguf.py b/tools/llava/glmedge-convert-image-encoder-to-gguf.py
similarity index 100%
rename from examples/llava/glmedge-convert-image-encoder-to-gguf.py
rename to tools/llava/glmedge-convert-image-encoder-to-gguf.py
diff --git a/examples/llava/glmedge-surgery.py b/tools/llava/glmedge-surgery.py
similarity index 100%
rename from examples/llava/glmedge-surgery.py
rename to tools/llava/glmedge-surgery.py
diff --git a/examples/llava/llava.cpp b/tools/llava/llava.cpp
similarity index 100%
rename from examples/llava/llava.cpp
rename to tools/llava/llava.cpp
diff --git a/examples/llava/llava.h b/tools/llava/llava.h
similarity index 100%
rename from examples/llava/llava.h
rename to tools/llava/llava.h
diff --git a/examples/llava/llava_surgery.py b/tools/llava/llava_surgery.py
similarity index 100%
rename from examples/llava/llava_surgery.py
rename to tools/llava/llava_surgery.py
diff --git a/examples/llava/llava_surgery_v2.py b/tools/llava/llava_surgery_v2.py
similarity index 100%
rename from examples/llava/llava_surgery_v2.py
rename to tools/llava/llava_surgery_v2.py
diff --git a/examples/llava/minicpmv-convert-image-encoder-to-gguf.py b/tools/llava/minicpmv-convert-image-encoder-to-gguf.py
similarity index 100%
rename from examples/llava/minicpmv-convert-image-encoder-to-gguf.py
rename to tools/llava/minicpmv-convert-image-encoder-to-gguf.py
diff --git a/examples/llava/minicpmv-surgery.py b/tools/llava/minicpmv-surgery.py
similarity index 100%
rename from examples/llava/minicpmv-surgery.py
rename to tools/llava/minicpmv-surgery.py
diff --git a/examples/llava/mtmd-cli.cpp b/tools/llava/mtmd-cli.cpp
similarity index 100%
rename from examples/llava/mtmd-cli.cpp
rename to tools/llava/mtmd-cli.cpp
diff --git a/examples/llava/mtmd.cpp b/tools/llava/mtmd.cpp
similarity index 100%
rename from examples/llava/mtmd.cpp
rename to tools/llava/mtmd.cpp
diff --git a/examples/llava/mtmd.h b/tools/llava/mtmd.h
similarity index 100%
rename from examples/llava/mtmd.h
rename to tools/llava/mtmd.h
diff --git a/examples/llava/qwen2vl-test.cpp b/tools/llava/qwen2vl-test.cpp
similarity index 100%
rename from examples/llava/qwen2vl-test.cpp
rename to tools/llava/qwen2vl-test.cpp
diff --git a/examples/llava/requirements.txt b/tools/llava/requirements.txt
similarity index 100%
rename from examples/llava/requirements.txt
rename to tools/llava/requirements.txt
diff --git a/examples/llava/test-1.jpeg b/tools/llava/test-1.jpeg
similarity index 100%
rename from examples/llava/test-1.jpeg
rename to tools/llava/test-1.jpeg
diff --git a/examples/llava/tests.sh b/tools/llava/tests.sh
similarity index 100%
rename from examples/llava/tests.sh
rename to tools/llava/tests.sh
diff --git a/examples/main/CMakeLists.txt b/tools/main/CMakeLists.txt
similarity index 100%
rename from examples/main/CMakeLists.txt
rename to tools/main/CMakeLists.txt
diff --git a/examples/main/README.md b/tools/main/README.md
similarity index 99%
rename from examples/main/README.md
rename to tools/main/README.md
index e4b3590b5d15e..4f16ad6b2b10e 100644
--- a/examples/main/README.md
+++ b/tools/main/README.md
@@ -1,4 +1,4 @@
-# llama.cpp/examples/main
+# llama.cpp/tools/main
 
 This example program allows you to use various LLaMA language models easily and efficiently. It is specifically designed to work with the [llama.cpp](https://github.com/ggml-org/llama.cpp) project, which provides a plain C/C++ implementation with optional 4-bit quantization support for faster, lower memory inference, and is optimized for desktop CPUs. This program can be used to perform various inference tasks with LLaMA models, including generating text based on user-provided prompts and chat-like interactions with reverse prompts.
 
diff --git a/examples/main/main.cpp b/tools/main/main.cpp
similarity index 100%
rename from examples/main/main.cpp
rename to tools/main/main.cpp
diff --git a/examples/perplexity/CMakeLists.txt b/tools/perplexity/CMakeLists.txt
similarity index 100%
rename from examples/perplexity/CMakeLists.txt
rename to tools/perplexity/CMakeLists.txt
diff --git a/examples/perplexity/README.md b/tools/perplexity/README.md
similarity index 100%
rename from examples/perplexity/README.md
rename to tools/perplexity/README.md
diff --git a/examples/perplexity/perplexity.cpp b/tools/perplexity/perplexity.cpp
similarity index 100%
rename from examples/perplexity/perplexity.cpp
rename to tools/perplexity/perplexity.cpp
diff --git a/examples/quantize/CMakeLists.txt b/tools/quantize/CMakeLists.txt
similarity index 100%
rename from examples/quantize/CMakeLists.txt
rename to tools/quantize/CMakeLists.txt
diff --git a/examples/quantize/README.md b/tools/quantize/README.md
similarity index 100%
rename from examples/quantize/README.md
rename to tools/quantize/README.md
diff --git a/examples/quantize/quantize.cpp b/tools/quantize/quantize.cpp
similarity index 100%
rename from examples/quantize/quantize.cpp
rename to tools/quantize/quantize.cpp
diff --git a/examples/quantize/tests.sh b/tools/quantize/tests.sh
similarity index 100%
rename from examples/quantize/tests.sh
rename to tools/quantize/tests.sh
diff --git a/examples/rpc/CMakeLists.txt b/tools/rpc/CMakeLists.txt
similarity index 100%
rename from examples/rpc/CMakeLists.txt
rename to tools/rpc/CMakeLists.txt
diff --git a/examples/rpc/README.md b/tools/rpc/README.md
similarity index 100%
rename from examples/rpc/README.md
rename to tools/rpc/README.md
diff --git a/examples/rpc/rpc-server.cpp b/tools/rpc/rpc-server.cpp
similarity index 100%
rename from examples/rpc/rpc-server.cpp
rename to tools/rpc/rpc-server.cpp
diff --git a/examples/run/CMakeLists.txt b/tools/run/CMakeLists.txt
similarity index 100%
rename from examples/run/CMakeLists.txt
rename to tools/run/CMakeLists.txt
diff --git a/examples/run/README.md b/tools/run/README.md
similarity index 100%
rename from examples/run/README.md
rename to tools/run/README.md
diff --git a/examples/run/linenoise.cpp/linenoise.cpp b/tools/run/linenoise.cpp/linenoise.cpp
similarity index 100%
rename from examples/run/linenoise.cpp/linenoise.cpp
rename to tools/run/linenoise.cpp/linenoise.cpp
diff --git a/examples/run/linenoise.cpp/linenoise.h b/tools/run/linenoise.cpp/linenoise.h
similarity index 100%
rename from examples/run/linenoise.cpp/linenoise.h
rename to tools/run/linenoise.cpp/linenoise.h
diff --git a/examples/run/run.cpp b/tools/run/run.cpp
similarity index 100%
rename from examples/run/run.cpp
rename to tools/run/run.cpp
diff --git a/examples/server/CMakeLists.txt b/tools/server/CMakeLists.txt
similarity index 100%
rename from examples/server/CMakeLists.txt
rename to tools/server/CMakeLists.txt
diff --git a/examples/server/README.md b/tools/server/README.md
similarity index 99%
rename from examples/server/README.md
rename to tools/server/README.md
index 61446a0ba2a07..0ec786ea76f7a 100644
--- a/examples/server/README.md
+++ b/tools/server/README.md
@@ -232,7 +232,7 @@ To build or to run the dev server (with hot reload):
 
 ```sh
 # make sure you have nodejs installed
-cd examples/server/webui
+cd tools/server/webui
 npm i
 
 # to run the dev server
@@ -242,7 +242,7 @@ npm run dev
 npm run build
 ```
 After `public/index.html.gz` has been generated we need to generate the c++
-headers (like build/examples/server/index.html.gz.hpp) that will be included
+headers (like build/tools/server/index.html.gz.hpp) that will be included
 by server.cpp. This is done by building `llama-server` as described in the
 [build](#build) section above.
 
@@ -1228,12 +1228,12 @@ Apart from error types supported by OAI, we also have custom types that are spec
 
 ### Legacy completion web UI
 
-A new chat-based UI has replaced the old completion-based since [this PR](https://github.com/ggml-org/llama.cpp/pull/10175). If you want to use the old completion, start the server with `--path ./examples/server/public_legacy`
+A new chat-based UI has replaced the old completion-based since [this PR](https://github.com/ggml-org/llama.cpp/pull/10175). If you want to use the old completion, start the server with `--path ./tools/server/public_legacy`
 
 For example:
 
 ```sh
-./llama-server -m my_model.gguf -c 8192 --path ./examples/server/public_legacy
+./llama-server -m my_model.gguf -c 8192 --path ./tools/server/public_legacy
 ```
 
 ### Extending or building alternative Web Front End
diff --git a/examples/server/bench/README.md b/tools/server/bench/README.md
similarity index 100%
rename from examples/server/bench/README.md
rename to tools/server/bench/README.md
diff --git a/examples/server/bench/bench.py b/tools/server/bench/bench.py
similarity index 100%
rename from examples/server/bench/bench.py
rename to tools/server/bench/bench.py
diff --git a/examples/server/bench/prometheus.yml b/tools/server/bench/prometheus.yml
similarity index 100%
rename from examples/server/bench/prometheus.yml
rename to tools/server/bench/prometheus.yml
diff --git a/examples/server/bench/requirements.txt b/tools/server/bench/requirements.txt
similarity index 100%
rename from examples/server/bench/requirements.txt
rename to tools/server/bench/requirements.txt
diff --git a/examples/server/bench/script.js b/tools/server/bench/script.js
similarity index 100%
rename from examples/server/bench/script.js
rename to tools/server/bench/script.js
diff --git a/examples/server/chat-llama2.sh b/tools/server/chat-llama2.sh
similarity index 100%
rename from examples/server/chat-llama2.sh
rename to tools/server/chat-llama2.sh
diff --git a/examples/server/chat.mjs b/tools/server/chat.mjs
similarity index 100%
rename from examples/server/chat.mjs
rename to tools/server/chat.mjs
diff --git a/examples/server/chat.sh b/tools/server/chat.sh
similarity index 100%
rename from examples/server/chat.sh
rename to tools/server/chat.sh
diff --git a/examples/server/httplib.h b/tools/server/httplib.h
similarity index 100%
rename from examples/server/httplib.h
rename to tools/server/httplib.h
diff --git a/tools/server/public/index.html.gz b/tools/server/public/index.html.gz
new file mode 100644
index 0000000000000..6f760c39f44c0
Binary files /dev/null and b/tools/server/public/index.html.gz differ
diff --git a/examples/server/public/loading.html b/tools/server/public/loading.html
similarity index 100%
rename from examples/server/public/loading.html
rename to tools/server/public/loading.html
diff --git a/examples/server/public_legacy/colorthemes.css b/tools/server/public_legacy/colorthemes.css
similarity index 100%
rename from examples/server/public_legacy/colorthemes.css
rename to tools/server/public_legacy/colorthemes.css
diff --git a/examples/server/public_legacy/completion.js b/tools/server/public_legacy/completion.js
similarity index 100%
rename from examples/server/public_legacy/completion.js
rename to tools/server/public_legacy/completion.js
diff --git a/examples/server/public_legacy/favicon.ico b/tools/server/public_legacy/favicon.ico
similarity index 100%
rename from examples/server/public_legacy/favicon.ico
rename to tools/server/public_legacy/favicon.ico
diff --git a/examples/server/public_legacy/index-new.html b/tools/server/public_legacy/index-new.html
similarity index 100%
rename from examples/server/public_legacy/index-new.html
rename to tools/server/public_legacy/index-new.html
diff --git a/examples/server/public_legacy/index.html b/tools/server/public_legacy/index.html
similarity index 100%
rename from examples/server/public_legacy/index.html
rename to tools/server/public_legacy/index.html
diff --git a/examples/server/public_legacy/index.js b/tools/server/public_legacy/index.js
similarity index 100%
rename from examples/server/public_legacy/index.js
rename to tools/server/public_legacy/index.js
diff --git a/examples/server/public_legacy/json-schema-to-grammar.mjs b/tools/server/public_legacy/json-schema-to-grammar.mjs
similarity index 100%
rename from examples/server/public_legacy/json-schema-to-grammar.mjs
rename to tools/server/public_legacy/json-schema-to-grammar.mjs
diff --git a/examples/server/public_legacy/loading.html b/tools/server/public_legacy/loading.html
similarity index 100%
rename from examples/server/public_legacy/loading.html
rename to tools/server/public_legacy/loading.html
diff --git a/examples/server/public_legacy/prompt-formats.js b/tools/server/public_legacy/prompt-formats.js
similarity index 100%
rename from examples/server/public_legacy/prompt-formats.js
rename to tools/server/public_legacy/prompt-formats.js
diff --git a/examples/server/public_legacy/style.css b/tools/server/public_legacy/style.css
similarity index 100%
rename from examples/server/public_legacy/style.css
rename to tools/server/public_legacy/style.css
diff --git a/examples/server/public_legacy/system-prompts.js b/tools/server/public_legacy/system-prompts.js
similarity index 100%
rename from examples/server/public_legacy/system-prompts.js
rename to tools/server/public_legacy/system-prompts.js
diff --git a/examples/server/public_legacy/theme-beeninorder.css b/tools/server/public_legacy/theme-beeninorder.css
similarity index 100%
rename from examples/server/public_legacy/theme-beeninorder.css
rename to tools/server/public_legacy/theme-beeninorder.css
diff --git a/examples/server/public_legacy/theme-ketivah.css b/tools/server/public_legacy/theme-ketivah.css
similarity index 100%
rename from examples/server/public_legacy/theme-ketivah.css
rename to tools/server/public_legacy/theme-ketivah.css
diff --git a/examples/server/public_legacy/theme-mangotango.css b/tools/server/public_legacy/theme-mangotango.css
similarity index 100%
rename from examples/server/public_legacy/theme-mangotango.css
rename to tools/server/public_legacy/theme-mangotango.css
diff --git a/examples/server/public_legacy/theme-playground.css b/tools/server/public_legacy/theme-playground.css
similarity index 100%
rename from examples/server/public_legacy/theme-playground.css
rename to tools/server/public_legacy/theme-playground.css
diff --git a/examples/server/public_legacy/theme-polarnight.css b/tools/server/public_legacy/theme-polarnight.css
similarity index 100%
rename from examples/server/public_legacy/theme-polarnight.css
rename to tools/server/public_legacy/theme-polarnight.css
diff --git a/examples/server/public_legacy/theme-snowstorm.css b/tools/server/public_legacy/theme-snowstorm.css
similarity index 100%
rename from examples/server/public_legacy/theme-snowstorm.css
rename to tools/server/public_legacy/theme-snowstorm.css
diff --git a/examples/server/public_simplechat/datautils.mjs b/tools/server/public_simplechat/datautils.mjs
similarity index 100%
rename from examples/server/public_simplechat/datautils.mjs
rename to tools/server/public_simplechat/datautils.mjs
diff --git a/examples/server/public_simplechat/index.html b/tools/server/public_simplechat/index.html
similarity index 100%
rename from examples/server/public_simplechat/index.html
rename to tools/server/public_simplechat/index.html
diff --git a/examples/server/public_simplechat/readme.md b/tools/server/public_simplechat/readme.md
similarity index 97%
rename from examples/server/public_simplechat/readme.md
rename to tools/server/public_simplechat/readme.md
index 21410199f6016..24e026d455b03 100644
--- a/examples/server/public_simplechat/readme.md
+++ b/tools/server/public_simplechat/readme.md
@@ -7,7 +7,7 @@ by Humans for All.
 
 To run from the build dir
 
-bin/llama-server -m path/model.gguf --path ../examples/server/public_simplechat
+bin/llama-server -m path/model.gguf --path ../tools/server/public_simplechat
 
 Continue reading for the details.
 
@@ -51,17 +51,17 @@ One could run this web frontend directly using server itself or if anyone is thi
 frontend to configure the server over http(s) or so, then run this web frontend using something like python's
 http module.
 
-### running using examples/server
+### running using tools/server
 
-./llama-server -m path/model.gguf --path examples/server/public_simplechat [--port PORT]
+./llama-server -m path/model.gguf --path tools/server/public_simplechat [--port PORT]
 
 ### running using python3's server module
 
-first run examples/server
+first run tools/server
 * ./llama-server -m path/model.gguf
 
-next run this web front end in examples/server/public_simplechat
-* cd ../examples/server/public_simplechat
+next run this web front end in tools/server/public_simplechat
+* cd ../tools/server/public_simplechat
 * python3 -m http.server PORT
 
 ### using the front end
@@ -248,7 +248,7 @@ Set max_tokens to 1024, so that a relatively large previous reponse doesnt eat u
 available wrt next query-response. However dont forget that the server when started should
 also be started with a model context size of 1k or more, to be on safe side.
 
-  The /completions endpoint of examples/server doesnt take max_tokens, instead it takes the
+  The /completions endpoint of tools/server doesnt take max_tokens, instead it takes the
   internal n_predict, for now add the same here on the client side, maybe later add max_tokens
   to /completions endpoint handling code on server side.
 
diff --git a/examples/server/public_simplechat/simplechat.css b/tools/server/public_simplechat/simplechat.css
similarity index 100%
rename from examples/server/public_simplechat/simplechat.css
rename to tools/server/public_simplechat/simplechat.css
diff --git a/examples/server/public_simplechat/simplechat.js b/tools/server/public_simplechat/simplechat.js
similarity index 100%
rename from examples/server/public_simplechat/simplechat.js
rename to tools/server/public_simplechat/simplechat.js
diff --git a/examples/server/public_simplechat/simplechat_screens.webp b/tools/server/public_simplechat/simplechat_screens.webp
similarity index 100%
rename from examples/server/public_simplechat/simplechat_screens.webp
rename to tools/server/public_simplechat/simplechat_screens.webp
diff --git a/examples/server/public_simplechat/ui.mjs b/tools/server/public_simplechat/ui.mjs
similarity index 100%
rename from examples/server/public_simplechat/ui.mjs
rename to tools/server/public_simplechat/ui.mjs
diff --git a/examples/server/server.cpp b/tools/server/server.cpp
similarity index 100%
rename from examples/server/server.cpp
rename to tools/server/server.cpp
diff --git a/examples/server/tests/.gitignore b/tools/server/tests/.gitignore
similarity index 100%
rename from examples/server/tests/.gitignore
rename to tools/server/tests/.gitignore
diff --git a/examples/server/tests/README.md b/tools/server/tests/README.md
similarity index 96%
rename from examples/server/tests/README.md
rename to tools/server/tests/README.md
index 652dea0382ce1..cb87db035e2d6 100644
--- a/examples/server/tests/README.md
+++ b/tools/server/tests/README.md
@@ -60,7 +60,7 @@ To run a single test:
 Hint: You can compile and run test in single command, useful for local developement:
 
 ```shell
-cmake --build build -j --target llama-server && ./examples/server/tests/tests.sh
+cmake --build build -j --target llama-server && ./tools/server/tests/tests.sh
 ```
 
 To see all available arguments, please refer to [pytest documentation](https://docs.pytest.org/en/stable/how-to/usage.html)
diff --git a/examples/server/tests/conftest.py b/tools/server/tests/conftest.py
similarity index 100%
rename from examples/server/tests/conftest.py
rename to tools/server/tests/conftest.py
diff --git a/examples/server/tests/pytest.ini b/tools/server/tests/pytest.ini
similarity index 100%
rename from examples/server/tests/pytest.ini
rename to tools/server/tests/pytest.ini
diff --git a/examples/server/tests/requirements.txt b/tools/server/tests/requirements.txt
similarity index 100%
rename from examples/server/tests/requirements.txt
rename to tools/server/tests/requirements.txt
diff --git a/examples/server/tests/tests.sh b/tools/server/tests/tests.sh
similarity index 100%
rename from examples/server/tests/tests.sh
rename to tools/server/tests/tests.sh
diff --git a/examples/server/tests/unit/test_basic.py b/tools/server/tests/unit/test_basic.py
similarity index 100%
rename from examples/server/tests/unit/test_basic.py
rename to tools/server/tests/unit/test_basic.py
diff --git a/examples/server/tests/unit/test_chat_completion.py b/tools/server/tests/unit/test_chat_completion.py
similarity index 100%
rename from examples/server/tests/unit/test_chat_completion.py
rename to tools/server/tests/unit/test_chat_completion.py
diff --git a/examples/server/tests/unit/test_completion.py b/tools/server/tests/unit/test_completion.py
similarity index 100%
rename from examples/server/tests/unit/test_completion.py
rename to tools/server/tests/unit/test_completion.py
diff --git a/examples/server/tests/unit/test_ctx_shift.py b/tools/server/tests/unit/test_ctx_shift.py
similarity index 100%
rename from examples/server/tests/unit/test_ctx_shift.py
rename to tools/server/tests/unit/test_ctx_shift.py
diff --git a/examples/server/tests/unit/test_embedding.py b/tools/server/tests/unit/test_embedding.py
similarity index 100%
rename from examples/server/tests/unit/test_embedding.py
rename to tools/server/tests/unit/test_embedding.py
diff --git a/examples/server/tests/unit/test_infill.py b/tools/server/tests/unit/test_infill.py
similarity index 100%
rename from examples/server/tests/unit/test_infill.py
rename to tools/server/tests/unit/test_infill.py
diff --git a/examples/server/tests/unit/test_lora.py b/tools/server/tests/unit/test_lora.py
similarity index 100%
rename from examples/server/tests/unit/test_lora.py
rename to tools/server/tests/unit/test_lora.py
diff --git a/examples/server/tests/unit/test_rerank.py b/tools/server/tests/unit/test_rerank.py
similarity index 100%
rename from examples/server/tests/unit/test_rerank.py
rename to tools/server/tests/unit/test_rerank.py
diff --git a/examples/server/tests/unit/test_security.py b/tools/server/tests/unit/test_security.py
similarity index 100%
rename from examples/server/tests/unit/test_security.py
rename to tools/server/tests/unit/test_security.py
diff --git a/examples/server/tests/unit/test_slot_save.py b/tools/server/tests/unit/test_slot_save.py
similarity index 100%
rename from examples/server/tests/unit/test_slot_save.py
rename to tools/server/tests/unit/test_slot_save.py
diff --git a/examples/server/tests/unit/test_speculative.py b/tools/server/tests/unit/test_speculative.py
similarity index 100%
rename from examples/server/tests/unit/test_speculative.py
rename to tools/server/tests/unit/test_speculative.py
diff --git a/examples/server/tests/unit/test_tokenize.py b/tools/server/tests/unit/test_tokenize.py
similarity index 100%
rename from examples/server/tests/unit/test_tokenize.py
rename to tools/server/tests/unit/test_tokenize.py
diff --git a/examples/server/tests/unit/test_tool_call.py b/tools/server/tests/unit/test_tool_call.py
similarity index 100%
rename from examples/server/tests/unit/test_tool_call.py
rename to tools/server/tests/unit/test_tool_call.py
diff --git a/examples/server/tests/utils.py b/tools/server/tests/utils.py
similarity index 100%
rename from examples/server/tests/utils.py
rename to tools/server/tests/utils.py
diff --git a/examples/server/themes/README.md b/tools/server/themes/README.md
similarity index 100%
rename from examples/server/themes/README.md
rename to tools/server/themes/README.md
diff --git a/examples/server/themes/buttons-top/README.md b/tools/server/themes/buttons-top/README.md
similarity index 100%
rename from examples/server/themes/buttons-top/README.md
rename to tools/server/themes/buttons-top/README.md
diff --git a/examples/server/themes/buttons-top/buttons_top.png b/tools/server/themes/buttons-top/buttons_top.png
similarity index 100%
rename from examples/server/themes/buttons-top/buttons_top.png
rename to tools/server/themes/buttons-top/buttons_top.png
diff --git a/examples/server/themes/buttons-top/favicon.ico b/tools/server/themes/buttons-top/favicon.ico
similarity index 100%
rename from examples/server/themes/buttons-top/favicon.ico
rename to tools/server/themes/buttons-top/favicon.ico
diff --git a/examples/server/themes/buttons-top/index.html b/tools/server/themes/buttons-top/index.html
similarity index 100%
rename from examples/server/themes/buttons-top/index.html
rename to tools/server/themes/buttons-top/index.html
diff --git a/examples/server/themes/wild/README.md b/tools/server/themes/wild/README.md
similarity index 100%
rename from examples/server/themes/wild/README.md
rename to tools/server/themes/wild/README.md
diff --git a/examples/server/themes/wild/favicon.ico b/tools/server/themes/wild/favicon.ico
similarity index 100%
rename from examples/server/themes/wild/favicon.ico
rename to tools/server/themes/wild/favicon.ico
diff --git a/examples/server/themes/wild/index.html b/tools/server/themes/wild/index.html
similarity index 100%
rename from examples/server/themes/wild/index.html
rename to tools/server/themes/wild/index.html
diff --git a/examples/server/themes/wild/llama_cpp.png b/tools/server/themes/wild/llama_cpp.png
similarity index 100%
rename from examples/server/themes/wild/llama_cpp.png
rename to tools/server/themes/wild/llama_cpp.png
diff --git a/examples/server/themes/wild/llamapattern.png b/tools/server/themes/wild/llamapattern.png
similarity index 100%
rename from examples/server/themes/wild/llamapattern.png
rename to tools/server/themes/wild/llamapattern.png
diff --git a/examples/server/themes/wild/wild.png b/tools/server/themes/wild/wild.png
similarity index 100%
rename from examples/server/themes/wild/wild.png
rename to tools/server/themes/wild/wild.png
diff --git a/examples/server/utils.hpp b/tools/server/utils.hpp
similarity index 100%
rename from examples/server/utils.hpp
rename to tools/server/utils.hpp
diff --git a/examples/server/webui/.gitignore b/tools/server/webui/.gitignore
similarity index 100%
rename from examples/server/webui/.gitignore
rename to tools/server/webui/.gitignore
diff --git a/examples/server/webui/.prettierignore b/tools/server/webui/.prettierignore
similarity index 100%
rename from examples/server/webui/.prettierignore
rename to tools/server/webui/.prettierignore
diff --git a/examples/server/webui/eslint.config.js b/tools/server/webui/eslint.config.js
similarity index 100%
rename from examples/server/webui/eslint.config.js
rename to tools/server/webui/eslint.config.js
diff --git a/examples/server/webui/index.html b/tools/server/webui/index.html
similarity index 100%
rename from examples/server/webui/index.html
rename to tools/server/webui/index.html
diff --git a/examples/server/webui/package-lock.json b/tools/server/webui/package-lock.json
similarity index 100%
rename from examples/server/webui/package-lock.json
rename to tools/server/webui/package-lock.json
diff --git a/examples/server/webui/package.json b/tools/server/webui/package.json
similarity index 100%
rename from examples/server/webui/package.json
rename to tools/server/webui/package.json
diff --git a/examples/server/webui/postcss.config.js b/tools/server/webui/postcss.config.js
similarity index 100%
rename from examples/server/webui/postcss.config.js
rename to tools/server/webui/postcss.config.js
diff --git a/examples/server/webui/public/demo-conversation.json b/tools/server/webui/public/demo-conversation.json
similarity index 100%
rename from examples/server/webui/public/demo-conversation.json
rename to tools/server/webui/public/demo-conversation.json
diff --git a/examples/server/webui/src/App.tsx b/tools/server/webui/src/App.tsx
similarity index 100%
rename from examples/server/webui/src/App.tsx
rename to tools/server/webui/src/App.tsx
diff --git a/examples/server/webui/src/Config.ts b/tools/server/webui/src/Config.ts
similarity index 100%
rename from examples/server/webui/src/Config.ts
rename to tools/server/webui/src/Config.ts
diff --git a/examples/server/webui/src/components/CanvasPyInterpreter.tsx b/tools/server/webui/src/components/CanvasPyInterpreter.tsx
similarity index 100%
rename from examples/server/webui/src/components/CanvasPyInterpreter.tsx
rename to tools/server/webui/src/components/CanvasPyInterpreter.tsx
diff --git a/examples/server/webui/src/components/ChatMessage.tsx b/tools/server/webui/src/components/ChatMessage.tsx
similarity index 100%
rename from examples/server/webui/src/components/ChatMessage.tsx
rename to tools/server/webui/src/components/ChatMessage.tsx
diff --git a/examples/server/webui/src/components/ChatScreen.tsx b/tools/server/webui/src/components/ChatScreen.tsx
similarity index 100%
rename from examples/server/webui/src/components/ChatScreen.tsx
rename to tools/server/webui/src/components/ChatScreen.tsx
diff --git a/examples/server/webui/src/components/Header.tsx b/tools/server/webui/src/components/Header.tsx
similarity index 100%
rename from examples/server/webui/src/components/Header.tsx
rename to tools/server/webui/src/components/Header.tsx
diff --git a/examples/server/webui/src/components/MarkdownDisplay.tsx b/tools/server/webui/src/components/MarkdownDisplay.tsx
similarity index 100%
rename from examples/server/webui/src/components/MarkdownDisplay.tsx
rename to tools/server/webui/src/components/MarkdownDisplay.tsx
diff --git a/examples/server/webui/src/components/SettingDialog.tsx b/tools/server/webui/src/components/SettingDialog.tsx
similarity index 99%
rename from examples/server/webui/src/components/SettingDialog.tsx
rename to tools/server/webui/src/components/SettingDialog.tsx
index b65e73ae16926..b0044d25403b5 100644
--- a/examples/server/webui/src/components/SettingDialog.tsx
+++ b/tools/server/webui/src/components/SettingDialog.tsx
@@ -196,7 +196,7 @@ const SETTING_SECTIONS: SettingSection[] = [
         label: (
           <>
             Custom JSON config (For more info, refer to{' '}
-            <OpenInNewTab href="https://github.com/ggerganov/llama.cpp/blob/master/examples/server/README.md">
+            <OpenInNewTab href="https://github.com/ggerganov/llama.cpp/blob/master/tools/server/README.md">
               server documentation
             </OpenInNewTab>
             )
diff --git a/examples/server/webui/src/components/Sidebar.tsx b/tools/server/webui/src/components/Sidebar.tsx
similarity index 100%
rename from examples/server/webui/src/components/Sidebar.tsx
rename to tools/server/webui/src/components/Sidebar.tsx
diff --git a/examples/server/webui/src/components/useChatTextarea.ts b/tools/server/webui/src/components/useChatTextarea.ts
similarity index 100%
rename from examples/server/webui/src/components/useChatTextarea.ts
rename to tools/server/webui/src/components/useChatTextarea.ts
diff --git a/examples/server/webui/src/index.scss b/tools/server/webui/src/index.scss
similarity index 100%
rename from examples/server/webui/src/index.scss
rename to tools/server/webui/src/index.scss
diff --git a/examples/server/webui/src/main.tsx b/tools/server/webui/src/main.tsx
similarity index 100%
rename from examples/server/webui/src/main.tsx
rename to tools/server/webui/src/main.tsx
diff --git a/examples/server/webui/src/utils/app.context.tsx b/tools/server/webui/src/utils/app.context.tsx
similarity index 100%
rename from examples/server/webui/src/utils/app.context.tsx
rename to tools/server/webui/src/utils/app.context.tsx
diff --git a/examples/server/webui/src/utils/common.tsx b/tools/server/webui/src/utils/common.tsx
similarity index 100%
rename from examples/server/webui/src/utils/common.tsx
rename to tools/server/webui/src/utils/common.tsx
diff --git a/examples/server/webui/src/utils/llama-vscode.ts b/tools/server/webui/src/utils/llama-vscode.ts
similarity index 100%
rename from examples/server/webui/src/utils/llama-vscode.ts
rename to tools/server/webui/src/utils/llama-vscode.ts
diff --git a/examples/server/webui/src/utils/misc.ts b/tools/server/webui/src/utils/misc.ts
similarity index 100%
rename from examples/server/webui/src/utils/misc.ts
rename to tools/server/webui/src/utils/misc.ts
diff --git a/examples/server/webui/src/utils/storage.ts b/tools/server/webui/src/utils/storage.ts
similarity index 100%
rename from examples/server/webui/src/utils/storage.ts
rename to tools/server/webui/src/utils/storage.ts
diff --git a/examples/server/webui/src/utils/types.ts b/tools/server/webui/src/utils/types.ts
similarity index 100%
rename from examples/server/webui/src/utils/types.ts
rename to tools/server/webui/src/utils/types.ts
diff --git a/examples/server/webui/src/vite-env.d.ts b/tools/server/webui/src/vite-env.d.ts
similarity index 100%
rename from examples/server/webui/src/vite-env.d.ts
rename to tools/server/webui/src/vite-env.d.ts
diff --git a/examples/server/webui/tailwind.config.js b/tools/server/webui/tailwind.config.js
similarity index 100%
rename from examples/server/webui/tailwind.config.js
rename to tools/server/webui/tailwind.config.js
diff --git a/examples/server/webui/tsconfig.app.json b/tools/server/webui/tsconfig.app.json
similarity index 100%
rename from examples/server/webui/tsconfig.app.json
rename to tools/server/webui/tsconfig.app.json
diff --git a/examples/server/webui/tsconfig.json b/tools/server/webui/tsconfig.json
similarity index 100%
rename from examples/server/webui/tsconfig.json
rename to tools/server/webui/tsconfig.json
diff --git a/examples/server/webui/tsconfig.node.json b/tools/server/webui/tsconfig.node.json
similarity index 100%
rename from examples/server/webui/tsconfig.node.json
rename to tools/server/webui/tsconfig.node.json
diff --git a/examples/server/webui/vite.config.ts b/tools/server/webui/vite.config.ts
similarity index 100%
rename from examples/server/webui/vite.config.ts
rename to tools/server/webui/vite.config.ts
diff --git a/examples/tokenize/CMakeLists.txt b/tools/tokenize/CMakeLists.txt
similarity index 100%
rename from examples/tokenize/CMakeLists.txt
rename to tools/tokenize/CMakeLists.txt
diff --git a/examples/tokenize/tokenize.cpp b/tools/tokenize/tokenize.cpp
similarity index 100%
rename from examples/tokenize/tokenize.cpp
rename to tools/tokenize/tokenize.cpp
diff --git a/examples/tts/CMakeLists.txt b/tools/tts/CMakeLists.txt
similarity index 100%
rename from examples/tts/CMakeLists.txt
rename to tools/tts/CMakeLists.txt
diff --git a/examples/tts/README.md b/tools/tts/README.md
similarity index 96%
rename from examples/tts/README.md
rename to tools/tts/README.md
index 4509763c65019..557014aebb98a 100644
--- a/examples/tts/README.md
+++ b/tools/tts/README.md
@@ -45,7 +45,7 @@ $ popd
 This model file is PyTorch checkpoint (.ckpt) and we first need to convert it to
 huggingface format:
 ```console
-(venv) python examples/tts/convert_pt_to_hf.py \
+(venv) python tools/tts/convert_pt_to_hf.py \
     models/WavTokenizer-large-speech-75token/wavtokenizer_large_speech_320_24k.ckpt
 ...
 Model has been successfully converted and saved to models/WavTokenizer-large-speech-75token/model.safetensors
@@ -105,7 +105,7 @@ $ source venv/bin/activate
 
 And then run the python script using:
 ```conole
-(venv) python ./examples/tts/tts-outetts.py http://localhost:8020 http://localhost:8021 "Hello world"
+(venv) python ./tools/tts/tts-outetts.py http://localhost:8020 http://localhost:8021 "Hello world"
 spectrogram generated: n_codes: 90, n_embd: 1282
 converting to audio ...
 audio generated: 28800 samples
diff --git a/examples/tts/convert_pt_to_hf.py b/tools/tts/convert_pt_to_hf.py
similarity index 100%
rename from examples/tts/convert_pt_to_hf.py
rename to tools/tts/convert_pt_to_hf.py
diff --git a/examples/tts/tts-outetts.py b/tools/tts/tts-outetts.py
similarity index 100%
rename from examples/tts/tts-outetts.py
rename to tools/tts/tts-outetts.py
diff --git a/examples/tts/tts.cpp b/tools/tts/tts.cpp
similarity index 100%
rename from examples/tts/tts.cpp
rename to tools/tts/tts.cpp