Skip to content

llama : move end-user examples to tools directory #13249

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
May 2, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions .editorconfig
Original file line number Diff line number Diff line change
Expand Up @@ -21,23 +21,23 @@ indent_style = tab
[prompts/*.txt]
insert_final_newline = unset

[examples/server/public/*]
[tools/server/public/*]
indent_size = 2

[examples/server/public/deps_*]
[tools/server/public/deps_*]
trim_trailing_whitespace = unset
indent_style = unset
indent_size = unset

[examples/server/deps_*]
[tools/server/deps_*]
trim_trailing_whitespace = unset
indent_style = unset
indent_size = unset

[examples/llama.swiftui/llama.swiftui.xcodeproj/*]
indent_style = tab

[examples/cvector-generator/*.txt]
[tools/cvector-generator/*.txt]
trim_trailing_whitespace = unset
insert_final_newline = unset

Expand Down
3 changes: 2 additions & 1 deletion .flake8
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@
max-line-length = 125
ignore = E203,E211,E221,E225,E231,E241,E251,E261,E266,E501,E701,E704,W503
exclude =
# Do not traverse examples
# Do not traverse examples and tools
examples,
tools,
# Do not include package initializers
__init__.py,
# No need to traverse our git directory
Expand Down
6 changes: 4 additions & 2 deletions .github/labeler.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,9 @@ build:
- CMakePresets.json
examples:
- changed-files:
- any-glob-to-any-file: examples/**
- any-glob-to-any-file:
- examples/**
- tools/**
devops:
- changed-files:
- any-glob-to-any-file:
Expand All @@ -70,7 +72,7 @@ android:
server:
- changed-files:
- any-glob-to-any-file:
- examples/server/**
- tools/server/**
ggml:
- changed-files:
- any-glob-to-any-file:
Expand Down
30 changes: 15 additions & 15 deletions .github/workflows/bench.yml.disabled
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,10 @@ on:
push:
branches:
- master
paths: ['llama.cpp', 'ggml.c', 'ggml-backend.cpp', 'ggml-quants.c', '**/*.cu', 'examples/server/*.h*', 'examples/server/*.cpp']
paths: ['llama.cpp', 'ggml.c', 'ggml-backend.cpp', 'ggml-quants.c', '**/*.cu', 'tools/server/*.h*', 'tools/server/*.cpp']
pull_request_target:
types: [opened, synchronize, reopened]
paths: ['llama.cpp', 'ggml.c', 'ggml-backend.cpp', 'ggml-quants.c', '**/*.cu', 'examples/server/*.h*', 'examples/server/*.cpp']
paths: ['llama.cpp', 'ggml.c', 'ggml-backend.cpp', 'ggml-quants.c', '**/*.cu', 'tools/server/*.h*', 'tools/server/*.cpp']
schedule:
- cron: '04 2 * * *'

Expand Down Expand Up @@ -69,7 +69,7 @@ jobs:
- name: Install python env
id: pipenv
run: |
cd examples/server/bench
cd tools/server/bench
python3 -m venv venv
source venv/bin/activate
pip install -r requirements.txt
Expand All @@ -79,7 +79,7 @@ jobs:
run: |
wget --quiet https://github.com/prometheus/prometheus/releases/download/v2.51.0/prometheus-2.51.0.linux-amd64.tar.gz
tar xzf prometheus*.tar.gz --strip-components=1
./prometheus --config.file=examples/server/bench/prometheus.yml &
./prometheus --config.file=tools/server/bench/prometheus.yml &
while ! nc -z localhost 9090; do
sleep 0.1
done
Expand All @@ -92,7 +92,7 @@ jobs:
- name: Install k6 and xk6-sse
id: k6_installation
run: |
cd examples/server/bench
cd tools/server/bench
go install go.k6.io/xk6/cmd/xk6@latest
xk6 build master \
--with github.com/phymbert/xk6-sse
Expand All @@ -116,7 +116,7 @@ jobs:
- name: Download the dataset
id: download_dataset
run: |
cd examples/server/bench
cd tools/server/bench
wget --quiet https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json

- name: Server bench
Expand All @@ -126,7 +126,7 @@ jobs:
run: |
set -eux

cd examples/server/bench
cd tools/server/bench
source venv/bin/activate
python bench.py \
--runner-label ${{ env.RUNNER_LABEL }} \
Expand Down Expand Up @@ -157,9 +157,9 @@ jobs:
name: bench-server-${{ github.job }}-${{ env.RUNNER_LABEL }}-${{ matrix.model }}-${{ matrix.ftype }}
compression-level: 9
path: |
examples/server/bench/*.jpg
examples/server/bench/*.json
examples/server/bench/*.log
tools/server/bench/*.jpg
tools/server/bench/*.json
tools/server/bench/*.log

- name: Commit status
uses: Sibz/github-status-action@v1
Expand All @@ -178,17 +178,17 @@ jobs:
with:
client_id: ${{secrets.IMGUR_CLIENT_ID}}
path: |
examples/server/bench/prompt_tokens_seconds.jpg
examples/server/bench/predicted_tokens_seconds.jpg
examples/server/bench/kv_cache_usage_ratio.jpg
examples/server/bench/requests_processing.jpg
tools/server/bench/prompt_tokens_seconds.jpg
tools/server/bench/predicted_tokens_seconds.jpg
tools/server/bench/kv_cache_usage_ratio.jpg
tools/server/bench/requests_processing.jpg

- name: Extract mermaid
id: set_mermaid
run: |
set -eux

cd examples/server/bench
cd tools/server/bench
PROMPT_TOKENS_SECONDS=$(cat prompt_tokens_seconds.mermaid)
echo "PROMPT_TOKENS_SECONDS<<EOF" >> $GITHUB_ENV
echo "$PROMPT_TOKENS_SECONDS" >> $GITHUB_ENV
Expand Down
3 changes: 3 additions & 0 deletions .github/workflows/build-linux-cross.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ jobs:
cmake -B build -DCMAKE_BUILD_TYPE=Release \
-DGGML_OPENMP=OFF \
-DLLAMA_BUILD_EXAMPLES=ON \
-DLLAMA_BUILD_TOOLS=ON \
-DLLAMA_BUILD_TESTS=OFF \
-DCMAKE_SYSTEM_NAME=Linux \
-DCMAKE_SYSTEM_PROCESSOR=riscv64 \
Expand Down Expand Up @@ -80,6 +81,7 @@ jobs:
-DGGML_VULKAN=ON \
-DGGML_OPENMP=OFF \
-DLLAMA_BUILD_EXAMPLES=ON \
-DLLAMA_BUILD_TOOLS=ON \
-DLLAMA_BUILD_TESTS=OFF \
-DCMAKE_SYSTEM_NAME=Linux \
-DCMAKE_SYSTEM_PROCESSOR=riscv64 \
Expand Down Expand Up @@ -125,6 +127,7 @@ jobs:
-DGGML_VULKAN=ON \
-DGGML_OPENMP=OFF \
-DLLAMA_BUILD_EXAMPLES=ON \
-DLLAMA_BUILD_TOOLS=ON \
-DLLAMA_BUILD_TESTS=OFF \
-DCMAKE_SYSTEM_NAME=Linux \
-DCMAKE_SYSTEM_PROCESSOR=aarch64 \
Expand Down
5 changes: 5 additions & 0 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -633,6 +633,7 @@ jobs:
-DGGML_METAL_EMBED_LIBRARY=ON \
-DLLAMA_BUILD_COMMON=OFF \
-DLLAMA_BUILD_EXAMPLES=OFF \
-DLLAMA_BUILD_TOOLS=OFF \
-DLLAMA_BUILD_TESTS=OFF \
-DLLAMA_BUILD_SERVER=OFF \
-DCMAKE_SYSTEM_NAME=iOS \
Expand Down Expand Up @@ -669,6 +670,7 @@ jobs:
-DGGML_METAL_EMBED_LIBRARY=ON \
-DLLAMA_BUILD_COMMON=OFF \
-DLLAMA_BUILD_EXAMPLES=OFF \
-DLLAMA_BUILD_TOOLS=OFF \
-DLLAMA_BUILD_TESTS=OFF \
-DLLAMA_BUILD_SERVER=OFF \
-DCMAKE_SYSTEM_NAME=tvOS \
Expand Down Expand Up @@ -699,6 +701,7 @@ jobs:
-DGGML_METAL_EMBED_LIBRARY=ON \
-DLLAMA_BUILD_COMMON=OFF \
-DLLAMA_BUILD_EXAMPLES=OFF \
-DLLAMA_BUILD_TOOLS=OFF \
-DLLAMA_BUILD_TESTS=OFF \
-DLLAMA_BUILD_SERVER=OFF \
-DCMAKE_SYSTEM_NAME=visionOS \
Expand Down Expand Up @@ -739,6 +742,7 @@ jobs:
-DGGML_METAL_EMBED_LIBRARY=ON \
-DLLAMA_CURL=OFF \
-DLLAMA_BUILD_EXAMPLES=OFF \
-DLLAMA_BUILD_TOOLS=OFF \
-DLLAMA_BUILD_TESTS=OFF \
-DLLAMA_BUILD_SERVER=OFF \
-DCMAKE_OSX_ARCHITECTURES="arm64;x86_64"
Expand Down Expand Up @@ -1417,6 +1421,7 @@ jobs:
-DGGML_METAL_EMBED_LIBRARY=ON \
-DLLAMA_CURL=OFF \
-DLLAMA_BUILD_EXAMPLES=OFF \
-DLLAMA_BUILD_TOOLS=OFF \
-DLLAMA_BUILD_TESTS=OFF \
-DLLAMA_BUILD_SERVER=OFF \
-DCMAKE_SYSTEM_NAME=iOS \
Expand Down
24 changes: 12 additions & 12 deletions .github/workflows/server.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,10 @@ on:
push:
branches:
- master
paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/**.*']
paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'tools/server/**.*']
pull_request:
types: [opened, synchronize, reopened]
paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/**.*']
paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'tools/server/**.*']

env:
LLAMA_LOG_COLORS: 1
Expand Down Expand Up @@ -74,7 +74,7 @@ jobs:
- name: Tests dependencies
id: test_dependencies
run: |
pip install -r examples/server/tests/requirements.txt
pip install -r tools/server/tests/requirements.txt

# Setup nodejs (to be used for verifying bundled index.html)
- uses: actions/setup-node@v4
Expand All @@ -84,14 +84,14 @@ jobs:
- name: WebUI - Install dependencies
id: webui_lint
run: |
cd examples/server/webui
cd tools/server/webui
npm ci

- name: WebUI - Check code format
id: webui_format
run: |
git config --global --add safe.directory $(realpath .)
cd examples/server/webui
cd tools/server/webui
git status

npm run format
Expand All @@ -108,7 +108,7 @@ jobs:
id: verify_server_index_html
run: |
git config --global --add safe.directory $(realpath .)
cd examples/server/webui
cd tools/server/webui
git status

npm run build
Expand Down Expand Up @@ -161,21 +161,21 @@ jobs:
env:
GITHUB_ACTIONS: "true"
run: |
cd examples/server/tests
cd tools/server/tests
./tests.sh

- name: Tests (sanitizers)
id: server_integration_tests_sanitizers
if: ${{ matrix.sanitizer != '' }}
run: |
cd examples/server/tests
cd tools/server/tests
LLAMA_SANITIZE=1 ./tests.sh

- name: Slow tests
id: server_integration_tests_slow
if: ${{ (github.event.schedule || github.event.inputs.slow_tests == 'true') && matrix.build_type == 'Release' }}
run: |
cd examples/server/tests
cd tools/server/tests
SLOW_TESTS=1 ./tests.sh


Expand Down Expand Up @@ -211,7 +211,7 @@ jobs:
- name: Tests dependencies
id: test_dependencies
run: |
pip install -r examples/server/tests/requirements.txt
pip install -r tools/server/tests/requirements.txt

- name: Copy Libcurl
id: prepare_libcurl
Expand All @@ -224,14 +224,14 @@ jobs:
id: server_integration_tests
if: ${{ !matrix.disabled_on_pr || !github.event.pull_request }}
run: |
cd examples/server/tests
cd tools/server/tests
$env:PYTHONIOENCODING = ":replace"
pytest -v -x -m "not slow"

- name: Slow tests
id: server_integration_tests_slow
if: ${{ (github.event.schedule || github.event.inputs.slow_tests == 'true') && matrix.build_type == 'Release' }}
run: |
cd examples/server/tests
cd tools/server/tests
$env:SLOW_TESTS = "1"
pytest -v -x
12 changes: 6 additions & 6 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -96,11 +96,11 @@ perf-*.txt
# Examples

examples/jeopardy/results.txt
examples/server/*.css.hpp
examples/server/*.html.hpp
examples/server/*.js.hpp
examples/server/*.mjs.hpp
examples/server/*.gz.hpp
tools/server/*.css.hpp
tools/server/*.html.hpp
tools/server/*.js.hpp
tools/server/*.mjs.hpp
tools/server/*.gz.hpp
!build_64.sh
!examples/*.bat
!examples/*/*.kts
Expand All @@ -110,7 +110,7 @@ examples/server/*.gz.hpp

# Server Web UI temporary files
node_modules
examples/server/webui/dist
tools/server/webui/dist

# Python

Expand Down
5 changes: 5 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ option(LLAMA_BUILD_COMMON "llama: build common utils library" ${LLAMA_STANDALONE

# extra artifacts
option(LLAMA_BUILD_TESTS "llama: build tests" ${LLAMA_STANDALONE})
option(LLAMA_BUILD_TOOLS "llama: build tools" ${LLAMA_STANDALONE})
option(LLAMA_BUILD_EXAMPLES "llama: build examples" ${LLAMA_STANDALONE})
option(LLAMA_BUILD_SERVER "llama: build server example" ${LLAMA_STANDALONE})

Expand Down Expand Up @@ -187,6 +188,10 @@ if (LLAMA_BUILD_COMMON AND LLAMA_BUILD_EXAMPLES)
add_subdirectory(pocs)
endif()

if (LLAMA_BUILD_COMMON AND LLAMA_BUILD_TOOLS)
add_subdirectory(tools)
endif()

#
# install
#
Expand Down
2 changes: 1 addition & 1 deletion CODEOWNERS
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

/ci/ @ggerganov
/.devops/*.Dockerfile @ngxson
/examples/server/ @ngxson
/tools/server/ @ngxson
/ggml/src/ggml-cuda/fattn* @JohannesGaessler
/ggml/src/ggml-cuda/mmq.* @JohannesGaessler
/ggml/src/ggml-cuda/mmv.* @JohannesGaessler
Expand Down
Loading