Skip to content

Commit 1434c2c

Browse files
committed
Merge branch 'master' into xsn/private_batch_api
2 parents a9efdbb + 053b3f9 commit 1434c2c

23 files changed

+249
-183
lines changed

ci/README.md

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,4 +26,43 @@ GG_BUILD_CUDA=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
2626
# with SYCL support
2727
source /opt/intel/oneapi/setvars.sh
2828
GG_BUILD_SYCL=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
29+
30+
# with MUSA support
31+
GG_BUILD_MUSA=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
32+
```
33+
34+
## Running MUSA CI in a Docker Container
35+
36+
Assuming `$PWD` is the root of the `llama.cpp` repository, follow these steps to set up and run MUSA CI in a Docker container:
37+
38+
### 1. Create a local directory to store cached models, configuration files and venv:
39+
40+
```bash
41+
mkdir -p $HOME/llama.cpp/ci-cache
42+
```
43+
44+
### 2. Create a local directory to store CI run results:
45+
46+
```bash
47+
mkdir -p $HOME/llama.cpp/ci-results
48+
```
49+
50+
### 3. Start a Docker container and run the CI:
51+
52+
```bash
53+
docker run --privileged -it \
54+
-v $HOME/llama.cpp/ci-cache:/ci-cache \
55+
-v $HOME/llama.cpp/ci-results:/ci-results \
56+
-v $PWD:/ws -w /ws \
57+
mthreads/musa:rc3.1.1-devel-ubuntu22.04
2958
```
59+
60+
Inside the container, execute the following commands:
61+
62+
```bash
63+
apt update -y && apt install -y cmake git python3.10-venv wget
64+
git config --global --add safe.directory /ws
65+
GG_BUILD_MUSA=1 bash ./ci/run.sh /ci-results /ci-cache
66+
```
67+
68+
This setup ensures that the CI runs within an isolated Docker environment while maintaining cached files and results across runs.

ci/run.sh

Lines changed: 24 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@
1616
# # with VULKAN support
1717
# GG_BUILD_VULKAN=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
1818
#
19+
# # with MUSA support
20+
# GG_BUILD_MUSA=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
21+
#
1922

2023
if [ -z "$2" ]; then
2124
echo "usage: $0 <output-dir> <mnt-dir>"
@@ -52,13 +55,22 @@ if [ ! -z ${GG_BUILD_SYCL} ]; then
5255
echo "source /opt/intel/oneapi/setvars.sh"
5356
exit 1
5457
fi
55-
58+
# Use only main GPU
59+
export ONEAPI_DEVICE_SELECTOR="level_zero:0"
60+
# Enable sysman for correct memory reporting
61+
export ZES_ENABLE_SYSMAN=1
5662
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_SYCL=1 -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON"
5763
fi
5864

5965
if [ ! -z ${GG_BUILD_VULKAN} ]; then
6066
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_VULKAN=1"
6167
fi
68+
69+
if [ ! -z ${GG_BUILD_MUSA} ]; then
70+
# Use qy1 by default (MTT S80)
71+
MUSA_ARCH=${MUSA_ARCH:-21}
72+
CMAKE_EXTRA="-DGGML_MUSA=ON -DMUSA_ARCHITECTURES=${MUSA_ARCH}"
73+
fi
6274
## helpers
6375

6476
# download a file if it does not exist or if it is outdated
@@ -808,7 +820,7 @@ export LLAMA_LOG_PREFIX=1
808820
export LLAMA_LOG_TIMESTAMPS=1
809821

810822
if [ -z ${GG_BUILD_LOW_PERF} ]; then
811-
# Create symlink: ./llama.cpp/models-mnt -> $MNT/models/models-mnt
823+
# Create symlink: ./llama.cpp/models-mnt -> $MNT/models
812824
rm -rf ${SRC}/models-mnt
813825
mnt_models=${MNT}/models
814826
mkdir -p ${mnt_models}
@@ -826,16 +838,20 @@ if [ -z ${GG_BUILD_LOW_PERF} ]; then
826838
fi
827839

828840
ret=0
829-
830-
test $ret -eq 0 && gg_run ctest_debug
841+
if [ -z ${GG_BUILD_SYCL} ]; then
842+
# SYCL build breaks with debug build flags
843+
test $ret -eq 0 && gg_run ctest_debug
844+
fi
831845
test $ret -eq 0 && gg_run ctest_release
832846

833847
if [ -z ${GG_BUILD_LOW_PERF} ]; then
834848
test $ret -eq 0 && gg_run embd_bge_small
835849
test $ret -eq 0 && gg_run rerank_tiny
836850

837851
if [ -z ${GG_BUILD_CLOUD} ] || [ ${GG_BUILD_EXTRA_TESTS_0} ]; then
838-
test $ret -eq 0 && gg_run test_scripts_debug
852+
if [ -z ${GG_BUILD_SYCL} ]; then
853+
test $ret -eq 0 && gg_run test_scripts_debug
854+
fi
839855
test $ret -eq 0 && gg_run test_scripts_release
840856
fi
841857

@@ -846,7 +862,9 @@ if [ -z ${GG_BUILD_LOW_PERF} ]; then
846862
test $ret -eq 0 && gg_run pythia_2_8b
847863
#test $ret -eq 0 && gg_run open_llama_7b_v2
848864
fi
849-
test $ret -eq 0 && gg_run ctest_with_model_debug
865+
if [ -z ${GG_BUILD_SYCL} ]; then
866+
test $ret -eq 0 && gg_run ctest_with_model_debug
867+
fi
850868
test $ret -eq 0 && gg_run ctest_with_model_release
851869
fi
852870
fi

convert_hf_to_gguf.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -705,6 +705,9 @@ def get_vocab_base_pre(self, tokenizer) -> str:
705705
if chkhsh == "ccc2ef013c104be7bae2965776d611e1d7a8a2a9c547dd93a682c9a9fc80352e":
706706
# ref: https://huggingface.co/Xenova/gpt-4o
707707
res = "gpt-4o"
708+
if chkhsh == "7dec86086fcc38b66b7bc1575a160ae21cf705be7718b9d5598190d7c12db76f":
709+
# ref: https://huggingface.co/UW/OLMo2-8B-SuperBPE-t180k
710+
res = "superbpe"
708711

709712
if res is None:
710713
logger.warning("\n")

convert_hf_to_gguf_update.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@ class TOKENIZER_TYPE(IntEnum):
110110
{"name": "deepseek-v3", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/deepseek-ai/DeepSeek-V3"},
111111
{"name": "deepseek-r1-qwen", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"},
112112
{"name": "gpt-4o", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/Xenova/gpt-4o", },
113+
{"name": "superbpe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/UW/OLMo2-8B-SuperBPE-t180k", },
113114
]
114115

115116

docs/cuda-fedora.md renamed to docs/backend/CUDA-FEDORA.md

Lines changed: 39 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,7 @@ In this guide we setup [Nvidia CUDA](https://docs.nvidia.com/cuda/) in a toolbox
1414
- [Creating a Fedora Toolbox Environment](#creating-a-fedora-toolbox-environment)
1515
- [Installing Essential Development Tools](#installing-essential-development-tools)
1616
- [Adding the CUDA Repository](#adding-the-cuda-repository)
17-
- [Installing `nvidia-driver-libs`](#installing-nvidia-driver-libs)
18-
- [Manually Resolving Package Conflicts](#manually-resolving-package-conflicts)
19-
- [Finalizing the Installation of `nvidia-driver-libs`](#finalizing-the-installation-of-nvidia-driver-libs)
17+
- [Installing Nvidia Driver Libraries](#installing-nvidia-driver-libraries)
2018
- [Installing the CUDA Meta-Package](#installing-the-cuda-meta-package)
2119
- [Configuring the Environment](#configuring-the-environment)
2220
- [Verifying the Installation](#verifying-the-installation)
@@ -67,7 +65,7 @@ This guide focuses on Fedora hosts, but with small adjustments, it can work for
6765
sudo dnf distro-sync
6866
```
6967

70-
2. **Install the Default Text Editor (Optional):**
68+
2. **Install **Vim** the default text editor (Optional):**
7169

7270
```bash
7371
sudo dnf install vim-default-editor --allowerasing
@@ -97,60 +95,75 @@ After adding the repository, synchronize the package manager again:
9795
sudo dnf distro-sync
9896
```
9997

100-
## Installing `nvidia-driver-libs` and `nvidia-driver-cuda-libs`
98+
## Installing Nvidia Driver Libraries
10199

102-
We need to detect if the host is supplying the [NVIDIA driver libraries into the toolbox](https://github.com/containers/toolbox/blob/main/src/pkg/nvidia/nvidia.go).
100+
First, we need to detect if the host is supplying the [NVIDIA driver libraries into the toolbox](https://github.com/containers/toolbox/blob/main/src/pkg/nvidia/nvidia.go):
103101

104102
```bash
105103
ls -la /usr/lib64/libcuda.so.1
106104
```
107105

108-
**Explanation:**
106+
### If *`libcuda.so.1`* is missing:
107+
108+
```
109+
ls: cannot access '/usr/lib64/libcuda.so.1': No such file or directory
110+
```
109111

110-
- `nvidia-driver-libs` and `nvidia-driver-cuda-libs` contains necessary NVIDIA driver libraries required by CUDA,
111-
on hosts with NVIDIA drivers installed the Fedora Container will supply the host libraries.
112+
**Explanation:**
113+
The host dose not supply the CUDA drivers, **install them now:**
112114

113-
### Install Nvidia Driver Libraries on Guest (if `libcuda.so.1` was NOT found).
115+
#### Install the Nvidia Driver Libraries on Guest:
114116

115117
```bash
116-
sudo dnf install nvidia-driver-libs nvidia-driver-cuda-libs
118+
sudo dnf install nvidia-driver-cuda nvidia-driver-libs nvidia-driver-cuda-libs nvidia-persistenced
117119
```
118120

119-
### Manually Updating the RPM database for host-supplied NVIDIA drivers (if `libcuda.so.1` was found).
121+
### If *`libcuda.so.1`* exists:
122+
```
123+
lrwxrwxrwx. 1 root root 21 Mar 24 11:26 /usr/lib64/libcuda.so.1 -> libcuda.so.570.133.07
124+
```
125+
126+
**Explanation:**
127+
The host is supply the CUDA drivers, **we need to update the guest RPM Database accordingly:**
120128

121-
If the installation fails due to conflicts, we'll manually download and install the required packages, excluding conflicting files.
129+
#### Update the Toolbox RPM Database to include the Host-Supplied Libraries:
122130

123-
#### 1. Download `nvidia-driver-libs` and `nvidia-driver-cuda-libs` RPM's (with dependencies)
131+
Note: we do not actually install the libraries, we just update the DB so that the guest system knows they are supplied by the host.
132+
133+
##### 1. Download `nvidia-` parts that are supplied by the host RPM's (with dependencies)
124134

125135
```bash
126-
sudo dnf download --destdir=/tmp/nvidia-driver-libs --resolve --arch x86_64 nvidia-driver-libs nvidia-driver-cuda-libs
136+
sudo dnf download --destdir=/tmp/nvidia-driver-libs --resolve --arch x86_64 nvidia-driver-cuda nvidia-driver-libs nvidia-driver-cuda-libs nvidia-persistenced
127137
```
128138

129-
#### 2. Update the RPM database to assume the installation of these packages.
139+
##### 2. Update the RPM database to assume the installation of these packages.
130140

131141
```bash
132142
sudo rpm --install --verbose --hash --justdb /tmp/nvidia-driver-libs/*
133143
```
134144

135145
**Note:**
136146

137-
- The `--justdb` option only updates the RPM database, without touching the filesystem.
147+
- The `--justdb` option only updates the RPM database, without touching the filesystem elsewhere.
148+
149+
##### Check that the RPM Database has been correctly updated:
138150

139-
#### Finalizing the Installation of `nvidia-driver-libs` and `nvidia-driver-cuda-libs`
151+
**Note:** This is the same command as in the *"Install the Nvidia Driver Libraries on Guest"* for if *`libcuda.so.1`* was missing.
140152

141-
After manually installing the dependencies, run:
142153

143154
```bash
144-
sudo dnf install nvidia-driver-libs nvidia-driver-cuda-libs
155+
sudo dnf install nvidia-driver-cuda nvidia-driver-libs nvidia-driver-cuda-libs nvidia-persistenced
145156
```
146157

147-
You should receive a message indicating the package is already installed:
158+
*(this time it will not install anything, as the database things that these packages are already installed)*
148159

149160
```
150161
Updating and loading repositories:
151162
Repositories loaded.
152-
Package "nvidia-driver-libs-3:570.86.10-1.fc41.x86_64" is already installed.
153-
Package "nvidia-driver-cuda-libs-3:570.86.10-1.fc41.x86_64" is already installed.
163+
Package "nvidia-driver-cuda-3:570.124.06-1.fc41.x86_64" is already installed.
164+
Package "nvidia-driver-libs-3:570.124.06-1.fc41.x86_64" is already installed.
165+
Package "nvidia-driver-cuda-libs-3:570.124.06-1.fc41.x86_64" is already installed.
166+
Package "nvidia-persistenced-3:570.124.06-1.fc41.x86_64" is already installed.
154167
155168
Nothing to do.
156169
```
@@ -207,9 +220,9 @@ You should see output similar to:
207220
```
208221
nvcc: NVIDIA (R) Cuda compiler driver
209222
Copyright (c) 2005-2025 NVIDIA Corporation
210-
Built on Wed_Jan_15_19:20:09_PST_2025
211-
Cuda compilation tools, release 12.8, V12.8.61
212-
Build cuda_12.8.r12.8/compiler.35404655_0
223+
Built on Fri_Feb_21_20:23:50_PST_2025
224+
Cuda compilation tools, release 12.8, V12.8.93
225+
Build cuda_12.8.r12.8/compiler.35583870_0
213226
```
214227

215228
This output confirms that the CUDA compiler is accessible and indicates the installed version.

docs/build.md

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -132,12 +132,14 @@ You may find the official downloads here: [NVIDIA developer site](https://develo
132132
133133
134134
#### Compile and run inside a Fedora Toolbox Container
135-
We also have a [guide](./cuda-fedora.md) for setting up CUDA toolkit in a Fedora [toolbox container](https://containertoolbx.org/).
135+
We also have a [guide](./backend/CUDA-FEDORA.md) for setting up CUDA toolkit in a Fedora [toolbox container](https://containertoolbx.org/).
136136
137137
**Recommended for:**
138-
139-
- ***Particularly*** *convenient* for users of [Atomic Desktops for Fedora](https://fedoraproject.org/atomic-desktops/); such as: [Silverblue](https://fedoraproject.org/atomic-desktops/silverblue/) and [Kinoite](https://fedoraproject.org/atomic-desktops/kinoite/).
140-
- Toolbox is installed by default: [Fedora Workstation](https://fedoraproject.org/workstation/) or [Fedora KDE Plasma Desktop](https://fedoraproject.org/spins/kde).
138+
- ***Necessary*** for users of [Atomic Desktops for Fedora](https://fedoraproject.org/atomic-desktops/); such as: [Silverblue](https://fedoraproject.org/atomic-desktops/silverblue/) and [Kinoite](https://fedoraproject.org/atomic-desktops/kinoite/).
139+
- (there are no supported CUDA packages for these systems)
140+
- ***Necessary*** for users that have a host that is not a: [Supported Nvidia CUDA Release Platform](https://developer.nvidia.com/cuda-downloads).
141+
- (for example, you may have [Fedora 42 Beta](https://fedoramagazine.org/announcing-fedora-linux-42-beta/) as your your host operating system)
142+
- ***Convenient*** For those running [Fedora Workstation](https://fedoraproject.org/workstation/) or [Fedora KDE Plasma Desktop](https://fedoraproject.org/spins/kde), and want to keep their host system clean.
141143
- *Optionally* toolbox packages are available: [Arch Linux](https://archlinux.org/), [Red Hat Enterprise Linux >= 8.5](https://www.redhat.com/en/technologies/linux-platforms/enterprise-linux), or [Ubuntu](https://ubuntu.com/download)
142144
143145
@@ -433,6 +435,26 @@ llama_new_context_with_model: CANN compute buffer size = 1260.81 MiB
433435

434436
For detailed info, such as model/device supports, CANN install, please refer to [llama.cpp for CANN](./backend/CANN.md).
435437

438+
## Arm® KleidiAI™
439+
KleidiAI is a library of optimized microkernels for AI workloads, specifically designed for Arm CPUs. These microkernels enhance performance and can be enabled for use by the CPU backend.
440+
441+
To enable KleidiAI, go to the llama.cpp directory and build using CMake
442+
```bash
443+
cmake -B build -DGGML_CPU_KLEIDIAI=ON
444+
cmake --build build --config Release
445+
```
446+
You can verify that KleidiAI is being used by running
447+
```bash
448+
./build/bin/llama-cli -m PATH_TO_MODEL -p "What is a car?"
449+
```
450+
If KleidiAI is enabled, the ouput will contain a line similar to:
451+
```
452+
load_tensors: CPU_KLEIDIAI model buffer size = 3474.00 MiB
453+
```
454+
KleidiAI's microkernels implement optimized tensor operations using Arm CPU features such as dotprod, int8mm and SME. llama.cpp selects the most efficient kernel based on runtime CPU feature detection. However, on platforms that support SME, you must manually enable SME microkernels by setting the environment variable `GGML_KLEIDIAI_SME=1`.
455+
456+
Depending on your build target, other higher priority backends may be enabled by default. To ensure the CPU backend is used, you must disable the higher priority backends either at compile time, e.g. -DGGML_METAL=OFF, or during run-time using the command line option `--device none`.
457+
436458
## Android
437459

438460
To read documentation for how to build on Android, [click here](./android.md)

examples/server/server.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -830,6 +830,11 @@ struct server_task_result_cmpl_final : server_task_result {
830830
ret.push_back({"timings", timings.to_json()});
831831
}
832832

833+
// extra fields for debugging purposes
834+
if (verbose) {
835+
ret["__verbose"] = to_json_non_oaicompat();
836+
}
837+
833838
return ret;
834839
}
835840
};

ggml/src/ggml-cpu/CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -359,9 +359,9 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
359359

360360
# Fetch KleidiAI sources:
361361
include(FetchContent)
362-
set(KLEIDIAI_COMMIT_TAG "v1.3.0")
362+
set(KLEIDIAI_COMMIT_TAG "v1.5.0")
363363
set(KLEIDIAI_DOWNLOAD_URL "https://github.com/ARM-software/kleidiai/archive/refs/tags/${KLEIDIAI_COMMIT_TAG}.tar.gz")
364-
set(KLEIDIAI_ARCHIVE_MD5 "060bd2dc64642b091f461cc8dd7426d9")
364+
set(KLEIDIAI_ARCHIVE_MD5 "ea22e1aefb800e9bc8c74d91633cc58e")
365365

366366
if (POLICY CMP0135)
367367
cmake_policy(SET CMP0135 NEW)

ggml/src/ggml-cpu/kleidiai/kernels.cpp

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -51,11 +51,10 @@ static ggml_kleidiai_kernels gemm_gemv_kernels[] = {
5151
/* .run_kernel = */ kai_run_matmul_clamp_f32_qsi8d32p1x4_qsi4c32p4vlx4_1x4vl_sme2_sdot,
5252
},
5353
/* .lhs_info = */ {
54-
/* .get_offset = */ kai_get_lhs_offset_lhs_quant_pack_qsi8d32p_f32,
55-
/* .get_packed_offset = */ kai_get_lhs_packed_offset_lhs_quant_pack_qsi8d32p_f32,
54+
/* .get_offset = */ kai_get_lhs_offset_lhs_quant_pack_qsi8d32p_f32_neon,
55+
/* .get_packed_offset = */ kai_get_lhs_packed_offset_lhs_quant_pack_qsi8d32p_f32_neon,
5656
/* .packed_size = */ kai_get_lhs_packed_size_lhs_quant_pack_qsi8d32p_f32_neon,
5757
/* .pack_func = */ kai_run_lhs_quant_pack_qsi8d32p_f32_neon,
58-
/* .require_aligned_m_idx = */ true,
5958
},
6059
/* .rhs_info = */ {
6160
/* .packed_size = */ kai_get_rhs_packed_size_rhs_pack_nxk_qsi4c32ps1s0scalef16_qsu4c32s16s0_neon,
@@ -100,7 +99,6 @@ static ggml_kleidiai_kernels gemm_gemv_kernels[] = {
10099
/* .get_packed_offset = */ kai_get_lhs_packed_offset_lhs_quant_pack_qsi8d32p_f32,
101100
/* .packed_size = */ kai_get_lhs_packed_size_lhs_quant_pack_qsi8d32p_f32,
102101
/* .pack_func = */ kai_run_lhs_quant_pack_qsi8d32p_f32,
103-
/* .require_aligned_m_idx = */ false,
104102
},
105103
/* .rhs_info = */ {
106104
/* .packed_size = */ kai_get_rhs_packed_size_rhs_pack_nxk_qsi4c32pscalef16_qsu4c32s16s0,
@@ -144,7 +142,6 @@ static ggml_kleidiai_kernels gemm_gemv_kernels[] = {
144142
/* .get_packed_offset = */ kai_get_lhs_packed_offset_lhs_quant_pack_qsi8d32p_f32,
145143
/* .packed_size = */ kai_get_lhs_packed_size_lhs_quant_pack_qsi8d32p_f32,
146144
/* .pack_func = */ kai_run_lhs_quant_pack_qsi8d32p_f32,
147-
/* .require_aligned_m_idx = */ false,
148145
},
149146
/* .rhs_info = */ {
150147
/* .packed_size = */ kai_get_rhs_packed_size_rhs_pack_nxk_qsi4c32pscalef16_qsu4c32s16s0,
@@ -189,7 +186,6 @@ static ggml_kleidiai_kernels gemm_gemv_kernels[] = {
189186
/* .get_packed_offset = */ kai_get_lhs_packed_offset_lhs_quant_pack_qsi8d32p_f32,
190187
/* .packed_size = */ kai_get_lhs_packed_size_lhs_quant_pack_qsi8d32p_f32,
191188
/* .pack_func = */ kai_run_lhs_quant_pack_qsi8d32p_f32,
192-
/* .require_aligned_m_idx = */ false,
193189
},
194190
/* .rhs_info = */ {
195191
/* .packed_size = */ kai_get_rhs_packed_size_rhs_pack_nxk_qsi4c32pscalef16_qsu4c32s16s0,
@@ -233,7 +229,6 @@ static ggml_kleidiai_kernels gemm_gemv_kernels[] = {
233229
/* .get_packed_offset = */ kai_get_lhs_packed_offset_lhs_quant_pack_qsi8d32p_f32,
234230
/* .packed_size = */ kai_get_lhs_packed_size_lhs_quant_pack_qsi8d32p_f32,
235231
/* .pack_func = */ kai_run_lhs_quant_pack_qsi8d32p_f32,
236-
/* .require_aligned_m_idx = */ false,
237232
},
238233
/* .rhs_info = */ {
239234
/* .packed_size = */ kai_get_rhs_packed_size_rhs_pack_nxk_qsi4c32pscalef16_qsu4c32s16s0,

ggml/src/ggml-cpu/kleidiai/kernels.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,6 @@ struct lhs_packing_info {
4040
size_t (*packed_size)(size_t m, size_t k, size_t bl, size_t mr, size_t kr, size_t sr);
4141
void (*pack_func)(size_t m, size_t k, size_t bl, size_t mr, size_t kr, size_t sr, size_t m_idx_start, const float* lhs,
4242
size_t lhs_stride, void* lhs_packed);
43-
bool require_aligned_m_idx;
4443
};
4544

4645
struct rhs_packing_info {

0 commit comments

Comments
 (0)