Skip to content

Commit 5e9d919

Browse files
committed
perf: Add FP16 GEMM MMUL Reshaped Only Rhs Support
This patch introduces a GEMM routine that is optimized for Arm(R) Mali(TM)-G1 Resolves: [COMPMID-8311], [COMPMID-8312] Signed-off-by: Omar Al Khatib <[email protected]> Change-Id: I84e685f0314da9af1c3fbb50d83e68b355727770
1 parent 020a3da commit 5e9d919

19 files changed

+950
-120
lines changed

arm_compute/core/CL/CLHelpers.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -276,6 +276,14 @@ void set_unroll_with_pragma(CLBuildOptions &built_opts, std::initializer_list<in
276276
*/
277277
bool arm_matrix_multiply_supported(const cl::Device &device);
278278

279+
/** Helper function to check whether the cl_arm_matrix_multiply with fp16 extension is supported
280+
*
281+
* @param[in] device A CL device
282+
*
283+
* @return True if the extension is supported
284+
*/
285+
bool arm_matrix_multiply_fp16_supported(const cl::Device &device);
286+
279287
/** Check whether cl_khr_command_buffer extension is supported by the specified CL device.
280288
*
281289
* @param[in] device The CL device

arm_compute/core/CL/OpenCL.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,8 @@
4444
#pragma GCC diagnostic ignored "-Wunused-parameter"
4545
#if defined(__GNUG__) && __GNUG__ >= 8
4646
#pragma GCC diagnostic ignored "-Wcatch-value"
47-
#endif // defined(__GNUG__) && __GNUG__ >= 8
48-
#include <CL/opencl.hpp> // include new hpp header instead of cl2.hpp
47+
#endif // defined(__GNUG__) && __GNUG__ >= 8
48+
#include "arm_compute/core/CL/cl_definitions.h"
4949
#pragma GCC diagnostic pop
5050

5151
namespace cl
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
/*
2+
* Copyright (c) 2025 Arm Limited.
3+
*
4+
* SPDX-License-Identifier: MIT
5+
*
6+
* Permission is hereby granted, free of charge, to any person obtaining a copy
7+
* of this software and associated documentation files (the "Software"), to
8+
* deal in the Software without restriction, including without limitation the
9+
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10+
* sell copies of the Software, and to permit persons to whom the Software is
11+
* furnished to do so, subject to the following conditions:
12+
*
13+
* The above copyright notice and this permission notice shall be included in all
14+
* copies or substantial portions of the Software.
15+
*
16+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19+
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20+
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21+
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22+
* SOFTWARE.
23+
*/
24+
25+
#ifndef ACL_ARM_COMPUTE_CORE_CL_CL_DEFINITIONS_H
26+
#define ACL_ARM_COMPUTE_CORE_CL_CL_DEFINITIONS_H
27+
28+
#include "include/CL/opencl.hpp"
29+
30+
#define CL_DEVICE_MATRIX_MULTIPLY_FP16_WITH_FP16_ACCUMULATORS_ARM (1ULL << 0)
31+
#define CL_DEVICE_MATRIX_MULTIPLY_CAPABILITIES_ARM 0x41F4
32+
33+
namespace cl
34+
{
35+
namespace detail
36+
{
37+
#ifdef CL_DEVICE_MATRIX_MULTIPLY_CAPABILITIES_ARM
38+
CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_MATRIX_MULTIPLY_CAPABILITIES_ARM, cl_ulong)
39+
#endif // CL_DEVICE_MATRIX_MULTIPLY_CAPABILITIES_ARM
40+
} // namespace detail
41+
} // namespace cl
42+
#endif // ACL_ARM_COMPUTE_CORE_CL_CL_DEFINITIONS_H

arm_compute/core/GPUTarget.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ enum class GPUTarget
6969
G615 = 0x351,
7070
G720 = 0x410,
7171
G620 = 0x411,
72+
G1 = 0x480
7273

7374
// When new models are added, watch out for heuristics.
7475
// The default/unrecognized Gpu will be the latest one and

src/core/CL/CLHelpers.cpp

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2016-2023 Arm Limited.
2+
* Copyright (c) 2016-2023, 2025 Arm Limited.
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -501,6 +501,21 @@ bool arm_matrix_multiply_supported(const cl::Device &device)
501501
return device_supports_extension(device, "cl_arm_matrix_multiply");
502502
}
503503

504+
bool arm_matrix_multiply_fp16_supported(const cl::Device &device)
505+
{
506+
if (!arm_matrix_multiply_supported(device))
507+
{
508+
return false;
509+
}
510+
511+
cl_ulong caps = 0;
512+
const auto err = clGetDeviceInfo( //
513+
device(), CL_DEVICE_MATRIX_MULTIPLY_CAPABILITIES_ARM, sizeof(caps), &caps, nullptr);
514+
515+
const auto supported = err == CL_SUCCESS && (caps & CL_DEVICE_MATRIX_MULTIPLY_FP16_WITH_FP16_ACCUMULATORS_ARM) != 0;
516+
return supported;
517+
}
518+
504519
bool command_buffer_supported(const cl::Device &device)
505520
{
506521
return device_supports_extension(device, "cl_khr_command_buffer");

0 commit comments

Comments
 (0)