Skip to content

Commit 5141a90

Browse files
authored
Fix ARMV9SME target in DYNAMIC_ARCH and add SME query code for MacOS (#5222)
* Fix ARMV9SME target and add support_sme1 code for MacOS * make sgemm_direct unconditionally available on all arm64 * build a (dummy) sgemm_direct kernel on all arm64 * Update dynamic_arm64.c
1 parent 2320e0b commit 5141a90

File tree

6 files changed

+44
-18
lines changed

6 files changed

+44
-18
lines changed

common_param.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -224,10 +224,8 @@ BLASLONG (*ismin_k) (BLASLONG, float *, BLASLONG);
224224
int (*sgemm_direct_performant) (BLASLONG M, BLASLONG N, BLASLONG K);
225225
#endif
226226
#ifdef ARCH_ARM64
227-
#ifdef HAVE_SME
228227
void (*sgemm_direct) (BLASLONG, BLASLONG, BLASLONG, float *, BLASLONG , float *, BLASLONG , float * , BLASLONG);
229228
#endif
230-
#endif
231229

232230

233231
int (*sgemm_kernel )(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG);

driver/others/dynamic_arm64.c

Lines changed: 31 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,14 @@
4343
#include <sys/auxv.h>
4444
#endif
4545

46+
#ifdef __APPLE__
47+
#include <sys/sysctl.h>
48+
int32_t value;
49+
size_t length=sizeof(value);
50+
int64_t value64;
51+
size_t length64=sizeof(value64);
52+
#endif
53+
4654
extern gotoblas_t gotoblas_ARMV8;
4755
#ifdef DYNAMIC_LIST
4856
#ifdef DYN_CORTEXA53
@@ -120,7 +128,7 @@ extern gotoblas_t gotoblas_ARMV9SME;
120128
#else
121129
#define gotoblas_ARMV9SME gotoblas_ARMV8
122130
#endif
123-
#ifdef DYN_CORTEX_A55
131+
#ifdef DYN_CORTEXA55
124132
extern gotoblas_t gotoblas_CORTEXA55;
125133
#else
126134
#define gotoblas_CORTEXA55 gotoblas_ARMV8
@@ -147,17 +155,17 @@ extern gotoblas_t gotoblas_NEOVERSEV1;
147155
extern gotoblas_t gotoblas_NEOVERSEN2;
148156
extern gotoblas_t gotoblas_ARMV8SVE;
149157
extern gotoblas_t gotoblas_A64FX;
158+
#ifndef NO_SME
159+
extern gotoblas_t gotoblas_ARMV9SME;
160+
#else
161+
#define gotoblas_ARMV9SME gotoblas_ARMV8SVE
162+
#endif
150163
#else
151164
#define gotoblas_NEOVERSEV1 gotoblas_ARMV8
152165
#define gotoblas_NEOVERSEN2 gotoblas_ARMV8
153166
#define gotoblas_ARMV8SVE gotoblas_ARMV8
154167
#define gotoblas_A64FX gotoblas_ARMV8
155-
#endif
156-
157-
#ifndef NO_SME
158-
extern gotoblas_t gotoblas_ARMV9SME;
159-
#else
160-
#define gotoblas_ARMV9SME gotoblas_ARMV8SVE
168+
#define gotoblas_ARMV9SME gotoblas_ARMV8
161169
#endif
162170

163171
extern gotoblas_t gotoblas_THUNDERX3T110;
@@ -168,7 +176,7 @@ extern void openblas_warning(int verbose, const char * msg);
168176
#define FALLBACK_VERBOSE 1
169177
#define NEOVERSEN1_FALLBACK "OpenBLAS : Your OS does not support SVE instructions. OpenBLAS is using Neoverse N1 kernels as a fallback, which may give poorer performance.\n"
170178

171-
#define NUM_CORETYPES 18
179+
#define NUM_CORETYPES 19
172180

173181
/*
174182
* In case asm/hwcap.h is outdated on the build system, make sure
@@ -207,6 +215,7 @@ static char *corename[] = {
207215
"cortexa55",
208216
"armv8sve",
209217
"a64fx",
218+
"armv9sme",
210219
"unknown"
211220
};
212221

@@ -229,6 +238,7 @@ char *gotoblas_corename(void) {
229238
if (gotoblas == &gotoblas_CORTEXA55) return corename[15];
230239
if (gotoblas == &gotoblas_ARMV8SVE) return corename[16];
231240
if (gotoblas == &gotoblas_A64FX) return corename[17];
241+
if (gotoblas == &gotoblas_ARMV9SME) return corename[18];
232242
return corename[NUM_CORETYPES];
233243
}
234244

@@ -266,6 +276,7 @@ static gotoblas_t *force_coretype(char *coretype) {
266276
case 15: return (&gotoblas_CORTEXA55);
267277
case 16: return (&gotoblas_ARMV8SVE);
268278
case 17: return (&gotoblas_A64FX);
279+
case 18: return (&gotoblas_ARMV9SME);
269280
}
270281
snprintf(message, 128, "Core not found: %s\n", coretype);
271282
openblas_warning(1, message);
@@ -277,6 +288,11 @@ static gotoblas_t *get_coretype(void) {
277288
char coremsg[128];
278289

279290
#if defined (OS_DARWIN)
291+
//future #if !defined(NO_SME)
292+
// if (support_sme1()) {
293+
// return &gotoblas_ARMV9SME;
294+
// }
295+
// #endif
280296
return &gotoblas_NEOVERSEN1;
281297
#endif
282298

@@ -439,15 +455,16 @@ static gotoblas_t *get_coretype(void) {
439455
}
440456
break;
441457
case 0x61: // Apple
458+
//future if (support_sme1()) return &gotoblas_ARMV9SME;
442459
return &gotoblas_NEOVERSEN1;
443460
break;
444461
default:
445462
snprintf(coremsg, 128, "Unknown CPU model - implementer %x part %x\n",implementer,part);
446463
openblas_warning(1, coremsg);
447464
}
448465

449-
#if !defined(NO_SME) && defined(HWCAP2_SME)
450-
if ((getauxval(AT_HWCAP2) & HWCAP2_SME)) {
466+
#if !defined(NO_SME)
467+
if (support_sme1()) {
451468
return &gotoblas_ARMV9SME;
452469
}
453470
#endif
@@ -511,6 +528,10 @@ int support_sme1(void) {
511528
if(getauxval(AT_HWCAP2) & HWCAP2_SME){
512529
ret = 1;
513530
}
531+
#endif
532+
#if defined(__APPLE__)
533+
sysctlbyname("hw.optional.arm.FEAT_SME",&value64,&length64,NULL,0);
534+
ret = value64;
514535
#endif
515536
return ret;
516537
}

kernel/CMakeLists.txt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,7 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS)
208208
set(USE_TRMM true)
209209
endif ()
210210
set(USE_DIRECT_SGEMM false)
211-
if (X86_64 OR (ARM64 AND (UC_TARGET_CORE MATCHES ARMV9SME)))
211+
if (X86_64 OR ARM64)
212212
set(USE_DIRECT_SGEMM true)
213213
endif()
214214

@@ -225,9 +225,11 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS)
225225
set (SGEMMDIRECTSMEKERNEL sgemm_direct_sme1.S)
226226
set (SGEMMDIRECTPREKERNEL sgemm_direct_sme1_preprocess.S)
227227
GenerateNamedObjects("${KERNELDIR}/${SGEMMDIRECTKERNEL}" "" "gemm_direct" false "" "" false SINGLE)
228+
if (HAVE_SME)
228229
GenerateNamedObjects("${KERNELDIR}/${SGEMMDIRECTSMEKERNEL}" "" "gemm_direct_sme1" false "" "" false SINGLE)
229230
GenerateNamedObjects("${KERNELDIR}/${SGEMMDIRECTPREKERNEL}" "" "gemm_direct_sme1_preprocess" false "" "" false SINGLE)
230231
endif ()
232+
endif ()
231233
endif()
232234

233235
foreach (float_type SINGLE DOUBLE)

kernel/Makefile.L3

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -103,8 +103,8 @@ endif
103103
ifeq ($(ARCH), arm64)
104104
ifeq ($(TARGET_CORE), ARMV9SME)
105105
HAVE_SME = 1
106-
SGEMMDIRECTKERNEL = sgemm_direct_arm64_sme1.c
107106
endif
107+
SGEMMDIRECTKERNEL = sgemm_direct_arm64_sme1.c
108108
endif
109109
endif
110110
endif
@@ -143,9 +143,10 @@ SKERNELOBJS += \
143143
sgemm_direct_performant$(TSUFFIX).$(SUFFIX)
144144
endif
145145
ifeq ($(ARCH), arm64)
146+
SKERNELOBJS += \
147+
sgemm_direct$(TSUFFIX).$(SUFFIX)
146148
ifdef HAVE_SME
147149
SKERNELOBJS += \
148-
sgemm_direct$(TSUFFIX).$(SUFFIX) \
149150
sgemm_direct_sme1$(TSUFFIX).$(SUFFIX) \
150151
sgemm_direct_sme1_preprocess$(TSUFFIX).$(SUFFIX)
151152
endif
@@ -835,9 +836,9 @@ $(KDIR)sgemm_direct$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEMMDIRECTKERNEL)
835836
$(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@
836837
endif
837838
ifeq ($(ARCH), arm64)
838-
ifdef HAVE_SME
839839
$(KDIR)sgemm_direct$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEMMDIRECTKERNEL)
840840
$(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@
841+
ifdef HAVE_SME
841842
$(KDIR)sgemm_direct_sme1$(TSUFFIX).$(SUFFIX) :
842843
$(CC) $(CFLAGS) -c $(KERNELDIR)/sgemm_direct_sme1.S -UDOUBLE -UCOMPLEX -o $@
843844
$(KDIR)sgemm_direct_sme1_preprocess$(TSUFFIX).$(SUFFIX) :

kernel/arm64/sgemm_direct_arm64_sme1.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,4 +71,10 @@ void CNAME (BLASLONG M, BLASLONG N, BLASLONG K, float * __restrict A,\
7171
free(A_mod);
7272
}
7373

74+
#else
75+
76+
void CNAME (BLASLONG M, BLASLONG N, BLASLONG K, float * __restrict A,\
77+
BLASLONG strideA, float * __restrict B, BLASLONG strideB ,\
78+
float * __restrict R, BLASLONG strideR){}
79+
7480
#endif

kernel/setparam-ref.c

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -180,9 +180,7 @@ gotoblas_t TABLE_NAME = {
180180
sgemm_direct_performantTS,
181181
#endif
182182
#ifdef ARCH_ARM64
183-
#ifdef HAVE_SME
184183
sgemm_directTS,
185-
#endif
186184
#endif
187185

188186
sgemm_kernelTS, sgemm_betaTS,

0 commit comments

Comments
 (0)