Skip to content

Commit b52b29a

Browse files
es0mcebtenzzre
andauthored
arm64 support for windows (#3007)
Co-authored-by: Cebtenzzre <[email protected]>
1 parent 4f7cd6b commit b52b29a

File tree

4 files changed

+20
-6
lines changed

4 files changed

+20
-6
lines changed

CMakeLists.txt

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -461,6 +461,13 @@ endif()
461461
# TODO: probably these flags need to be tweaked on some architectures
462462
# feel free to update the Makefile for your architecture and send a pull request or issue
463463
message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}")
464+
if (MSVC)
465+
string(TOLOWER "${CMAKE_GENERATOR_PLATFORM}" CMAKE_GENERATOR_PLATFORM_LWR)
466+
message(STATUS "CMAKE_GENERATOR_PLATFORM: ${CMAKE_GENERATOR_PLATFORM}")
467+
else ()
468+
set(CMAKE_GENERATOR_PLATFORM_LWR "")
469+
endif ()
470+
464471
if (NOT MSVC)
465472
if (LLAMA_STATIC)
466473
add_link_options(-static)
@@ -476,10 +483,14 @@ if (NOT MSVC)
476483
endif()
477484
endif()
478485

479-
if ((${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm") OR (${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64"))
486+
if ((${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm") OR (${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64") OR ("${CMAKE_GENERATOR_PLATFORM_LWR}" MATCHES "arm64"))
480487
message(STATUS "ARM detected")
481488
if (MSVC)
482-
# TODO: arm msvc?
489+
add_compile_definitions(__ARM_NEON)
490+
add_compile_definitions(__ARM_FEATURE_FMA)
491+
add_compile_definitions(__ARM_FEATURE_DOTPROD)
492+
# add_compile_definitions(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) # MSVC doesn't support vdupq_n_f16, vld1q_f16, vst1q_f16
493+
add_compile_definitions(__aarch64__) # MSVC defines _M_ARM64 instead
483494
else()
484495
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv6")
485496
# Raspberry Pi 1, Zero
@@ -494,7 +505,7 @@ if ((${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm") OR (${CMAKE_SYSTEM_PROCESSOR} MATC
494505
add_compile_options(-mfp16-format=ieee -mno-unaligned-access)
495506
endif()
496507
endif()
497-
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "^(x86_64|i686|AMD64)$")
508+
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "^(x86_64|i686|AMD64)$" OR "${CMAKE_GENERATOR_PLATFORM_LWR}" MATCHES "^(x86_64|i686|amd64|x64)$" )
498509
message(STATUS "x86 detected")
499510
if (MSVC)
500511
if (LLAMA_AVX512)

ggml.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -283,7 +283,7 @@ typedef double ggml_float;
283283
// 16-bit float
284284
// on Arm, we use __fp16
285285
// on x86, we use uint16_t
286-
#ifdef __ARM_NEON
286+
#if defined(__ARM_NEON) && !defined(_MSC_VER)
287287

288288
// if YCM cannot find <arm_neon.h>, make a symbolic link to it, for example:
289289
//

ggml.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -270,7 +270,7 @@ extern "C" {
270270

271271
#if defined(__ARM_NEON) && defined(__CUDACC__)
272272
typedef half ggml_fp16_t;
273-
#elif defined(__ARM_NEON)
273+
#elif defined(__ARM_NEON) && !defined(_MSC_VER)
274274
typedef __fp16 ggml_fp16_t;
275275
#else
276276
typedef uint16_t ggml_fp16_t;

k_quants.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2609,7 +2609,10 @@ void ggml_vec_dot_q4_K_q8_K(const int n, float * restrict s, const void * restri
26092609

26102610
memcpy(utmp, x[i].scales, 12);
26112611

2612-
const uint32x2_t mins8 = {utmp[1] & kmask1, ((utmp[2] >> 4) & kmask2) | (((utmp[1] >> 6) & kmask3) << 4)};
2612+
uint32x2_t mins8 = { 0 };
2613+
mins8 = vset_lane_u32(utmp[1] & kmask1, mins8, 0);
2614+
mins8 = vset_lane_u32(((utmp[2] >> 4) & kmask2) | (((utmp[1] >> 6) & kmask3) << 4), mins8, 1);
2615+
26132616
utmp[1] = (utmp[2] & kmask2) | (((utmp[0] >> 6) & kmask3) << 4);
26142617
utmp[0] &= kmask1;
26152618

0 commit comments

Comments
 (0)