From 5f50d151207b866eb4bf4ec1c799323ae0800545 Mon Sep 17 00:00:00 2001 From: Howard Su Date: Sat, 1 Apr 2023 16:32:14 +0800 Subject: [PATCH 1/4] Add detection code for avx --- CMakeLists.txt | 108 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 108 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 21f4ec9ddd267..8321a5d3534d9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -109,6 +109,114 @@ else() message(WARNING "Git repository not found; to enable automatic generation of build info, make sure Git is installed and the project is a Git repository.") endif() +INCLUDE(CheckCSourceRuns) + +SET(AVX_CODE " + #include + int main() + { + __m256 a; + a = _mm256_set1_ps(0); + return 0; + } +") + +SET(AVX512_CODE " + #include + int main() + { + __m512i a = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0); + __m512i b = a; + __mmask64 equality_mask = _mm512_cmp_epi8_mask(a, b, _MM_CMPINT_EQ); + return 0; + } +") + +SET(AVX2_CODE " + #include + int main() + { + __m256i a = {0}; + a = _mm256_abs_epi16(a); + __m256i x; + _mm256_extract_epi64(x, 0); // we rely on this in our AVX2 code + return 0; + } +") + +SET(FMA_CODE " + #include + int main() + { + __m256 acc = _mm256_setzero_ps(); + const __m256 d = _mm256_setzero_ps(); + const __m256 p = _mm256_setzero_ps(); + acc = _mm256_fmadd_ps( d, p, acc ); + return 0; + } +") + +MACRO(CHECK_SSE type flags) + SET(__FLAG_I 1) + SET(CMAKE_REQUIRED_FLAGS_SAVE ${CMAKE_REQUIRED_FLAGS}) + FOREACH(__FLAG ${flags}) + IF(NOT ${type}_FOUND) + SET(CMAKE_REQUIRED_FLAGS ${__FLAG}) + CHECK_C_SOURCE_RUNS("${${type}_CODE}" HAS_${type}_${__FLAG_I}) + IF(HAS_${type}_${__FLAG_I}) + SET(${type}_FOUND TRUE CACHE BOOL "${type} support") + SET(${type}_FLAGS "${__FLAG}" CACHE STRING "${type} flags") + ENDIF() + MATH(EXPR __FLAG_I "${__FLAG_I}+1") + ENDIF() + ENDFOREACH() + SET(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_SAVE}) + + IF(NOT ${type}_FOUND) + SET(${type}_FOUND FALSE CACHE BOOL "${type} support") + SET(${type}_FLAGS "" CACHE STRING "${type} flags") + ENDIF() + + MARK_AS_ADVANCED(${type}_FOUND ${type}_FLAGS) + +ENDMACRO() + +CHECK_SSE("AVX" " ;-mavx;/arch:AVX") +CHECK_SSE("AVX2" " ;-mavx2 -mfma;/arch:AVX2") +CHECK_SSE("AVX512" " ;-mavx512f -mavx512dq -mavx512vl -mavx512bw -mfma;/arch:AVX512") +CHECK_SSE("FMA" " ;-mfma;") + +IF(${AVX_FOUND}) + set(LLAMA_AVX ON) +ELSE() + set(LLAMA_AVX OFF) +ENDIF() + +IF (${FMA_FOUND}) + set(LLAMA_FMA ON) +ELSE() + set(LLAMA_FMA OFF) +ENDIF() + +IF(${AVX2_FOUND}) + set(LLAMA_AVX2 ON) +ELSE() + set(LLAMA_AVX2 OFF) +ENDIF() + +IF(${AVX512_FOUND}) + set(LLAMA_AVX512 ON) +ELSE() + set(LLAMA_AVX512 OFF) +ENDIF() + # # Compile flags # From 7adce4f64c6b37206be33fff48fe9b8bc9b20bf5 Mon Sep 17 00:00:00 2001 From: Howard Su Date: Fri, 7 Apr 2023 21:04:47 +0800 Subject: [PATCH 2/4] Only check hardware when option is ON --- CMakeLists.txt | 41 ++++++++++++++++++++--------------------- 1 file changed, 20 insertions(+), 21 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 8321a5d3534d9..12c7e46e00e7a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -188,33 +188,32 @@ MACRO(CHECK_SSE type flags) ENDMACRO() -CHECK_SSE("AVX" " ;-mavx;/arch:AVX") -CHECK_SSE("AVX2" " ;-mavx2 -mfma;/arch:AVX2") -CHECK_SSE("AVX512" " ;-mavx512f -mavx512dq -mavx512vl -mavx512bw -mfma;/arch:AVX512") -CHECK_SSE("FMA" " ;-mfma;") - -IF(${AVX_FOUND}) - set(LLAMA_AVX ON) -ELSE() - set(LLAMA_AVX OFF) +IF(${LLAMA_AVX}) + CHECK_SSE("AVX" " ;-mavx;/arch:AVX") + IF(NOT ${AVX_FOUND}) + set(LLAMA_AVX OFF) + ENDIF() ENDIF() -IF (${FMA_FOUND}) - set(LLAMA_FMA ON) -ELSE() - set(LLAMA_FMA OFF) +IF(${LLAMA_AVX2}) + CHECK_SSE("AVX2" " ;-mavx2 -mfma;/arch:AVX2") + IF(NOT ${AVX2_FOUND}) + set(LLAMA_AVX2 OFF) + ENDIF() ENDIF() -IF(${AVX2_FOUND}) - set(LLAMA_AVX2 ON) -ELSE() - set(LLAMA_AVX2 OFF) +IF(${LLAMA_AVX512}) + CHECK_SSE("AVX512" " ;-mavx512f -mavx512dq -mavx512vl -mavx512bw -mfma;/arch:AVX512") + IF(NOT ${AVX512_FOUND}) + set(LLAMA_AVX512 OFF) + ENDIF() ENDIF() -IF(${AVX512_FOUND}) - set(LLAMA_AVX512 ON) -ELSE() - set(LLAMA_AVX512 OFF) +IF(${LLAMA_FMA}) + CHECK_SSE("FMA" " ;-mfma;") + IF (NOT ${FMA_FOUND}) + set(LLAMA_FMA OFF) + ENDIF() ENDIF() # From ac072d7c91ca6be205bbcfa0922418027d93f1a9 Mon Sep 17 00:00:00 2001 From: Howard Su Date: Sun, 16 Apr 2023 22:42:06 +0800 Subject: [PATCH 3/4] Modify per code review sugguestions --- CMakeLists.txt | 108 ++----------------------------------------- cmake/FindSIMD.cmake | 99 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 102 insertions(+), 105 deletions(-) create mode 100644 cmake/FindSIMD.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index 12c7e46e00e7a..cbe1f5bab857c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -109,111 +109,9 @@ else() message(WARNING "Git repository not found; to enable automatic generation of build info, make sure Git is installed and the project is a Git repository.") endif() -INCLUDE(CheckCSourceRuns) - -SET(AVX_CODE " - #include - int main() - { - __m256 a; - a = _mm256_set1_ps(0); - return 0; - } -") - -SET(AVX512_CODE " - #include - int main() - { - __m512i a = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0); - __m512i b = a; - __mmask64 equality_mask = _mm512_cmp_epi8_mask(a, b, _MM_CMPINT_EQ); - return 0; - } -") - -SET(AVX2_CODE " - #include - int main() - { - __m256i a = {0}; - a = _mm256_abs_epi16(a); - __m256i x; - _mm256_extract_epi64(x, 0); // we rely on this in our AVX2 code - return 0; - } -") - -SET(FMA_CODE " - #include - int main() - { - __m256 acc = _mm256_setzero_ps(); - const __m256 d = _mm256_setzero_ps(); - const __m256 p = _mm256_setzero_ps(); - acc = _mm256_fmadd_ps( d, p, acc ); - return 0; - } -") - -MACRO(CHECK_SSE type flags) - SET(__FLAG_I 1) - SET(CMAKE_REQUIRED_FLAGS_SAVE ${CMAKE_REQUIRED_FLAGS}) - FOREACH(__FLAG ${flags}) - IF(NOT ${type}_FOUND) - SET(CMAKE_REQUIRED_FLAGS ${__FLAG}) - CHECK_C_SOURCE_RUNS("${${type}_CODE}" HAS_${type}_${__FLAG_I}) - IF(HAS_${type}_${__FLAG_I}) - SET(${type}_FOUND TRUE CACHE BOOL "${type} support") - SET(${type}_FLAGS "${__FLAG}" CACHE STRING "${type} flags") - ENDIF() - MATH(EXPR __FLAG_I "${__FLAG_I}+1") - ENDIF() - ENDFOREACH() - SET(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_SAVE}) - - IF(NOT ${type}_FOUND) - SET(${type}_FOUND FALSE CACHE BOOL "${type} support") - SET(${type}_FLAGS "" CACHE STRING "${type} flags") - ENDIF() - - MARK_AS_ADVANCED(${type}_FOUND ${type}_FLAGS) - -ENDMACRO() - -IF(${LLAMA_AVX}) - CHECK_SSE("AVX" " ;-mavx;/arch:AVX") - IF(NOT ${AVX_FOUND}) - set(LLAMA_AVX OFF) - ENDIF() -ENDIF() - -IF(${LLAMA_AVX2}) - CHECK_SSE("AVX2" " ;-mavx2 -mfma;/arch:AVX2") - IF(NOT ${AVX2_FOUND}) - set(LLAMA_AVX2 OFF) - ENDIF() -ENDIF() - -IF(${LLAMA_AVX512}) - CHECK_SSE("AVX512" " ;-mavx512f -mavx512dq -mavx512vl -mavx512bw -mfma;/arch:AVX512") - IF(NOT ${AVX512_FOUND}) - set(LLAMA_AVX512 OFF) - ENDIF() -ENDIF() - -IF(${LLAMA_FMA}) - CHECK_SSE("FMA" " ;-mfma;") - IF (NOT ${FMA_FOUND}) - set(LLAMA_FMA OFF) - ENDIF() +MESSAGE("NATIVE=" ${LLAMA_NATIVE} " MSVC=" ${MSVC}) +IF(LLAMA_NATIVE AND MSVC) + include(cmake/FindSIMD.cmake) ENDIF() # diff --git a/cmake/FindSIMD.cmake b/cmake/FindSIMD.cmake new file mode 100644 index 0000000000000..fc8b079bc3167 --- /dev/null +++ b/cmake/FindSIMD.cmake @@ -0,0 +1,99 @@ +INCLUDE(CheckCSourceRuns) + +SET(AVX_CODE " + #include + int main() + { + __m256 a; + a = _mm256_set1_ps(0); + return 0; + } +") + +SET(AVX512_CODE " + #include + int main() + { + __m512i a = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0); + __m512i b = a; + __mmask64 equality_mask = _mm512_cmp_epi8_mask(a, b, _MM_CMPINT_EQ); + return 0; + } +") + +SET(AVX2_CODE " + #include + int main() + { + __m256i a = {0}; + a = _mm256_abs_epi16(a); + __m256i x; + _mm256_extract_epi64(x, 0); // we rely on this in our AVX2 code + return 0; + } +") + +SET(FMA_CODE " + #include + int main() + { + __m256 acc = _mm256_setzero_ps(); + const __m256 d = _mm256_setzero_ps(); + const __m256 p = _mm256_setzero_ps(); + acc = _mm256_fmadd_ps( d, p, acc ); + return 0; + } +") + +MACRO(CHECK_SSE type flags) + SET(__FLAG_I 1) + SET(CMAKE_REQUIRED_FLAGS_SAVE ${CMAKE_REQUIRED_FLAGS}) + FOREACH(__FLAG ${flags}) + IF(NOT ${type}_FOUND) + SET(CMAKE_REQUIRED_FLAGS ${__FLAG}) + CHECK_C_SOURCE_RUNS("${${type}_CODE}" HAS_${type}_${__FLAG_I}) + IF(HAS_${type}_${__FLAG_I}) + SET(${type}_FOUND TRUE CACHE BOOL "${type} support") + SET(${type}_FLAGS "${__FLAG}" CACHE STRING "${type} flags") + ENDIF() + MATH(EXPR __FLAG_I "${__FLAG_I}+1") + ENDIF() + ENDFOREACH() + SET(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_SAVE}) + + IF(NOT ${type}_FOUND) + SET(${type}_FOUND FALSE CACHE BOOL "${type} support") + SET(${type}_FLAGS "" CACHE STRING "${type} flags") + ENDIF() + + MARK_AS_ADVANCED(${type}_FOUND ${type}_FLAGS) + +ENDMACRO() + +CHECK_SSE("AVX" " ;/arch:AVX") +IF(NOT ${AVX_FOUND}) + set(LLAMA_AVX OFF) +ELSE() + set(LLAMA_AVX ON) +ENDIF() + +CHECK_SSE("AVX2" " ;/arch:AVX2") +IF(NOT ${AVX2_FOUND}) + set(LLAMA_AVX2 OFF) +ELSE() + set(LLAMA_AVX2 ON) +ENDIF() + +CHECK_SSE("AVX512" " ;/arch:AVX512") +IF(NOT ${AVX512_FOUND}) + set(LLAMA_AVX512 OFF) +ELSE() + set(LLAMA_AVX512 ON) +ENDIF() From a8a22ff93fe25f0c9e0a25d83bce2cbd414a9008 Mon Sep 17 00:00:00 2001 From: Howard Su Date: Thu, 1 Jun 2023 23:00:12 +0800 Subject: [PATCH 4/4] Build locally will detect CPU --- .github/workflows/build.yml | 10 +++++----- CMakeLists.txt | 5 ++--- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index c98cbcbbebd0c..bebb7f3db8bad 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -157,15 +157,15 @@ jobs: matrix: include: - build: 'avx2' - defines: '-DLLAMA_BUILD_SERVER=ON' + defines: '-DLLAMA_BUILD_SERVER=ON -DLLAMA_NATIVE=OFF' - build: 'avx' - defines: '-DLLAMA_BUILD_SERVER=ON -DLLAMA_AVX2=OFF' + defines: '-DLLAMA_BUILD_SERVER=ON -DLLAMA_NATIVE=OFF -DLLAMA_AVX2=OFF' - build: 'avx512' - defines: '-DLLAMA_BUILD_SERVER=ON -DLLAMA_AVX512=ON -DBUILD_SHARED_LIBS=ON' + defines: '-DLLAMA_BUILD_SERVER=ON -DLLAMA_NATIVE=OFF -DLLAMA_AVX512=ON -DBUILD_SHARED_LIBS=ON' - build: 'clblast' - defines: '-DLLAMA_BUILD_SERVER=ON -DLLAMA_CLBLAST=ON -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/clblast"' + defines: '-DLLAMA_BUILD_SERVER=ON -DLLAMA_NATIVE=OFF -DLLAMA_CLBLAST=ON -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/clblast"' - build: 'openblas' - defines: '-DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include" -DBLAS_LIBRARIES="$env:RUNNER_TEMP/openblas/lib/openblas.lib"' + defines: '-DLLAMA_BUILD_SERVER=ON -DLLAMA_NATIVE=OFF -DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include" -DBLAS_LIBRARIES="$env:RUNNER_TEMP/openblas/lib/openblas.lib"' steps: - name: Clone diff --git a/CMakeLists.txt b/CMakeLists.txt index cbe1f5bab857c..531240b02af5a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -38,7 +38,7 @@ endif() # general option(LLAMA_STATIC "llama: static link libraries" OFF) -option(LLAMA_NATIVE "llama: enable -march=native flag" OFF) +option(LLAMA_NATIVE "llama: enable -march=native flag" ON) option(LLAMA_LTO "llama: enable link time optimization" OFF) # debug @@ -109,8 +109,7 @@ else() message(WARNING "Git repository not found; to enable automatic generation of build info, make sure Git is installed and the project is a Git repository.") endif() -MESSAGE("NATIVE=" ${LLAMA_NATIVE} " MSVC=" ${MSVC}) -IF(LLAMA_NATIVE AND MSVC) +IF(LLAMA_NATIVE) include(cmake/FindSIMD.cmake) ENDIF()