diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index c98cbcbbebd0c..bebb7f3db8bad 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -157,15 +157,15 @@ jobs: matrix: include: - build: 'avx2' - defines: '-DLLAMA_BUILD_SERVER=ON' + defines: '-DLLAMA_BUILD_SERVER=ON -DLLAMA_NATIVE=OFF' - build: 'avx' - defines: '-DLLAMA_BUILD_SERVER=ON -DLLAMA_AVX2=OFF' + defines: '-DLLAMA_BUILD_SERVER=ON -DLLAMA_NATIVE=OFF -DLLAMA_AVX2=OFF' - build: 'avx512' - defines: '-DLLAMA_BUILD_SERVER=ON -DLLAMA_AVX512=ON -DBUILD_SHARED_LIBS=ON' + defines: '-DLLAMA_BUILD_SERVER=ON -DLLAMA_NATIVE=OFF -DLLAMA_AVX512=ON -DBUILD_SHARED_LIBS=ON' - build: 'clblast' - defines: '-DLLAMA_BUILD_SERVER=ON -DLLAMA_CLBLAST=ON -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/clblast"' + defines: '-DLLAMA_BUILD_SERVER=ON -DLLAMA_NATIVE=OFF -DLLAMA_CLBLAST=ON -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/clblast"' - build: 'openblas' - defines: '-DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include" -DBLAS_LIBRARIES="$env:RUNNER_TEMP/openblas/lib/openblas.lib"' + defines: '-DLLAMA_BUILD_SERVER=ON -DLLAMA_NATIVE=OFF -DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include" -DBLAS_LIBRARIES="$env:RUNNER_TEMP/openblas/lib/openblas.lib"' steps: - name: Clone diff --git a/CMakeLists.txt b/CMakeLists.txt index 21f4ec9ddd267..531240b02af5a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -38,7 +38,7 @@ endif() # general option(LLAMA_STATIC "llama: static link libraries" OFF) -option(LLAMA_NATIVE "llama: enable -march=native flag" OFF) +option(LLAMA_NATIVE "llama: enable -march=native flag" ON) option(LLAMA_LTO "llama: enable link time optimization" OFF) # debug @@ -109,6 +109,10 @@ else() message(WARNING "Git repository not found; to enable automatic generation of build info, make sure Git is installed and the project is a Git repository.") endif() +IF(LLAMA_NATIVE) + include(cmake/FindSIMD.cmake) +ENDIF() + # # Compile flags # diff --git a/cmake/FindSIMD.cmake b/cmake/FindSIMD.cmake new file mode 100644 index 0000000000000..fc8b079bc3167 --- /dev/null +++ b/cmake/FindSIMD.cmake @@ -0,0 +1,99 @@ +INCLUDE(CheckCSourceRuns) + +SET(AVX_CODE " + #include + int main() + { + __m256 a; + a = _mm256_set1_ps(0); + return 0; + } +") + +SET(AVX512_CODE " + #include + int main() + { + __m512i a = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0); + __m512i b = a; + __mmask64 equality_mask = _mm512_cmp_epi8_mask(a, b, _MM_CMPINT_EQ); + return 0; + } +") + +SET(AVX2_CODE " + #include + int main() + { + __m256i a = {0}; + a = _mm256_abs_epi16(a); + __m256i x; + _mm256_extract_epi64(x, 0); // we rely on this in our AVX2 code + return 0; + } +") + +SET(FMA_CODE " + #include + int main() + { + __m256 acc = _mm256_setzero_ps(); + const __m256 d = _mm256_setzero_ps(); + const __m256 p = _mm256_setzero_ps(); + acc = _mm256_fmadd_ps( d, p, acc ); + return 0; + } +") + +MACRO(CHECK_SSE type flags) + SET(__FLAG_I 1) + SET(CMAKE_REQUIRED_FLAGS_SAVE ${CMAKE_REQUIRED_FLAGS}) + FOREACH(__FLAG ${flags}) + IF(NOT ${type}_FOUND) + SET(CMAKE_REQUIRED_FLAGS ${__FLAG}) + CHECK_C_SOURCE_RUNS("${${type}_CODE}" HAS_${type}_${__FLAG_I}) + IF(HAS_${type}_${__FLAG_I}) + SET(${type}_FOUND TRUE CACHE BOOL "${type} support") + SET(${type}_FLAGS "${__FLAG}" CACHE STRING "${type} flags") + ENDIF() + MATH(EXPR __FLAG_I "${__FLAG_I}+1") + ENDIF() + ENDFOREACH() + SET(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_SAVE}) + + IF(NOT ${type}_FOUND) + SET(${type}_FOUND FALSE CACHE BOOL "${type} support") + SET(${type}_FLAGS "" CACHE STRING "${type} flags") + ENDIF() + + MARK_AS_ADVANCED(${type}_FOUND ${type}_FLAGS) + +ENDMACRO() + +CHECK_SSE("AVX" " ;/arch:AVX") +IF(NOT ${AVX_FOUND}) + set(LLAMA_AVX OFF) +ELSE() + set(LLAMA_AVX ON) +ENDIF() + +CHECK_SSE("AVX2" " ;/arch:AVX2") +IF(NOT ${AVX2_FOUND}) + set(LLAMA_AVX2 OFF) +ELSE() + set(LLAMA_AVX2 ON) +ENDIF() + +CHECK_SSE("AVX512" " ;/arch:AVX512") +IF(NOT ${AVX512_FOUND}) + set(LLAMA_AVX512 OFF) +ELSE() + set(LLAMA_AVX512 ON) +ENDIF()