Skip to content

Commit 081a86d

Browse files
committed
Merge branch 'master' into sync
ggml-ci
2 parents e50ab5a + 2833a6f commit 081a86d

File tree

9 files changed

+241
-125
lines changed

9 files changed

+241
-125
lines changed

.github/workflows/build.yml

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -288,6 +288,7 @@ jobs:
288288
OPENBLAS_VERSION: 0.3.23
289289
OPENCL_VERSION: 2023.04.17
290290
CLBLAST_VERSION: 1.6.0
291+
SDE_VERSION: 9.21.1-2023-04-24
291292

292293
strategy:
293294
matrix:
@@ -383,11 +384,23 @@ jobs:
383384
384385
- name: Test
385386
id: cmake_test
386-
if: ${{ matrix.build != 'clblast' && (matrix.build != 'avx512' || env.HAS_AVX512F == '1') }} # Test AVX-512 only when possible
387+
if: ${{ matrix.build != 'clblast' && (matrix.build != 'avx512' || env.HAS_AVX512F == '1') }} # not all machines have native AVX-512
387388
run: |
388389
cd build
389390
ctest -C Release --verbose --timeout 900
390391
392+
- name: Test (Intel SDE)
393+
id: cmake_test_sde
394+
if: ${{ matrix.build == 'avx512' && env.HAS_AVX512F == '0' }} # use Intel SDE for AVX-512 emulation
395+
run: |
396+
curl.exe -o $env:RUNNER_TEMP/sde.tar.xz -L "https://downloadmirror.intel.com/777395/sde-external-${env:SDE_VERSION}-win.tar.xz"
397+
# for some weird reason windows tar doesn't like sde tar.xz
398+
7z x "-o${env:RUNNER_TEMP}" $env:RUNNER_TEMP/sde.tar.xz
399+
7z x "-o${env:RUNNER_TEMP}" $env:RUNNER_TEMP/sde.tar
400+
$sde = $(join-path $env:RUNNER_TEMP sde-external-${env:SDE_VERSION}-win/sde.exe)
401+
cd build
402+
& $sde -future -- ctest -C Release --verbose --timeout 900
403+
391404
- name: Determine tag name
392405
id: tag
393406
shell: bash

CMakeLists.txt

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ endif()
1010

1111
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
1212

13-
if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
13+
if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
1414
set(LLAMA_STANDALONE ON)
1515

1616
# configure project version
@@ -44,7 +44,7 @@ endif()
4444

4545
# general
4646
option(LLAMA_STATIC "llama: static link libraries" OFF)
47-
option(LLAMA_NATIVE "llama: enable -march=native flag" OFF)
47+
option(LLAMA_NATIVE "llama: enable -march=native flag" ON)
4848
option(LLAMA_LTO "llama: enable link time optimization" OFF)
4949

5050
# debug
@@ -510,6 +510,10 @@ if ((${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm") OR (${CMAKE_SYSTEM_PROCESSOR} MATC
510510
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "^(x86_64|i686|AMD64)$" OR "${CMAKE_GENERATOR_PLATFORM_LWR}" MATCHES "^(x86_64|i686|amd64|x64)$" )
511511
message(STATUS "x86 detected")
512512
if (MSVC)
513+
# instruction set detection for MSVC only
514+
if (LLAMA_NATIVE)
515+
include(cmake/FindSIMD.cmake)
516+
endif ()
513517
if (LLAMA_AVX512)
514518
add_compile_options($<$<COMPILE_LANGUAGE:C>:/arch:AVX512>)
515519
add_compile_options($<$<COMPILE_LANGUAGE:CXX>:/arch:AVX512>)

cmake/FindSIMD.cmake

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
include(CheckCSourceRuns)
2+
3+
set(AVX_CODE "
4+
#include <immintrin.h>
5+
int main()
6+
{
7+
__m256 a;
8+
a = _mm256_set1_ps(0);
9+
return 0;
10+
}
11+
")
12+
13+
set(AVX512_CODE "
14+
#include <immintrin.h>
15+
int main()
16+
{
17+
__m512i a = _mm512_set_epi8(0, 0, 0, 0, 0, 0, 0, 0,
18+
0, 0, 0, 0, 0, 0, 0, 0,
19+
0, 0, 0, 0, 0, 0, 0, 0,
20+
0, 0, 0, 0, 0, 0, 0, 0,
21+
0, 0, 0, 0, 0, 0, 0, 0,
22+
0, 0, 0, 0, 0, 0, 0, 0,
23+
0, 0, 0, 0, 0, 0, 0, 0,
24+
0, 0, 0, 0, 0, 0, 0, 0);
25+
__m512i b = a;
26+
__mmask64 equality_mask = _mm512_cmp_epi8_mask(a, b, _MM_CMPINT_EQ);
27+
return 0;
28+
}
29+
")
30+
31+
set(AVX2_CODE "
32+
#include <immintrin.h>
33+
int main()
34+
{
35+
__m256i a = {0};
36+
a = _mm256_abs_epi16(a);
37+
__m256i x;
38+
_mm256_extract_epi64(x, 0); // we rely on this in our AVX2 code
39+
return 0;
40+
}
41+
")
42+
43+
set(FMA_CODE "
44+
#include <immintrin.h>
45+
int main()
46+
{
47+
__m256 acc = _mm256_setzero_ps();
48+
const __m256 d = _mm256_setzero_ps();
49+
const __m256 p = _mm256_setzero_ps();
50+
acc = _mm256_fmadd_ps( d, p, acc );
51+
return 0;
52+
}
53+
")
54+
55+
macro(check_sse type flags)
56+
set(__FLAG_I 1)
57+
set(CMAKE_REQUIRED_FLAGS_SAVE ${CMAKE_REQUIRED_FLAGS})
58+
foreach (__FLAG ${flags})
59+
if (NOT ${type}_FOUND)
60+
set(CMAKE_REQUIRED_FLAGS ${__FLAG})
61+
check_c_source_runs("${${type}_CODE}" HAS_${type}_${__FLAG_I})
62+
if (HAS_${type}_${__FLAG_I})
63+
set(${type}_FOUND TRUE CACHE BOOL "${type} support")
64+
set(${type}_FLAGS "${__FLAG}" CACHE STRING "${type} flags")
65+
endif()
66+
math(EXPR __FLAG_I "${__FLAG_I}+1")
67+
endif()
68+
endforeach()
69+
set(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_SAVE})
70+
71+
if (NOT ${type}_FOUND)
72+
set(${type}_FOUND FALSE CACHE BOOL "${type} support")
73+
set(${type}_FLAGS "" CACHE STRING "${type} flags")
74+
endif()
75+
76+
mark_as_advanced(${type}_FOUND ${type}_FLAGS)
77+
endmacro()
78+
79+
# flags are for MSVC only!
80+
check_sse("AVX" " ;/arch:AVX")
81+
if (NOT ${AVX_FOUND})
82+
set(LLAMA_AVX OFF)
83+
else()
84+
set(LLAMA_AVX ON)
85+
endif()
86+
87+
check_sse("AVX2" " ;/arch:AVX2")
88+
check_sse("FMA" " ;/arch:AVX2")
89+
if ((NOT ${AVX2_FOUND}) OR (NOT ${FMA_FOUND}))
90+
set(LLAMA_AVX2 OFF)
91+
else()
92+
set(LLAMA_AVX2 ON)
93+
endif()
94+
95+
check_sse("AVX512" " ;/arch:AVX512")
96+
if (NOT ${AVX512_FOUND})
97+
set(LLAMA_AVX512 OFF)
98+
else()
99+
set(LLAMA_AVX512 ON)
100+
endif()

common/common.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,19 @@ void process_escapes(std::string& input) {
9090
case '\'': input[output_idx++] = '\''; break;
9191
case '\"': input[output_idx++] = '\"'; break;
9292
case '\\': input[output_idx++] = '\\'; break;
93+
case 'x':
94+
// Handle \x12, etc
95+
if (input_idx + 2 < input_len) {
96+
const char x[3] = { input[input_idx + 1], input[input_idx + 2], 0 };
97+
char *err_p = nullptr;
98+
const long val = std::strtol(x, &err_p, 16);
99+
if (err_p == x + 2) {
100+
input_idx += 2;
101+
input[output_idx++] = char(val);
102+
break;
103+
}
104+
}
105+
// fall through
93106
default: input[output_idx++] = '\\';
94107
input[output_idx++] = input[input_idx]; break;
95108
}

examples/server/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ Command line options:
77
- `--threads N`, `-t N`: Set the number of threads to use during generation.
88
- `-tb N, --threads-batch N`: Set the number of threads to use during batch and prompt processing. If not specified, the number of threads will be set to the number of threads used for generation.
99
- `-m FNAME`, `--model FNAME`: Specify the path to the LLaMA model file (e.g., `models/7B/ggml-model.gguf`).
10-
- `-m ALIAS`, `--alias ALIAS`: Set an alias for the model. The alias will be returned in API responses.
10+
- `-a ALIAS`, `--alias ALIAS`: Set an alias for the model. The alias will be returned in API responses.
1111
- `-c N`, `--ctx-size N`: Set the size of the prompt context. The default is 512, but LLaMA models were built with a context of 2048, which will provide better results for longer input/inference. The size may differ in other models, for example, baichuan models were build with a context of 4096.
1212
- `-ngl N`, `--n-gpu-layers N`: When compiled with appropriate support (currently CLBlast or cuBLAS), this option allows offloading some layers to the GPU for computation. Generally results in increased performance.
1313
- `-mg i, --main-gpu i`: When using multiple GPUs this option controls which GPU is used for small tensors for which the overhead of splitting the computation across all GPUs is not worthwhile. The GPU in question will use slightly more VRAM to store a scratch buffer for temporary results. By default GPU 0 is used. Requires cuBLAS.

0 commit comments

Comments
 (0)