From 1206b5f3be6509ea7f3611ea1712740e927e2439 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Mon, 30 Oct 2023 15:12:54 +0200 Subject: [PATCH 1/5] build : enable link-time optimizations --- CMakeLists.txt | 2 ++ Makefile | 10 +++++----- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 3659279e2d7d0..410fc174a813f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -676,6 +676,8 @@ add_library(ggml OBJECT ${GGML_SOURCES_EXTRA} ${GGML_HEADERS_EXTRA} ) +set_property(TARGET ggml PROPERTY INTERPROCEDURAL_OPTIMIZATION TRUE) + target_include_directories(ggml PUBLIC . ${LLAMA_EXTRA_INCLUDES}) target_compile_features(ggml PUBLIC c_std_11) # don't bump target_link_libraries(ggml PUBLIC Threads::Threads ${LLAMA_EXTRA_LIBS}) diff --git a/Makefile b/Makefile index c53c1e7260fef..048ad5d4ef35e 100644 --- a/Makefile +++ b/Makefile @@ -120,12 +120,12 @@ MK_CXXFLAGS = -std=c++11 -fPIC # -Ofast tends to produce faster code, but may not be available for some compilers. ifdef LLAMA_FAST -MK_CFLAGS += -Ofast -MK_HOST_CXXFLAGS += -Ofast -MK_CUDA_CXXFLAGS += -O3 +MK_CFLAGS += -flto -Ofast +MK_HOST_CXXFLAGS += -flto -Ofast +MK_CUDA_CXXFLAGS += -flto -O3 else -MK_CFLAGS += -O3 -MK_CXXFLAGS += -O3 +MK_CFLAGS += -flto -O3 +MK_CXXFLAGS += -flto -O3 endif # clock_gettime came in POSIX.1b (1993) From 6f6b0db6d114d1172e92f94238a58f9270b6470c Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Mon, 30 Oct 2023 15:40:01 +0200 Subject: [PATCH 2/5] build : disable lto for C++ (make) and enable existing LTO flag (cmake) --- CMakeLists.txt | 4 +--- Makefile | 10 +++++----- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 410fc174a813f..e388f1c220697 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -45,7 +45,7 @@ endif() # general option(LLAMA_STATIC "llama: static link libraries" OFF) option(LLAMA_NATIVE "llama: enable -march=native flag" ON) -option(LLAMA_LTO "llama: enable link time optimization" OFF) +option(LLAMA_LTO "llama: enable link time optimization" ON) # debug option(LLAMA_ALL_WARNINGS "llama: enable all compiler warnings" ON) @@ -676,8 +676,6 @@ add_library(ggml OBJECT ${GGML_SOURCES_EXTRA} ${GGML_HEADERS_EXTRA} ) -set_property(TARGET ggml PROPERTY INTERPROCEDURAL_OPTIMIZATION TRUE) - target_include_directories(ggml PUBLIC . ${LLAMA_EXTRA_INCLUDES}) target_compile_features(ggml PUBLIC c_std_11) # don't bump target_link_libraries(ggml PUBLIC Threads::Threads ${LLAMA_EXTRA_LIBS}) diff --git a/Makefile b/Makefile index 048ad5d4ef35e..2a2ac8505bda2 100644 --- a/Makefile +++ b/Makefile @@ -120,12 +120,12 @@ MK_CXXFLAGS = -std=c++11 -fPIC # -Ofast tends to produce faster code, but may not be available for some compilers. ifdef LLAMA_FAST -MK_CFLAGS += -flto -Ofast -MK_HOST_CXXFLAGS += -flto -Ofast -MK_CUDA_CXXFLAGS += -flto -O3 +MK_CFLAGS += -Ofast -flto +MK_HOST_CXXFLAGS += -Ofast +MK_CUDA_CXXFLAGS += -O3 else -MK_CFLAGS += -flto -O3 -MK_CXXFLAGS += -flto -O3 +MK_CFLAGS += -O3 -flto +MK_CXXFLAGS += -O3 endif # clock_gettime came in POSIX.1b (1993) From a6aba2c85ca6f2c9d549ad7a025504976a6c4312 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Mon, 30 Oct 2023 15:43:05 +0200 Subject: [PATCH 3/5] ci : try to fix code coverage build --- .github/workflows/code-coverage.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/code-coverage.yml b/.github/workflows/code-coverage.yml index 392db8a089ac5..250c5f4b232bd 100644 --- a/.github/workflows/code-coverage.yml +++ b/.github/workflows/code-coverage.yml @@ -15,13 +15,13 @@ jobs: - name: Dependencies run: | sudo apt-get update - sudo apt-get install build-essential gcc-8 lcov + sudo apt-get install build-essential gcc-9 g++-9 lcov - name: Build - run: CC=gcc-8 make -j LLAMA_CODE_COVERAGE=1 tests + run: CC=gcc-9 CXX=g++-9 make -j LLAMA_CODE_COVERAGE=1 tests - name: Run tests - run: CC=gcc-8 make test + run: CC=gcc-9 CXX=g++-9 make test - name: Generate coverage report run: | From 57c4296cf06c58a1160d59788fc18f4e434acc8a Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Mon, 30 Oct 2023 15:58:40 +0200 Subject: [PATCH 4/5] ci : fix focal build --- .github/workflows/build.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 5af497a3ce321..5d1ee623f4a0e 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -33,17 +33,17 @@ jobs: id: depends run: | sudo apt-get update - sudo apt-get install build-essential gcc-8 + sudo apt-get install build-essential gcc-8 g++-8 - name: Build id: make_build run: | - CC=gcc-8 make -j $(nproc) + CC=gcc-8 CXX=g++-8 make -j $(nproc) - name: Test id: make_test run: | - CC=gcc-8 make tests -j $(nproc) + CC=gcc-8 CXX=g++-8 make tests -j $(nproc) make test -j $(nproc) ubuntu-latest-cmake: From bc28aaa8c20b81d6d2da56f954854a87ac6d2219 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Mon, 30 Oct 2023 16:00:53 +0200 Subject: [PATCH 5/5] make : use -lfto=auto to avoid warnings and maintain perf --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 2a2ac8505bda2..c443cac866779 100644 --- a/Makefile +++ b/Makefile @@ -120,11 +120,11 @@ MK_CXXFLAGS = -std=c++11 -fPIC # -Ofast tends to produce faster code, but may not be available for some compilers. ifdef LLAMA_FAST -MK_CFLAGS += -Ofast -flto +MK_CFLAGS += -Ofast -flto=auto MK_HOST_CXXFLAGS += -Ofast MK_CUDA_CXXFLAGS += -O3 else -MK_CFLAGS += -O3 -flto +MK_CFLAGS += -O3 -flto=auto MK_CXXFLAGS += -O3 endif