jart
diff --git a/‎.devops/nix/package.nix
Lines changed: 0 additions & 1 deletion b/‎.devops/nix/package.nix
Lines changed: 0 additions & 1 deletion
diff --git a/‎.github/labeler.yml
Lines changed: 73 additions & 0 deletions b/‎.github/labeler.yml
Lines changed: 73 additions & 0 deletions
diff --git a/‎.github/workflows/build.yml
Lines changed: 7 additions & 41 deletions b/‎.github/workflows/build.yml
Lines changed: 7 additions & 41 deletions
diff --git a/‎.github/workflows/labeler.yml
Lines changed: 17 additions & 0 deletions b/‎.github/workflows/labeler.yml
Lines changed: 17 additions & 0 deletions
diff --git a/‎.github/workflows/server.yml
Lines changed: 2 additions & 6 deletions b/‎.github/workflows/server.yml
Lines changed: 2 additions & 6 deletions
diff --git a/‎CMakeLists.txt
Lines changed: 23 additions & 33 deletions b/‎CMakeLists.txt
Lines changed: 23 additions & 33 deletions
diff --git a/‎Makefile
Lines changed: 5 additions & 12 deletions b/‎Makefile
Lines changed: 5 additions & 12 deletions
@@ -214,7 +214,6 @@ effectiveStdenv.mkDerivation (
         (cmakeBool "LLAMA_CUDA" useCuda)
         (cmakeBool "LLAMA_HIPBLAS" useRocm)
         (cmakeBool "LLAMA_METAL" useMetalKit)
-        (cmakeBool "LLAMA_MPI" useMpi)
         (cmakeBool "LLAMA_VULKAN" useVulkan)
         (cmakeBool "LLAMA_STATIC" enableStatic)
       ]
 
@@ -0,0 +1,73 @@
+# https://github.com/actions/labeler
+
+SYCL:
+    - changed-files:
+        - any-glob-to-any-file:
+            - ggml-sycl.h
+            - ggml-sycl.cpp
+            - README-sycl.md
+Nvidia GPU:
+    - changed-files:
+        - any-glob-to-any-file:
+            - ggml-cuda/**
+Vulkan:
+    - changed-files:
+        - any-glob-to-any-file:
+            - ggml_vk_generate_shaders.py
+            - ggml-vulkan*
+documentation:
+    - changed-files:
+        - any-glob-to-any-file:
+            - docs/**
+            - media/**
+testing:
+    - changed-files:
+        - any-glob-to-any-file:
+            - tests/**
+build:
+    - changed-files:
+        - any-glob-to-any-file:
+            - cmake/**
+            - CMakeLists.txt
+            - CMakePresets.json
+            - codecov.yml
+examples:
+    - changed-files:
+        - any-glob-to-any-file: examples/**
+devops:
+    - changed-files:
+        - any-glob-to-any-file:
+            - .devops/**
+            - .github/**
+            - ci/**
+python:
+    - changed-files:
+        - any-glob-to-any-file:
+            - "**/*.py"
+            - requirements/**
+            - gguf-py/**
+            - .flake8
+script:
+    - changed-files:
+        - any-glob-to-any-file:
+            - scripts/**
+android:
+    - changed-files:
+        - any-glob-to-any-file:
+            - examples/llama.android/**
+server:
+    - changed-files:
+        - any-glob-to-any-file:
+            - examples/server/**
+ggml:
+    - changed-files:
+        - any-glob-to-any-file:
+            - ggml-*.c
+            - ggml-*.h
+            - ggml-cuda/**
+nix:
+    - changed-files:
+        - any-glob-to-any-file:
+            - "**/*.nix"
+            - .github/workflows/nix-*.yml
+            - .devops/nix/nixpkgs-instances.nix
@@ -271,49 +271,15 @@ jobs:
           path: llama-${{ steps.tag.outputs.name }}-bin-ubuntu-x64.zip
           name: llama-bin-ubuntu-x64.zip
 
-#  ubuntu-latest-cmake-sanitizer:
-#    runs-on: ubuntu-latest
-#
-#    continue-on-error: true
-#
-#    strategy:
-#      matrix:
-#        sanitizer: [ADDRESS, THREAD, UNDEFINED]
-#        build_type: [Debug, Release]
-#
-#    steps:
-#      - name: Clone
-#        id: checkout
-#        uses: actions/checkout@v4
-#
-#      - name: Dependencies
-#        id: depends
-#        run: |
-#          sudo apt-get update
-#          sudo apt-get install build-essential
-#
-#      - name: Build
-#        id: cmake_build
-#        run: |
-#          mkdir build
-#          cd build
-#          cmake .. -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON -DCMAKE_BUILD_TYPE=${{ matrix.build_type }}
-#          cmake --build . --config ${{ matrix.build_type }} -j $(nproc)
-#
-#      - name: Test
-#        id: cmake_test
-#        run: |
-#          cd build
-#          ctest -L main --verbose --timeout 900
-
-  ubuntu-latest-cmake-mpi:
+  ubuntu-latest-cmake-sanitizer:
     runs-on: ubuntu-latest
 
     continue-on-error: true
 
     strategy:
       matrix:
-        mpi_library: [mpich, libopenmpi-dev]
+        sanitizer: [ADDRESS, THREAD, UNDEFINED]
+        build_type: [Debug, Release]
 
     steps:
       - name: Clone
@@ -324,21 +290,21 @@ jobs:
         id: depends
         run: |
           sudo apt-get update
-          sudo apt-get install build-essential ${{ matrix.mpi_library }}
+          sudo apt-get install build-essential
 
       - name: Build
         id: cmake_build
         run: |
           mkdir build
           cd build
-          cmake -DLLAMA_MPI=ON ..
-          cmake --build . --config Release -j $(nproc)
+          cmake .. -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON -DCMAKE_BUILD_TYPE=${{ matrix.build_type }}
+          cmake --build . --config ${{ matrix.build_type }} -j $(nproc)
 
       - name: Test
         id: cmake_test
         run: |
           cd build
-          ctest -L main --verbose
+          ctest -L main --verbose --timeout 900
 
   ubuntu-latest-cmake-rpc:
     runs-on: ubuntu-latest
 
@@ -0,0 +1,17 @@
+name: "Pull Request Labeler"
+on:
+- pull_request_target
+
+jobs:
+  labeler:
+    permissions:
+      contents: read
+      pull-requests: write
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v4
+      with:
+        repository: "ggerganov/llama.cpp"
+    - uses: actions/labeler@v5
+      with:
+        configuration-path: '.github/labeler.yml'
@@ -32,10 +32,8 @@ jobs:
 
     strategy:
       matrix:
-        # TODO: temporary disabled due to linux kernel issues
-        #sanitizer: [ADDRESS, THREAD, UNDEFINED]
-        sanitizer: [UNDEFINED]
-        build_type: [Debug]
+        sanitizer: [ADDRESS, THREAD, UNDEFINED]
+        build_type: [RelWithDebInfo]
         include:
           - build_type: Release
             sanitizer: ""
@@ -102,10 +100,8 @@ jobs:
               -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON ;
           cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target server
 
-
       - name: Tests
         id: server_integration_tests
-        if: ${{ !matrix.disabled_on_pr || !github.event.pull_request }}
         run: |
           cd examples/server/tests
           PORT=8888 ./tests.sh
 
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.14)  # for add_link_options and implicit target directories.
+cmake_minimum_required(VERSION 3.14) # for add_link_options and implicit target directories.
 project("llama.cpp" C CXX)
 include(CheckIncludeFileCXX)
 
@@ -77,6 +77,7 @@ option(LLAMA_AVX2                            "llama: enable AVX2"
 option(LLAMA_AVX512                          "llama: enable AVX512"                             OFF)
 option(LLAMA_AVX512_VBMI                     "llama: enable AVX512-VBMI"                        OFF)
 option(LLAMA_AVX512_VNNI                     "llama: enable AVX512-VNNI"                        OFF)
+option(LLAMA_AVX512_BF16                     "llama: enable AVX512-BF16"                        OFF)
 option(LLAMA_FMA                             "llama: enable FMA"                                ${INS_ENB})
 # in MSVC F16C is implied with AVX2/AVX512
 if (NOT MSVC)
@@ -122,7 +123,6 @@ set(LLAMA_METAL_MACOSX_VERSION_MIN "" CACHE STRING
                                              "llama: metal minimum macOS version")
 set(LLAMA_METAL_STD "" CACHE STRING          "llama: metal standard version (-std flag)")
 option(LLAMA_KOMPUTE                         "llama: use Kompute"                               OFF)
-option(LLAMA_MPI                             "llama: use MPI"                                   OFF)
 option(LLAMA_RPC                             "llama: use RPC"                                   OFF)
 option(LLAMA_QKK_64                          "llama: use super-block size of 64 for k-quants"   OFF)
 option(LLAMA_SYCL                            "llama: use SYCL"                                  OFF)
@@ -134,6 +134,8 @@ set(LLAMA_SCHED_MAX_COPIES  "4" CACHE STRING "llama: max input copies for pipeli
 option(LLAMA_BUILD_TESTS                     "llama: build tests"    ${LLAMA_STANDALONE})
 option(LLAMA_BUILD_EXAMPLES                  "llama: build examples" ${LLAMA_STANDALONE})
 option(LLAMA_BUILD_SERVER                    "llama: build server example"                      ON)
+option(LLAMA_LASX                            "llama: enable lasx"                               ON)
+option(LLAMA_LSX                             "llama: enable lsx"                                ON)
 
 # add perf arguments
 option(LLAMA_PERF                            "llama: enable perf"                               OFF)
@@ -466,35 +468,6 @@ if (LLAMA_CUDA)
     endif()
 endif()
 
-if (LLAMA_MPI)
-    cmake_minimum_required(VERSION 3.10)
-    find_package(MPI)
-    if (MPI_C_FOUND)
-        message(STATUS "MPI found")
-
-        set(GGML_HEADERS_MPI ggml-mpi.h)
-        set(GGML_SOURCES_MPI ggml-mpi.c)
-
-        add_compile_definitions(GGML_USE_MPI)
-        add_compile_definitions(${MPI_C_COMPILE_DEFINITIONS})
-
-        if (NOT MSVC)
-            add_compile_options(-Wno-cast-qual)
-        endif()
-
-        set(LLAMA_EXTRA_LIBS     ${LLAMA_EXTRA_LIBS}     ${MPI_C_LIBRARIES})
-        set(LLAMA_EXTRA_INCLUDES ${LLAMA_EXTRA_INCLUDES} ${MPI_C_INCLUDE_DIRS})
-
-        # Even if you're only using the C header, C++ programs may bring in MPI
-        # C++ functions, so more linkage is needed
-        if (MPI_CXX_FOUND)
-            set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS}     ${MPI_CXX_LIBRARIES})
-        endif()
-    else()
-        message(WARNING "MPI not found")
-    endif()
-endif()
-
 if (LLAMA_RPC)
     add_compile_definitions(GGML_USE_RPC)
 
@@ -1090,6 +1063,10 @@ elseif (CMAKE_OSX_ARCHITECTURES STREQUAL "x86_64" OR CMAKE_GENERATOR_PLATFORM_LW
                 add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AVX512VNNI__>)
                 add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AVX512VNNI__>)
             endif()
+            if (LLAMA_AVX512_BF16)
+                add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AVX512BF16__>)
+                add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AVX512BF16__>)
+            endif()
         elseif (LLAMA_AVX2)
             list(APPEND ARCH_FLAGS /arch:AVX2)
         elseif (LLAMA_AVX)
@@ -1121,6 +1098,9 @@ elseif (CMAKE_OSX_ARCHITECTURES STREQUAL "x86_64" OR CMAKE_GENERATOR_PLATFORM_LW
         if (LLAMA_AVX512_VNNI)
             list(APPEND ARCH_FLAGS -mavx512vnni)
         endif()
+        if (LLAMA_AVX512_BF16)
+            list(APPEND ARCH_FLAGS -mavx512bf16)
+        endif()
     endif()
 elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64")
     message(STATUS "PowerPC detected")
@@ -1130,6 +1110,17 @@ elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64")
         list(APPEND ARCH_FLAGS -mcpu=native -mtune=native)
         #TODO: Add  targets for Power8/Power9 (Altivec/VSX) and Power10(MMA) and query for big endian systems (ppc64/le/be)
     endif()
+elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "loongarch64")
+    message(STATUS "loongarch64 detected")
+
+    list(APPEND ARCH_FLAGS -march=loongarch64)
+    if (LLAMA_LASX)
+        list(APPEND ARCH_FLAGS -mlasx)
+    endif()
+    if (LLAMA_LSX)
+        list(APPEND ARCH_FLAGS -mlsx)
+    endif()
+
 else()
     message(STATUS "Unknown architecture")
 endif()
@@ -1218,7 +1209,6 @@ add_library(ggml OBJECT
             ${GGML_SOURCES_CUDA}      ${GGML_HEADERS_CUDA}
             ${GGML_SOURCES_OPENCL}    ${GGML_HEADERS_OPENCL}
             ${GGML_SOURCES_METAL}     ${GGML_HEADERS_METAL}
-            ${GGML_SOURCES_MPI}       ${GGML_HEADERS_MPI}
             ${GGML_SOURCES_RPC}       ${GGML_HEADERS_RPC}
             ${GGML_SOURCES_EXTRA}     ${GGML_HEADERS_EXTRA}
             ${GGML_SOURCES_SYCL}      ${GGML_HEADERS_SYCL}
@@ -1306,7 +1296,7 @@ install(FILES ${CMAKE_CURRENT_BINARY_DIR}/LlamaConfig.cmake
 
 set(GGML_PUBLIC_HEADERS "ggml.h" "ggml-alloc.h" "ggml-backend.h"
         "${GGML_HEADERS_CUDA}"  "${GGML_HEADERS_OPENCL}"
-        "${GGML_HEADERS_METAL}" "${GGML_HEADERS_MPI}" "${GGML_HEADERS_EXTRA}")
+        "${GGML_HEADERS_METAL}" "${GGML_HEADERS_EXTRA}")
 
 set_target_properties(ggml PROPERTIES PUBLIC_HEADER "${GGML_PUBLIC_HEADERS}")
 install(TARGETS ggml PUBLIC_HEADER)
 
@@ -379,6 +379,11 @@ ifneq ($(filter ppc64le%,$(UNAME_M)),)
 	CUDA_POWER_ARCH = 1
 endif
 
+ifneq ($(filter loongarch64%,$(UNAME_M)),)
+	MK_CFLAGS   += -mlasx
+	MK_CXXFLAGS += -mlasx
+endif
+
 else
 	MK_CFLAGS   += -march=rv64gcv -mabi=lp64d
 	MK_CXXFLAGS += -march=rv64gcv -mabi=lp64d
@@ -399,13 +404,6 @@ ifndef LLAMA_NO_ACCELERATE
 	endif
 endif # LLAMA_NO_ACCELERATE
 
-ifdef LLAMA_MPI
-	MK_CPPFLAGS += -DGGML_USE_MPI
-	MK_CFLAGS   += -Wno-cast-qual
-	MK_CXXFLAGS += -Wno-cast-qual
-	OBJS        += ggml-mpi.o
-endif # LLAMA_MPI
-
 ifdef LLAMA_OPENBLAS
 	MK_CPPFLAGS += -DGGML_USE_OPENBLAS $(shell pkg-config --cflags-only-I openblas)
 	MK_CFLAGS   += $(shell pkg-config --cflags-only-other openblas)
@@ -629,11 +627,6 @@ ggml-metal-embed.o: ggml-metal.metal ggml-common.h
 endif
 endif # LLAMA_METAL
 
-ifdef LLAMA_MPI
-ggml-mpi.o: ggml-mpi.c ggml-mpi.h
-	$(CC) $(CFLAGS) -c $< -o $@
-endif # LLAMA_MPI
-
 ifndef LLAMA_NO_LLAMAFILE
 sgemm.o: sgemm.cpp sgemm.h ggml.h
 	$(CXX) $(CXXFLAGS) -c $< -o $@
Original file line number	Diff line number	Diff line change
`@@ -214,7 +214,6 @@ effectiveStdenv.mkDerivation (`
`214`	`214`	`(cmakeBool "LLAMA_CUDA" useCuda)`
`215`	`215`	`(cmakeBool "LLAMA_HIPBLAS" useRocm)`
`216`	`216`	`(cmakeBool "LLAMA_METAL" useMetalKit)`
`217`		`- (cmakeBool "LLAMA_MPI" useMpi)`
`218`	`217`	`(cmakeBool "LLAMA_VULKAN" useVulkan)`
`219`	`218`	`(cmakeBool "LLAMA_STATIC" enableStatic)`
`220`	`219`	`]`