Skip to content

ggml: separate musa into its own section in the Makefile #10294

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Nov 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
155 changes: 110 additions & 45 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -531,21 +531,10 @@ ifndef GGML_NO_ACCELERATE
endif
endif # GGML_NO_ACCELERATE

ifdef GGML_MUSA
CC := clang
CXX := clang++
GGML_CUDA := 1
MK_CPPFLAGS += -DGGML_USE_MUSA
endif

ifndef GGML_NO_OPENMP
MK_CPPFLAGS += -DGGML_USE_OPENMP
MK_CFLAGS += -fopenmp
MK_CXXFLAGS += -fopenmp
ifdef GGML_MUSA
MK_CPPFLAGS += -I/usr/lib/llvm-10/include/openmp
MK_LDFLAGS += -L/usr/lib/llvm-10/lib
endif # GGML_MUSA
endif # GGML_NO_OPENMP

ifdef GGML_OPENBLAS
Expand Down Expand Up @@ -601,27 +590,15 @@ else
endif # GGML_CUDA_FA_ALL_QUANTS

ifdef GGML_CUDA
ifdef GGML_MUSA
ifneq ('', '$(wildcard /opt/musa)')
CUDA_PATH ?= /opt/musa
else
CUDA_PATH ?= /usr/local/musa
endif

MK_CPPFLAGS += -DGGML_USE_CUDA -I$(CUDA_PATH)/include
MK_LDFLAGS += -lmusa -lmublas -lmusart -lpthread -ldl -lrt -L$(CUDA_PATH)/lib -L/usr/lib64
MK_NVCCFLAGS += -x musa -mtgpu --cuda-gpu-arch=mp_21 --cuda-gpu-arch=mp_22
ifneq ('', '$(wildcard /opt/cuda)')
CUDA_PATH ?= /opt/cuda
else
ifneq ('', '$(wildcard /opt/cuda)')
CUDA_PATH ?= /opt/cuda
else
CUDA_PATH ?= /usr/local/cuda
endif
CUDA_PATH ?= /usr/local/cuda
endif

MK_CPPFLAGS += -DGGML_USE_CUDA -DGGML_CUDA_USE_GRAPHS -I$(CUDA_PATH)/include -I$(CUDA_PATH)/targets/$(UNAME_M)-linux/include
MK_LDFLAGS += -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L$(CUDA_PATH)/lib64 -L/usr/lib64 -L$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib -L$(CUDA_PATH)/lib64/stubs -L/usr/lib/wsl/lib
MK_NVCCFLAGS += -use_fast_math
endif # GGML_MUSA
MK_CPPFLAGS += -DGGML_USE_CUDA -DGGML_CUDA_USE_GRAPHS -I$(CUDA_PATH)/include -I$(CUDA_PATH)/targets/$(UNAME_M)-linux/include
MK_LDFLAGS += -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L$(CUDA_PATH)/lib64 -L/usr/lib64 -L$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib -L$(CUDA_PATH)/lib64/stubs -L/usr/lib/wsl/lib
MK_NVCCFLAGS += -use_fast_math

OBJ_GGML += ggml/src/ggml-cuda/ggml-cuda.o
OBJ_GGML += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/*.cu))
Expand All @@ -631,11 +608,9 @@ ifdef LLAMA_FATAL_WARNINGS
MK_NVCCFLAGS += -Werror all-warnings
endif # LLAMA_FATAL_WARNINGS

ifndef GGML_MUSA
ifndef JETSON_EOL_MODULE_DETECT
MK_NVCCFLAGS += --forward-unknown-to-host-compiler
endif # JETSON_EOL_MODULE_DETECT
endif # GGML_MUSA

ifdef LLAMA_DEBUG
MK_NVCCFLAGS += -lineinfo
Expand All @@ -648,11 +623,7 @@ endif # GGML_CUDA_DEBUG
ifdef GGML_CUDA_NVCC
NVCC = $(CCACHE) $(GGML_CUDA_NVCC)
else
ifdef GGML_MUSA
NVCC = $(CCACHE) mcc
else
NVCC = $(CCACHE) nvcc
endif # GGML_MUSA
NVCC = $(CCACHE) nvcc
endif # GGML_CUDA_NVCC

ifdef CUDA_DOCKER_ARCH
Expand Down Expand Up @@ -724,15 +695,9 @@ define NVCC_COMPILE
$(NVCC) -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUDA -I/usr/local/cuda/include -I/opt/cuda/include -I/usr/local/cuda/targets/aarch64-linux/include -std=c++11 -O3 $(NVCCFLAGS) $(CPPFLAGS) -Xcompiler "$(CUDA_CXXFLAGS)" -c $< -o $@
endef # NVCC_COMPILE
else
ifdef GGML_MUSA
define NVCC_COMPILE
$(NVCC) $(NVCCFLAGS) $(CPPFLAGS) -c $< -o $@
endef # NVCC_COMPILE
else
define NVCC_COMPILE
$(NVCC) $(NVCCFLAGS) $(CPPFLAGS) -Xcompiler "$(CUDA_CXXFLAGS)" -c $< -o $@
endef # NVCC_COMPILE
endif # GGML_MUSA
endif # JETSON_EOL_MODULE_DETECT

ggml/src/ggml-cuda/%.o: \
Expand Down Expand Up @@ -874,6 +839,107 @@ ggml/src/ggml-cuda/%.o: \
$(HIPCC) $(CXXFLAGS) $(HIPFLAGS) -x hip -c -o $@ $<
endif # GGML_HIPBLAS

ifdef GGML_MUSA
ifeq ($(wildcard /opt/musa),)
MUSA_PATH ?= /usr/local/musa
else
MUSA_PATH ?= /opt/musa
endif
MTGPU_TARGETS ?= mp_21 mp_22

MK_CPPFLAGS += -DGGML_USE_MUSA -DGGML_USE_CUDA
MK_LDFLAGS += -L$(MUSA_PATH)/lib -Wl,-rpath=$(MUSA_PATH)/lib
MK_LDFLAGS += -lmusa -lmusart -lmublas

ifndef GGML_NO_OPENMP
# For Ubuntu Focal
MK_CPPFLAGS += -I/usr/lib/llvm-10/include/openmp
MK_LDFLAGS += -L/usr/lib/llvm-10/lib
# For Ubuntu Jammy
MK_CPPFLAGS += -I/usr/lib/llvm-14/lib/clang/14.0.0/include
MK_LDFLAGS += -L/usr/lib/llvm-14/lib
endif # GGML_NO_OPENMP

CC := $(MUSA_PATH)/bin/clang
CXX := $(MUSA_PATH)/bin/clang++
MCC := $(CCACHE) $(MUSA_PATH)/bin/mcc

MUSAFLAGS += $(addprefix --cuda-gpu-arch=, $(MTGPU_TARGETS))

ifdef GGML_CUDA_FORCE_DMMV
MUSAFLAGS += -DGGML_CUDA_FORCE_DMMV
endif # GGML_CUDA_FORCE_DMMV

ifdef GGML_CUDA_FORCE_MMQ
MUSAFLAGS += -DGGML_CUDA_FORCE_MMQ
endif # GGML_CUDA_FORCE_MMQ

ifdef GGML_CUDA_FORCE_CUBLAS
MUSAFLAGS += -DGGML_CUDA_FORCE_CUBLAS
endif # GGML_CUDA_FORCE_CUBLAS

ifdef GGML_CUDA_DMMV_X
MUSAFLAGS += -DGGML_CUDA_DMMV_X=$(GGML_CUDA_DMMV_X)
else
MUSAFLAGS += -DGGML_CUDA_DMMV_X=32
endif # GGML_CUDA_DMMV_X

ifdef GGML_CUDA_MMV_Y
MUSAFLAGS += -DGGML_CUDA_MMV_Y=$(GGML_CUDA_MMV_Y)
else
MUSAFLAGS += -DGGML_CUDA_MMV_Y=1
endif # GGML_CUDA_MMV_Y

ifdef GGML_CUDA_F16
MUSAFLAGS += -DGGML_CUDA_F16
endif # GGML_CUDA_F16

ifdef GGML_CUDA_DMMV_F16
MUSAFLAGS += -DGGML_CUDA_F16
endif # GGML_CUDA_DMMV_F16

ifdef GGML_CUDA_KQUANTS_ITER
MUSAFLAGS += -DK_QUANTS_PER_ITERATION=$(GGML_CUDA_KQUANTS_ITER)
else
MUSAFLAGS += -DK_QUANTS_PER_ITERATION=2
endif

ifdef GGML_CUDA_PEER_MAX_BATCH_SIZE
MUSAFLAGS += -DGGML_CUDA_PEER_MAX_BATCH_SIZE=$(GGML_CUDA_PEER_MAX_BATCH_SIZE)
else
MUSAFLAGS += -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128
endif # GGML_CUDA_PEER_MAX_BATCH_SIZE

ifdef GGML_CUDA_NO_PEER_COPY
MUSAFLAGS += -DGGML_CUDA_NO_PEER_COPY
endif # GGML_CUDA_NO_PEER_COPY

ifdef GGML_CUDA_FA_ALL_QUANTS
MUSAFLAGS += -DGGML_CUDA_FA_ALL_QUANTS
endif # GGML_CUDA_FA_ALL_QUANTS

OBJ_GGML += ggml/src/ggml-cuda/ggml-cuda.o
OBJ_GGML += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/*.cu))
OBJ_GGML += $(OBJ_CUDA_TMPL)

ggml/src/ggml-cuda/ggml-cuda.o: \
ggml/src/ggml-cuda/ggml-cuda.cu \
ggml/include/ggml-cuda.h \
ggml/include/ggml.h \
ggml/include/ggml-backend.h \
ggml/src/ggml-backend-impl.h \
ggml/src/ggml-common.h \
$(wildcard ggml/src/ggml-cuda/*.cuh)
$(MCC) $(CXXFLAGS) $(MUSAFLAGS) -x musa -mtgpu -c -o $@ $<

ggml/src/ggml-cuda/%.o: \
ggml/src/ggml-cuda/%.cu \
ggml/include/ggml.h \
ggml/src/ggml-common.h \
ggml/src/ggml-cuda/common.cuh
$(MCC) $(CXXFLAGS) $(MUSAFLAGS) -x musa -mtgpu -c -o $@ $<
endif # GGML_MUSA

ifdef GGML_METAL
MK_CPPFLAGS += -DGGML_USE_METAL
MK_LDFLAGS += -framework Foundation -framework Metal -framework MetalKit
Expand Down Expand Up @@ -1002,7 +1068,6 @@ $(info I CXX: $(shell $(CXX) --version | head -n 1))
ifdef GGML_CUDA
$(info I NVCC: $(shell $(NVCC) --version | tail -n 1))
CUDA_VERSION := $(shell $(NVCC) --version | grep -oP 'release (\K[0-9]+\.[0-9])')
ifndef GGML_MUSA
ifeq ($(shell awk -v "v=$(CUDA_VERSION)" 'BEGIN { print (v < 11.7) }'),1)

ifndef CUDA_DOCKER_ARCH
Expand All @@ -1012,7 +1077,6 @@ endif # CUDA_POWER_ARCH
endif # CUDA_DOCKER_ARCH

endif # eq ($(shell echo "$(CUDA_VERSION) < 11.7" | bc),1)
endif # GGML_MUSA
endif # GGML_CUDA
$(info )

Expand Down Expand Up @@ -1283,6 +1347,7 @@ clean:
rm -vrf ggml/src/ggml-rpc/*.o
rm -vrf ggml/src/ggml-sycl/*.o
rm -vrf ggml/src/ggml-vulkan/*.o
rm -vrf ggml/src/ggml-musa/*.o
rm -rvf $(BUILD_TARGETS)
rm -rvf $(TEST_TARGETS)
rm -f vulkan-shaders-gen ggml/src/ggml-vulkan-shaders.hpp ggml/src/ggml-vulkan-shaders.cpp
Expand Down
6 changes: 5 additions & 1 deletion ggml/src/ggml-musa/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
if (NOT EXISTS $ENV{MUSA_PATH})
set(MUSA_PATH /usr/local/musa)
if (NOT EXISTS /opt/musa)
set(MUSA_PATH /usr/local/musa)
else()
set(MUSA_PATH /opt/musa)
endif()
else()
set(MUSA_PATH $ENV{MUSA_PATH})
endif()
Expand Down
Loading