Skip to content

Commit f2f5c3b

Browse files
authored
ggml: separate musa into its own section in the Makefile (#10294)
Signed-off-by: Xiaodong Ye <[email protected]>
1 parent 4e49714 commit f2f5c3b

File tree

2 files changed

+115
-46
lines changed

2 files changed

+115
-46
lines changed

Makefile

Lines changed: 110 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -531,21 +531,10 @@ ifndef GGML_NO_ACCELERATE
531531
endif
532532
endif # GGML_NO_ACCELERATE
533533

534-
ifdef GGML_MUSA
535-
CC := clang
536-
CXX := clang++
537-
GGML_CUDA := 1
538-
MK_CPPFLAGS += -DGGML_USE_MUSA
539-
endif
540-
541534
ifndef GGML_NO_OPENMP
542535
MK_CPPFLAGS += -DGGML_USE_OPENMP
543536
MK_CFLAGS += -fopenmp
544537
MK_CXXFLAGS += -fopenmp
545-
ifdef GGML_MUSA
546-
MK_CPPFLAGS += -I/usr/lib/llvm-10/include/openmp
547-
MK_LDFLAGS += -L/usr/lib/llvm-10/lib
548-
endif # GGML_MUSA
549538
endif # GGML_NO_OPENMP
550539

551540
ifdef GGML_OPENBLAS
@@ -601,27 +590,15 @@ else
601590
endif # GGML_CUDA_FA_ALL_QUANTS
602591

603592
ifdef GGML_CUDA
604-
ifdef GGML_MUSA
605-
ifneq ('', '$(wildcard /opt/musa)')
606-
CUDA_PATH ?= /opt/musa
607-
else
608-
CUDA_PATH ?= /usr/local/musa
609-
endif
610-
611-
MK_CPPFLAGS += -DGGML_USE_CUDA -I$(CUDA_PATH)/include
612-
MK_LDFLAGS += -lmusa -lmublas -lmusart -lpthread -ldl -lrt -L$(CUDA_PATH)/lib -L/usr/lib64
613-
MK_NVCCFLAGS += -x musa -mtgpu --cuda-gpu-arch=mp_21 --cuda-gpu-arch=mp_22
593+
ifneq ('', '$(wildcard /opt/cuda)')
594+
CUDA_PATH ?= /opt/cuda
614595
else
615-
ifneq ('', '$(wildcard /opt/cuda)')
616-
CUDA_PATH ?= /opt/cuda
617-
else
618-
CUDA_PATH ?= /usr/local/cuda
619-
endif
596+
CUDA_PATH ?= /usr/local/cuda
597+
endif
620598

621-
MK_CPPFLAGS += -DGGML_USE_CUDA -DGGML_CUDA_USE_GRAPHS -I$(CUDA_PATH)/include -I$(CUDA_PATH)/targets/$(UNAME_M)-linux/include
622-
MK_LDFLAGS += -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L$(CUDA_PATH)/lib64 -L/usr/lib64 -L$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib -L$(CUDA_PATH)/lib64/stubs -L/usr/lib/wsl/lib
623-
MK_NVCCFLAGS += -use_fast_math
624-
endif # GGML_MUSA
599+
MK_CPPFLAGS += -DGGML_USE_CUDA -DGGML_CUDA_USE_GRAPHS -I$(CUDA_PATH)/include -I$(CUDA_PATH)/targets/$(UNAME_M)-linux/include
600+
MK_LDFLAGS += -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L$(CUDA_PATH)/lib64 -L/usr/lib64 -L$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib -L$(CUDA_PATH)/lib64/stubs -L/usr/lib/wsl/lib
601+
MK_NVCCFLAGS += -use_fast_math
625602

626603
OBJ_GGML += ggml/src/ggml-cuda/ggml-cuda.o
627604
OBJ_GGML += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/*.cu))
@@ -631,11 +608,9 @@ ifdef LLAMA_FATAL_WARNINGS
631608
MK_NVCCFLAGS += -Werror all-warnings
632609
endif # LLAMA_FATAL_WARNINGS
633610

634-
ifndef GGML_MUSA
635611
ifndef JETSON_EOL_MODULE_DETECT
636612
MK_NVCCFLAGS += --forward-unknown-to-host-compiler
637613
endif # JETSON_EOL_MODULE_DETECT
638-
endif # GGML_MUSA
639614

640615
ifdef LLAMA_DEBUG
641616
MK_NVCCFLAGS += -lineinfo
@@ -648,11 +623,7 @@ endif # GGML_CUDA_DEBUG
648623
ifdef GGML_CUDA_NVCC
649624
NVCC = $(CCACHE) $(GGML_CUDA_NVCC)
650625
else
651-
ifdef GGML_MUSA
652-
NVCC = $(CCACHE) mcc
653-
else
654-
NVCC = $(CCACHE) nvcc
655-
endif # GGML_MUSA
626+
NVCC = $(CCACHE) nvcc
656627
endif # GGML_CUDA_NVCC
657628

658629
ifdef CUDA_DOCKER_ARCH
@@ -724,15 +695,9 @@ define NVCC_COMPILE
724695
$(NVCC) -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUDA -I/usr/local/cuda/include -I/opt/cuda/include -I/usr/local/cuda/targets/aarch64-linux/include -std=c++11 -O3 $(NVCCFLAGS) $(CPPFLAGS) -Xcompiler "$(CUDA_CXXFLAGS)" -c $< -o $@
725696
endef # NVCC_COMPILE
726697
else
727-
ifdef GGML_MUSA
728-
define NVCC_COMPILE
729-
$(NVCC) $(NVCCFLAGS) $(CPPFLAGS) -c $< -o $@
730-
endef # NVCC_COMPILE
731-
else
732698
define NVCC_COMPILE
733699
$(NVCC) $(NVCCFLAGS) $(CPPFLAGS) -Xcompiler "$(CUDA_CXXFLAGS)" -c $< -o $@
734700
endef # NVCC_COMPILE
735-
endif # GGML_MUSA
736701
endif # JETSON_EOL_MODULE_DETECT
737702

738703
ggml/src/ggml-cuda/%.o: \
@@ -874,6 +839,107 @@ ggml/src/ggml-cuda/%.o: \
874839
$(HIPCC) $(CXXFLAGS) $(HIPFLAGS) -x hip -c -o $@ $<
875840
endif # GGML_HIPBLAS
876841

842+
ifdef GGML_MUSA
843+
ifeq ($(wildcard /opt/musa),)
844+
MUSA_PATH ?= /usr/local/musa
845+
else
846+
MUSA_PATH ?= /opt/musa
847+
endif
848+
MTGPU_TARGETS ?= mp_21 mp_22
849+
850+
MK_CPPFLAGS += -DGGML_USE_MUSA -DGGML_USE_CUDA
851+
MK_LDFLAGS += -L$(MUSA_PATH)/lib -Wl,-rpath=$(MUSA_PATH)/lib
852+
MK_LDFLAGS += -lmusa -lmusart -lmublas
853+
854+
ifndef GGML_NO_OPENMP
855+
# For Ubuntu Focal
856+
MK_CPPFLAGS += -I/usr/lib/llvm-10/include/openmp
857+
MK_LDFLAGS += -L/usr/lib/llvm-10/lib
858+
# For Ubuntu Jammy
859+
MK_CPPFLAGS += -I/usr/lib/llvm-14/lib/clang/14.0.0/include
860+
MK_LDFLAGS += -L/usr/lib/llvm-14/lib
861+
endif # GGML_NO_OPENMP
862+
863+
CC := $(MUSA_PATH)/bin/clang
864+
CXX := $(MUSA_PATH)/bin/clang++
865+
MCC := $(CCACHE) $(MUSA_PATH)/bin/mcc
866+
867+
MUSAFLAGS += $(addprefix --cuda-gpu-arch=, $(MTGPU_TARGETS))
868+
869+
ifdef GGML_CUDA_FORCE_DMMV
870+
MUSAFLAGS += -DGGML_CUDA_FORCE_DMMV
871+
endif # GGML_CUDA_FORCE_DMMV
872+
873+
ifdef GGML_CUDA_FORCE_MMQ
874+
MUSAFLAGS += -DGGML_CUDA_FORCE_MMQ
875+
endif # GGML_CUDA_FORCE_MMQ
876+
877+
ifdef GGML_CUDA_FORCE_CUBLAS
878+
MUSAFLAGS += -DGGML_CUDA_FORCE_CUBLAS
879+
endif # GGML_CUDA_FORCE_CUBLAS
880+
881+
ifdef GGML_CUDA_DMMV_X
882+
MUSAFLAGS += -DGGML_CUDA_DMMV_X=$(GGML_CUDA_DMMV_X)
883+
else
884+
MUSAFLAGS += -DGGML_CUDA_DMMV_X=32
885+
endif # GGML_CUDA_DMMV_X
886+
887+
ifdef GGML_CUDA_MMV_Y
888+
MUSAFLAGS += -DGGML_CUDA_MMV_Y=$(GGML_CUDA_MMV_Y)
889+
else
890+
MUSAFLAGS += -DGGML_CUDA_MMV_Y=1
891+
endif # GGML_CUDA_MMV_Y
892+
893+
ifdef GGML_CUDA_F16
894+
MUSAFLAGS += -DGGML_CUDA_F16
895+
endif # GGML_CUDA_F16
896+
897+
ifdef GGML_CUDA_DMMV_F16
898+
MUSAFLAGS += -DGGML_CUDA_F16
899+
endif # GGML_CUDA_DMMV_F16
900+
901+
ifdef GGML_CUDA_KQUANTS_ITER
902+
MUSAFLAGS += -DK_QUANTS_PER_ITERATION=$(GGML_CUDA_KQUANTS_ITER)
903+
else
904+
MUSAFLAGS += -DK_QUANTS_PER_ITERATION=2
905+
endif
906+
907+
ifdef GGML_CUDA_PEER_MAX_BATCH_SIZE
908+
MUSAFLAGS += -DGGML_CUDA_PEER_MAX_BATCH_SIZE=$(GGML_CUDA_PEER_MAX_BATCH_SIZE)
909+
else
910+
MUSAFLAGS += -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128
911+
endif # GGML_CUDA_PEER_MAX_BATCH_SIZE
912+
913+
ifdef GGML_CUDA_NO_PEER_COPY
914+
MUSAFLAGS += -DGGML_CUDA_NO_PEER_COPY
915+
endif # GGML_CUDA_NO_PEER_COPY
916+
917+
ifdef GGML_CUDA_FA_ALL_QUANTS
918+
MUSAFLAGS += -DGGML_CUDA_FA_ALL_QUANTS
919+
endif # GGML_CUDA_FA_ALL_QUANTS
920+
921+
OBJ_GGML += ggml/src/ggml-cuda/ggml-cuda.o
922+
OBJ_GGML += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/*.cu))
923+
OBJ_GGML += $(OBJ_CUDA_TMPL)
924+
925+
ggml/src/ggml-cuda/ggml-cuda.o: \
926+
ggml/src/ggml-cuda/ggml-cuda.cu \
927+
ggml/include/ggml-cuda.h \
928+
ggml/include/ggml.h \
929+
ggml/include/ggml-backend.h \
930+
ggml/src/ggml-backend-impl.h \
931+
ggml/src/ggml-common.h \
932+
$(wildcard ggml/src/ggml-cuda/*.cuh)
933+
$(MCC) $(CXXFLAGS) $(MUSAFLAGS) -x musa -mtgpu -c -o $@ $<
934+
935+
ggml/src/ggml-cuda/%.o: \
936+
ggml/src/ggml-cuda/%.cu \
937+
ggml/include/ggml.h \
938+
ggml/src/ggml-common.h \
939+
ggml/src/ggml-cuda/common.cuh
940+
$(MCC) $(CXXFLAGS) $(MUSAFLAGS) -x musa -mtgpu -c -o $@ $<
941+
endif # GGML_MUSA
942+
877943
ifdef GGML_METAL
878944
MK_CPPFLAGS += -DGGML_USE_METAL
879945
MK_LDFLAGS += -framework Foundation -framework Metal -framework MetalKit
@@ -1002,7 +1068,6 @@ $(info I CXX: $(shell $(CXX) --version | head -n 1))
10021068
ifdef GGML_CUDA
10031069
$(info I NVCC: $(shell $(NVCC) --version | tail -n 1))
10041070
CUDA_VERSION := $(shell $(NVCC) --version | grep -oP 'release (\K[0-9]+\.[0-9])')
1005-
ifndef GGML_MUSA
10061071
ifeq ($(shell awk -v "v=$(CUDA_VERSION)" 'BEGIN { print (v < 11.7) }'),1)
10071072

10081073
ifndef CUDA_DOCKER_ARCH
@@ -1012,7 +1077,6 @@ endif # CUDA_POWER_ARCH
10121077
endif # CUDA_DOCKER_ARCH
10131078

10141079
endif # eq ($(shell echo "$(CUDA_VERSION) < 11.7" | bc),1)
1015-
endif # GGML_MUSA
10161080
endif # GGML_CUDA
10171081
$(info )
10181082

@@ -1283,6 +1347,7 @@ clean:
12831347
rm -vrf ggml/src/ggml-rpc/*.o
12841348
rm -vrf ggml/src/ggml-sycl/*.o
12851349
rm -vrf ggml/src/ggml-vulkan/*.o
1350+
rm -vrf ggml/src/ggml-musa/*.o
12861351
rm -rvf $(BUILD_TARGETS)
12871352
rm -rvf $(TEST_TARGETS)
12881353
rm -f vulkan-shaders-gen ggml/src/ggml-vulkan-shaders.hpp ggml/src/ggml-vulkan-shaders.cpp

ggml/src/ggml-musa/CMakeLists.txt

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
if (NOT EXISTS $ENV{MUSA_PATH})
2-
set(MUSA_PATH /usr/local/musa)
2+
if (NOT EXISTS /opt/musa)
3+
set(MUSA_PATH /usr/local/musa)
4+
else()
5+
set(MUSA_PATH /opt/musa)
6+
endif()
37
else()
48
set(MUSA_PATH $ENV{MUSA_PATH})
59
endif()

0 commit comments

Comments
 (0)