@@ -531,21 +531,10 @@ ifndef GGML_NO_ACCELERATE
531
531
endif
532
532
endif # GGML_NO_ACCELERATE
533
533
534
- ifdef GGML_MUSA
535
- CC := clang
536
- CXX := clang++
537
- GGML_CUDA := 1
538
- MK_CPPFLAGS += -DGGML_USE_MUSA
539
- endif
540
-
541
534
ifndef GGML_NO_OPENMP
542
535
MK_CPPFLAGS += -DGGML_USE_OPENMP
543
536
MK_CFLAGS += -fopenmp
544
537
MK_CXXFLAGS += -fopenmp
545
- ifdef GGML_MUSA
546
- MK_CPPFLAGS += -I/usr/lib/llvm-10/include/openmp
547
- MK_LDFLAGS += -L/usr/lib/llvm-10/lib
548
- endif # GGML_MUSA
549
538
endif # GGML_NO_OPENMP
550
539
551
540
ifdef GGML_OPENBLAS
@@ -601,27 +590,15 @@ else
601
590
endif # GGML_CUDA_FA_ALL_QUANTS
602
591
603
592
ifdef GGML_CUDA
604
- ifdef GGML_MUSA
605
- ifneq ('', '$(wildcard /opt/musa)')
606
- CUDA_PATH ?= /opt/musa
607
- else
608
- CUDA_PATH ?= /usr/local/musa
609
- endif
610
-
611
- MK_CPPFLAGS += -DGGML_USE_CUDA -I$(CUDA_PATH)/include
612
- MK_LDFLAGS += -lmusa -lmublas -lmusart -lpthread -ldl -lrt -L$(CUDA_PATH)/lib -L/usr/lib64
613
- MK_NVCCFLAGS += -x musa -mtgpu --cuda-gpu-arch=mp_21 --cuda-gpu-arch=mp_22
593
+ ifneq ('', '$(wildcard /opt/cuda)')
594
+ CUDA_PATH ?= /opt/cuda
614
595
else
615
- ifneq ('', '$(wildcard /opt/cuda)')
616
- CUDA_PATH ?= /opt/cuda
617
- else
618
- CUDA_PATH ?= /usr/local/cuda
619
- endif
596
+ CUDA_PATH ?= /usr/local/cuda
597
+ endif
620
598
621
- MK_CPPFLAGS += -DGGML_USE_CUDA -DGGML_CUDA_USE_GRAPHS -I$(CUDA_PATH)/include -I$(CUDA_PATH)/targets/$(UNAME_M)-linux/include
622
- MK_LDFLAGS += -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L$(CUDA_PATH)/lib64 -L/usr/lib64 -L$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib -L$(CUDA_PATH)/lib64/stubs -L/usr/lib/wsl/lib
623
- MK_NVCCFLAGS += -use_fast_math
624
- endif # GGML_MUSA
599
+ MK_CPPFLAGS += -DGGML_USE_CUDA -DGGML_CUDA_USE_GRAPHS -I$(CUDA_PATH)/include -I$(CUDA_PATH)/targets/$(UNAME_M)-linux/include
600
+ MK_LDFLAGS += -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L$(CUDA_PATH)/lib64 -L/usr/lib64 -L$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib -L$(CUDA_PATH)/lib64/stubs -L/usr/lib/wsl/lib
601
+ MK_NVCCFLAGS += -use_fast_math
625
602
626
603
OBJ_GGML += ggml/src/ggml-cuda/ggml-cuda.o
627
604
OBJ_GGML += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/*.cu))
@@ -631,11 +608,9 @@ ifdef LLAMA_FATAL_WARNINGS
631
608
MK_NVCCFLAGS += -Werror all-warnings
632
609
endif # LLAMA_FATAL_WARNINGS
633
610
634
- ifndef GGML_MUSA
635
611
ifndef JETSON_EOL_MODULE_DETECT
636
612
MK_NVCCFLAGS += --forward-unknown-to-host-compiler
637
613
endif # JETSON_EOL_MODULE_DETECT
638
- endif # GGML_MUSA
639
614
640
615
ifdef LLAMA_DEBUG
641
616
MK_NVCCFLAGS += -lineinfo
@@ -648,11 +623,7 @@ endif # GGML_CUDA_DEBUG
648
623
ifdef GGML_CUDA_NVCC
649
624
NVCC = $(CCACHE) $(GGML_CUDA_NVCC)
650
625
else
651
- ifdef GGML_MUSA
652
- NVCC = $(CCACHE) mcc
653
- else
654
- NVCC = $(CCACHE) nvcc
655
- endif # GGML_MUSA
626
+ NVCC = $(CCACHE) nvcc
656
627
endif # GGML_CUDA_NVCC
657
628
658
629
ifdef CUDA_DOCKER_ARCH
@@ -724,15 +695,9 @@ define NVCC_COMPILE
724
695
$(NVCC ) -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUDA -I/usr/local/cuda/include -I/opt/cuda/include -I/usr/local/cuda/targets/aarch64-linux/include -std=c++11 -O3 $(NVCCFLAGS ) $(CPPFLAGS ) -Xcompiler "$(CUDA_CXXFLAGS ) " -c $< -o $@
725
696
endef # NVCC_COMPILE
726
697
else
727
- ifdef GGML_MUSA
728
- define NVCC_COMPILE
729
- $(NVCC ) $(NVCCFLAGS ) $(CPPFLAGS ) -c $< -o $@
730
- endef # NVCC_COMPILE
731
- else
732
698
define NVCC_COMPILE
733
699
$(NVCC ) $(NVCCFLAGS ) $(CPPFLAGS ) -Xcompiler "$(CUDA_CXXFLAGS ) " -c $< -o $@
734
700
endef # NVCC_COMPILE
735
- endif # GGML_MUSA
736
701
endif # JETSON_EOL_MODULE_DETECT
737
702
738
703
ggml/src/ggml-cuda/% .o : \
@@ -874,6 +839,107 @@ ggml/src/ggml-cuda/%.o: \
874
839
$(HIPCC ) $(CXXFLAGS ) $(HIPFLAGS ) -x hip -c -o $@ $<
875
840
endif # GGML_HIPBLAS
876
841
842
+ ifdef GGML_MUSA
843
+ ifeq ($(wildcard /opt/musa),)
844
+ MUSA_PATH ?= /usr/local/musa
845
+ else
846
+ MUSA_PATH ?= /opt/musa
847
+ endif
848
+ MTGPU_TARGETS ?= mp_21 mp_22
849
+
850
+ MK_CPPFLAGS += -DGGML_USE_MUSA -DGGML_USE_CUDA
851
+ MK_LDFLAGS += -L$(MUSA_PATH)/lib -Wl,-rpath=$(MUSA_PATH)/lib
852
+ MK_LDFLAGS += -lmusa -lmusart -lmublas
853
+
854
+ ifndef GGML_NO_OPENMP
855
+ # For Ubuntu Focal
856
+ MK_CPPFLAGS += -I/usr/lib/llvm-10/include/openmp
857
+ MK_LDFLAGS += -L/usr/lib/llvm-10/lib
858
+ # For Ubuntu Jammy
859
+ MK_CPPFLAGS += -I/usr/lib/llvm-14/lib/clang/14.0.0/include
860
+ MK_LDFLAGS += -L/usr/lib/llvm-14/lib
861
+ endif # GGML_NO_OPENMP
862
+
863
+ CC := $(MUSA_PATH)/bin/clang
864
+ CXX := $(MUSA_PATH)/bin/clang++
865
+ MCC := $(CCACHE) $(MUSA_PATH)/bin/mcc
866
+
867
+ MUSAFLAGS += $(addprefix --cuda-gpu-arch=, $(MTGPU_TARGETS))
868
+
869
+ ifdef GGML_CUDA_FORCE_DMMV
870
+ MUSAFLAGS += -DGGML_CUDA_FORCE_DMMV
871
+ endif # GGML_CUDA_FORCE_DMMV
872
+
873
+ ifdef GGML_CUDA_FORCE_MMQ
874
+ MUSAFLAGS += -DGGML_CUDA_FORCE_MMQ
875
+ endif # GGML_CUDA_FORCE_MMQ
876
+
877
+ ifdef GGML_CUDA_FORCE_CUBLAS
878
+ MUSAFLAGS += -DGGML_CUDA_FORCE_CUBLAS
879
+ endif # GGML_CUDA_FORCE_CUBLAS
880
+
881
+ ifdef GGML_CUDA_DMMV_X
882
+ MUSAFLAGS += -DGGML_CUDA_DMMV_X=$(GGML_CUDA_DMMV_X)
883
+ else
884
+ MUSAFLAGS += -DGGML_CUDA_DMMV_X=32
885
+ endif # GGML_CUDA_DMMV_X
886
+
887
+ ifdef GGML_CUDA_MMV_Y
888
+ MUSAFLAGS += -DGGML_CUDA_MMV_Y=$(GGML_CUDA_MMV_Y)
889
+ else
890
+ MUSAFLAGS += -DGGML_CUDA_MMV_Y=1
891
+ endif # GGML_CUDA_MMV_Y
892
+
893
+ ifdef GGML_CUDA_F16
894
+ MUSAFLAGS += -DGGML_CUDA_F16
895
+ endif # GGML_CUDA_F16
896
+
897
+ ifdef GGML_CUDA_DMMV_F16
898
+ MUSAFLAGS += -DGGML_CUDA_F16
899
+ endif # GGML_CUDA_DMMV_F16
900
+
901
+ ifdef GGML_CUDA_KQUANTS_ITER
902
+ MUSAFLAGS += -DK_QUANTS_PER_ITERATION=$(GGML_CUDA_KQUANTS_ITER)
903
+ else
904
+ MUSAFLAGS += -DK_QUANTS_PER_ITERATION=2
905
+ endif
906
+
907
+ ifdef GGML_CUDA_PEER_MAX_BATCH_SIZE
908
+ MUSAFLAGS += -DGGML_CUDA_PEER_MAX_BATCH_SIZE=$(GGML_CUDA_PEER_MAX_BATCH_SIZE)
909
+ else
910
+ MUSAFLAGS += -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128
911
+ endif # GGML_CUDA_PEER_MAX_BATCH_SIZE
912
+
913
+ ifdef GGML_CUDA_NO_PEER_COPY
914
+ MUSAFLAGS += -DGGML_CUDA_NO_PEER_COPY
915
+ endif # GGML_CUDA_NO_PEER_COPY
916
+
917
+ ifdef GGML_CUDA_FA_ALL_QUANTS
918
+ MUSAFLAGS += -DGGML_CUDA_FA_ALL_QUANTS
919
+ endif # GGML_CUDA_FA_ALL_QUANTS
920
+
921
+ OBJ_GGML += ggml/src/ggml-cuda/ggml-cuda.o
922
+ OBJ_GGML += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/*.cu))
923
+ OBJ_GGML += $(OBJ_CUDA_TMPL)
924
+
925
+ ggml/src/ggml-cuda/ggml-cuda.o : \
926
+ ggml/src/ggml-cuda/ggml-cuda.cu \
927
+ ggml/include/ggml-cuda.h \
928
+ ggml/include/ggml.h \
929
+ ggml/include/ggml-backend.h \
930
+ ggml/src/ggml-backend-impl.h \
931
+ ggml/src/ggml-common.h \
932
+ $(wildcard ggml/src/ggml-cuda/* .cuh)
933
+ $(MCC ) $(CXXFLAGS ) $(MUSAFLAGS ) -x musa -mtgpu -c -o $@ $<
934
+
935
+ ggml/src/ggml-cuda/% .o : \
936
+ ggml/src/ggml-cuda/%.cu \
937
+ ggml/include/ggml.h \
938
+ ggml/src/ggml-common.h \
939
+ ggml/src/ggml-cuda/common.cuh
940
+ $(MCC ) $(CXXFLAGS ) $(MUSAFLAGS ) -x musa -mtgpu -c -o $@ $<
941
+ endif # GGML_MUSA
942
+
877
943
ifdef GGML_METAL
878
944
MK_CPPFLAGS += -DGGML_USE_METAL
879
945
MK_LDFLAGS += -framework Foundation -framework Metal -framework MetalKit
@@ -1002,7 +1068,6 @@ $(info I CXX: $(shell $(CXX) --version | head -n 1))
1002
1068
ifdef GGML_CUDA
1003
1069
$(info I NVCC : $(shell $(NVCC ) --version | tail -n 1) )
1004
1070
CUDA_VERSION := $(shell $(NVCC ) --version | grep -oP 'release (\K[0-9]+\.[0-9]) ')
1005
- ifndef GGML_MUSA
1006
1071
ifeq ($(shell awk -v "v=$(CUDA_VERSION ) " 'BEGIN { print (v < 11.7) }'),1)
1007
1072
1008
1073
ifndef CUDA_DOCKER_ARCH
@@ -1012,7 +1077,6 @@ endif # CUDA_POWER_ARCH
1012
1077
endif # CUDA_DOCKER_ARCH
1013
1078
1014
1079
endif # eq ($(shell echo "$(CUDA_VERSION) < 11.7" | bc),1)
1015
- endif # GGML_MUSA
1016
1080
endif # GGML_CUDA
1017
1081
$(info )
1018
1082
@@ -1283,6 +1347,7 @@ clean:
1283
1347
rm -vrf ggml/src/ggml-rpc/* .o
1284
1348
rm -vrf ggml/src/ggml-sycl/* .o
1285
1349
rm -vrf ggml/src/ggml-vulkan/* .o
1350
+ rm -vrf ggml/src/ggml-musa/* .o
1286
1351
rm -rvf $(BUILD_TARGETS )
1287
1352
rm -rvf $(TEST_TARGETS )
1288
1353
rm -f vulkan-shaders-gen ggml/src/ggml-vulkan-shaders.hpp ggml/src/ggml-vulkan-shaders.cpp
0 commit comments