From a8820c1a234e44ad45a3e51f4b737147bbeb81bd Mon Sep 17 00:00:00 2001 From: Divakar Verma Date: Tue, 18 Feb 2025 18:00:52 -0600 Subject: [PATCH] resolve diff for mixtral8x7B Signed-off-by: Divakar Verma --- .../E=8,N=14336,device_name=AMD_Instinct_MI300X.json | 8 ++++---- .../E=8,N=1792,device_name=AMD_Instinct_MI300X.json | 2 +- .../E=8,N=3584,device_name=AMD_Instinct_MI300X.json | 4 ++-- .../E=8,N=7168,device_name=AMD_Instinct_MI300X.json | 6 +++--- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/vllm/model_executor/layers/fused_moe/configs/E=8,N=14336,device_name=AMD_Instinct_MI300X.json b/vllm/model_executor/layers/fused_moe/configs/E=8,N=14336,device_name=AMD_Instinct_MI300X.json index 022d5ece7f87..66f9106bd1be 100644 --- a/vllm/model_executor/layers/fused_moe/configs/E=8,N=14336,device_name=AMD_Instinct_MI300X.json +++ b/vllm/model_executor/layers/fused_moe/configs/E=8,N=14336,device_name=AMD_Instinct_MI300X.json @@ -45,8 +45,8 @@ }, "16": { "BLOCK_SIZE_M": 16, - "BLOCK_SIZE_N": 64, - "BLOCK_SIZE_K": 64, + "BLOCK_SIZE_N": 16, + "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 1, "num_warps": 2, "num_stages": 2, @@ -96,7 +96,7 @@ "num_stages": 2, "waves_per_eu": 0, "matrix_instr_nonkdim": 16, - "kpack": 1 + "kpack": 2 }, "96": { "BLOCK_SIZE_M": 32, @@ -123,7 +123,7 @@ "256": { "BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, - "BLOCK_SIZE_K": 128, + "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 4, "num_warps": 8, "num_stages": 2, diff --git a/vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=AMD_Instinct_MI300X.json b/vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=AMD_Instinct_MI300X.json index de2320e4b28c..1b46cb571651 100644 --- a/vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=AMD_Instinct_MI300X.json +++ b/vllm/model_executor/layers/fused_moe/configs/E=8,N=1792,device_name=AMD_Instinct_MI300X.json @@ -8,7 +8,7 @@ "num_stages": 2, "waves_per_eu": 0, "matrix_instr_nonkdim": 16, - "kpack": 1 + "kpack": 2 }, "2": { "BLOCK_SIZE_M": 16, diff --git a/vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=AMD_Instinct_MI300X.json b/vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=AMD_Instinct_MI300X.json index 5a3f415d5414..ed5b655d8993 100644 --- a/vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=AMD_Instinct_MI300X.json +++ b/vllm/model_executor/layers/fused_moe/configs/E=8,N=3584,device_name=AMD_Instinct_MI300X.json @@ -45,8 +45,8 @@ }, "16": { "BLOCK_SIZE_M": 16, - "BLOCK_SIZE_N": 64, - "BLOCK_SIZE_K": 128, + "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 1, "num_warps": 2, "num_stages": 2, diff --git a/vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=AMD_Instinct_MI300X.json b/vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=AMD_Instinct_MI300X.json index 8dec5e3afaba..822f04e33e87 100644 --- a/vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=AMD_Instinct_MI300X.json +++ b/vllm/model_executor/layers/fused_moe/configs/E=8,N=7168,device_name=AMD_Instinct_MI300X.json @@ -8,7 +8,7 @@ "num_stages": 2, "waves_per_eu": 0, "matrix_instr_nonkdim": 16, - "kpack": 1 + "kpack": 2 }, "2": { "BLOCK_SIZE_M": 16, @@ -63,7 +63,7 @@ "num_stages": 2, "waves_per_eu": 0, "matrix_instr_nonkdim": 16, - "kpack": 2 + "kpack": 1 }, "32": { "BLOCK_SIZE_M": 16, @@ -128,7 +128,7 @@ "num_warps": 8, "num_stages": 2, "waves_per_eu": 0, - "matrix_instr_nonkdim": 16, + "matrix_instr_nonkdim": 32, "kpack": 2 }, "512": {