Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,7 @@ def test_slice_and_copy_similar_to_vllm(self, granularity):
self._test_slice_and_copy_similar_to_vllm(config)

@unittest.skipIf(not is_sm_at_least_90(), "Nedd sm90+")
@unittest.skipIf(not _is_fbgemm_gpu_genai_available(), "Need fbgemm_gpu_genai")
def test_bmm(self):
# only support per row quantization
config = Float8DynamicActivationFloat8WeightConfig(granularity=PerRow())
Expand Down Expand Up @@ -406,6 +407,7 @@ def test_cat(self, granularity, sizes):
self.assertEqual(cat_qweight2.scale, ref_scale)

@unittest.skipIf(not is_sm_at_least_90(), "Nedd sm90+")
@unittest.skipIf(not _is_fbgemm_gpu_genai_available(), "Need fbgemm_gpu_genai")
def test_moe_weight_reshape_ops(self):
# only per row quantization is supported for bmm
granularity = PerRow()
Expand All @@ -416,6 +418,7 @@ def test_moe_weight_reshape_ops(self):
# that should be moved here after v1 config is deprecated:
# https://github.com/pytorch/ao/issues/2649
@unittest.skipIf(not is_sm_at_least_90(), "Nedd sm90+")
@unittest.skipIf(not _is_fbgemm_gpu_genai_available(), "Need fbgemm_gpu_genai")
def test_expected_gpu_kernel_fbgemm(self):
"""Making sure KernelPreference.FBGEMM calls correct quantize and gemm kernels
and the bias add happens in the gemm kernel for per row quantization
Expand Down
Loading