Skip to content

Commit ff3d03c

Browse files
y-sqfacebook-github-bot
authored andcommitted
Fix fp8-all-gather buck errors (#912)
Summary: Pull Request resolved: #912 Reviewed By: vkuzo Differential Revision: D63048850
1 parent 2dea315 commit ff3d03c

File tree

3 files changed

+4
-4
lines changed

3 files changed

+4
-4
lines changed

test/float8/test_fsdp2/test_fsdp2.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
from torchao.float8.config import CastConfig, Float8LinearConfig, ScalingType
1919
from torchao.float8.float8_linear_utils import convert_to_float8_training
2020
from torchao.float8.fsdp_utils import WeightWithDynamicFloat8CastTensor
21-
from fsdp2_common import check_parity_bf16_mp, check_parity_no_mp
21+
from torchao.testing.float8_fsdp2_utils.float8 import check_parity_bf16_mp, check_parity_no_mp
2222
from torch.distributed._composable.fsdp import fully_shard, MixedPrecisionPolicy
2323
from torch.distributed._tensor import DTensor
2424
from torch.testing._internal.common_cuda import TEST_CUDA

torchao/testing/float8/__init__.py

Whitespace-only changes.

test/float8/test_fsdp2/fsdp2_common.py renamed to torchao/testing/float8/float8_fsdp2_utils.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -49,9 +49,9 @@ def check_parity_no_mp(
4949
precompute_float8_dynamic_scale_for_fsdp(model)
5050

5151
if compile_transformer_block:
52-
test_cls.assertEqual(losses[0], losses[1], atol=1e-4, rtol=1e-4)
52+
test_cls.assertEqual(losses[0], losses[1], atol=1e-4, rtol=1e-4, msg = f"iter: {iter_idx}, loss-ref: {losses[0]}, loss-fp8: {losses[1]}")
5353
else:
54-
test_cls.assertEqual(losses[0], losses[1])
54+
test_cls.assertEqual(losses[0], losses[1], msg = f"iter: {iter_idx}, loss-ref: {losses[0]}, loss-fp8: {losses[1]}")
5555

5656

5757
def check_parity_bf16_mp(
@@ -86,4 +86,4 @@ def check_parity_bf16_mp(
8686
ref_model.parameters(), ref_model_bf16.parameters()
8787
):
8888
param_bf16.detach().copy_(param_fp32)
89-
test_cls.assertEqual(losses[0], losses[1])
89+
test_cls.assertEqual(losses[0], losses[1], msg = f"iter: {iter_idx}, loss-ref: {losses[0]}, loss-fp8: {losses[1]}")

0 commit comments

Comments
 (0)