pytorch
diff --git a/‎.github/workflows/integration_test_8gpu_features.yaml‎
Lines changed: 7 additions & 0 deletions b/‎.github/workflows/integration_test_8gpu_features.yaml‎
Lines changed: 7 additions & 0 deletions
@@ -92,5 +92,12 @@ jobs:
 
         python -m tests.integration_tests.run_tests --gpu_arch_type ${{ matrix.gpu-arch-type }} --test_suite features $RUNNER_TEMP/artifacts-to-be-uploaded --ngpu 8
 
+        # Verify the accuracy.
+        echo "Checking FSDP4 v.s. HSDP2FSDP2TP2 accuracy parity"
+        export baseline_options="--parallelism.data_parallel_replicate_degree=1"
+        export test_options="--parallelism.data_parallel_replicate_degree=2 --parallelism.tensor_parallel_degree=2"
+        python3 scripts/loss_compare.py . . --baseline-options="${baseline_options}" --test-options="${test_options}" --job-dump-folder="${RUNNER_TEMP}/artifacts-to-be-uploaded/accuracy_comparison_outputs" --assert-equal --baseline-ngpus=4 --test-ngpus=8 --steps=1
+
+        # Cleanup the checkpoints so that we don't waste network bandwidth and time.
         rm -rf $RUNNER_TEMP/artifacts-to-be-uploaded/*/checkpoint
         rm -rf artifacts-to-be-uploaded/*/checkpoint