From d7beeb3d74bc463e4608632fbb17dc62c3f79f3c Mon Sep 17 00:00:00 2001
From: Chien-Chin Huang <chienchin@fb.com>
Date: Wed, 19 Nov 2025 12:48:20 -0800
Subject: [PATCH 1/8] Update

[ghstack-poisoned]
---
 .../integration_test_8gpu_features.yaml       |   2 +-
 scripts/loss_compare.py                       | 154 +++++++++++++++++-
 2 files changed, 149 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/integration_test_8gpu_features.yaml b/.github/workflows/integration_test_8gpu_features.yaml
index de0672eeef..ad0904dff3 100644
--- a/.github/workflows/integration_test_8gpu_features.yaml
+++ b/.github/workflows/integration_test_8gpu_features.yaml
@@ -96,7 +96,7 @@ jobs:
         echo "Checking FSDP4 v.s. HSDP2FSDP2TP2 accuracy parity"
         export baseline_options="--parallelism.data_parallel_replicate_degree=1"
         export test_options="--parallelism.data_parallel_replicate_degree=2 --parallelism.tensor_parallel_degree=2"
-        python3 scripts/loss_compare.py . . --baseline-options="${baseline_options}" --test-options="${test_options}" --job-dump-folder="${RUNNER_TEMP}/artifacts-to-be-uploaded/accuracy_comparison_outputs" --assert-equal --baseline-ngpus=4 --test-ngpus=8 --steps=1
+        python3 scripts/loss_compare.py . . --baseline-options="${baseline_options}" --test-options="${test_options}" --job-dump-folder="${RUNNER_TEMP}/artifacts-to-be-uploaded/accuracy_comparison_outputs" --assert-equal --baseline-ngpus=4 --test-ngpus=8 --steps=1 --export-result "${RUNNER_TEMP}/artifacts-to-be-uploaded/accuracy_comparison_outputs/loss_compare_result.txt"
 
         # Cleanup the checkpoints so that we don't waste network bandwidth and time.
         rm -rf $RUNNER_TEMP/artifacts-to-be-uploaded/*/checkpoint
diff --git a/scripts/loss_compare.py b/scripts/loss_compare.py
index 48a211c8f2..39c9c4b79e 100644
--- a/scripts/loss_compare.py
+++ b/scripts/loss_compare.py
@@ -168,6 +168,9 @@ def validate_arguments(
     test_train_file: str,
     test_options: str,
     steps: int,
+    assert_equal: bool,
+    export_result: str | None,
+    import_result: str | None,
 ) -> None:
     """Validate command line arguments."""
     # Validate commit arguments - if one is ".", both must be "."
@@ -201,6 +204,34 @@ def validate_arguments(
         log_print(f"Error: --steps must be a positive integer, got: {steps}")
         sys.exit(1)
 
+    # Validate export-result requires assert-equal
+    if export_result and not assert_equal:
+        log_print("Error: --export-result requires --assert-equal")
+        log_print("       Export only happens when losses are verified to match")
+        sys.exit(1)
+
+    # Validate import-result requires assert-equal
+    if import_result and not assert_equal:
+        log_print("Error: --import-result requires --assert-equal")
+        log_print("       Import is used to verify all losses match")
+        sys.exit(1)
+
+    # Validate export-result and import-result are mutually exclusive
+    if export_result and import_result:
+        log_print(
+            "Error: --export-result and --import-result cannot be " "used together"
+        )
+        log_print(
+            "       Use export to save results or import to compare "
+            "against saved results"
+        )
+        sys.exit(1)
+
+    # Validate import file exists
+    if import_result and not os.path.exists(import_result):
+        log_print(f"Error: Import file does not exist: {import_result}")
+        sys.exit(1)
+
 
 # =============================================================================
 # SETUP FUNCTIONS
@@ -321,7 +352,10 @@ def check_git_clean_state() -> None:
         for line in result.stdout.strip().split("\n"):
             log_print(f"  {line}")
         log_print("")
-        log_print("Please commit, stash, or discard your changes before running this script")
+        log_print(
+            "Please commit, stash, or discard your changes before "
+            "running this script"
+        )
         log_print("  - To commit: git add -A && git commit -m 'message'")
         log_print("  - To stash: git stash")
         log_print("  - To discard: git checkout -- . && git clean -fd")
@@ -431,6 +465,34 @@ def read_losses_from_file(loss_file: str) -> dict[int, float]:
     return losses
 
 
+def export_losses_to_file(losses: dict[int, float], export_path: str) -> None:
+    """Export losses to file and stdout.
+
+    Args:
+        losses: Dictionary mapping step numbers to loss values
+        export_path: Path to export file
+    """
+    log_print(f"Exporting losses to {export_path}")
+
+    # Write to file and collect output for stdout
+    with open(export_path, "w") as f:
+        for step in sorted(losses.keys()):
+            loss = losses[step]
+            line = f"{step} {loss}"
+            f.write(line + "\n")
+
+    log_print(f"Exported {len(losses)} loss values:")
+    log_print()
+
+    # Output to stdout in same format
+    for step in sorted(losses.keys()):
+        loss = losses[step]
+        print(f"{step} {loss}")
+
+    log_print()
+    log_print(f"Losses saved to: {export_path}")
+
+
 def extract_loss_data(output_folder: str | None) -> None:
     """Extract loss data from logs."""
     if not output_folder:
@@ -554,13 +616,18 @@ def perform_loss_analysis(
     generate_summary_statistics(baseline_losses, test_losses, stats_file)
 
 
-def assert_losses_equal(baseline_log: str, test_log: str) -> None:
-    """Assert that losses are equal between baseline and test using
-    unittest.
+def assert_losses_equal(
+    baseline_log: str, test_log: str, import_result: str | None = None
+) -> None:
+    """Assert that losses are equal between baseline and test using unittest.
+
+    If import_result is provided, also compares baseline with imported losses.
     """
     log_print("Asserting losses are equal...")
     log_print(f"Baseline log: {baseline_log}")
     log_print(f"Test log: {test_log}")
+    if import_result:
+        log_print(f"Import file: {import_result}")
 
     # Extract losses from both logs
     baseline_losses = extract_losses_from_log(baseline_log)
@@ -577,6 +644,15 @@ def assert_losses_equal(baseline_log: str, test_log: str) -> None:
         log_print("Error: No losses found in test log")
         sys.exit(1)
 
+    # Load imported losses if provided
+    imported_losses = None
+    if import_result:
+        imported_losses = read_losses_from_file(import_result)
+        log_print(f"Loaded {len(imported_losses)} steps from import file")
+        if not imported_losses:
+            log_print("Error: No losses found in import file")
+            sys.exit(1)
+
     # Create a test case
     class LossEqualityTest(unittest.TestCase):
         def test_losses_equal(self):
@@ -591,10 +667,22 @@ def test_losses_equal(self):
                 f"test has {len(test_steps)} steps",
             )
 
+            # If imported losses exist, check steps match
+            if imported_losses:
+                imported_steps = set(imported_losses.keys())
+                self.assertEqual(
+                    baseline_steps,
+                    imported_steps,
+                    f"Steps mismatch: baseline has {len(baseline_steps)} steps, "
+                    f"imported has {len(imported_steps)} steps",
+                )
+
             # Check that losses are equal for each step
             for step in sorted(baseline_steps):
                 baseline_loss = baseline_losses[step]
                 test_loss = test_losses[step]
+
+                # Compare baseline vs test
                 self.assertEqual(
                     baseline_loss,
                     test_loss,
@@ -602,6 +690,18 @@ def test_losses_equal(self):
                     f"baseline={baseline_loss}, test={test_loss}",
                 )
 
+                # Compare baseline vs imported (if provided)
+                # No need to compare test vs imported since:
+                # baseline==test and baseline==imported implies test==imported
+                if imported_losses:
+                    imported_loss = imported_losses[step]
+                    self.assertEqual(
+                        baseline_loss,
+                        imported_loss,
+                        f"Loss mismatch at step {step}: "
+                        f"baseline={baseline_loss}, imported={imported_loss}",
+                    )
+
     # Run the test
     suite = unittest.TestLoader().loadTestsFromTestCase(LossEqualityTest)
     runner = unittest.TextTestRunner(verbosity=2)
@@ -611,7 +711,13 @@ def test_losses_equal(self):
         log_print("Loss assertion failed!")
         sys.exit(1)
     else:
-        log_print("All losses are equal. Assertion passed!")
+        if import_result:
+            log_print(
+                "All losses are equal (baseline, test, and imported). "
+                "Assertion passed!"
+            )
+        else:
+            log_print("All losses are equal. Assertion passed!")
 
 
 def cleanup_temp_files(output_folder: str | None) -> None:
@@ -754,6 +860,24 @@ def parse_arguments() -> argparse.Namespace:
             "Script exits with error if losses differ."
         ),
     )
+    parser.add_argument(
+        "--export-result",
+        default="",
+        help=(
+            "Export losses to specified file path (requires --assert-equal). "
+            "Exports only when losses match. Format: '{step} {loss}' per line."
+        ),
+    )
+    parser.add_argument(
+        "--import-result",
+        default="",
+        help=(
+            "Import losses from specified file path for comparison "
+            "(requires --assert-equal). "
+            "Compares imported losses with both baseline and test "
+            "(all 3 must match)."
+        ),
+    )
     parser.add_argument(
         "--job-dump-folder",
         default="outputs",
@@ -785,6 +909,14 @@ def parse_arguments() -> argparse.Namespace:
     if not args.output_folder:
         args.output_folder = None
 
+    # Convert empty export_result to None
+    if not args.export_result:
+        args.export_result = None
+
+    # Convert empty import_result to None
+    if not args.import_result:
+        args.import_result = None
+
     return args
 
 
@@ -848,6 +980,9 @@ def main() -> None:
         args.test_train_file,
         args.test_options,
         args.steps,
+        args.assert_equal,
+        args.export_result,
+        args.import_result,
     )
 
     # Setup environment
@@ -910,7 +1045,14 @@ def main() -> None:
 
     # Assert losses are equal if requested
     if args.assert_equal:
-        assert_losses_equal(baseline_log, test_log)
+        # Pass import_result if provided for 3-way comparison
+        assert_losses_equal(baseline_log, test_log, args.import_result)
+
+        # Export losses if requested (only after assertion passes)
+        if args.export_result:
+            # Extract baseline losses (they equal test losses since assertion passed)
+            baseline_losses = extract_losses_from_log(baseline_log)
+            export_losses_to_file(baseline_losses, args.export_result)
 
     # Analysis and reporting
     perform_loss_analysis(baseline_log, test_log, stats_file)

From 25d13811975d24ad82a59c952c75a4a83afc48d7 Mon Sep 17 00:00:00 2001
From: Chien-Chin Huang <chienchin@fb.com>
Date: Wed, 19 Nov 2025 12:48:20 -0800
Subject: [PATCH 2/8] Update (base update)

[ghstack-poisoned]
---
 scripts/loss_compare.py | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/scripts/loss_compare.py b/scripts/loss_compare.py
index 31573c3bce..48a211c8f2 100644
--- a/scripts/loss_compare.py
+++ b/scripts/loss_compare.py
@@ -301,6 +301,33 @@ def print_configuration(
 # =============================================================================
 
 
+def check_git_clean_state() -> None:
+    """Check if git working directory is clean before switching commits.
+
+    Raises SystemExit if there are uncommitted changes or untracked files.
+    """
+    result = subprocess.run(
+        ["git", "status", "--porcelain"],
+        capture_output=True,
+        text=True,
+        check=True,
+    )
+
+    if result.stdout.strip():
+        log_print("Error: Git working directory is not clean")
+        log_print("       Cannot switch commits with uncommitted changes")
+        log_print("")
+        log_print("Modified/untracked files:")
+        for line in result.stdout.strip().split("\n"):
+            log_print(f"  {line}")
+        log_print("")
+        log_print("Please commit, stash, or discard your changes before running this script")
+        log_print("  - To commit: git add -A && git commit -m 'message'")
+        log_print("  - To stash: git stash")
+        log_print("  - To discard: git checkout -- . && git clean -fd")
+        sys.exit(1)
+
+
 def checkout_commit(commit: str, commit_name: str) -> None:
     """Checkout git commit."""
     if commit != ".":
@@ -840,6 +867,12 @@ def main() -> None:
         args.job_dump_folder,
     )
 
+    # Check if git working directory is clean before switching commits
+    # Skip check if both commits are "." (comparing configs on same commit)
+    needs_git_checkout = args.baseline_commit != "." or args.test_commit != "."
+    if needs_git_checkout:
+        check_git_clean_state()
+
     create_seed_checkpoint(
         enable_seed_checkpoint,
         args.baseline_config,

From 7274b815fbb352b4978eaffde58bcbb379175d32 Mon Sep 17 00:00:00 2001
From: Chien-Chin Huang <chienchin@fb.com>
Date: Wed, 19 Nov 2025 15:21:28 -0800
Subject: [PATCH 3/8] Update

[ghstack-poisoned]
---
 .github/workflows/integration_test_8gpu_features.yaml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/integration_test_8gpu_features.yaml b/.github/workflows/integration_test_8gpu_features.yaml
index ad0904dff3..0011cd514b 100644
--- a/.github/workflows/integration_test_8gpu_features.yaml
+++ b/.github/workflows/integration_test_8gpu_features.yaml
@@ -93,10 +93,10 @@ jobs:
         python -m tests.integration_tests.run_tests --gpu_arch_type ${{ matrix.gpu-arch-type }} --test_suite features $RUNNER_TEMP/artifacts-to-be-uploaded --ngpu 8
 
         # Verify the accuracy.
-        echo "Checking FSDP4 v.s. HSDP2FSDP2TP2 accuracy parity"
+        echo "Checking FSDP4 v.s. HSDP2FSDP4 accuracy parity"
         export baseline_options="--parallelism.data_parallel_replicate_degree=1"
-        export test_options="--parallelism.data_parallel_replicate_degree=2 --parallelism.tensor_parallel_degree=2"
-        python3 scripts/loss_compare.py . . --baseline-options="${baseline_options}" --test-options="${test_options}" --job-dump-folder="${RUNNER_TEMP}/artifacts-to-be-uploaded/accuracy_comparison_outputs" --assert-equal --baseline-ngpus=4 --test-ngpus=8 --steps=1 --export-result "${RUNNER_TEMP}/artifacts-to-be-uploaded/accuracy_comparison_outputs/loss_compare_result.txt"
+        export test_options="--parallelism.data_parallel_replicate_degree=4"
+        python3 scripts/loss_compare.py . . --baseline-options="${baseline_options}" --test-options="${test_options}" --job-dump-folder="${RUNNER_TEMP}/artifacts-to-be-uploaded/accuracy_comparison_outputs" --assert-equal --steps=10 --export-result "${RUNNER_TEMP}/artifacts-to-be-uploaded/accuracy_comparison_outputs/loss_compare_result.txt"
 
         # Cleanup the checkpoints so that we don't waste network bandwidth and time.
         rm -rf $RUNNER_TEMP/artifacts-to-be-uploaded/*/checkpoint

From a699162088bab5e01b44e7660ef5e5b5c2ea97b5 Mon Sep 17 00:00:00 2001
From: Chien-Chin Huang <chienchin@fb.com>
Date: Wed, 19 Nov 2025 16:03:22 -0800
Subject: [PATCH 4/8] Update (base update)

[ghstack-poisoned]
---
 scripts/loss_compare.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/scripts/loss_compare.py b/scripts/loss_compare.py
index 48a211c8f2..42ad3a81be 100644
--- a/scripts/loss_compare.py
+++ b/scripts/loss_compare.py
@@ -321,7 +321,9 @@ def check_git_clean_state() -> None:
         for line in result.stdout.strip().split("\n"):
             log_print(f"  {line}")
         log_print("")
-        log_print("Please commit, stash, or discard your changes before running this script")
+        log_print(
+            "Please commit, stash, or discard your changes before running this script"
+        )
         log_print("  - To commit: git add -A && git commit -m 'message'")
         log_print("  - To stash: git stash")
         log_print("  - To discard: git checkout -- . && git clean -fd")

From 30bc03c2e7268d72c8745755d7fa4ac5025a8b0d Mon Sep 17 00:00:00 2001
From: Chien-Chin Huang <chienchin@fb.com>
Date: Wed, 19 Nov 2025 17:37:40 -0800
Subject: [PATCH 5/8] Update

[ghstack-poisoned]
---
 assets/ci_llama3_losses.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/assets/ci_llama3_losses.txt b/assets/ci_llama3_losses.txt
index 5ccea64b17..56ed044beb 100644
--- a/assets/ci_llama3_losses.txt
+++ b/assets/ci_llama3_losses.txt
@@ -7,4 +7,4 @@
 7 4.5606
 8 4.3724
 9 4.347
-10 4.2004
+10 4.2001

From 40b07a550732843b67e3d6743442888c6ababbf8 Mon Sep 17 00:00:00 2001
From: Chien-Chin Huang <chienchin@fb.com>
Date: Wed, 19 Nov 2025 17:39:40 -0800
Subject: [PATCH 6/8] Update

[ghstack-poisoned]
---
 .github/workflows/integration_test_8gpu_features.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/integration_test_8gpu_features.yaml b/.github/workflows/integration_test_8gpu_features.yaml
index 39409e8d82..9af3ce9292 100644
--- a/.github/workflows/integration_test_8gpu_features.yaml
+++ b/.github/workflows/integration_test_8gpu_features.yaml
@@ -90,14 +90,14 @@ jobs:
         sudo mkdir -p "$RUNNER_TEMP/artifacts-to-be-uploaded"
         sudo chown -R $(id -u):$(id -g) "$RUNNER_TEMP/artifacts-to-be-uploaded"
 
-        python -m tests.integration_tests.run_tests --gpu_arch_type ${{ matrix.gpu-arch-type }} --test_suite features $RUNNER_TEMP/artifacts-to-be-uploaded --ngpu 8
-
         # Verify the accuracy.
         echo "Checking FSDP4 v.s. HSDP2FSDP4 accuracy parity"
         export baseline_options="--parallelism.data_parallel_replicate_degree=1"
         export test_options="--parallelism.data_parallel_replicate_degree=4"
         python3 scripts/loss_compare.py . . --baseline-options="${baseline_options}" --test-options="${test_options}" --job-dump-folder="${RUNNER_TEMP}/artifacts-to-be-uploaded/accuracy_comparison_outputs" --assert-equal --steps=10  --import-result assets/ci_llama3_losses.txt
 
+        python -m tests.integration_tests.run_tests --gpu_arch_type ${{ matrix.gpu-arch-type }} --test_suite features $RUNNER_TEMP/artifacts-to-be-uploaded --ngpu 8
+
         # Cleanup the checkpoints so that we don't waste network bandwidth and time.
         rm -rf $RUNNER_TEMP/artifacts-to-be-uploaded/*/checkpoint
         rm -rf artifacts-to-be-uploaded/*/checkpoint

From 0a94bfc64380dc41ea57d145a8541073b9de888f Mon Sep 17 00:00:00 2001
From: Chien-Chin Huang <chienchin@fb.com>
Date: Wed, 19 Nov 2025 17:55:22 -0800
Subject: [PATCH 7/8] Update

[ghstack-poisoned]
---
 assets/ci_llama3_losses.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/assets/ci_llama3_losses.txt b/assets/ci_llama3_losses.txt
index 56ed044beb..5ccea64b17 100644
--- a/assets/ci_llama3_losses.txt
+++ b/assets/ci_llama3_losses.txt
@@ -7,4 +7,4 @@
 7 4.5606
 8 4.3724
 9 4.347
-10 4.2001
+10 4.2004

From 49ce58c97b187dbf8ce58e83e190a361181f0950 Mon Sep 17 00:00:00 2001
From: Chien-Chin Huang <chienchin@fb.com>
Date: Wed, 19 Nov 2025 22:49:19 -0800
Subject: [PATCH 8/8] Update

[ghstack-poisoned]
---
 .github/workflows/integration_test_8gpu_features.yaml | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/integration_test_8gpu_features.yaml b/.github/workflows/integration_test_8gpu_features.yaml
index 9af3ce9292..bf17194efd 100644
--- a/.github/workflows/integration_test_8gpu_features.yaml
+++ b/.github/workflows/integration_test_8gpu_features.yaml
@@ -90,11 +90,12 @@ jobs:
         sudo mkdir -p "$RUNNER_TEMP/artifacts-to-be-uploaded"
         sudo chown -R $(id -u):$(id -g) "$RUNNER_TEMP/artifacts-to-be-uploaded"
 
-        # Verify the accuracy.
-        echo "Checking FSDP4 v.s. HSDP2FSDP4 accuracy parity"
+        # Verify the accuracy first.
+        echo "Checking FSDP8 v.s. HSDP (4, 2) accuracy parity"
         export baseline_options="--parallelism.data_parallel_replicate_degree=1"
         export test_options="--parallelism.data_parallel_replicate_degree=4"
         python3 scripts/loss_compare.py . . --baseline-options="${baseline_options}" --test-options="${test_options}" --job-dump-folder="${RUNNER_TEMP}/artifacts-to-be-uploaded/accuracy_comparison_outputs" --assert-equal --steps=10  --import-result assets/ci_llama3_losses.txt
+        rm -rf $RUNNER_TEMP/artifacts-to-be-uploaded/*
 
         python -m tests.integration_tests.run_tests --gpu_arch_type ${{ matrix.gpu-arch-type }} --test_suite features $RUNNER_TEMP/artifacts-to-be-uploaded --ngpu 8