pytorch · GregoryComer · Sep 11, 2025 · Sep 4, 2025 · Sep 5, 2025 · Sep 10, 2025
diff --git a/.ci/scripts/setup-windows.ps1 b/.ci/scripts/setup-windows.ps1
@@ -1,5 +1,5 @@
 param (
-    [string]$editable = $false
+    [string]$editable = "false"
 )
 
 conda create --yes --quiet -n et python=3.12

diff --git a/.ci/scripts/unittest-windows.ps1 b/.ci/scripts/unittest-windows.ps1
@@ -1,15 +1,38 @@
 param (
-    [string]$editable = $false
+    [string]$buildMode = "Release"
 )
 
 Set-PSDebug -Trace 1
 $ErrorActionPreference = 'Stop'
 $PSNativeCommandUseErrorActionPreference = $true
 
-# Run pytest with coverage
-# pytest -n auto --cov=./ --cov-report=xml
-pytest -v --full-trace -c pytest-windows.ini
+# Run native unit tests (via ctest)
+New-Item -Path "test-build" -ItemType Directory
+cd "test-build"
+
+cmake .. --preset windows -B . -DEXECUTORCH_BUILD_TESTS=ON -DCMAKE_BUILD_TYPE=$buildMode
 if ($LASTEXITCODE -ne 0) {
-    Write-Host "Pytest invocation was unsuccessful. Exit code: $LASTEXITCODE."
+    Write-Host "CMake configuration was unsuccessful. Exit code: $LASTEXITCODE."
+    exit $LASTEXITCODE
+}
+
+cmake --build . -j8 --config $buildMode --verbose
+if ($LASTEXITCODE -ne 0) {
+    Write-Host "CMake build was unsuccessful. Exit code: $LASTEXITCODE."
     exit $LASTEXITCODE
 }
+
+ctest -j8 . --build-config $buildMode --output-on-failure -E "method_test|tensor_parser_test"
+if ($LASTEXITCODE -ne 0) {
+    Write-Host "CTest run was unsuccessful. Exit code: $LASTEXITCODE."
+    exit $LASTEXITCODE
+}
+
+cd ..
+
+# Run pytest
+pytest -v -c pytest-windows.ini
+if ($LASTEXITCODE -ne 0) {
+    Write-Host "Pytest invocation was unsuccessful. Exit code: $LASTEXITCODE."
+    exit $LASTEXITCODE
+}
diff --git a/.github/workflows/_unittest.yml b/.github/workflows/_unittest.yml
@@ -69,7 +69,15 @@ jobs:
           \$ErrorActionPreference = 'Stop'
           \$PSNativeCommandUseErrorActionPreference = \$true
 
-          .ci/scripts/setup-windows.ps1       
+          .ci/scripts/setup-windows.ps1 -editable "${{ inputs.editable }}"
+          if (\$LASTEXITCODE -ne 0) {
+              Write-Host "Setup failed. Exit code: \$LASTEXITCODE."
+              exit \$LASTEXITCODE
+          }
 
-          powershell .ci/scripts/unittest-windows.ps1 -editable "${{ inputs.editable }}"
+          .ci/scripts/unittest-windows.ps1 -buildMode "${{ inputs.build-mode }}"
+          if (\$LASTEXITCODE -ne 0) {
+              Write-Host "Unit tests failed. Exit code: \$LASTEXITCODE."
+              exit \$LASTEXITCODE
+          }
         }"
diff --git a/.github/workflows/trunk.yml b/.github/workflows/trunk.yml
@@ -1032,5 +1032,5 @@ jobs:
 
           .ci/scripts/setup-windows.ps1
 
-          powershell .ci/scripts/test_model.ps1 -modelName ${{ matrix.model }} -backend ${{ matrix.backend }}
+          .ci/scripts/test_model.ps1 -modelName ${{ matrix.model }} -backend ${{ matrix.backend }}
         }"
@@ -143,9 +143,13 @@ endif()
 
 # -ffunction-sections -fdata-sections: breaks function and data into sections so
 # they can be properly gc'd. -s: strip symbol.
-set(CMAKE_CXX_FLAGS_RELEASE
-    "-ffunction-sections -fdata-sections ${CMAKE_CXX_FLAGS_RELEASE}"
-)
+if(WIN32)
+  set(CMAKE_CXX_FLAGS_RELEASE "/Gy /Gw ${CMAKE_CXX_FLAGS_RELEASE}")
+else()
+  set(CMAKE_CXX_FLAGS_RELEASE
+      "-ffunction-sections -fdata-sections ${CMAKE_CXX_FLAGS_RELEASE}"
+  )
+endif()
 if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
   set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -s")
 endif()

@@ -59,7 +59,7 @@ foreach(fbs_file ${_xnnpack_schema__srcs})
   )
 endforeach()
 
-if(WIN32 AND NOT CMAKE_CROSSCOMPILING)
+if(CMAKE_HOST_SYSTEM_NAME STREQUAL "Windows")
   set(MV_COMMAND
       powershell -Command
       "Move-Item -Path ${_xnnpack_flatbuffer__outputs} -Destination ${_xnnpack_schema__outputs} -Force"

diff --git a/export/target_recipes.py b/export/target_recipes.py
@@ -11,26 +11,32 @@
 selection and combine multiple backends optimally for target hardware.
 """
 
+import sys
 from typing import Dict, List
 
-import coremltools as ct
+if sys.platform != "win32":
+    import coremltools as ct
+    from executorch.backends.apple.coreml.recipes import CoreMLRecipeType
 
 # pyre-ignore
-from executorch.backends.apple.coreml.recipes import CoreMLRecipeType
 from executorch.backends.xnnpack.recipes import XNNPackRecipeType
 from executorch.export.recipe import ExportRecipe, RecipeType
 
 
 ## IOS Target configs
 # The following list of recipes are not exhaustive for CoreML; refer to CoreMLRecipeType for more detailed recipes.
-IOS_CONFIGS: Dict[str, List[RecipeType]] = {
-    # pyre-ignore
-    "ios-arm64-coreml-fp32": [CoreMLRecipeType.FP32, XNNPackRecipeType.FP32],
-    # pyre-ignore
-    "ios-arm64-coreml-fp16": [CoreMLRecipeType.FP16],
-    # pyre-ignore
-    "ios-arm64-coreml-int8": [CoreMLRecipeType.PT2E_INT8_STATIC],
-}
+IOS_CONFIGS: Dict[str, List[RecipeType]] = (
+    {
+        # pyre-ignore
+        "ios-arm64-coreml-fp32": [CoreMLRecipeType.FP32, XNNPackRecipeType.FP32],
+        # pyre-ignore
+        "ios-arm64-coreml-fp16": [CoreMLRecipeType.FP16],
+        # pyre-ignore
+        "ios-arm64-coreml-int8": [CoreMLRecipeType.PT2E_INT8_STATIC],
+    }
+    if sys.platform != "win32"
+    else {}
+)
 
 
 def _create_target_recipe(

diff --git a/export/tests/test_target_recipes.py b/export/tests/test_target_recipes.py
@@ -7,17 +7,22 @@
 # pyre-strict
 
 import logging
+import sys
 import unittest
 
 import torch
-from executorch.backends.apple.coreml.recipes import CoreMLRecipeProvider  # pyre-ignore
 from executorch.backends.xnnpack.recipes.xnnpack_recipe_provider import (
     XNNPACKRecipeProvider,
 )
 from executorch.export import export, recipe_registry
 from executorch.export.target_recipes import get_ios_recipe
 from executorch.runtime import Runtime
 
+if sys.platform != "win32":
+    from executorch.backends.apple.coreml.recipes import (  # pyre-ignore
+        CoreMLRecipeProvider,
+    )
+
 
 class TestTargetRecipes(unittest.TestCase):
     """Test target recipes."""
@@ -26,12 +31,14 @@ def setUp(self) -> None:
         torch._dynamo.reset()
         super().setUp()
         recipe_registry.register_backend_recipe_provider(XNNPACKRecipeProvider())
-        # pyre-ignore
-        recipe_registry.register_backend_recipe_provider(CoreMLRecipeProvider())
+        if sys.platform != "win32":
+            # pyre-ignore
+            recipe_registry.register_backend_recipe_provider(CoreMLRecipeProvider())
 
     def tearDown(self) -> None:
         super().tearDown()
 
+    @unittest.skipIf(sys.platform == "win32", "Core ML is not available on Windows.")
     def test_ios_fp32_recipe_with_xnnpack_fallback(self) -> None:
         # Linear ops skipped by coreml but handled by xnnpack
         class Model(torch.nn.Module):
@@ -107,6 +114,7 @@ def forward(self, x, y):
             et_output = session.run_method("forward", example_inputs[0])
             logging.info(f"et output {et_output}")
 
+    @unittest.skipIf(sys.platform == "win32", "Core ML is not available on Windows.")
     def test_ios_quant_recipes(self) -> None:
         class Model(torch.nn.Module):
             def __init__(self):

@@ -267,7 +267,7 @@ TEST(PrintEvalueTest, UnelidedBoolLists) {
   // case; the other scalar types use the same underlying code, so they don't
   // need to test this again.
   {
-    EValue value(ArrayRef<bool>(list.data(), 0ul));
+    EValue value(ArrayRef<bool>(list.data(), static_cast<size_t>(0ul)));
     expect_output(value, "(len=0)[]");
   }
   {
@@ -419,7 +419,7 @@ TEST(PrintEvalueTest, UnelidedDoubleLists) {
   std::array<double, 6> list = {-2.2, -1, 0, INFINITY, NAN, 3.3};
 
   {
-    EValue value(ArrayRef<double>(list.data(), 0ul));
+    EValue value(ArrayRef<double>(list.data(), static_cast<size_t>(0ul)));
     expect_output(value, "(len=0)[]");
   }
   {

@@ -23,7 +23,7 @@ add_custom_command(
          "${CMAKE_CURRENT_BINARY_DIR}/ModuleAddMulProgram.ptd"
   COMMAND
     ${PYTHON_EXECUTABLE} -m test.models.export_program --modules "ModuleAddMul"
-    --external-constants --outdir "${CMAKE_CURRENT_BINARY_DIR}" 2> /dev/null
+    --external-constants --outdir "${CMAKE_CURRENT_BINARY_DIR}"
   WORKING_DIRECTORY ${EXECUTORCH_ROOT}
 )
 

@@ -24,10 +24,10 @@ add_custom_command(
          "${CMAKE_CURRENT_BINARY_DIR}/ModuleAddMulProgram.pte"
          "${CMAKE_CURRENT_BINARY_DIR}/ModuleAddMulProgram.ptd"
   COMMAND ${PYTHON_EXECUTABLE} -m test.models.export_program --modules
-          "ModuleAdd" --outdir "${CMAKE_CURRENT_BINARY_DIR}" 2> /dev/null
+          "ModuleAdd" --outdir "${CMAKE_CURRENT_BINARY_DIR}"
   COMMAND
     ${PYTHON_EXECUTABLE} -m test.models.export_program --modules "ModuleAddMul"
-    --external-constants --outdir "${CMAKE_CURRENT_BINARY_DIR}" 2> /dev/null
+    --external-constants --outdir "${CMAKE_CURRENT_BINARY_DIR}"
   WORKING_DIRECTORY ${EXECUTORCH_ROOT}
 )
 

@@ -20,7 +20,7 @@ include(${EXECUTORCH_ROOT}/tools/cmake/Test.cmake)
 add_custom_command(
   OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/ModuleAdd.pte"
   COMMAND ${PYTHON_EXECUTABLE} -m test.models.export_program --modules
-          "ModuleAdd" --outdir "${CMAKE_CURRENT_BINARY_DIR}" 2> /dev/null
+          "ModuleAdd" --outdir "${CMAKE_CURRENT_BINARY_DIR}"
   WORKING_DIRECTORY ${EXECUTORCH_ROOT}
 )
 

@@ -9,13 +9,11 @@
 #pragma once
 
 #include <array>
+#include <fstream>
 #include <memory>
 #include <string>
 
 #include <fcntl.h> // open()
-#include <stdio.h> // tmpnam(), remove()
-#include <unistd.h> // write(), close()
-
 #include <gtest/gtest.h>
 
 namespace executorch {
@@ -72,19 +70,13 @@ class TempFile {
     }
 
     // Write the contents to the file.
-    int fd = open(
-        path.c_str(),
-        // O_EXCL ensures that we are the ones creating this file, to help
-        // protect against race conditions.
-        O_CREAT | O_EXCL | O_RDWR,
-        // User can read and write, group can read.
-        S_IRUSR | S_IWUSR | S_IRGRP);
-    ASSERT_GE(fd, 0) << "open(" << path << ") failed: " << strerror(errno);
-
-    ssize_t nwrite = write(fd, data, size);
-    ASSERT_EQ(nwrite, size) << "Failed to write " << size << " bytes (wrote "
-                            << nwrite << "): " << strerror(errno);
-    close(fd);
+    std::ofstream file(path, std::ios::out | std::ios::binary);
+    ASSERT_TRUE(file.is_open())
+        << "open(" << path << ") failed: " << strerror(errno);
+
+    file.write((const char*)data, size);
+    ASSERT_TRUE(file.good())
+        << "Failed to write " << size << " bytes: " << strerror(errno);
 
     *out_path = path;
   }

@@ -49,21 +49,21 @@ Tensor& argmax_out(
   static constexpr const char op_name[] = "argmax.out";
 
   ET_SWITCH_REALHBF16_TYPES(in.scalar_type(), ctx, op_name, CTYPE, [&] {
-    long* out_data = out.mutable_data_ptr<long>();
+    int64_t* out_data = out.mutable_data_ptr<int64_t>();
 
     const bool success = parallel_for_each_reduce_over_dim_output_index(
         in, dim, out, [&](const auto begin, const auto end) {
           for (const auto out_ix : c10::irange(begin, end)) {
-            std::tuple<CTYPE, long> acc = reduce_over_dim<CTYPE>(
-                [](CTYPE v, long ix, CTYPE acc_val, long acc_ix) {
+            std::tuple<CTYPE, int64_t> acc = reduce_over_dim<CTYPE>(
+                [](CTYPE v, int64_t ix, CTYPE acc_val, int64_t acc_ix) {
                   // the below condition as written is equivalent to
                   // !isnan(accval) && (isnan(v) || v > acc_val). See
                   // argument in op_argmin.cpp.
                   if (!utils::isnan_override(acc_val) && !(v <= acc_val)) {
                     acc_val = v;
                     acc_ix = ix;
                   }
-                  return std::tuple<CTYPE, long>{acc_val, acc_ix};
+                  return std::tuple<CTYPE, int64_t>{acc_val, acc_ix};
                 },
                 in,
                 dim,

@@ -49,13 +49,13 @@ Tensor& argmin_out(
   static constexpr const char op_name[] = "argmin.out";
 
   ET_SWITCH_REALHBF16_TYPES(in.scalar_type(), ctx, op_name, CTYPE, [&] {
-    long* out_data = out.mutable_data_ptr<long>();
+    int64_t* out_data = out.mutable_data_ptr<int64_t>();
 
     const bool success = parallel_for_each_reduce_over_dim_output_index(
         in, dim, out, [&](const auto begin, const auto end) {
           for (const auto out_ix : c10::irange(begin, end)) {
-            std::tuple<CTYPE, long> acc = reduce_over_dim<CTYPE>(
-                [](CTYPE v, long ix, CTYPE acc_val, long acc_ix) {
+            std::tuple<CTYPE, int64_t> acc = reduce_over_dim<CTYPE>(
+                [](CTYPE v, int64_t ix, CTYPE acc_val, int64_t acc_ix) {
                   // the below condition as written is equivalent to
                   // !isnan(accval) && (isnan(v) || v < acc_val). cases:
                   // - if neither acc_val nor v is NaN, !(v >= acc_val) is
@@ -70,7 +70,7 @@ Tensor& argmin_out(
                     acc_val = v;
                     acc_ix = ix;
                   }
-                  return std::tuple<CTYPE, long>{acc_val, acc_ix};
+                  return std::tuple<CTYPE, int64_t>{acc_val, acc_ix};
                 },
                 in,
                 dim,

@@ -45,9 +45,9 @@ ET_NODISCARD bool check_bounds(
   static constexpr const char op_name[] = "clamp.out";
 
   if (isIntegralType(out_type, /*includeBool=*/false)) {
-    const long val_long = utils::scalar_to<long>(val_scalar);
+    const int64_t val_long = utils::scalar_to<int64_t>(val_scalar);
     ET_SWITCH_INT_TYPES(out_type, ctx, op_name, CTYPE_OUT, [&]() {
-      if (is_out_of_bounds<CTYPE_OUT, long>(val_long)) {
+      if (is_out_of_bounds<CTYPE_OUT, int64_t>(val_long)) {
         ET_LOG(Error, "%s value out of bounds", val_name);
         is_valid = false;
       }

@@ -30,7 +30,7 @@ void gather_helper(
     Tensor& out,
     int64_t dim) {
   const CTYPE* in_data = in.const_data_ptr<CTYPE>();
-  const long* index_data = index.const_data_ptr<long>();
+  const int64_t* index_data = index.const_data_ptr<int64_t>();
   CTYPE* out_data = out.mutable_data_ptr<CTYPE>();
 
   if (index.dim() == 0) {

@@ -82,19 +82,19 @@ std::tuple<Tensor&, Tensor&> max_out(
   ET_SWITCH_REALHBBF16_TYPES(
       in.scalar_type(), ctx, "max.dim_max", CTYPE, [&]() {
         CTYPE* max_data = max.mutable_data_ptr<CTYPE>();
-        long* max_indices_data = max_indices.mutable_data_ptr<long>();
+        int64_t* max_indices_data = max_indices.mutable_data_ptr<int64_t>();
 
         const bool success = parallel_for_each_reduce_over_dim_output_index(
             in, dim, max, [&](const auto begin, const auto end) {
               for (const auto out_ix : c10::irange(begin, end)) {
-                std::tuple<CTYPE, long> acc = reduce_over_dim<CTYPE>(
-                    [](CTYPE v, long ix, CTYPE acc_val, long acc_ix) {
+                std::tuple<CTYPE, int64_t> acc = reduce_over_dim<CTYPE>(
+                    [](CTYPE v, int64_t ix, CTYPE acc_val, int64_t acc_ix) {
                       if (!utils::isnan_override(acc_val) &&
                           (utils::isnan_override(v) || v > acc_val)) {
                         acc_val = v;
                         acc_ix = ix;
                       }
-                      return std::tuple<CTYPE, long>{acc_val, acc_ix};
+                      return std::tuple<CTYPE, int64_t>{acc_val, acc_ix};
                     },
                     in,
                     dim,