pytorch
diff --git a/‎.ci/docker/ci_commit_pins/executorch.txt
Lines changed: 1 addition & 1 deletion b/‎.ci/docker/ci_commit_pins/executorch.txt
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/docker/requirements-ci.txt
Lines changed: 1 addition & 1 deletion b/‎.ci/docker/requirements-ci.txt
Lines changed: 1 addition & 1 deletion
diff --git a/‎.circleci/README.md
Lines changed: 1 addition & 465 deletions b/‎.circleci/README.md
Lines changed: 1 addition & 465 deletions
diff --git a/‎.clang-tidy
Lines changed: 0 additions & 1 deletion b/‎.clang-tidy
Lines changed: 0 additions & 1 deletion
diff --git a/‎.github/auto_request_review.yml
Lines changed: 0 additions & 1 deletion b/‎.github/auto_request_review.yml
Lines changed: 0 additions & 1 deletion
diff --git a/‎.github/ci_commit_pins/audio.txt
Lines changed: 1 addition & 1 deletion b/‎.github/ci_commit_pins/audio.txt
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/ci_commit_pins/vision.txt
Lines changed: 1 addition & 1 deletion b/‎.github/ci_commit_pins/vision.txt
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/ci_commit_pins/xla.txt
Lines changed: 1 addition & 1 deletion b/‎.github/ci_commit_pins/xla.txt
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/scripts/filter_test_configs.py
Lines changed: 1 addition & 1 deletion b/‎.github/scripts/filter_test_configs.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/scripts/generate_docker_release_matrix.py
Lines changed: 1 addition & 1 deletion b/‎.github/scripts/generate_docker_release_matrix.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/scripts/trymerge.py
Lines changed: 1 addition & 2 deletions b/‎.github/scripts/trymerge.py
Lines changed: 1 addition & 2 deletions
diff --git a/‎.github/workflows/check_mergeability_ghstack.yml
Lines changed: 28 additions & 0 deletions b/‎.github/workflows/check_mergeability_ghstack.yml
Lines changed: 28 additions & 0 deletions
diff --git a/‎.github/workflows/create_release.yml
Lines changed: 3 additions & 0 deletions b/‎.github/workflows/create_release.yml
Lines changed: 3 additions & 0 deletions
diff --git a/‎.github/workflows/lint.yml
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/lint.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎.lintrunner.toml
Lines changed: 0 additions & 3 deletions b/‎.lintrunner.toml
Lines changed: 0 additions & 3 deletions
diff --git a/‎CMakeLists.txt
Lines changed: 4 additions & 0 deletions b/‎CMakeLists.txt
Lines changed: 4 additions & 0 deletions
diff --git a/‎CODEOWNERS
Lines changed: 12 additions & 3 deletions b/‎CODEOWNERS
Lines changed: 12 additions & 3 deletions
diff --git a/‎android/pytorch_android/generate_test_torchscripts.py
Lines changed: 9 additions & 0 deletions b/‎android/pytorch_android/generate_test_torchscripts.py
Lines changed: 9 additions & 0 deletions
diff --git a/‎android/pytorch_android/src/androidTest/assets/android_api_module.ptl
1.13 KB b/‎android/pytorch_android/src/androidTest/assets/android_api_module.ptl
1.13 KB
diff --git a/‎android/pytorch_android/src/androidTest/java/org/pytorch/PytorchTestBase.java
Lines changed: 114 additions & 7 deletions b/‎android/pytorch_android/src/androidTest/java/org/pytorch/PytorchTestBase.java
Lines changed: 114 additions & 7 deletions
diff --git a/‎android/pytorch_android/test_asset.jit
Lines changed: 9 additions & 0 deletions b/‎android/pytorch_android/test_asset.jit
Lines changed: 9 additions & 0 deletions
diff --git a/‎aten/src/ATen/MapAllocator.cpp
Lines changed: 5 additions & 0 deletions b/‎aten/src/ATen/MapAllocator.cpp
Lines changed: 5 additions & 0 deletions
diff --git a/‎aten/src/ATen/core/DeprecatedTypeProperties.h
Lines changed: 4 additions & 0 deletions b/‎aten/src/ATen/core/DeprecatedTypeProperties.h
Lines changed: 4 additions & 0 deletions
diff --git a/‎aten/src/ATen/cpu/vec/vec256/vsx/vec256_float_vsx.h
Lines changed: 0 additions & 5 deletions b/‎aten/src/ATen/cpu/vec/vec256/vsx/vec256_float_vsx.h
Lines changed: 0 additions & 5 deletions
@@ -1 +1 @@
-a34cbd4093a2eaf7267aa122cd580ea7f9b15f72
+export-D53340041
@@ -76,7 +76,7 @@ librosa>=0.6.2 ; python_version < "3.11"
 #Description: A testing library that allows you to replace parts of your
 #system under test with mock objects
 #Pinned versions:
-#test that import: test_module_init.py, test_modules.py, test_nn.py,
+#test that import: test_modules.py, test_nn.py,
 #test_testing.py
 
 #MonkeyType # breaks pytorch-xla-linux-bionic-py3.7-clang8
 
@@ -42,7 +42,6 @@ misc-*,
 -misc-non-private-member-variables-in-classes,
 -misc-confusable-identifiers,
 modernize-*,
--modernize-concat-nested-namespaces,
 -modernize-macro-to-enum,
 -modernize-return-braced-init-list,
 -modernize-use-auto,
 
@@ -6,7 +6,6 @@ reviewers:
       - albanD
       - miladm
       - bdhirsh
-      - voznesenskym
 
   per_author:
     symbolic-shapes:
 
@@ -1 +1 @@
-b2d9c3e315405f2b5cfdfa5b93f849d5b27a4109
+02586da797dbfa201721d2080e2171805202f72c
@@ -1 +1 @@
-315f31527e720999eecbb986679b3177d4ed5e37
+806dba678d5b01f6e8a46f7c48fdf8c09369a267
@@ -1 +1 @@
-e265fd7b403d4508a1bcdaad2af9519e6c2d0b4c
+21653dc9859c2fa2cf5e18ce79d7ac1e392c5381
@@ -588,7 +588,7 @@ def main() -> None:
         labels=labels,
         test_matrix=filtered_test_matrix,
         job_name=args.job_name,
-        pr_body=pr_body,
+        pr_body=pr_body if pr_body else "",
     )
 
     # Set the filtered test matrix as the output
 
@@ -4,7 +4,7 @@
 
 Will output a condensed version of the matrix. Will include fllowing:
     * CUDA version short
-    * CUDA full verison
+    * CUDA full version
     * CUDNN version short
     * Image type either runtime or devel
     * Platform linux/arm64,linux/amd64
 
@@ -748,7 +748,7 @@ def get_merge_base(self) -> str:
         # work for ghstack where the base is the custom branch, i.e. gh/USER/ID/base,
         # so let's just use main instead
         self.merge_base = gh_fetch_merge_base(
-            self.org, self.project, last_commit_oid, "main"
+            self.org, self.project, last_commit_oid, self.default_branch()
         )
 
         # Fallback to baseRefOid if the API call fails, i.e. rate limit. Note that baseRefOid
@@ -2307,7 +2307,6 @@ def handle_exception(e: Exception, title: str = "Merge failed") -> None:
             get_ghstack_prs(repo, pr)  # raises error if out of sync
         pr.merge_changes(
             repo,
-            branch="main",
             skip_mandatory_checks=True,
             skip_all_rule_checks=True,
         )
 
@@ -20,6 +20,34 @@ jobs:
           git config --global user.name "PyTorch MergeBot"
           git fetch origin main:main
 
+      - name: Wait for orig branch
+        shell: bash
+        run: |
+          BRANCH="${{ github.base_ref }}"
+          echo "$BRANCH"
+          BRANCH="${BRANCH%/base}/orig"
+          echo "$BRANCH"
+
+          WAIT_SECONDS=300
+          END_WAIT=$((SECONDS+WAIT_SECONDS))
+          BRANCH_EXISTS=0
+
+          while [ $SECONDS -lt $END_WAIT ]; do
+            git fetch --prune origin "${BRANCH}" || true
+            if git rev-parse --verify "origin/${BRANCH}"; then
+              BRANCH_EXISTS=1
+              break
+            fi
+            echo "Waiting for branch ${BRANCH} to exist..."
+            sleep 30  # Wait for 30 seconds before retrying
+          done
+
+          if [ $BRANCH_EXISTS -eq 0 ]; then
+            echo "Branch ${BRANCH} not found after ${WAIT_SECONDS} seconds."
+            echo "Mergeability check failed for infrastructure reasons."
+            exit 1
+          fi
+
       - name: Setup Python
         uses: actions/setup-python@v4
         with:
 
@@ -15,6 +15,9 @@ jobs:
     if: ${{ github.repository == 'pytorch/pytorch' }}
     name: Create Release
     runs-on: ubuntu-latest
+    # https://github.com/softprops/action-gh-release?tab=readme-ov-file#permissions
+    permissions:
+      contents: write
     steps:
       - uses: malfet/checkout@silent-checkout
         with:
 
@@ -51,7 +51,7 @@ jobs:
         # Run lintrunner on all files
         if ! lintrunner --force-color --all-files --tee-json=lint.json 2> /dev/null; then
           echo ""
-          echo -e "\e[1m\e[36mYou can reproduce these results locally by using \`lintrunner\`. (If you don't get the same results, run \'lintrunner init\' to update your local linter)\e[0m"
+          echo -e "\e[1m\e[36mYou can reproduce these results locally by using \`lintrunner -m origin/main\`. (If you don't get the same results, run \'lintrunner init\' to update your local linter)\e[0m"
           echo -e "\e[1m\e[36mSee https://github.com/pytorch/pytorch/wiki/lintrunner for setup instructions.\e[0m"
           RC=1
         fi
 
@@ -1,5 +1,3 @@
-merge_base_with = "origin/main"
-
 [[linter]]
 code = 'FLAKE8'
 include_patterns = ['**/*.py']
@@ -1549,7 +1547,6 @@ exclude_patterns = [
     'test/test_mkldnn_verbose.py',
     'test/test_mobile_optimizer.py',
     'test/test_model_dump.py',
-    'test/test_module_init.py',
     'test/test_modules.py',
     'test/test_monitor.py',
     'test/test_mps.py',
 
@@ -48,6 +48,10 @@ if(CMAKE_COMPILER_IS_GNUCXX AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 9.4)
   message(FATAL "GCC-9.4 or newer is required to compile PyTorch, but found ${CMAKE_CXX_COMPILER_VERSION}")
 endif()
 
+# This define is needed to preserve behavior given anticpated changes to cccl/thrust
+# https://nvidia.github.io/libcudacxx/standard_api/numerics_library/complex.html
+string(APPEND CMAKE_CUDA_FLAGS "-DLIBCUDACXX_ENABLE_SIMPLIFIED_COMPLEX_OPERATIONS")
+
 if(LINUX)
   include(cmake/CheckAbi.cmake)
   string(APPEND CMAKE_CXX_FLAGS " -D_GLIBCXX_USE_CXX11_ABI=${GLIBCXX_USE_CXX11_ABI}")
 
@@ -97,9 +97,9 @@ test/functorch/test_ops.py @zou3519 @chillee @kshitij12345
 test/functorch/test_vmap.py @zou3519 @chillee @kshitij12345
 
 # torch MPS
-test/test_mps.py @kulinseth
-aten/src/ATen/mps/ @kulinseth
-aten/src/ATen/native/mps/ @kulinseth
+test/test_mps.py @kulinseth @malfet
+aten/src/ATen/mps/ @kulinseth @malfet
+aten/src/ATen/native/mps/ @kulinseth @malfet
 
 # Profiler
 torch/csrc/autograd/profiler* @aaronenyeshi
@@ -130,3 +130,12 @@ caffe2/utils/hip @jeffdaily @jithunnair-amd
 # torch.export
 /torch/export/ @avikchaudhuri @gmagogsfm @tugsbayasgalan @zhxchen17
 /torch/_export/ @avikchaudhuri @gmagogsfm @tugsbayasgalan @zhxchen17
+
+# serialization-related files
+/aten/src/ATen/MapAllocator* @mikaylagawarecki
+/caffe2/serialize/ @mikaylagawarecki
+/torch/serialization.py @mikaylagawarecki
+/torch/storage.py @mikaylagawarecki
+/torch/csrc/Storage* @mikaylagawarecki
+# subscribing for PyTorchFileWriter/PyTorchFileReader changes
+/torch/csrc/jit/python/init.cpp @mikaylagawarecki
@@ -125,6 +125,15 @@ def conv2d(self, x: Tensor, w: Tensor, toChannelsLast: bool) -> Tensor:
             r = r.contiguous()
         return r
 
+    @torch.jit.script_method
+    def conv3d(self, x: Tensor, w: Tensor, toChannelsLast: bool) -> Tensor:
+        r = torch.nn.functional.conv3d(x, w)
+        if toChannelsLast:
+            r = r.contiguous(memory_format=torch.channels_last_3d)
+        else:
+            r = r.contiguous()
+        return r
+
     @torch.jit.script_method
     def contiguous(self, x: Tensor) -> Tensor:
         return x.contiguous()
 
@@ -348,15 +348,32 @@ public void testChannelsLast3d() throws IOException {
   @Test
   public void testChannelsLastConv2d() throws IOException {
     long[] inputShape = new long[] {1, 3, 2, 2};
-    long[] dataNCHW = new long[] {1, 2, 3, 4, 11, 12, 13, 14, 101, 102, 103, 104};
+    long[] dataNCHW = new long[] {
+      111, 112,
+      121, 122,
+
+      211, 212,
+      221, 222,
+
+      311, 312,
+      321, 322};
     Tensor inputNCHW = Tensor.fromBlob(dataNCHW, inputShape, MemoryFormat.CONTIGUOUS);
-    long[] dataNHWC = new long[] {1, 11, 101, 2, 12, 102, 3, 13, 103, 4, 14, 104};
-    Tensor inputNHWC = Tensor.fromBlob(dataNHWC, inputShape, MemoryFormat.CHANNELS_LAST);
+    long[] dataNHWC = new long[] {
+      111, 211, 311,       112, 212, 312,
 
+      121, 221, 321,       122, 222, 322};
+    Tensor inputNHWC = Tensor.fromBlob(dataNHWC, inputShape, MemoryFormat.CHANNELS_LAST);
     long[] weightShape = new long[] {3, 3, 1, 1};
-    long[] dataWeightOIHW = new long[] {2, 0, 0, 0, 1, 0, 0, 0, -1};
+    long[] dataWeightOIHW = new long[] {
+      2, 0, 0,
+      0, 1, 0,
+      0, 0, -1};
     Tensor wNCHW = Tensor.fromBlob(dataWeightOIHW, weightShape, MemoryFormat.CONTIGUOUS);
-    long[] dataWeightOHWI = new long[] {2, 0, 0, 0, 1, 0, 0, 0, -1};
+    long[] dataWeightOHWI = new long[] {
+      2, 0, 0,
+      0, 1, 0,
+      0, 0, -1};
+
     Tensor wNHWC = Tensor.fromBlob(dataWeightOHWI, weightShape, MemoryFormat.CHANNELS_LAST);
 
     final Module module = loadModel(TEST_MODULE_ASSET_NAME);
@@ -367,15 +384,105 @@ public void testChannelsLastConv2d() throws IOException {
         outputNCHW,
         MemoryFormat.CONTIGUOUS,
         new long[] {1, 3, 2, 2},
-        new long[] {2, 4, 6, 8, 11, 12, 13, 14, -101, -102, -103, -104});
+        new long[] {
+          2*111, 2*112,
+          2*121, 2*122,
+
+          211, 212,
+          221, 222,
+
+          -311, -312,
+          -321, -322});
 
     final IValue outputNHWC =
         module.runMethod("conv2d", IValue.from(inputNHWC), IValue.from(wNHWC), IValue.from(true));
     assertIValueTensor(
         outputNHWC,
         MemoryFormat.CHANNELS_LAST,
         new long[] {1, 3, 2, 2},
-        new long[] {2, 11, -101, 4, 12, -102, 6, 13, -103, 8, 14, -104});
+        new long[] {
+          2*111, 211, -311,      2*112, 212, -312,
+          2*121, 221, -321,      2*122, 222, -322});
+  }
+
+  @Test
+  public void testChannelsLastConv3d() throws IOException {
+    long[] inputShape = new long[] {1, 3, 2, 2, 2};
+    long[] dataNCDHW = new long[] {
+      1111, 1112,
+      1121, 1122,
+      1211, 1212,
+      1221, 1222,
+
+      2111, 2112,
+      2121, 2122,
+      2211, 2212,
+      2221, 2222,
+
+      3111, 3112,
+      3121, 3122,
+      3211, 3212,
+      3221, 3222};
+    Tensor inputNCDHW = Tensor.fromBlob(dataNCDHW, inputShape, MemoryFormat.CONTIGUOUS);
+    long[] dataNDHWC = new long[] {
+      1111, 2111, 3111,
+      1112, 2112, 3112,
+
+      1121, 2121, 3121,
+      1122, 2122, 3122,
+
+      1211, 2211, 3211,
+      1212, 2212, 3212,
+
+      1221, 2221, 3221,
+      1222, 2222, 3222};
+
+    Tensor inputNDHWC = Tensor.fromBlob(dataNDHWC, inputShape, MemoryFormat.CHANNELS_LAST_3D);
+
+    long[] weightShape = new long[] {3, 3, 1, 1, 1};
+    long[] dataWeightOIDHW = new long[] {
+      2, 0, 0,
+      0, 1, 0,
+      0, 0, -1,
+    };
+    Tensor wNCDHW = Tensor.fromBlob(dataWeightOIDHW, weightShape, MemoryFormat.CONTIGUOUS);
+    long[] dataWeightODHWI = new long[] {
+      2, 0, 0,
+      0, 1, 0,
+      0, 0, -1,
+    };
+    Tensor wNDHWC = Tensor.fromBlob(dataWeightODHWI, weightShape, MemoryFormat.CHANNELS_LAST_3D);
+
+    final Module module = loadModel(TEST_MODULE_ASSET_NAME);
+
+    final IValue outputNCDHW =
+        module.runMethod("conv3d", IValue.from(inputNCDHW), IValue.from(wNCDHW), IValue.from(false));
+    assertIValueTensor(
+        outputNCDHW,
+        MemoryFormat.CONTIGUOUS,
+        new long[] {1, 3, 2, 2, 2},
+        new long[] {
+          2*1111, 2*1112,     2*1121, 2*1122,
+          2*1211, 2*1212,     2*1221, 2*1222,
+
+          2111, 2112,     2121, 2122,
+          2211, 2212,     2221, 2222,
+
+          -3111, -3112,     -3121, -3122,
+          -3211, -3212,     -3221, -3222});
+
+    final IValue outputNDHWC =
+        module.runMethod("conv3d", IValue.from(inputNDHWC), IValue.from(wNDHWC), IValue.from(true));
+    assertIValueTensor(
+        outputNDHWC,
+        MemoryFormat.CHANNELS_LAST_3D,
+        new long[] {1, 3, 2, 2, 2},
+        new long[] {
+          2*1111, 2111, -3111,      2*1112, 2112, -3112,
+          2*1121, 2121, -3121,      2*1122, 2122, -3122,
+
+          2*1211, 2211, -3211,      2*1212, 2212, -3212,
+          2*1221, 2221, -3221,      2*1222, 2222, -3222});
   }
 
   @Test
 
@@ -84,6 +84,15 @@ def conv2d(self, x: Tensor, w: Tensor, toChannelsLast: bool) -> Tensor:
         r = r.contiguous()
     return r
 
+def conv3d(self, x: Tensor, w: Tensor, toChannelsLast: bool) -> Tensor:
+    r = torch.conv3d(x, w)
+    if (toChannelsLast):
+        # memory_format=torch.channels_last_3d
+        r = r.contiguous(memory_format=2)
+    else:
+        r = r.contiguous()
+    return r
+
 def contiguous(self, x: Tensor) -> Tensor:
     return x.contiguous()
 
 
@@ -325,6 +325,11 @@ MapAllocator::MapAllocator(WithFd, c10::string_view filename, int fd, int flags,
       TORCH_CHECK(false, "unable to mmap ", size_, " bytes from file <", filename_, ">: ", strerror(errno), " (", errno, ")");
     }
 
+#if !defined(__APPLE__) && !defined(__ANDROID__)
+    /* attempt to use larger block size on Linux, which is important for getting better CUDA upload speed */
+    posix_fadvise(fd, 0, size, POSIX_FADV_SEQUENTIAL);
+#endif
+
     if (flags_ & ALLOCATOR_MAPPED_KEEPFD) {
       fd_ = fd;
     } else {
 
@@ -94,6 +94,10 @@ class TORCH_API DeprecatedTypeProperties {
     return toBackend(Backend::HIP);
   }
 
+  DeprecatedTypeProperties & privateUser1() const {
+    return toBackend(Backend::PrivateUse1);
+  }
+
   /// Constructs the `TensorOptions` from a type and a `device_index`.
   TensorOptions options(int16_t device_index = -1) const {
     return TensorOptions().dtype(typeMeta())
 
@@ -234,11 +234,6 @@ class Vectorized<float> {
     return ret._nor();
   }
 
-  Vectorized<float> _isinf() const {
-    auto x = *this;
-    return (x == v_inf) | (x == v_minus_inf);
-  }
-
   bool has_inf_nan() const {
     for (const auto i : c10::irange(size()/2)) {
       if(_isnan(_vec0[i]) || _isinf(_vec0[i])) {
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-a34cbd4093a2eaf7267aa122cd580ea7f9b15f72`
	`1`	`+export-D53340041`
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-b2d9c3e315405f2b5cfdfa5b93f849d5b27a4109`
	`1`	`+02586da797dbfa201721d2080e2171805202f72c`
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-315f31527e720999eecbb986679b3177d4ed5e37`
	`1`	`+806dba678d5b01f6e8a46f7c48fdf8c09369a267`
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-e265fd7b403d4508a1bcdaad2af9519e6c2d0b4c`
	`1`	`+21653dc9859c2fa2cf5e18ce79d7ac1e392c5381`
Original file line number	Diff line number	Diff line change
`@@ -588,7 +588,7 @@ def main() -> None:`
`588`	`588`	`labels=labels,`
`589`	`589`	`test_matrix=filtered_test_matrix,`
`590`	`590`	`job_name=args.job_name,`
`591`		`- pr_body=pr_body,`
	`591`	`+ pr_body=pr_body if pr_body else "",`
`592`	`592`	`)`
`593`	`593`
`594`	`594`	`# Set the filtered test matrix as the output`