pytorch
diff --git a/‎.circleci/docker/common/install_conda.sh
Lines changed: 2 additions & 0 deletions b/‎.circleci/docker/common/install_conda.sh
Lines changed: 2 additions & 0 deletions
diff --git a/‎.gitignore
Lines changed: 1 addition & 1 deletion b/‎.gitignore
Lines changed: 1 addition & 1 deletion
diff --git a/‎CMakeLists.txt
Lines changed: 3 additions & 20 deletions b/‎CMakeLists.txt
Lines changed: 3 additions & 20 deletions
diff --git a/‎android/pytorch_android_torchvision/src/main/cpp/pytorch_vision_jni.cpp
Lines changed: 17 additions & 12 deletions b/‎android/pytorch_android_torchvision/src/main/cpp/pytorch_vision_jni.cpp
Lines changed: 17 additions & 12 deletions
diff --git a/‎aten/src/ATen/CUDAGeneratorImpl.h
Lines changed: 4 additions & 4 deletions b/‎aten/src/ATen/CUDAGeneratorImpl.h
Lines changed: 4 additions & 4 deletions
diff --git a/‎aten/src/ATen/Context.cpp
Lines changed: 11 additions & 11 deletions b/‎aten/src/ATen/Context.cpp
Lines changed: 11 additions & 11 deletions
diff --git a/‎aten/src/ATen/Context.h
Lines changed: 18 additions & 16 deletions b/‎aten/src/ATen/Context.h
Lines changed: 18 additions & 16 deletions
@@ -92,6 +92,8 @@ if [ -n "$ANACONDA_PYTHON_VERSION" ]; then
     conda_install magma-cuda110 -c pytorch
   elif [[ "$CUDA_VERSION" == 11.1* ]]; then
     conda_install magma-cuda111 -c pytorch
+  elif [[ "$CUDA_VERSION" == 11.2* ]]; then
+    conda_install magma-cuda112 -c pytorch
   fi
 
   # TODO: This isn't working atm
 
@@ -38,6 +38,7 @@ docs/cpp/source/html/
 docs/cpp/source/latex/
 docs/source/generated/
 log
+test-reports/
 test/.coverage
 test/.hypothesis/
 test/cpp/api/mnist
@@ -50,7 +51,6 @@ dropout_model.pt
 test/generated_type_hints_smoketest.py
 test/htmlcov
 test/cpp_extensions/install/
-test/test-reports/
 third_party/build/
 tools/shared/_utils_internal.py
 tools/fast_nvcc/wrap_nvcc.sh
 
@@ -171,7 +171,8 @@ cmake_dependent_option(
     USE_STATIC_CUDNN "Use cuDNN static libraries" OFF
     "USE_CUDNN" OFF)
 option(USE_FBGEMM "Use FBGEMM (quantized 8-bit server operators)" ON)
-option(USE_KINETO "Use Kineto profiling library" OFF)
+option(USE_KINETO "Use Kineto profiling library" ON)
+option(USE_CUPTI_SO "Use CUPTI as a shared library" OFF)
 option(USE_FAKELOWP "Use FakeLowp operators" OFF)
 option(USE_FFMPEG "Use ffmpeg" OFF)
 option(USE_GFLAGS "Use GFLAGS" OFF)
@@ -248,6 +249,7 @@ cmake_dependent_option(
 option(USE_TBB "Use TBB" OFF)
 option(ONNX_ML "Enable traditional ONNX ML API." ON)
 option(HAVE_SOVERSION "Whether to add SOVERSION to the shared objects" OFF)
+option(USE_DEPLOY "Enable torch::deploy embedded python interpreter" OFF)
 
 # Since TensorPipe does not support Windows, set it to OFF when WIN32 detected
 # On Windows platform, if user does not install libuv in build conda env and
@@ -545,31 +547,12 @@ if(USE_FBGEMM AND ((CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND CMAKE_SIZEOF_VO
   set(USE_FBGEMM OFF)
 endif()
 
-if(USE_KINETO AND INTERN_BUILD_MOBILE)
-  message(STATUS "Not using libkineto in a mobile build.")
-  set(USE_KINETO OFF)
-endif()
-
-if(USE_KINETO AND (NOT USE_CUDA))
-  message(STATUS "Not using libkineto in a non-CUDA build.")
-  set(USE_KINETO OFF)
-endif()
-
-if(USE_KINETO AND MSVC)
-  message(STATUS "Not using libkineto in a Windows build.")
-  set(USE_KINETO OFF)
-endif()
-
 include(cmake/Dependencies.cmake)
 
 if(USE_FBGEMM)
   string(APPEND CMAKE_CXX_FLAGS " -DUSE_FBGEMM")
 endif()
 
-if(USE_KINETO)
-  string(APPEND CMAKE_CXX_FLAGS " -DUSE_KINETO")
-endif()
-
 if(USE_QNNPACK)
   string(APPEND CMAKE_CXX_FLAGS " -DUSE_QNNPACK")
 endif()
 
@@ -4,8 +4,6 @@
 
 #include "jni.h"
 
-#define clamp0255(x) x > 255 ? 255 : x < 0 ? 0 : x
-
 namespace pytorch_vision_jni {
 
 static void imageYUV420CenterCropToFloatBuffer(
@@ -65,7 +63,7 @@ static void imageYUV420CenterCropToFloatBuffer(
   const uint8_t* vData = (uint8_t*)jniEnv->GetDirectBufferAddress(vBuffer);
 
   float scale = cropWidthAfterRtn / tensorWidth;
-  int uvRowStride = uRowStride >> 1;
+  int uvRowStride = uRowStride;
   int cropXMult = 1;
   int cropYMult = 1;
   int cropXAdd = offsetX;
@@ -91,7 +89,7 @@ static void imageYUV420CenterCropToFloatBuffer(
   float normStdBm255 = 255 * normStdRGB[2];
 
   int xBeforeRtn, yBeforeRtn;
-  int yIdx, uvIdx, ui, vi, a0, ri, gi, bi;
+  int yi, yIdx, uvIdx, ui, vi, a0, ri, gi, bi;
   int channelSize = tensorWidth * tensorHeight;
   int wr = outOffset;
   int wg = wr + channelSize;
@@ -101,16 +99,23 @@ static void imageYUV420CenterCropToFloatBuffer(
       xBeforeRtn = cropXAdd + cropXMult * (int)(x * scale);
       yBeforeRtn = cropYAdd + cropYMult * (int)(y * scale);
       yIdx = yBeforeRtn * yRowStride + xBeforeRtn * yPixelStride;
-      uvIdx = (yBeforeRtn >> 1) * uvRowStride + xBeforeRtn * uvPixelStride;
+      uvIdx = (yBeforeRtn >> 1) * uvRowStride + (xBeforeRtn >> 1) * uvPixelStride;
       ui = uData[uvIdx];
       vi = vData[uvIdx];
-      a0 = 1192 * (yData[yIdx] - 16);
-      ri = (a0 + 1634 * (vi - 128)) >> 10;
-      gi = (a0 - 832 * (vi - 128) - 400 * (ui - 128)) >> 10;
-      bi = (a0 + 2066 * (ui - 128)) >> 10;
-      outData[wr++] = (clamp0255(ri) - normMeanRm255) / normStdRm255;
-      outData[wg++] = (clamp0255(gi) - normMeanGm255) / normStdGm255;
-      outData[wb++] = (clamp0255(bi) - normMeanBm255) / normStdBm255;
+      yi = yData[yIdx];
+      yi = (yi - 16) < 0 ? 0 : (yi - 16);
+      ui -= 128;
+      vi -= 128;
+      a0 = 1192 * yi;
+      ri = (a0 + 1634 * vi) >> 10;
+      gi = (a0 - 833 * vi - 400 * ui) >> 10;
+      bi = (a0 + 2066 * ui) >> 10;
+      ri = ri > 255 ? 255 : ri < 0 ? 0 : ri;
+      gi = gi > 255 ? 255 : gi < 0 ? 0 : gi;
+      bi = bi > 255 ? 255 : bi < 0 ? 0 : bi;
+      outData[wr++] = (ri - normMeanRm255) / normStdRm255;
+      outData[wg++] = (gi - normMeanGm255) / normStdGm255;
+      outData[wb++] = (bi - normMeanBm255) / normStdBm255;
     }
   }
 }
 
@@ -119,7 +119,7 @@ struct PhiloxCudaState {
   bool captured_ = false;
 };
 
-struct TORCH_CUDA_API CUDAGeneratorImpl : public c10::GeneratorImpl {
+struct TORCH_CUDA_CPP_API CUDAGeneratorImpl : public c10::GeneratorImpl {
   // Constructors
   CUDAGeneratorImpl(DeviceIndex device_index = -1);
   ~CUDAGeneratorImpl() = default;
@@ -155,10 +155,10 @@ struct TORCH_CUDA_API CUDAGeneratorImpl : public c10::GeneratorImpl {
 namespace cuda {
 namespace detail {
 
-  TORCH_CUDA_API const Generator& getDefaultCUDAGenerator(DeviceIndex device_index = -1);
-  TORCH_CUDA_API Generator createCUDAGenerator(DeviceIndex device_index = -1);
+TORCH_CUDA_CPP_API const Generator& getDefaultCUDAGenerator(
+    DeviceIndex device_index = -1);
+TORCH_CUDA_CPP_API Generator createCUDAGenerator(DeviceIndex device_index = -1);
 
 } // namespace detail
 } // namespace cuda
 } // namespace at
-
@@ -60,25 +60,25 @@ void Context::setDeterministicCuDNN(bool b) {
   deterministic_cudnn = b;
 }
 
-bool Context::deterministic() const {
-  return _deterministic;
+bool Context::deterministicAlgorithms() const {
+  return _deterministic_algorithms;
 }
 
-void Context::setDeterministic(bool b) {
+void Context::setDeterministicAlgorithms(bool b) {
   if (b) {
-    TORCH_WARN_ONCE("torch.set_deterministic is in beta, and its design and "
+    TORCH_WARN_ONCE("torch.use_deterministic_algorithms is in beta, and its design and"
       " functionality may change in the future.");
   }
 
-  _deterministic = b;
+  _deterministic_algorithms = b;
 }
 
 void Context::alertNotDeterministic(c10::string_view const& caller) {
-  if (globalContext().deterministic()) {
+  if (globalContext().deterministicAlgorithms()) {
     TORCH_CHECK(false,
       caller, " does not have a deterministic implementation, but you set "
-      "'torch.set_deterministic(True)'. You can turn off determinism just "
-      "for this operation if that's acceptable for your application. You "
+      "'torch.use_deterministic_algorithms(True)'. You can turn off determinism ",
+      "just for this operation if that's acceptable for your application. You "
       "can also file an issue at https://github.com/pytorch/pytorch/issues "
       "to help us prioritize adding deterministic support for this operation.");
   }
@@ -111,9 +111,9 @@ bool Context::checkCuBLASConfigDeterministic() {
 
 void Context::alertCuBLASConfigNotDeterministic() {
   static bool cublas_config_deterministic = checkCuBLASConfigDeterministic();
-  TORCH_CHECK(!deterministic() || cublas_config_deterministic,
-    "Deterministic behavior was enabled with either `torch.set_deterministic(True)` or ",
-    "`at::Context::setDeterministic(true)`, but this operation is not deterministic because ",
+  TORCH_CHECK(!deterministicAlgorithms() || cublas_config_deterministic,
+    "Deterministic behavior was enabled with either `torch.use_deterministic_algorithms(True)` or ",
+    "`at::Context::setDeterministicAlgorithms(true)`, but this operation is not deterministic because ",
     "it uses CuBLAS and you have CUDA >= 10.2. To enable deterministic behavior in this ",
     "case, you must set an environment variable before running your PyTorch application: ",
     cublas_config_var_name, "=", cublas_deterministic_configs[0], " or ",
 
@@ -120,27 +120,27 @@ class TORCH_API Context {
   //
   // * Include this comment: "See Note [Enabling Deterministic Operations]"
   //
-  // * Check the value of `at::globalContext().deterministic()` to toggle between
-  //   nondeterministic and deterministic implementations.
+  // * Check the value of `at::globalContext().deterministicAlgorithms()` to toggle
+  //   between nondeterministic and deterministic implementations.
   //
   // * Have an entry in the list of PyTorch operations that toggle between nondeterministic
-  //   and deterministic implementations, in the docstring of `set_deterministic()`
+  //   and deterministic implementations, in the docstring of `use_deterministic_algorithms()`
   //   in torch/__init__.py
   //
   // `example_func()` below shows an example of toggling between nondeterministic and
   // deterministic implementations:
   //
   //    void example_func() {
   //      // See Note [Enabling Deterministic Operations]
-  //      if (at::globalContext().deterministic()) {
+  //      if (at::globalContext().deterministicAlgorithms()) {
   //        example_func_deterministic();
   //      } else {
   //        example_func_nondeterministic();
   //      }
   //    }
 
-  bool deterministic() const;
-  void setDeterministic(bool);
+  bool deterministicAlgorithms() const;
+  void setDeterministicAlgorithms(bool);
 
   // Note [Writing Nondeterministic Operations]
   // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -151,16 +151,18 @@ class TORCH_API Context {
   //
   // * Include a comment explaining why the operation is nondeterministic.
   //
-  // * Throw an error when `Context::deterministic()` is true. Most of the time, this
-  //   should be accomplished by calling `at::globalContext().alertNotDeterminstic()`.
-  //   However, if the nondeterministic behavior is caused by the CuBLAS workspace
+  // * Throw an error when `Context::deterministicAlgorithms()` is true. Most
+  //   of the time, this should be accomplished by calling
+  //   `at::globalContext().alertNotDeterminstic()`.  However, if the
+  //   nondeterministic behavior is caused by the CuBLAS workspace
   //   configuration in CUDA >= 10.2,
-  //   `at::globalContext().alertCuBLASConfigNotDeterministic()` should
-  //   be called instead (in this case, a comment explaining why the operation is
-  //   nondeterministic is not necessary). See below for details on these methods.
+  //   `at::globalContext().alertCuBLASConfigNotDeterministic()` should be
+  //   called instead (in this case, a comment explaining why the operation is
+  //   nondeterministic is not necessary). See below for details on these
+  //   methods.
   //
   // * Have an entry in the list of nondeterministic PyTorch operations in the
-  //   docstring of `set_deterministic()` in torch/__init__.py
+  //   docstring of `use_deterministic_algorithms()` in torch/__init__.py
   //
   // `example_func()` below shows an example of the comments and error-throwing code
   // for a nondeterministic operation:
@@ -172,10 +174,10 @@ class TORCH_API Context {
   //      ...
   //    }
 
-  // Throws an error if `Context::deterministic()` is true
+  // Throws an error if `Context::deterministicAlgorithms()` is true
   void alertNotDeterministic(c10::string_view const& caller);
 
-  // Throws an error if `Context::deterministic()` is true, CUDA >= 10.2, and
+  // Throws an error if `Context::deterministicAlgorithms()` is true, CUDA >= 10.2, and
   // CUBLAS_WORKSPACE_CONFIG is not set to either ":16:8" or ":4096:8". For more details:
   // https://docs.nvidia.com/cuda/cublas/index.html#cublasApi_reproducibility
   void alertCuBLASConfigNotDeterministic();
@@ -210,7 +212,7 @@ class TORCH_API Context {
   std::once_flag thh_init;
   bool enabled_cudnn = true;
   bool deterministic_cudnn = false;
-  bool _deterministic = false;
+  bool _deterministic_algorithms = false;
   bool benchmark_cudnn = false;
   bool allow_tf32_cudnn = true;
   bool allow_tf32_cublas = true;