Skip to content

Commit e5ab2fe

Browse files
committed
Update on "[ONNX] Enable _jit_pass_onnx_fold_if only when dynamic_axes is None (#50582)"
Fixing pytorch/vision#3251 (PR #49410 triggers the torch vision test build failure, on three tests test_faster_rcnn, test_mask_rcnn, test_keypoint_rcnn. ) The offending PR is fine on pytorch UT, because the torchvision and pytorch test has a gap when we merge them - we are using different test API on two sides, therefore causing some discrepancy. This PR bridge the gap for the above three tests, and disable _jit_pass_onnx_fold_if pass until it gets fixed. Allow _jit_pass_onnx_fold_if only when dynamic_axes is None. Differential Revision: [D26023934](https://our.internmc.facebook.com/intern/diff/D26023934) [ghstack-poisoned]
2 parents 25fbc1e + f7215fb commit e5ab2fe

File tree

234 files changed

+4661
-2025
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

234 files changed

+4661
-2025
lines changed

.circleci/docker/common/install_conda.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,8 @@ if [ -n "$ANACONDA_PYTHON_VERSION" ]; then
9292
conda_install magma-cuda110 -c pytorch
9393
elif [[ "$CUDA_VERSION" == 11.1* ]]; then
9494
conda_install magma-cuda111 -c pytorch
95+
elif [[ "$CUDA_VERSION" == 11.2* ]]; then
96+
conda_install magma-cuda112 -c pytorch
9597
fi
9698

9799
# TODO: This isn't working atm

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ docs/cpp/source/html/
3838
docs/cpp/source/latex/
3939
docs/source/generated/
4040
log
41+
test-reports/
4142
test/.coverage
4243
test/.hypothesis/
4344
test/cpp/api/mnist
@@ -50,7 +51,6 @@ dropout_model.pt
5051
test/generated_type_hints_smoketest.py
5152
test/htmlcov
5253
test/cpp_extensions/install/
53-
test/test-reports/
5454
third_party/build/
5555
tools/shared/_utils_internal.py
5656
tools/fast_nvcc/wrap_nvcc.sh

CMakeLists.txt

Lines changed: 3 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,8 @@ cmake_dependent_option(
171171
USE_STATIC_CUDNN "Use cuDNN static libraries" OFF
172172
"USE_CUDNN" OFF)
173173
option(USE_FBGEMM "Use FBGEMM (quantized 8-bit server operators)" ON)
174-
option(USE_KINETO "Use Kineto profiling library" OFF)
174+
option(USE_KINETO "Use Kineto profiling library" ON)
175+
option(USE_CUPTI_SO "Use CUPTI as a shared library" OFF)
175176
option(USE_FAKELOWP "Use FakeLowp operators" OFF)
176177
option(USE_FFMPEG "Use ffmpeg" OFF)
177178
option(USE_GFLAGS "Use GFLAGS" OFF)
@@ -248,6 +249,7 @@ cmake_dependent_option(
248249
option(USE_TBB "Use TBB" OFF)
249250
option(ONNX_ML "Enable traditional ONNX ML API." ON)
250251
option(HAVE_SOVERSION "Whether to add SOVERSION to the shared objects" OFF)
252+
option(USE_DEPLOY "Enable torch::deploy embedded python interpreter" OFF)
251253

252254
# Since TensorPipe does not support Windows, set it to OFF when WIN32 detected
253255
# On Windows platform, if user does not install libuv in build conda env and
@@ -545,31 +547,12 @@ if(USE_FBGEMM AND ((CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND CMAKE_SIZEOF_VO
545547
set(USE_FBGEMM OFF)
546548
endif()
547549

548-
if(USE_KINETO AND INTERN_BUILD_MOBILE)
549-
message(STATUS "Not using libkineto in a mobile build.")
550-
set(USE_KINETO OFF)
551-
endif()
552-
553-
if(USE_KINETO AND (NOT USE_CUDA))
554-
message(STATUS "Not using libkineto in a non-CUDA build.")
555-
set(USE_KINETO OFF)
556-
endif()
557-
558-
if(USE_KINETO AND MSVC)
559-
message(STATUS "Not using libkineto in a Windows build.")
560-
set(USE_KINETO OFF)
561-
endif()
562-
563550
include(cmake/Dependencies.cmake)
564551

565552
if(USE_FBGEMM)
566553
string(APPEND CMAKE_CXX_FLAGS " -DUSE_FBGEMM")
567554
endif()
568555

569-
if(USE_KINETO)
570-
string(APPEND CMAKE_CXX_FLAGS " -DUSE_KINETO")
571-
endif()
572-
573556
if(USE_QNNPACK)
574557
string(APPEND CMAKE_CXX_FLAGS " -DUSE_QNNPACK")
575558
endif()

android/pytorch_android_torchvision/src/main/cpp/pytorch_vision_jni.cpp

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,6 @@
44

55
#include "jni.h"
66

7-
#define clamp0255(x) x > 255 ? 255 : x < 0 ? 0 : x
8-
97
namespace pytorch_vision_jni {
108

119
static void imageYUV420CenterCropToFloatBuffer(
@@ -65,7 +63,7 @@ static void imageYUV420CenterCropToFloatBuffer(
6563
const uint8_t* vData = (uint8_t*)jniEnv->GetDirectBufferAddress(vBuffer);
6664

6765
float scale = cropWidthAfterRtn / tensorWidth;
68-
int uvRowStride = uRowStride >> 1;
66+
int uvRowStride = uRowStride;
6967
int cropXMult = 1;
7068
int cropYMult = 1;
7169
int cropXAdd = offsetX;
@@ -91,7 +89,7 @@ static void imageYUV420CenterCropToFloatBuffer(
9189
float normStdBm255 = 255 * normStdRGB[2];
9290

9391
int xBeforeRtn, yBeforeRtn;
94-
int yIdx, uvIdx, ui, vi, a0, ri, gi, bi;
92+
int yi, yIdx, uvIdx, ui, vi, a0, ri, gi, bi;
9593
int channelSize = tensorWidth * tensorHeight;
9694
int wr = outOffset;
9795
int wg = wr + channelSize;
@@ -101,16 +99,23 @@ static void imageYUV420CenterCropToFloatBuffer(
10199
xBeforeRtn = cropXAdd + cropXMult * (int)(x * scale);
102100
yBeforeRtn = cropYAdd + cropYMult * (int)(y * scale);
103101
yIdx = yBeforeRtn * yRowStride + xBeforeRtn * yPixelStride;
104-
uvIdx = (yBeforeRtn >> 1) * uvRowStride + xBeforeRtn * uvPixelStride;
102+
uvIdx = (yBeforeRtn >> 1) * uvRowStride + (xBeforeRtn >> 1) * uvPixelStride;
105103
ui = uData[uvIdx];
106104
vi = vData[uvIdx];
107-
a0 = 1192 * (yData[yIdx] - 16);
108-
ri = (a0 + 1634 * (vi - 128)) >> 10;
109-
gi = (a0 - 832 * (vi - 128) - 400 * (ui - 128)) >> 10;
110-
bi = (a0 + 2066 * (ui - 128)) >> 10;
111-
outData[wr++] = (clamp0255(ri) - normMeanRm255) / normStdRm255;
112-
outData[wg++] = (clamp0255(gi) - normMeanGm255) / normStdGm255;
113-
outData[wb++] = (clamp0255(bi) - normMeanBm255) / normStdBm255;
105+
yi = yData[yIdx];
106+
yi = (yi - 16) < 0 ? 0 : (yi - 16);
107+
ui -= 128;
108+
vi -= 128;
109+
a0 = 1192 * yi;
110+
ri = (a0 + 1634 * vi) >> 10;
111+
gi = (a0 - 833 * vi - 400 * ui) >> 10;
112+
bi = (a0 + 2066 * ui) >> 10;
113+
ri = ri > 255 ? 255 : ri < 0 ? 0 : ri;
114+
gi = gi > 255 ? 255 : gi < 0 ? 0 : gi;
115+
bi = bi > 255 ? 255 : bi < 0 ? 0 : bi;
116+
outData[wr++] = (ri - normMeanRm255) / normStdRm255;
117+
outData[wg++] = (gi - normMeanGm255) / normStdGm255;
118+
outData[wb++] = (bi - normMeanBm255) / normStdBm255;
114119
}
115120
}
116121
}

aten/src/ATen/CUDAGeneratorImpl.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@ struct PhiloxCudaState {
119119
bool captured_ = false;
120120
};
121121

122-
struct TORCH_CUDA_API CUDAGeneratorImpl : public c10::GeneratorImpl {
122+
struct TORCH_CUDA_CPP_API CUDAGeneratorImpl : public c10::GeneratorImpl {
123123
// Constructors
124124
CUDAGeneratorImpl(DeviceIndex device_index = -1);
125125
~CUDAGeneratorImpl() = default;
@@ -155,10 +155,10 @@ struct TORCH_CUDA_API CUDAGeneratorImpl : public c10::GeneratorImpl {
155155
namespace cuda {
156156
namespace detail {
157157

158-
TORCH_CUDA_API const Generator& getDefaultCUDAGenerator(DeviceIndex device_index = -1);
159-
TORCH_CUDA_API Generator createCUDAGenerator(DeviceIndex device_index = -1);
158+
TORCH_CUDA_CPP_API const Generator& getDefaultCUDAGenerator(
159+
DeviceIndex device_index = -1);
160+
TORCH_CUDA_CPP_API Generator createCUDAGenerator(DeviceIndex device_index = -1);
160161

161162
} // namespace detail
162163
} // namespace cuda
163164
} // namespace at
164-

aten/src/ATen/Context.cpp

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -60,25 +60,25 @@ void Context::setDeterministicCuDNN(bool b) {
6060
deterministic_cudnn = b;
6161
}
6262

63-
bool Context::deterministic() const {
64-
return _deterministic;
63+
bool Context::deterministicAlgorithms() const {
64+
return _deterministic_algorithms;
6565
}
6666

67-
void Context::setDeterministic(bool b) {
67+
void Context::setDeterministicAlgorithms(bool b) {
6868
if (b) {
69-
TORCH_WARN_ONCE("torch.set_deterministic is in beta, and its design and "
69+
TORCH_WARN_ONCE("torch.use_deterministic_algorithms is in beta, and its design and"
7070
" functionality may change in the future.");
7171
}
7272

73-
_deterministic = b;
73+
_deterministic_algorithms = b;
7474
}
7575

7676
void Context::alertNotDeterministic(c10::string_view const& caller) {
77-
if (globalContext().deterministic()) {
77+
if (globalContext().deterministicAlgorithms()) {
7878
TORCH_CHECK(false,
7979
caller, " does not have a deterministic implementation, but you set "
80-
"'torch.set_deterministic(True)'. You can turn off determinism just "
81-
"for this operation if that's acceptable for your application. You "
80+
"'torch.use_deterministic_algorithms(True)'. You can turn off determinism ",
81+
"just for this operation if that's acceptable for your application. You "
8282
"can also file an issue at https://github.com/pytorch/pytorch/issues "
8383
"to help us prioritize adding deterministic support for this operation.");
8484
}
@@ -111,9 +111,9 @@ bool Context::checkCuBLASConfigDeterministic() {
111111

112112
void Context::alertCuBLASConfigNotDeterministic() {
113113
static bool cublas_config_deterministic = checkCuBLASConfigDeterministic();
114-
TORCH_CHECK(!deterministic() || cublas_config_deterministic,
115-
"Deterministic behavior was enabled with either `torch.set_deterministic(True)` or ",
116-
"`at::Context::setDeterministic(true)`, but this operation is not deterministic because ",
114+
TORCH_CHECK(!deterministicAlgorithms() || cublas_config_deterministic,
115+
"Deterministic behavior was enabled with either `torch.use_deterministic_algorithms(True)` or ",
116+
"`at::Context::setDeterministicAlgorithms(true)`, but this operation is not deterministic because ",
117117
"it uses CuBLAS and you have CUDA >= 10.2. To enable deterministic behavior in this ",
118118
"case, you must set an environment variable before running your PyTorch application: ",
119119
cublas_config_var_name, "=", cublas_deterministic_configs[0], " or ",

aten/src/ATen/Context.h

Lines changed: 18 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -120,27 +120,27 @@ class TORCH_API Context {
120120
//
121121
// * Include this comment: "See Note [Enabling Deterministic Operations]"
122122
//
123-
// * Check the value of `at::globalContext().deterministic()` to toggle between
124-
// nondeterministic and deterministic implementations.
123+
// * Check the value of `at::globalContext().deterministicAlgorithms()` to toggle
124+
// between nondeterministic and deterministic implementations.
125125
//
126126
// * Have an entry in the list of PyTorch operations that toggle between nondeterministic
127-
// and deterministic implementations, in the docstring of `set_deterministic()`
127+
// and deterministic implementations, in the docstring of `use_deterministic_algorithms()`
128128
// in torch/__init__.py
129129
//
130130
// `example_func()` below shows an example of toggling between nondeterministic and
131131
// deterministic implementations:
132132
//
133133
// void example_func() {
134134
// // See Note [Enabling Deterministic Operations]
135-
// if (at::globalContext().deterministic()) {
135+
// if (at::globalContext().deterministicAlgorithms()) {
136136
// example_func_deterministic();
137137
// } else {
138138
// example_func_nondeterministic();
139139
// }
140140
// }
141141

142-
bool deterministic() const;
143-
void setDeterministic(bool);
142+
bool deterministicAlgorithms() const;
143+
void setDeterministicAlgorithms(bool);
144144

145145
// Note [Writing Nondeterministic Operations]
146146
// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -151,16 +151,18 @@ class TORCH_API Context {
151151
//
152152
// * Include a comment explaining why the operation is nondeterministic.
153153
//
154-
// * Throw an error when `Context::deterministic()` is true. Most of the time, this
155-
// should be accomplished by calling `at::globalContext().alertNotDeterminstic()`.
156-
// However, if the nondeterministic behavior is caused by the CuBLAS workspace
154+
// * Throw an error when `Context::deterministicAlgorithms()` is true. Most
155+
// of the time, this should be accomplished by calling
156+
// `at::globalContext().alertNotDeterminstic()`. However, if the
157+
// nondeterministic behavior is caused by the CuBLAS workspace
157158
// configuration in CUDA >= 10.2,
158-
// `at::globalContext().alertCuBLASConfigNotDeterministic()` should
159-
// be called instead (in this case, a comment explaining why the operation is
160-
// nondeterministic is not necessary). See below for details on these methods.
159+
// `at::globalContext().alertCuBLASConfigNotDeterministic()` should be
160+
// called instead (in this case, a comment explaining why the operation is
161+
// nondeterministic is not necessary). See below for details on these
162+
// methods.
161163
//
162164
// * Have an entry in the list of nondeterministic PyTorch operations in the
163-
// docstring of `set_deterministic()` in torch/__init__.py
165+
// docstring of `use_deterministic_algorithms()` in torch/__init__.py
164166
//
165167
// `example_func()` below shows an example of the comments and error-throwing code
166168
// for a nondeterministic operation:
@@ -172,10 +174,10 @@ class TORCH_API Context {
172174
// ...
173175
// }
174176

175-
// Throws an error if `Context::deterministic()` is true
177+
// Throws an error if `Context::deterministicAlgorithms()` is true
176178
void alertNotDeterministic(c10::string_view const& caller);
177179

178-
// Throws an error if `Context::deterministic()` is true, CUDA >= 10.2, and
180+
// Throws an error if `Context::deterministicAlgorithms()` is true, CUDA >= 10.2, and
179181
// CUBLAS_WORKSPACE_CONFIG is not set to either ":16:8" or ":4096:8". For more details:
180182
// https://docs.nvidia.com/cuda/cublas/index.html#cublasApi_reproducibility
181183
void alertCuBLASConfigNotDeterministic();
@@ -210,7 +212,7 @@ class TORCH_API Context {
210212
std::once_flag thh_init;
211213
bool enabled_cudnn = true;
212214
bool deterministic_cudnn = false;
213-
bool _deterministic = false;
215+
bool _deterministic_algorithms = false;
214216
bool benchmark_cudnn = false;
215217
bool allow_tf32_cudnn = true;
216218
bool allow_tf32_cublas = true;

0 commit comments

Comments
 (0)