Implement at::has_internal_overlap helper function (#17926)

zou3519 · facebook-github-bot · commit ad8837140227 · 2019-03-15T07:50:17.000-07:00
Summary: Pull Request resolved: pytorch/pytorch#17926 ghimport-source-id: 9f7572b5d43e474492363fa17dcb86a6c27ca13c Stack: * **#17926 Implement at::has_internal_overlap helper function** * #17927 Error out on in-place (unary) ops on tensors that have internal overlap On the way to #17935. Checks if a tensor's sizes/strides indicate that multiple elements share the same memory location. This problem in general is hard so at::has_internal_overlap implements two heuristics and avoids solving the general problem: if a tensor is contiguous, it cannot have internal overlap if a tensor has any zero strides, it does have internal overlap otherwise, return MemOverlap::kTooHard to indicate that there might be overlap, but we don't know. Reviewed By: ezyang Differential Revision: D14438858 fbshipit-source-id: 607ab31771315921ab6165b2a1f072ac3e75925a
diff --git a/aten/src/ATen/MemoryOverlap.cpp b/aten/src/ATen/MemoryOverlap.cpp
@@ -0,0 +1,33 @@
+#include <ATen/MemoryOverlap.h>
+#include <c10/core/Layout.h>
+
+namespace at {
+
+MemOverlap has_internal_overlap(const Tensor& tensor) {
+  auto* t = tensor.unsafeGetTensorImpl();
+
+  AT_ASSERT(tensor.layout() == kStrided);
+
+  if (t->is_contiguous()) {
+    return MemOverlap::NO;
+  }
+
+  auto strides = t->strides();
+  if (std::find_if(
+        strides.begin(), strides.end(), [](int s) { return s == 0; })) {
+    return MemOverlap::YES;
+  }
+
+  return MemOverlap::TOO_HARD;
+}
+
+void assert_no_internal_overlap(const Tensor& t, std::string op) {
+  if (has_internal_overlap(t) == MemOverlap::YES) {
+    AT_ERROR(
+        op, ": unsupported operation: more than one element of the written-to "
+        "tensor refers to a single memory location. Please clone() the tensor "
+        "before calling ", op);
+  }
+}
+
+}
diff --git a/aten/src/ATen/MemoryOverlap.h b/aten/src/ATen/MemoryOverlap.h
@@ -0,0 +1,20 @@
+#pragma once
+
+#include <ATen/ATen.h>
+
+namespace at {
+
+// MemOverlap: Whether or not there is memory overlap
+//
+// NO: Absolutely no memory overlap
+// YES: Absolutely yes memory overlap
+// TOO_HARD: There might be memory overlap, but it was too expensive to compute.
+//
+// NB: Please update the python test for these if you renumber them.
+enum class MemOverlap { NO, YES, TOO_HARD };
+
+MemOverlap has_internal_overlap(const Tensor& t);
+
+void assert_no_internal_overlap(const Tensor& t, std::string op);
+
+}
diff --git a/aten/src/ATen/native/Memory.cpp b/aten/src/ATen/native/Memory.cpp
@@ -1,4 +1,5 @@
 #include <ATen/ATen.h>
+#include <ATen/MemoryOverlap.h>
 #include <ATen/NativeFunctions.h>
 #include <ATen/detail/CUDAHooksInterface.h>
 #include <c10/util/Exception.h>
@@ -16,5 +17,10 @@ Tensor pin_memory(const Tensor& self) {
   return tensor;
 }
 
+// Exposes at::has_internal_overlap as an operator for testing purposes
+int64_t _debug_has_internal_overlap(const Tensor& self) {
+  return static_cast<int64_t>(at::has_internal_overlap(self));
+}
+
 }
 }
diff --git a/aten/src/ATen/native/native_functions.yaml b/aten/src/ATen/native/native_functions.yaml
@@ -61,6 +61,9 @@
   dispatch:
     CUDA: _cudnn_init_dropout_state
 
+- func: _debug_has_internal_overlap(Tensor self) -> int
+  variants: function
+
 - func: _fused_dropout(Tensor self, float p, Generator? generator=None) -> (Tensor, Tensor)
   matches_jit_signature: True
   variants: function

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,5 @@`
`1`	`1`	`#include <ATen/ATen.h>`
	`2`	`+#include <ATen/MemoryOverlap.h>`
`2`	`3`	`#include <ATen/NativeFunctions.h>`
`3`	`4`	`#include <ATen/detail/CUDAHooksInterface.h>`
`4`	`5`	`#include <c10/util/Exception.h>`
`@@ -16,5 +17,10 @@ Tensor pin_memory(const Tensor& self) {`
`16`	`17`	`return tensor;`
`17`	`18`	`}`
`18`	`19`
	`20`	`+// Exposes at::has_internal_overlap as an operator for testing purposes`
	`21`	`+int64_t _debug_has_internal_overlap(const Tensor& self) {`
	`22`	`+ return static_cast<int64_t>(at::has_internal_overlap(self));`
	`23`	`+}`
	`24`	`+`
`19`	`25`	`}`
`20`	`26`	`}`