Merge remote-tracking branch 'upstream/master'

iotamudelta · iotamudelta · commit 410d539e4bbf · 2018-08-07T14:25:47.000-05:00
diff --git a/aten/src/ATen/code_template.py b/aten/src/ATen/code_template.py
@@ -11,13 +11,13 @@
 
 
 class CodeTemplate(object):
-    substitution_str = '(^[^\n\S]*)?\$([^\d\W]\w*|\{,?[^\d\W]\w*\,?})'
+    substitution_str = r'(^[^\n\S]*)?\$([^\d\W]\w*|\{,?[^\d\W]\w*\,?})'
 
     # older versions of Python have a bug where \w* does not work,
     # so we need to replace with the non-shortened version [a-zA-Z0-9_]*
     # https://bugs.python.org/issue18647
 
-    substitution_str = substitution_str.replace('\w', '[a-zA-Z0-9_]')
+    substitution_str = substitution_str.replace(r'\w', r'[a-zA-Z0-9_]')
 
     subtitution = re.compile(substitution_str, re.MULTILINE)
 
diff --git a/aten/src/ATen/preprocess_declarations.py b/aten/src/ATen/preprocess_declarations.py
@@ -124,7 +124,7 @@ def should_generate_out_variant(option):
 
 def sanitize_return(option):
     ret = option['return']
-    m = re.match('argument (\d+(,\d+)*)', ret)
+    m = re.match(r'argument (\d+(,\d+)*)', ret)
     if m is not None:
         arguments = [int(x) for x in m.group(1).split(',')]
         option['return'] = {'kind': 'arguments', 'arguments': arguments}
diff --git a/aten/src/THC/THCReduce.cuh b/aten/src/THC/THCReduce.cuh
@@ -517,9 +517,9 @@ bool THC_reduceDim(THCState* state,
         (TYPE) outElements, init, modifyOp, reduceOp, finalizeOp);      \
     }                                                                   \
     else                                                                \
-    {                                                                        \
-        void* stagingData;                                                   \
-        void* semaphores;                                                    \
+    {                                                                   \
+        void* stagingData = nullptr;                                    \
+        void* semaphores = nullptr;                                     \
                                                                              \
         if(grid.y > 1)                                                       \
         {                                                                    \
diff --git a/docker/caffe2/jenkins/common/install_rocm.sh b/docker/caffe2/jenkins/common/install_rocm.sh
@@ -21,8 +21,6 @@ install_ubuntu() {
                    miopengemm \
                    rocblas \
                    hipblas \
-                   rocrand \
-                   hcsparse \
                    rocm-profiler \
                    cxlactivitylogger
 
@@ -65,6 +63,20 @@ install_hcrng() {
     dpkg -i /opt/rocm/debians/hcrng.deb
 }
 
+# This will be removed after merging an upcoming PR.
+install_hcsparse() {
+    mkdir -p /opt/rocm/debians
+    curl https://s3.amazonaws.com/ossci-linux/hcsparse-master-907a505-Linux.deb -o /opt/rocm/debians/hcsparse.deb 
+    dpkg -i /opt/rocm/debians/hcsparse.deb
+}
+
+# Install an updated version of rocRand that's PyTorch compatible.
+install_rocrand() {
+    mkdir -p /opt/rocm/debians
+    curl https://s3.amazonaws.com/ossci-linux/rocrand-1.8.0-Linux.deb -o /opt/rocm/debians/rocrand.deb 
+    dpkg -i /opt/rocm/debians/rocrand.deb
+}
+
 # Install Python packages depending on the base OS
 if [ -f /etc/lsb-release ]; then
   install_ubuntu
@@ -77,3 +89,5 @@ fi
 
 install_hip_thrust
 install_hcrng
+install_rocrand
+install_hcsparse
diff --git a/tools/autograd/templates/VariableType.cpp b/tools/autograd/templates/VariableType.cpp
@@ -343,7 +343,7 @@ static void throw_error_out_requires_grad(const char* name) {
 
 static void rebase_history(Variable& var, std::shared_ptr<Function> grad_fn) {
   if (grad_fn && var.defined()) {
-    grad_fn->add_input_metadata(var.type(), var.sizes());
+    grad_fn->add_input_metadata(var);
     var.rebase_history({std::move(grad_fn), 0});
   }
 }
@@ -353,7 +353,7 @@ static void rebase_history(ArrayRef<Variable> vars, std::shared_ptr<Function> gr
     for (auto& var : vars) {
       if (var.defined()) {
         // TODO: eliminate const_cast
-        auto output_nr = grad_fn->add_input_metadata(var.type(), var.sizes());
+        auto output_nr = grad_fn->add_input_metadata(var);
         const_cast<Variable&>(var).rebase_history({grad_fn, output_nr});
       } else {
         grad_fn->add_input_metadata(Function::undefined_input());
diff --git a/torch/_torch_docs.py b/torch/_torch_docs.py
@@ -425,18 +425,21 @@ def parse_kwargs(desc):
 
 Example::
 
-    >>> torch.tensor([[0.1, 1.2], [2.2, 3.1], [4.9, 5.2]])
-    tensor([[ 0.1000,  1.2000],
-            [ 2.2000,  3.1000],
-            [ 4.9000,  5.2000]])
-
     >>> a = numpy.array([1, 2, 3])
-    >>> t = torch.from_numpy(a)
+    >>> t = torch.as_tensor(a)
     >>> t
     tensor([ 1,  2,  3])
     >>> t[0] = -1
     >>> a
     array([-1,  2,  3])
+
+    >>> a = numpy.array([1, 2, 3])
+    >>> t = torch.as_tensor(a, device=torch.device('cuda'))
+    >>> t
+    tensor([ 1,  2,  3])
+    >>> t[0] = -1
+    >>> a
+    array([1,  2,  3])
 """.format(**factory_data_common_args))
 
 add_docstr(torch.asin,
@@ -4136,8 +4139,10 @@ def parse_kwargs(desc):
            r"""
 sparse_coo_tensor(indices, values, size=None, dtype=None, device=None, requires_grad=False) -> Tensor
 
-Constructs a sparse_coo_tensor with non-zero elements at the given :attr:`indices` with the given
-:attr:`values`.
+Constructs a sparse tensors in COO(rdinate) format with non-zero elements at the given :attr:`indices`
+with the given :attr:`values`. A sparse tensor can be `uncoalesced`, in that case, there are duplicate
+coordinates in the indices, and the value at that index is the sum of all duplicate value entries:
+`torch.spaerse`_.
 
 Args:
     indices (array_like): Initial data for the tensor. Can be a list, tuple,
@@ -4192,6 +4197,8 @@ def parse_kwargs(desc):
     tensor([], dtype=torch.int64)
     and values:
     tensor([])
+
+.. _torch.sparse: https://pytorch.org/docs/stable/sparse.html
 """)
 
 add_docstr(torch.sqrt,
diff --git a/torch/csrc/autograd/engine.cpp b/torch/csrc/autograd/engine.cpp
@@ -338,6 +338,13 @@ static void validate_outputs(const edge_list& edges, variable_list& grads, const
       ss << metadata.type() << " but got " << grads[i].type();
       throw std::runtime_error(format_error(ss.str()));
     }
+    const auto output_device = output.is_cuda() ? output.get_device() : -1;
+    if (output_device != metadata.device()) {
+      std::stringstream ss;
+      ss << "invalid gradient at index " << i << " - expected device ";
+      ss << metadata.device() << " but got " << output_device;
+      throw std::runtime_error(format_error(ss.str()));
+    }
   }
 }
 
diff --git a/torch/csrc/autograd/function.h b/torch/csrc/autograd/function.h
@@ -5,7 +5,7 @@
 #include "torch/csrc/autograd/anomaly_mode.h"
 #include "torch/csrc/autograd/profiler.h"
 #include "torch/csrc/autograd/saved_variable.h"
-#include "torch/csrc/autograd/type_and_shape.h"
+#include "torch/csrc/autograd/input_metadata.h"
 #include "torch/csrc/autograd/variable.h"
 #include "torch/csrc/utils/python_stub.h"
 #include "torch/csrc/utils/variadic.h"
@@ -128,9 +128,18 @@ struct TORCH_API Function : std::enable_shared_from_this<Function> {
 
   /// Adds the type and shape metadata for a new input. Returns the index of
   /// of the new input.
-  uint32_t add_input_metadata(const at::Type& type, at::IntList shape) noexcept {
+  uint32_t add_input_metadata(
+    const at::Type& type
+  , at::IntList shape
+  , const int64_t device) noexcept {
     uint32_t input_nr = input_metadata_.size();
-    input_metadata_.emplace_back(type, shape);
+    input_metadata_.emplace_back(type, shape, device);
+    return input_nr;
+  }
+
+  uint32_t add_input_metadata(const at::Tensor& t) noexcept {
+    uint32_t input_nr = input_metadata_.size();
+    input_metadata_.emplace_back(t);
     return input_nr;
   }
 
@@ -145,7 +154,7 @@ struct TORCH_API Function : std::enable_shared_from_this<Function> {
     return input_metadata_.size();
   }
 
-  const TypeAndShape& input_metadata(size_t index) const {
+  const InputMetadata& input_metadata(size_t index) const {
     return input_metadata_[index];
   }
 
@@ -322,7 +331,7 @@ struct TORCH_API Function : std::enable_shared_from_this<Function> {
   std::unique_ptr<AnomalyMetadata> anomaly_metadata_ = nullptr;
   std::vector<std::unique_ptr<FunctionPreHook>> pre_hooks_;
   std::vector<std::unique_ptr<FunctionPostHook>> post_hooks_;
-  at::SmallVector<TypeAndShape, 2> input_metadata_;
+  at::SmallVector<InputMetadata, 2> input_metadata_;
 };
 
 /// See Function::is_traceable() for definition.
@@ -367,7 +376,7 @@ inline void create_gradient_edge(
     Variable& variable,
     std::shared_ptr<Function> function) {
   // Copy before move.
-  const auto input_nr = function->add_input_metadata(variable.type(), variable.sizes());
+  const auto input_nr = function->add_input_metadata(variable);
   variable.set_gradient_edge({std::move(function), input_nr});
 }
 
diff --git a/torch/csrc/autograd/functions/accumulate_grad.cpp b/torch/csrc/autograd/functions/accumulate_grad.cpp
@@ -19,7 +19,7 @@ namespace torch { namespace autograd {
 AccumulateGrad::AccumulateGrad(Variable variable_)
     : Function(/*sequence_nr=*/UINT64_MAX)
     , variable(std::move(variable_)) {
-  add_input_metadata(variable.type(), variable.sizes());
+  add_input_metadata(variable);
 }
 
 auto AccumulateGrad::apply(variable_list&& grads) -> variable_list {
diff --git a/torch/csrc/autograd/functions/tensor.cpp b/torch/csrc/autograd/functions/tensor.cpp
@@ -43,7 +43,7 @@ CopySlices::CopySlices(
       fn(std::move(fn_)) {
   // Take the next_edges of fn as our own, except for index 0 which goes
   // to base instead of the view.
-  add_input_metadata(base_var.type(), base_var.sizes());
+  add_input_metadata(base_var);
   const auto num_outputs = fn->num_outputs();
   next_edges_.reserve(num_outputs);
   add_next_edge(base_var.gradient_edge());
diff --git a/torch/csrc/autograd/functions/utils.h b/torch/csrc/autograd/functions/utils.h
@@ -54,7 +54,7 @@ inline void set_history(
   if (grad_fn) {
     if (variable.defined()) {
       auto output_nr =
-          grad_fn->add_input_metadata(variable.type(), variable.sizes());
+          grad_fn->add_input_metadata(variable);
       as_variable_ref(variable).set_gradient_edge({grad_fn, output_nr});
     } else {
       grad_fn->add_input_metadata(Function::undefined_input());
diff --git a/torch/csrc/autograd/input_metadata.h b/torch/csrc/autograd/input_metadata.h
@@ -0,0 +1,44 @@
+#pragma once
+
+#include <ATen/ATen.h>
+
+#include <cstdint>
+
+namespace torch { namespace autograd {
+
+/// A tensor's type and shape. Each Function records the required type and
+/// shape of its inputs. If is_valid() is false, then the corresponding input
+/// is not used and may be an undefined tensor.
+struct InputMetadata {
+  InputMetadata() = default;
+
+  InputMetadata(const at::Type& type, at::IntList shape, const int64_t device)
+  : type_{&type} , shape_{shape}, device_{device} { }
+
+  InputMetadata(const at::Tensor& t) 
+  : InputMetadata(t.type(), t.sizes(), t.is_cuda() ? t.get_device() : - 1) { }
+
+  bool is_valid() const {
+    return type_ != nullptr;
+  }
+
+  const at::Type& type() const {
+    AT_ASSERT(type_);
+    return *type_;
+  }
+
+  at::IntList shape() const {
+    return shape_;
+  }
+
+  int64_t device() const {
+    return device_;
+  }
+
+private:
+  const at::Type* type_ = nullptr;
+  at::DimVector shape_;
+  const int64_t device_ = -1;
+};
+
+}}
diff --git a/torch/csrc/autograd/python_function.cpp b/torch/csrc/autograd/python_function.cpp
@@ -433,7 +433,7 @@ static void _wrap_outputs(THPFunction *self,
     // to set_history wins.
     auto var = as_variable(obj, i);
     if (cdata) {
-      auto output_nr = cdata->add_input_metadata(var.type(), var.sizes());
+      auto output_nr = cdata->add_input_metadata(var);
       AT_ASSERT(i == (int)output_nr);
     }
     set_history(var, i, is_input, is_modified, is_differentiable);
diff --git a/torch/csrc/autograd/python_legacy_variable.cpp b/torch/csrc/autograd/python_legacy_variable.cpp
@@ -57,7 +57,7 @@ static PyObject *THPVariable_pynew(PyTypeObject* type, PyObject *args, PyObject
   Variable var;
   if (grad_fn) {
     auto grad_fn_ = THPFunction_asFunction((THPFunction*)grad_fn);
-    Edge edge(grad_fn_, grad_fn_->add_input_metadata(tensor.type(), tensor.sizes()));
+    Edge edge(grad_fn_, grad_fn_->add_input_metadata(tensor));
     var = make_variable(std::move(tensor), std::move(edge));
   } else {
     var = make_variable(std::move(tensor), requires_grad);
diff --git a/torch/csrc/autograd/type_and_shape.h b/torch/csrc/autograd/type_and_shape.h
@@ -1,33 +0,0 @@
-#pragma once
-
-#include <ATen/ATen.h>
-
-namespace torch { namespace autograd {
-
-/// A tensor's type and shape. Each Function records the required type and
-/// shape of its inputs. If is_valid() is false, then the corresponding input
-/// is not used and may be an undefined tensor.
-struct TypeAndShape {
-  TypeAndShape() : type_(nullptr) {}
-
-  TypeAndShape(const at::Type& type, at::IntList shape)
-    : type_(&type) , shape_(shape) {}
-
-  bool is_valid() const {
-    return type_ != nullptr;
-  }
-
-  const at::Type& type() const {
-    AT_ASSERT(type_);
-    return *type_;
-  }
-
-  at::IntList shape() const {
-    return shape_;
-  }
-
-  const at::Type* type_;
-  at::DimVector shape_;
-};
-
-}}
diff --git a/torch/csrc/autograd/variable.cpp b/torch/csrc/autograd/variable.cpp
@@ -117,13 +117,22 @@ void Variable::Impl::backward(
 }
 
 void Variable::Impl::set_data(Tensor new_data) {
-  if (new_data.type() != data_.type()) {
-    scalar_type_ = new_data.type().scalarType();
-    backend_ = new_data.type().backend();
-    is_variable_ = true;
-    // Clear grad_accumulator if it exists, since it stores the old type info.
-    grad_accumulator_.reset();
+  // Resets gradient accumulator if metadata is out of date
+  std::lock_guard<std::mutex> lock(mutex_);
+  auto prior_accumulator = grad_accumulator_.lock();
+  if (prior_accumulator) {
+    const auto prior_device = prior_accumulator->input_metadata(0).device();
+    const auto new_device = new_data.is_cuda() ? new_data.get_device() : -1;
+    
+    if (new_data.type() != data_.type() || prior_device != new_device) {
+      grad_accumulator_.reset();
+    }
   }
+  
+  // Updates metadata
+  scalar_type_ = new_data.type().scalarType();
+  backend_ = new_data.type().backend();
+  is_variable_ = true;
   data_ = std::move(new_data);
 }
 
@@ -160,7 +169,10 @@ std::shared_ptr<Function>& Variable::ViewImpl::get_grad_fn() {
     fn->stride = strides().vec();
     fn->storage_offset = data_.storage_offset();
     fn->set_next_edges(collect_next_edges(base_));
-    fn->add_input_metadata(base_.type(), sizes());
+    fn->add_input_metadata(
+      base_.type()
+    , sizes() // Note: sizes(), not base_.sizes(), is intentional
+    , base_.is_cuda() ? base_.get_device() : -1);
     grad_fn_ = std::move(fn);
     attr_version = current_version;
   }

Original file line number	Diff line number	Diff line change
`@@ -338,6 +338,13 @@ static void validate_outputs(const edge_list& edges, variable_list& grads, const`
`338`	`338`	`ss << metadata.type() << " but got " << grads[i].type();`
`339`	`339`	`throw std::runtime_error(format_error(ss.str()));`
`340`	`340`	`}`
	`341`	`+ const auto output_device = output.is_cuda() ? output.get_device() : -1;`
	`342`	`+ if (output_device != metadata.device()) {`
	`343`	`+ std::stringstream ss;`
	`344`	`+ ss << "invalid gradient at index " << i << " - expected device ";`
	`345`	`+ ss << metadata.device() << " but got " << output_device;`
	`346`	`+ throw std::runtime_error(format_error(ss.str()));`
	`347`	`+ }`
`341`	`348`	`}`
`342`	`349`	`}`
`343`	`350`
Original file line number	Diff line number	Diff line change
`@@ -19,7 +19,7 @@ namespace torch { namespace autograd {`
`19`	`19`	`AccumulateGrad::AccumulateGrad(Variable variable_)`
`20`	`20`	`: Function(/sequence_nr=/UINT64_MAX)`
`21`	`21`	`, variable(std::move(variable_)) {`
`22`		`- add_input_metadata(variable.type(), variable.sizes());`
	`22`	`+ add_input_metadata(variable);`
`23`	`23`	`}`
`24`	`24`
`25`	`25`	`auto AccumulateGrad::apply(variable_list&& grads) -> variable_list {`