stan-dev
diff --git a/‎stan/math/fwd/mat.hpp‎
Lines changed: 1 addition & 2 deletions b/‎stan/math/fwd/mat.hpp‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎stan/math/fwd/mat/meta/operands_and_partials.hpp‎
Lines changed: 3 additions & 2 deletions b/‎stan/math/fwd/mat/meta/operands_and_partials.hpp‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎stan/math/opencl/cholesky_decompose.hpp‎
Lines changed: 2 additions & 2 deletions b/‎stan/math/opencl/cholesky_decompose.hpp‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎stan/math/opencl/kernels/diag_inv.hpp‎
Lines changed: 1 addition & 1 deletion b/‎stan/math/opencl/kernels/diag_inv.hpp‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎stan/math/opencl/kernels/inv_lower_tri_multiply.hpp‎
Lines changed: 1 addition & 1 deletion b/‎stan/math/opencl/kernels/inv_lower_tri_multiply.hpp‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎stan/math/opencl/kernels/neg_rect_lower_tri_multiply.hpp‎
Lines changed: 1 addition & 1 deletion b/‎stan/math/opencl/kernels/neg_rect_lower_tri_multiply.hpp‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎stan/math/opencl/opencl.hpp‎
Lines changed: 1 addition & 1 deletion b/‎stan/math/opencl/opencl.hpp‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎stan/math/opencl/opencl_context.hpp‎
Lines changed: 3 additions & 0 deletions b/‎stan/math/opencl/opencl_context.hpp‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎stan/math/opencl/lower_tri_inverse.hpp‎ renamed to ‎stan/math/opencl/tri_inverse.hpp‎
Lines changed: 21 additions & 8 deletions b/‎stan/math/opencl/lower_tri_inverse.hpp‎ renamed to ‎stan/math/opencl/tri_inverse.hpp‎
Lines changed: 21 additions & 8 deletions
diff --git a/‎stan/math/prim/mat.hpp‎
Lines changed: 4 additions & 0 deletions b/‎stan/math/prim/mat.hpp‎
Lines changed: 4 additions & 0 deletions
@@ -4,6 +4,7 @@
 #include <stan/math/fwd/core.hpp>
 #include <stan/math/fwd/scal/meta/is_fvar.hpp>
 #include <stan/math/fwd/scal/meta/partials_type.hpp>
+#include <stan/math/fwd/mat/meta/operands_and_partials.hpp>
 
 #include <stan/math/fwd/mat/vectorize/apply_scalar_unary.hpp>
 #include <stan/math/prim/mat.hpp>
@@ -47,6 +48,4 @@
 #include <stan/math/fwd/mat/functor/hessian.hpp>
 #include <stan/math/fwd/mat/functor/jacobian.hpp>
 
-#include <stan/math/fwd/mat/meta/operands_and_partials.hpp>
-
 #endif
@@ -1,9 +1,10 @@
 #ifndef STAN_MATH_FWD_MAT_META_OPERANDS_AND_PARTIALS_HPP
 #define STAN_MATH_FWD_MAT_META_OPERANDS_AND_PARTIALS_HPP
 
-#include <stan/math/fwd/scal/meta/operands_and_partials.hpp>
-#include <stan/math/prim/scal/meta/broadcast_array.hpp>
 #include <stan/math/prim/mat/fun/Eigen.hpp>
+#include <stan/math/prim/arr/meta/length.hpp>
+#include <stan/math/prim/scal/meta/broadcast_array.hpp>
+#include <stan/math/fwd/scal/meta/operands_and_partials.hpp>
 #include <vector>
 
 namespace stan {
 
@@ -6,7 +6,7 @@
 #include <stan/math/opencl/kernels/cholesky_decompose.hpp>
 #include <stan/math/opencl/multiply.hpp>
 #include <stan/math/opencl/multiply_transpose.hpp>
-#include <stan/math/opencl/lower_tri_inverse.hpp>
+#include <stan/math/opencl/tri_inverse.hpp>
 #include <stan/math/opencl/transpose.hpp>
 #include <stan/math/opencl/subtract.hpp>
 #include <stan/math/opencl/err/check_diagonal_zeros.hpp>
@@ -78,7 +78,7 @@ inline void cholesky_decompose(matrix_cl& A) {
   // and copies the resulting submatrix to the lower left hand corner of A
   matrix_cl L_21
       = opencl::multiply<TriangularViewCL::Entire, TriangularViewCL::Upper>(
-          A_21, transpose(lower_triangular_inverse(A_11)));
+          A_21, transpose(tri_inverse<TriangularViewCL::Lower>(A_11)));
   A.sub_block(L_21, 0, 0, block, 0, block_subset, block);
   matrix_cl A_22(block_subset, block_subset);
   A_22.sub_block(A, block, block, 0, 0, block_subset, block_subset);
 
@@ -36,7 +36,7 @@ static const char* diag_inv_kernel_code = STRINGIFY(
      * @param rows The number of rows for A.
      * @note Code is a <code>const char*</code> held in
      * <code>diag_inv_kernel_code.</code>
-     *  Used in math/opencl/lower_tri_inverse.hpp.
+     *  Used in math/opencl/tri_inverse.hpp.
      *  This kernel uses the helper macros available in helpers.cl.
      */
     __kernel void diag_inv(__global double* A, __global double* tmp_inv,
 
@@ -39,7 +39,7 @@ static const char* inv_lower_tri_multiply_kernel_code = STRINGIFY(
      * @param rows The number of rows in a single matrix of the batch
      * @note Code is a <code>const char*</code> held in
      * <code>inv_lower_tri_multiply_kernel_code.</code>
-     *  Used in math/opencl/lower_tri_inverse.hpp.
+     *  Used in math/opencl/tri_inverse.hpp.
      *  This kernel uses the helper macros available in helpers.cl.
      */
     __kernel void inv_lower_tri_multiply(__global double* A,
 
@@ -33,7 +33,7 @@ static const char* neg_rect_lower_tri_multiply_kernel_code = STRINGIFY(
      * @param rows The number of rows in a single matrix of the batch
      * @note Code is a <code>const char*</code> held in
      *  neg_rect_lower_tri_multiply_kernel_code
-     *  Used in math/opencl/lower_tri_inverse.hpp.
+     *  Used in math/opencl/tri_inverse.hpp.
      *  This kernel uses the helper macros available in helpers.cl.
      */
     __kernel void neg_rect_lower_tri_multiply(
 
@@ -9,7 +9,7 @@
 #include <stan/math/opencl/cholesky_decompose.hpp>
 #include <stan/math/opencl/diagonal_multiply.hpp>
 #include <stan/math/opencl/identity.hpp>
-#include <stan/math/opencl/lower_tri_inverse.hpp>
+#include <stan/math/opencl/tri_inverse.hpp>
 #include <stan/math/opencl/matrix_cl.hpp>
 #include <stan/math/opencl/multiply.hpp>
 #include <stan/math/opencl/multiply_transpose.hpp>
 
@@ -195,6 +195,9 @@ class opencl_context_base {
     int cholesky_rev_block_partition = 8;
     // used in math/opencl/multiply
     int multiply_split_upper_limit = 2000000;
+    // used in math/prim/mat/fun/mdivide_left_tri
+    // and math/rev/mat/fun/mdivide_left_tri
+    int tri_inverse_size_worth_transfer = 100;
   } tuning_opts_;
 
   static opencl_context_base& getInstance() {
 
@@ -1,13 +1,14 @@
-#ifndef STAN_MATH_OPENCL_LOWER_TRI_INVERSE_HPP
-#define STAN_MATH_OPENCL_LOWER_TRI_INVERSE_HPP
+#ifndef STAN_MATH_OPENCL_TRI_INVERSE_HPP
+#define STAN_MATH_OPENCL_TRI_INVERSE_HPP
 
 #ifdef STAN_OPENCL
 #include <stan/math/opencl/matrix_cl.hpp>
+#include <stan/math/opencl/constants.hpp>
 #include <stan/math/opencl/kernels/diag_inv.hpp>
 #include <stan/math/opencl/kernels/inv_lower_tri_multiply.hpp>
 #include <stan/math/opencl/kernels/neg_rect_lower_tri_multiply.hpp>
 #include <stan/math/opencl/err/check_opencl.hpp>
-
+#include <stan/math/opencl/transpose.hpp>
 #include <stan/math/opencl/identity.hpp>
 #include <stan/math/opencl/err/check_square.hpp>
 #include <stan/math/opencl/sub_block.hpp>
@@ -19,22 +20,26 @@
 namespace stan {
 namespace math {
 /**
- * Computes the inverse of the lower triangular matrix
+ * Computes the inverse of a triangular matrix
  *
  * For a full guide to how this works and fits into Cholesky decompositions,
  * see the reference report
  * <a href="https://github.com/SteveBronder/stancon2018/blob/master/report.pdf">
  * here</a> and kernel doc
  * <a href="https://github.com/stan-dev/math/wiki/GPU-Kernels">here</a>.
  *
+ * @tparam triangular_view the triangularity of the input matrix
  * @param A matrix on the OpenCL device
  * @return the inverse of A
  *
  * @throw <code>std::invalid_argument</code> if the matrix
  *    is not square
  */
-inline matrix_cl lower_triangular_inverse(const matrix_cl& A) {
-  check_square("lower_triangular_inverse (OpenCL)", "A", A);
+template <TriangularViewCL triangular_view>
+inline matrix_cl tri_inverse(const matrix_cl& A) {
+  static_assert(triangular_view != TriangularViewCL::Entire,
+                "tri_inverse(OpenCL) only supports triangular input matrices");
+  check_square("tri_inverse (OpenCL)", "A", A);
 
   int thread_block_2D_dim = 32;
   int max_1D_thread_block_size = opencl_context.max_thread_block_size();
@@ -69,7 +74,9 @@ inline matrix_cl lower_triangular_inverse(const matrix_cl& A) {
   zero_mat.zeros<stan::math::TriangularViewCL::Entire>();
   temp.zeros<stan::math::TriangularViewCL::Entire>();
   inv_padded.zeros<stan::math::TriangularViewCL::Entire>();
-
+  if (triangular_view == TriangularViewCL::Upper) {
+    inv_mat = transpose(inv_mat);
+  }
   int work_per_thread
       = opencl_kernels::inv_lower_tri_multiply.make_functor.get_opts().at(
           "WORK_PER_THREAD");
@@ -95,6 +102,9 @@ inline matrix_cl lower_triangular_inverse(const matrix_cl& A) {
   inv_padded.zeros<stan::math::TriangularViewCL::Upper>();
   if (parts == 1) {
     inv_mat.sub_block(inv_padded, 0, 0, 0, 0, inv_mat.rows(), inv_mat.rows());
+    if (triangular_view == TriangularViewCL::Upper) {
+      inv_mat = transpose(inv_mat);
+    }
     return inv_mat;
   }
   parts = ceil(parts / 2.0);
@@ -132,7 +142,10 @@ inline matrix_cl lower_triangular_inverse(const matrix_cl& A) {
     inv_padded.zeros<stan::math::TriangularViewCL::Upper>();
   }
   // un-pad and return
-  inv_mat.sub_block(inv_padded, 0, 0, 0, 0, A.rows(), A.rows());
+  inv_mat.sub_block(inv_padded, 0, 0, 0, 0, inv_mat.rows(), inv_mat.rows());
+  if (triangular_view == TriangularViewCL::Upper) {
+    inv_mat = transpose(inv_mat);
+  }
   return inv_mat;
 }
 }  // namespace math
 
@@ -281,6 +281,7 @@
 #include <stan/math/prim/mat/functor/map_rect_reduce.hpp>
 #include <stan/math/prim/mat/prob/bernoulli_logit_glm_log.hpp>
 #include <stan/math/prim/mat/prob/bernoulli_logit_glm_lpmf.hpp>
+#include <stan/math/prim/mat/prob/bernoulli_logit_glm_rng.hpp>
 #include <stan/math/prim/mat/prob/categorical_log.hpp>
 #include <stan/math/prim/mat/prob/categorical_logit_log.hpp>
 #include <stan/math/prim/mat/prob/categorical_logit_lpmf.hpp>
@@ -292,6 +293,7 @@
 #include <stan/math/prim/mat/prob/dirichlet_rng.hpp>
 #include <stan/math/prim/mat/prob/gaussian_dlm_obs_log.hpp>
 #include <stan/math/prim/mat/prob/gaussian_dlm_obs_lpdf.hpp>
+#include <stan/math/prim/mat/prob/gaussian_dlm_obs_rng.hpp>
 #include <stan/math/prim/mat/prob/inv_wishart_log.hpp>
 #include <stan/math/prim/mat/prob/inv_wishart_lpdf.hpp>
 #include <stan/math/prim/mat/prob/inv_wishart_rng.hpp>
@@ -305,6 +307,7 @@
 #include <stan/math/prim/mat/prob/lkj_cov_lpdf.hpp>
 #include <stan/math/prim/mat/prob/matrix_normal_prec_log.hpp>
 #include <stan/math/prim/mat/prob/matrix_normal_prec_lpdf.hpp>
+#include <stan/math/prim/mat/prob/matrix_normal_prec_rng.hpp>
 #include <stan/math/prim/mat/prob/multi_gp_cholesky_log.hpp>
 #include <stan/math/prim/mat/prob/multi_gp_cholesky_lpdf.hpp>
 #include <stan/math/prim/mat/prob/multi_gp_log.hpp>
@@ -316,6 +319,7 @@
 #include <stan/math/prim/mat/prob/multi_normal_lpdf.hpp>
 #include <stan/math/prim/mat/prob/multi_normal_prec_log.hpp>
 #include <stan/math/prim/mat/prob/multi_normal_prec_lpdf.hpp>
+#include <stan/math/prim/mat/prob/multi_normal_prec_rng.hpp>
 #include <stan/math/prim/mat/prob/multi_normal_rng.hpp>
 #include <stan/math/prim/mat/prob/multi_student_t_log.hpp>
 #include <stan/math/prim/mat/prob/multi_student_t_lpdf.hpp>