diff --git a/.travis.yml b/.travis.yml index 537bfb905..41dae76f8 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,16 +1,24 @@ language: rust -sudo: false +# use trusty for newer openblas +sudo: required +dist: trusty matrix: include: - rust: stable + env: + - FEATURES='test' - rust: beta - rust: nightly env: - - FEATURES='assign_ops rustc-serialize' + - FEATURES='test' - BENCH=1 branches: only: - master +addons: + apt: + packages: + - libopenblas-dev script: - | cargo build --verbose && diff --git a/Cargo.toml b/Cargo.toml index 0b0bd3bb7..b477bf401 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -35,18 +35,24 @@ version = "0.3.16" optional = true [dependencies] -rblas = { version = "0.0.13", optional = true } +blas-sys = { version = "0.5", optional = true, default-features = false } -#[dependencies.serde] -#version = "0.4" -#optional = true +# rblas is deprecatedish +rblas = { version = "0.0.13", optional = true } [features] assign_ops = [] +blas = ["blas-sys"] +blas-openblas-sys = [ + "blas", + "blas-sys/openblas", + "blas-sys/openblas-provider/system-openblas", +] + # This feature is used for testing -all = ["assign_ops", "rblas", "rustc-serialize"] +test = ["blas-openblas-sys", "rustc-serialize"] [profile.release] [profile.bench] diff --git a/Makefile b/Makefile index a295b0769..39079dea5 100644 --- a/Makefile +++ b/Makefile @@ -3,7 +3,7 @@ DOCCRATES = ndarray # deps to delete the generated docs RMDOCS = -FEATURES = "assign_ops rustc-serialize rblas" +FEATURES = "assign_ops rustc-serialize rblas blas" VERSIONS = $(patsubst %,target/VERS/%,$(DOCCRATES)) diff --git a/benches/bench1.rs b/benches/bench1.rs index 1fe94f682..382ec94fb 100644 --- a/benches/bench1.rs +++ b/benches/bench1.rs @@ -433,19 +433,52 @@ fn bench_col_iter(bench: &mut test::Bencher) bench.iter(|| for elt in it.clone() { black_box(elt); }) } -#[bench] -fn bench_mat_mul_large(bench: &mut test::Bencher) -{ - let a = OwnedArray::::zeros((64, 64)); - let b = OwnedArray::::zeros((64, 64)); - let a = black_box(a.view()); - let b = black_box(b.view()); - bench.iter(|| a.mat_mul(&b)); +macro_rules! mat_mul { + ($modname:ident, $ty:ident, $(($name:ident, $m:expr, $n:expr, $k:expr))+) => { + mod $modname { + use test::{black_box, Bencher}; + use ndarray::OwnedArray; + $( + #[bench] + fn $name(bench: &mut Bencher) + { + let a = OwnedArray::<$ty, _>::zeros(($m, $n)); + let b = OwnedArray::<$ty, _>::zeros(($n, $k)); + let a = black_box(a.view()); + let b = black_box(b.view()); + bench.iter(|| a.mat_mul(&b)); + } + )+ + } + } +} + +mat_mul!{mat_mul_f32, f32, + (m004, 4, 4, 4) + (m007, 7, 7, 7) + (m008, 8, 8, 8) + (m012, 12, 12, 12) + (m016, 16, 16, 16) + (m032, 32, 32, 32) + (m064, 64, 64, 64) + (m127, 127, 127, 127) + (mix16x4, 32, 4, 32) + (mix32x2, 32, 2, 32) +} + +mat_mul!{mat_mul_i32, i32, + (m004, 4, 4, 4) + (m007, 7, 7, 7) + (m008, 8, 8, 8) + (m012, 12, 12, 12) + (m016, 16, 16, 16) + (m032, 32, 32, 32) + (m064, 64, 64, 64) } #[cfg(feature = "rblas")] #[bench] -fn bench_mat_mul_rblas_large(bench: &mut test::Bencher) +fn bench_mat_mul_rblas_64(bench: &mut test::Bencher) { use rblas::Gemm; use rblas::attribute::Transpose; diff --git a/src/arrayformat.rs b/src/arrayformat.rs index bf3e9ee2d..53232d94c 100644 --- a/src/arrayformat.rs +++ b/src/arrayformat.rs @@ -105,7 +105,10 @@ impl<'a, A: fmt::Debug, S, D: Dimension> fmt::Debug for ArrayBase where S: Data, { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - format_array(self, f, <_>::fmt) + // Add extra information for Debug + try!(format_array(self, f, <_>::fmt)); + try!(write!(f, " shape={:?}, strides={:?}", self.shape(), self.strides())); + Ok(()) } } diff --git a/src/blas.rs b/src/blas.rs index 22faf253b..25dd5f0d0 100644 --- a/src/blas.rs +++ b/src/blas.rs @@ -55,6 +55,7 @@ //! I know), instead output its own error conditions, for example on dimension //! mismatch in a matrix multiplication. //! +#![cfg_attr(has_deprecated, deprecated(note="`rblas` integration will move to its own crate."))] use std::os::raw::{c_int}; diff --git a/src/impl_linalg.rs b/src/impl_linalg.rs index 776a97db2..de8298765 100644 --- a/src/impl_linalg.rs +++ b/src/impl_linalg.rs @@ -6,7 +6,7 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -use libnum; +use libnum::Zero; use itertools::free::enumerate; use imp_prelude::*; @@ -16,6 +16,28 @@ use { LinalgScalar, }; +#[cfg(feature="blas")] +use std::mem::swap; +#[cfg(feature="blas")] +use std::os::raw::c_int; +#[cfg(feature="blas")] +use std::any::{Any, TypeId}; + +#[cfg(feature="blas")] +use blas_sys::c::{CblasNoTrans, CblasTrans, CblasRowMajor}; +#[cfg(feature="blas")] +use blas_sys; + +/// len of vector before we use blas +#[cfg(feature="blas")] +const DOT_BLAS_CUTOFF: usize = 32; +/// side of matrix before we use blas +#[cfg(feature="blas")] +const GEMM_BLAS_CUTOFF: usize = 7; +#[cfg(feature="blas")] +#[allow(non_camel_case_types)] +type blas_index = c_int; // blas index type + impl ArrayBase where S: Data, { @@ -52,7 +74,7 @@ impl ArrayBase sum } - #[cfg(not(feature="rblas"))] + #[cfg(not(feature="blas"))] fn dot_impl(&self, rhs: &ArrayBase) -> A where S2: Data, A: LinalgScalar, @@ -60,40 +82,36 @@ impl ArrayBase self.dot_generic(rhs) } - #[cfg(feature="rblas")] + #[cfg(feature="blas")] fn dot_impl(&self, rhs: &ArrayBase) -> A where S2: Data, A: LinalgScalar, { - use std::any::{Any, TypeId}; - use rblas::vector::ops::Dot; - use linalg::AsBlasAny; - - // Read pointer to type `A` as type `B`. - // - // **Panics** if `A` and `B` are not the same type - fn cast_as(a: &A) -> B { - assert_eq!(TypeId::of::(), TypeId::of::()); - unsafe { - ::std::ptr::read(a as *const _ as *const B) - } - } // Use only if the vector is large enough to be worth it - if self.len() >= 32 { + if self.len() >= DOT_BLAS_CUTOFF { debug_assert_eq!(self.len(), rhs.len()); assert!(self.len() == rhs.len()); - if let Ok(self_v) = self.blas_view_as_type::() { - if let Ok(rhs_v) = rhs.blas_view_as_type::() { - let f_ret = f32::dot(&self_v, &rhs_v); - return cast_as::(&f_ret); - } + macro_rules! dot { + ($ty:ty, $func:ident) => {{ + if blas_compat_1d::<$ty, _>(self) && blas_compat_1d::<$ty, _>(rhs) { + let n = self.len() as blas_index; + let incx = self.strides()[0] as blas_index; + let incy = rhs.strides()[0] as blas_index; + let ret = unsafe { + blas_sys::c::$func( + n, + self.ptr as *const $ty, + incx, + rhs.ptr as *const $ty, + incy) + }; + return cast_as::<$ty, A>(&ret); } - if let Ok(self_v) = self.blas_view_as_type::() { - if let Ok(rhs_v) = rhs.blas_view_as_type::() { - let f_ret = f64::dot(&self_v, &rhs_v); - return cast_as::(&f_ret); - } + }} } + + dot!{f32, cblas_sdot}; + dot!{f64, cblas_ddot}; } self.dot_generic(rhs) } @@ -163,36 +181,12 @@ impl ArrayBase pub fn mat_mul(&self, rhs: &ArrayBase) -> OwnedArray where A: LinalgScalar, { - // NOTE: Matrix multiplication only defined for Copy types to - // avoid trouble with panicking + and *, and destructors - let ((m, a), (b, n)) = (self.dim, rhs.dim); - let (self_columns, other_rows) = (a, b); - assert!(self_columns == other_rows); + let (lhs_columns, rhs_rows) = (a, b); + assert!(lhs_columns == rhs_rows); + assert!(m.checked_mul(n).is_some()); - // Avoid initializing the memory in vec -- set it during iteration - // Panic safe because A: Copy - let mut res_elems = Vec::::with_capacity(m as usize * n as usize); - unsafe { - res_elems.set_len(m as usize * n as usize); - } - let mut i = 0; - let mut j = 0; - for rr in &mut res_elems { - unsafe { - *rr = (0..a).fold(libnum::zero::(), - move |s, k| s + *self.uget((i, k)) * *rhs.uget((k, j)) - ); - } - j += 1; - if j == n { - j = 0; - i += 1; - } - } - unsafe { - ArrayBase::from_vec_dim_unchecked((m, n), res_elems) - } + mat_mul_impl(self, rhs) } /// Perform the matrix multiplication of the rectangular array `self` and @@ -218,7 +212,7 @@ impl ArrayBase } for (i, rr) in enumerate(&mut res_elems) { unsafe { - *rr = (0..a).fold(libnum::zero::(), + *rr = (0..a).fold(A::zero(), move |s, k| s + *self.uget((i, k)) * *rhs.uget(k) ); } @@ -229,4 +223,210 @@ impl ArrayBase } } +#[cfg(not(feature="blas"))] +use self::mat_mul_general as mat_mul_impl; + +#[cfg(feature="blas")] +fn mat_mul_impl(lhs: &ArrayBase, rhs: &ArrayBase) + -> OwnedArray + where A: LinalgScalar, + S: Data, +{ + // size cutoff for using BLAS + let cut = GEMM_BLAS_CUTOFF; + let ((mut m, a), (_, mut n)) = (lhs.dim, rhs.dim); + if !(m > cut || n > cut || a > cut) || + !(same_type::() || same_type::()) { + return mat_mul_general(lhs, rhs); + } + // Use `c` for c-order and `f` for an f-order matrix + // We can handle c * c, f * f generally and + // c * f and f * c if the `f` matrix is square. + let mut lhs_ = lhs.view(); + let mut rhs_ = rhs.view(); + let lhs_s0 = lhs_.strides()[0]; + let rhs_s0 = rhs_.strides()[0]; + let both_f = lhs_s0 == 1 && rhs_s0 == 1; + let mut lhs_trans = CblasNoTrans; + let mut rhs_trans = CblasNoTrans; + if both_f { + // A^t B^t = C^t => B A = C + lhs_ = lhs_.reversed_axes(); + rhs_ = rhs_.reversed_axes(); + swap(&mut lhs_, &mut rhs_); + swap(&mut m, &mut n); + } else if lhs_s0 == 1 && m == a { + lhs_ = lhs_.reversed_axes(); + lhs_trans = CblasTrans; + } else if rhs_s0 == 1 && a == n { + rhs_ = rhs_.reversed_axes(); + rhs_trans = CblasTrans; + } + + macro_rules! gemm { + ($ty:ty, $gemm:ident) => { + if blas_row_major_2d::<$ty, _>(&lhs_) && blas_row_major_2d::<$ty, _>(&rhs_) { + let mut elems = Vec::::with_capacity(m * n); + let c; + unsafe { + elems.set_len(m * n); + c = OwnedArray::from_vec_dim_unchecked((m, n), elems); + } + { + let (m, k) = match lhs_trans { + CblasNoTrans => lhs_.dim(), + _ => { + let (rows, cols) = lhs_.dim(); + (cols, rows) + } + }; + let n = match rhs_trans { + CblasNoTrans => rhs_.dim().1, + _ => rhs_.dim().0, + }; + // gemm is C ← αA^Op B^Op + βC + // Where Op is notrans/trans/conjtrans + unsafe { + blas_sys::c::$gemm( + CblasRowMajor, + lhs_trans, + rhs_trans, + m as blas_index, // m, rows of Op(a) + n as blas_index, // n, cols of Op(b) + k as blas_index, // k, cols of Op(a) + 1.0, // alpha + lhs_.ptr as *const _, // a + lhs_.strides()[0] as blas_index, // lda + rhs_.ptr as *const _, // b + rhs_.strides()[0] as blas_index, // ldb + 0.0, // beta + c.ptr as *mut _, // c + c.strides()[0] as blas_index, // ldc + ); + } + } + return if both_f { + c.reversed_axes() + } else { + c + }; + } + } + } + gemm!(f32, cblas_sgemm); + gemm!(f64, cblas_dgemm); + return mat_mul_general(lhs, rhs); +} +fn mat_mul_general(lhs: &ArrayBase, rhs: &ArrayBase) + -> OwnedArray + where A: LinalgScalar, + S: Data, +{ + let ((m, a), (_, n)) = (lhs.dim, rhs.dim); + + let lhs_s0 = lhs.strides()[0]; + let rhs_s0 = rhs.strides()[0]; + let column_major = lhs_s0 == 1 && rhs_s0 == 1; + + // Avoid initializing the memory in vec -- set it during iteration + // Panic safe because A: Copy + let mut res_elems = Vec::::with_capacity(m as usize * n as usize); + unsafe { + res_elems.set_len(m as usize * n as usize); + } + let mut i = 0; + let mut j = 0; + for rr in &mut res_elems { + unsafe { + *rr = (0..a).fold(A::zero(), + move |s, k| s + *lhs.uget((i, k)) * *rhs.uget((k, j))); + } + if !column_major { + j += 1; + if j == n { + j = 0; + i += 1; + } + } else { + i += 1; + if i == m { + i = 0; + j += 1; + } + } + } + unsafe { + if !column_major { + ArrayBase::from_vec_dim_unchecked((m, n), res_elems) + } else { + ArrayBase::from_vec_dim_unchecked_f((m, n), res_elems) + } + } +} + +#[cfg(feature="blas")] +#[inline(always)] +/// Return `true` if `A` and `B` are the same type +fn same_type() -> bool { + TypeId::of::() == TypeId::of::() +} + +#[cfg(feature="blas")] +// Read pointer to type `A` as type `B`. +// +// **Panics** if `A` and `B` are not the same type +fn cast_as(a: &A) -> B { + assert!(same_type::()); + unsafe { + ::std::ptr::read(a as *const _ as *const B) + } +} + +#[cfg(feature="blas")] +fn blas_compat_1d(a: &ArrayBase) -> bool + where S: Data, + A: Any, + S::Elem: Any, +{ + if !same_type::() { + return false; + } + if a.len() > blas_index::max_value() as usize { + return false; + } + let stride = a.strides()[0]; + if stride > blas_index::max_value() as isize || + stride < blas_index::min_value() as isize { + return false; + } + true +} + +#[cfg(feature="blas")] +fn blas_row_major_2d(a: &ArrayBase) -> bool + where S: Data, + A: Any, + S::Elem: Any, +{ + if !same_type::() { + return false; + } + let s0 = a.strides()[0]; + let s1 = a.strides()[1]; + if s1 != 1 { + return false; + } + if (s0 > blas_index::max_value() as isize || s0 < blas_index::min_value() as isize) || + (s1 > blas_index::max_value() as isize || s1 < blas_index::min_value() as isize) + { + return false; + } + let (m, n) = a.dim(); + if m > blas_index::max_value() as usize || + n > blas_index::max_value() as usize + { + return false; + } + true +} diff --git a/src/lib.rs b/src/lib.rs index 76aa0779d..1b0be291f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -74,6 +74,8 @@ extern crate rustc_serialize as serialize; #[cfg(feature = "rblas")] extern crate rblas; +#[cfg(feature="blas")] +extern crate blas_sys; extern crate itertools; extern crate num as libnum; diff --git a/src/linalg.rs b/src/linalg.rs index b7ab51470..a4c57e257 100644 --- a/src/linalg.rs +++ b/src/linalg.rs @@ -19,21 +19,6 @@ use std::ops::{ }; use ScalarOperand; -#[cfg(feature="rblas")] -use std::any::TypeId; - -#[cfg(feature="rblas")] -use ShapeError; - -#[cfg(feature="rblas")] -use error::{from_kind, ErrorKind}; - -#[cfg(feature="rblas")] -use blas::{AsBlas, BlasArrayView}; - -#[cfg(feature="rblas")] -use imp_prelude::*; - /// Elements that support linear algebra operations. /// /// `Any` for type-based specialization, `Copy` so that they don't need move @@ -90,32 +75,3 @@ pub trait NdFloat : impl NdFloat for f32 { } impl NdFloat for f64 { } - - -#[cfg(feature = "rblas")] -pub trait AsBlasAny : AsBlas { - fn blas_view_as_type(&self) -> Result, ShapeError> - where S: Data; -} - -#[cfg(feature = "rblas")] -/// ***Requires `features = "rblas"`*** -impl AsBlasAny for ArrayBase - where S: Data, - D: Dimension, - A: Any, -{ - fn blas_view_as_type(&self) -> Result, ShapeError> - where S: Data - { - if TypeId::of::() == TypeId::of::() { - unsafe { - let v = self.view(); - let u = ArrayView::new_(v.ptr as *const T, v.dim, v.strides); - Priv(u).into_blas_view() - } - } else { - Err(from_kind(ErrorKind::IncompatibleLayout)) - } - } -} diff --git a/tests/array.rs b/tests/array.rs index 48d2636ac..9bdefd236 100644 --- a/tests/array.rs +++ b/tests/array.rs @@ -2,6 +2,7 @@ #[macro_use] extern crate ndarray; +extern crate itertools; use ndarray::{RcArray, S, Si, OwnedArray, @@ -737,3 +738,26 @@ fn test_range() { assert_eq!(d[10], 2.); assert_eq!(d[12], 2.2); } + +#[test] +fn test_f_order() { + // Test that arrays are logically equal in every way, + // even if the underlying memory order is different + let c = arr2(&[[1, 2, 3], + [4, 5, 6]]); + let mut f = OwnedArray::zeros_f(c.dim()); + f.assign(&c); + assert_eq!(f, c); + assert_eq!(f.shape(), c.shape()); + assert_eq!(c.strides(), &[3, 1]); + assert_eq!(f.strides(), &[1, 2]); + itertools::assert_equal(f.iter(), c.iter()); + itertools::assert_equal(f.inner_iter(), c.inner_iter()); + itertools::assert_equal(f.outer_iter(), c.outer_iter()); + itertools::assert_equal(f.axis_iter(Axis(0)), c.axis_iter(Axis(0))); + itertools::assert_equal(f.axis_iter(Axis(1)), c.axis_iter(Axis(1))); + + let dupc = &c + &c; + let dupf = &f + &f; + assert_eq!(dupc, dupf); +} diff --git a/tests/format.rs b/tests/format.rs index 9bfc293fa..c3adb2a2c 100644 --- a/tests/format.rs +++ b/tests/format.rs @@ -7,25 +7,25 @@ use ndarray::{arr0, rcarr1, aview1}; fn formatting() { let a = rcarr1::(&[1., 2., 3., 4.]); - assert_eq!(format!("{:?}", a), + assert_eq!(format!("{}", a), //"[ 1, 2, 3, 4]"); "[1, 2, 3, 4]"); - assert_eq!(format!("{:4?}", a), + assert_eq!(format!("{:4}", a), "[ 1, 2, 3, 4]"); let a = a.reshape((4, 1, 1)); - assert_eq!(format!("{:4?}", a), + assert_eq!(format!("{:4}", a), "[[[ 1]],\n [[ 2]],\n [[ 3]],\n [[ 4]]]"); let a = a.reshape((2, 2)); assert_eq!(format!("{}", a), "[[1, 2],\n [3, 4]]"); - assert_eq!(format!("{:?}", a), + assert_eq!(format!("{}", a), "[[1, 2],\n [3, 4]]"); - assert_eq!(format!("{:#4?}", a), + assert_eq!(format!("{:#4}", a), "[[ 1, 2], [ 3, 4]]"); let b = arr0::(3.5); - assert_eq!(format!("{:?}", b), + assert_eq!(format!("{}", b), "3.5"); let s = format!("{:.3e}", aview1::(&[1.1, 2.2, 33., 440.])); diff --git a/tests/oper.rs b/tests/oper.rs index 24c27cde6..69930ec82 100644 --- a/tests/oper.rs +++ b/tests/oper.rs @@ -5,6 +5,7 @@ use ndarray::RcArray; use ndarray::{arr0, rcarr1, rcarr2}; use ndarray::{ OwnedArray, + Ix, }; use std::fmt; @@ -131,3 +132,68 @@ fn dot_product() { let b = b.map(|f| *f as i32); assert_eq!(a.dot(&b), dot as i32); } + +fn range_mat(m: Ix, n: Ix) -> OwnedArray { + OwnedArray::linspace(0., (m * n - 1) as f32, m * n).into_shape((m, n)).unwrap() +} + +#[cfg(has_assign)] +#[test] +fn mat_mul() { + let (m, n, k) = (8, 8, 8); + let a = range_mat(m, n); + let b = range_mat(n, k); + let mut b = b / 4.; + { + let mut c = b.column_mut(0); + c += 1.0; + } + let ab = a.mat_mul(&b); + + let mut af = OwnedArray::zeros_f(a.dim()); + let mut bf = OwnedArray::zeros_f(b.dim()); + af.assign(&a); + bf.assign(&b); + + assert_eq!(ab, a.mat_mul(&bf)); + assert_eq!(ab, af.mat_mul(&b)); + assert_eq!(ab, af.mat_mul(&bf)); + + let (m, n, k) = (10, 5, 11); + let a = range_mat(m, n); + let b = range_mat(n, k); + let mut b = b / 4.; + { + let mut c = b.column_mut(0); + c += 1.0; + } + let ab = a.mat_mul(&b); + + let mut af = OwnedArray::zeros_f(a.dim()); + let mut bf = OwnedArray::zeros_f(b.dim()); + af.assign(&a); + bf.assign(&b); + + assert_eq!(ab, a.mat_mul(&bf)); + assert_eq!(ab, af.mat_mul(&b)); + assert_eq!(ab, af.mat_mul(&bf)); +} + +// Check that matrix multiplication of contiguous matrices returns a +// matrix with the same order +#[test] +fn mat_mul_order() { + let (m, n, k) = (8, 8, 8); + let a = range_mat(m, n); + let b = range_mat(n, k); + let mut af = OwnedArray::zeros_f(a.dim()); + let mut bf = OwnedArray::zeros_f(b.dim()); + af.assign(&a); + bf.assign(&b); + + let cc = a.mat_mul(&b); + let ff = af.mat_mul(&bf); + + assert_eq!(cc.strides()[1], 1); + assert_eq!(ff.strides()[0], 1); +}