diff --git a/.travis.yml b/.travis.yml
index 537bfb905..41dae76f8 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,16 +1,24 @@
 language: rust
-sudo: false
+# use trusty for newer openblas
+sudo: required
+dist: trusty
 matrix:
   include:
     - rust: stable
+      env:
+       - FEATURES='test'
     - rust: beta
     - rust: nightly
       env:
-       - FEATURES='assign_ops rustc-serialize'
+       - FEATURES='test'
        - BENCH=1
 branches:
   only:
     - master
+addons:
+  apt:
+    packages:
+      - libopenblas-dev
 script:
   - |
       cargo build --verbose &&
diff --git a/Cargo.toml b/Cargo.toml
index 0b0bd3bb7..b477bf401 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -35,18 +35,24 @@ version = "0.3.16"
 optional = true
 
 [dependencies]
-rblas = { version = "0.0.13", optional = true }
+blas-sys = { version = "0.5", optional = true, default-features = false }
 
-#[dependencies.serde]
-#version = "0.4"
-#optional = true
+# rblas is deprecatedish
+rblas = { version = "0.0.13", optional = true }
 
 [features]
 
 assign_ops = []
 
+blas = ["blas-sys"]
+blas-openblas-sys = [
+    "blas",
+    "blas-sys/openblas",
+    "blas-sys/openblas-provider/system-openblas",
+]
+
 # This feature is used for testing
-all = ["assign_ops", "rblas", "rustc-serialize"]
+test = ["blas-openblas-sys", "rustc-serialize"]
 
 [profile.release]
 [profile.bench]
diff --git a/Makefile b/Makefile
index a295b0769..39079dea5 100644
--- a/Makefile
+++ b/Makefile
@@ -3,7 +3,7 @@ DOCCRATES = ndarray
 # deps to delete the generated docs
 RMDOCS =
 
-FEATURES = "assign_ops rustc-serialize rblas"
+FEATURES = "assign_ops rustc-serialize rblas blas"
 
 VERSIONS = $(patsubst %,target/VERS/%,$(DOCCRATES))
 
diff --git a/benches/bench1.rs b/benches/bench1.rs
index 1fe94f682..382ec94fb 100644
--- a/benches/bench1.rs
+++ b/benches/bench1.rs
@@ -433,19 +433,52 @@ fn bench_col_iter(bench: &mut test::Bencher)
     bench.iter(|| for elt in it.clone() { black_box(elt); })
 }
 
-#[bench]
-fn bench_mat_mul_large(bench: &mut test::Bencher)
-{
-    let a = OwnedArray::<f32, _>::zeros((64, 64));
-    let b = OwnedArray::<f32, _>::zeros((64, 64));
-    let a = black_box(a.view());
-    let b = black_box(b.view());
-    bench.iter(|| a.mat_mul(&b));
+macro_rules! mat_mul {
+    ($modname:ident, $ty:ident, $(($name:ident, $m:expr, $n:expr, $k:expr))+) => {
+        mod $modname {
+            use test::{black_box, Bencher};
+            use ndarray::OwnedArray;
+            $(
+            #[bench]
+            fn $name(bench: &mut Bencher)
+            {
+                let a = OwnedArray::<$ty, _>::zeros(($m, $n));
+                let b = OwnedArray::<$ty, _>::zeros(($n, $k));
+                let a = black_box(a.view());
+                let b = black_box(b.view());
+                bench.iter(|| a.mat_mul(&b));
+            }
+            )+
+        }
+    }
+}
+
+mat_mul!{mat_mul_f32, f32,
+    (m004, 4, 4, 4)
+    (m007, 7, 7, 7)
+    (m008, 8, 8, 8)
+    (m012, 12, 12, 12)
+    (m016, 16, 16, 16)
+    (m032, 32, 32, 32)
+    (m064, 64, 64, 64)
+    (m127, 127, 127, 127)
+    (mix16x4, 32, 4, 32)
+    (mix32x2, 32, 2, 32)
+}
+
+mat_mul!{mat_mul_i32, i32,
+    (m004, 4, 4, 4)
+    (m007, 7, 7, 7)
+    (m008, 8, 8, 8)
+    (m012, 12, 12, 12)
+    (m016, 16, 16, 16)
+    (m032, 32, 32, 32)
+    (m064, 64, 64, 64)
 }
 
 #[cfg(feature = "rblas")]
 #[bench]
-fn bench_mat_mul_rblas_large(bench: &mut test::Bencher)
+fn bench_mat_mul_rblas_64(bench: &mut test::Bencher)
 {
     use rblas::Gemm;
     use rblas::attribute::Transpose;
diff --git a/src/arrayformat.rs b/src/arrayformat.rs
index bf3e9ee2d..53232d94c 100644
--- a/src/arrayformat.rs
+++ b/src/arrayformat.rs
@@ -105,7 +105,10 @@ impl<'a, A: fmt::Debug, S, D: Dimension> fmt::Debug for ArrayBase<S, D>
     where S: Data<Elem=A>,
 {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        format_array(self, f, <_>::fmt)
+        // Add extra information for Debug
+        try!(format_array(self, f, <_>::fmt));
+        try!(write!(f, " shape={:?}, strides={:?}", self.shape(), self.strides()));
+        Ok(())
     }
 }
 
diff --git a/src/blas.rs b/src/blas.rs
index 22faf253b..25dd5f0d0 100644
--- a/src/blas.rs
+++ b/src/blas.rs
@@ -55,6 +55,7 @@
 //! I know), instead output its own error conditions, for example on dimension
 //! mismatch in a matrix multiplication.
 //!
+#![cfg_attr(has_deprecated, deprecated(note="`rblas` integration will move to its own crate."))]
 
 use std::os::raw::{c_int};
 
diff --git a/src/impl_linalg.rs b/src/impl_linalg.rs
index 776a97db2..de8298765 100644
--- a/src/impl_linalg.rs
+++ b/src/impl_linalg.rs
@@ -6,7 +6,7 @@
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
 
-use libnum;
+use libnum::Zero;
 use itertools::free::enumerate;
 
 use imp_prelude::*;
@@ -16,6 +16,28 @@ use {
     LinalgScalar,
 };
 
+#[cfg(feature="blas")]
+use std::mem::swap;
+#[cfg(feature="blas")]
+use std::os::raw::c_int;
+#[cfg(feature="blas")]
+use std::any::{Any, TypeId};
+
+#[cfg(feature="blas")]
+use blas_sys::c::{CblasNoTrans, CblasTrans, CblasRowMajor};
+#[cfg(feature="blas")]
+use blas_sys;
+
+/// len of vector before we use blas
+#[cfg(feature="blas")]
+const DOT_BLAS_CUTOFF: usize = 32;
+/// side of matrix before we use blas
+#[cfg(feature="blas")]
+const GEMM_BLAS_CUTOFF: usize = 7;
+#[cfg(feature="blas")]
+#[allow(non_camel_case_types)]
+type blas_index = c_int; // blas index type
+
 impl<A, S> ArrayBase<S, Ix>
     where S: Data<Elem=A>,
 {
@@ -52,7 +74,7 @@ impl<A, S> ArrayBase<S, Ix>
         sum
     }
 
-    #[cfg(not(feature="rblas"))]
+    #[cfg(not(feature="blas"))]
     fn dot_impl<S2>(&self, rhs: &ArrayBase<S2, Ix>) -> A
         where S2: Data<Elem=A>,
               A: LinalgScalar,
@@ -60,40 +82,36 @@ impl<A, S> ArrayBase<S, Ix>
         self.dot_generic(rhs)
     }
 
-    #[cfg(feature="rblas")]
+    #[cfg(feature="blas")]
     fn dot_impl<S2>(&self, rhs: &ArrayBase<S2, Ix>) -> A
         where S2: Data<Elem=A>,
               A: LinalgScalar,
     {
-        use std::any::{Any, TypeId};
-        use rblas::vector::ops::Dot;
-        use linalg::AsBlasAny;
-
-        // Read pointer to type `A` as type `B`.
-        //
-        // **Panics** if `A` and `B` are not the same type
-        fn cast_as<A: Any + Copy, B: Any + Copy>(a: &A) -> B {
-            assert_eq!(TypeId::of::<A>(), TypeId::of::<B>());
-            unsafe {
-                ::std::ptr::read(a as *const _ as *const B)
-            }
-        }
         // Use only if the vector is large enough to be worth it
-        if self.len() >= 32 {
+        if self.len() >= DOT_BLAS_CUTOFF {
             debug_assert_eq!(self.len(), rhs.len());
             assert!(self.len() == rhs.len());
-            if let Ok(self_v) = self.blas_view_as_type::<f32>() {
-                if let Ok(rhs_v) = rhs.blas_view_as_type::<f32>() {
-                    let f_ret = f32::dot(&self_v, &rhs_v);
-                    return cast_as::<f32, A>(&f_ret);
-                }
+            macro_rules! dot {
+                ($ty:ty, $func:ident) => {{
+            if blas_compat_1d::<$ty, _>(self) && blas_compat_1d::<$ty, _>(rhs) {
+                let n = self.len() as blas_index;
+                let incx = self.strides()[0] as blas_index;
+                let incy = rhs.strides()[0] as blas_index;
+                let ret = unsafe {
+                    blas_sys::c::$func(
+                        n,
+                        self.ptr as *const $ty,
+                        incx,
+                        rhs.ptr as *const $ty,
+                        incy)
+                };
+                return cast_as::<$ty, A>(&ret);
             }
-            if let Ok(self_v) = self.blas_view_as_type::<f64>() {
-                if let Ok(rhs_v) = rhs.blas_view_as_type::<f64>() {
-                    let f_ret = f64::dot(&self_v, &rhs_v);
-                    return cast_as::<f64, A>(&f_ret);
-                }
+                }}
             }
+
+            dot!{f32, cblas_sdot};
+            dot!{f64, cblas_ddot};
         }
         self.dot_generic(rhs)
     }
@@ -163,36 +181,12 @@ impl<A, S> ArrayBase<S, (Ix, Ix)>
     pub fn mat_mul(&self, rhs: &ArrayBase<S, (Ix, Ix)>) -> OwnedArray<A, (Ix, Ix)>
         where A: LinalgScalar,
     {
-        // NOTE: Matrix multiplication only defined for Copy types to
-        // avoid trouble with panicking + and *, and destructors
-
         let ((m, a), (b, n)) = (self.dim, rhs.dim);
-        let (self_columns, other_rows) = (a, b);
-        assert!(self_columns == other_rows);
+        let (lhs_columns, rhs_rows) = (a, b);
+        assert!(lhs_columns == rhs_rows);
+        assert!(m.checked_mul(n).is_some());
 
-        // Avoid initializing the memory in vec -- set it during iteration
-        // Panic safe because A: Copy
-        let mut res_elems = Vec::<A>::with_capacity(m as usize * n as usize);
-        unsafe {
-            res_elems.set_len(m as usize * n as usize);
-        }
-        let mut i = 0;
-        let mut j = 0;
-        for rr in &mut res_elems {
-            unsafe {
-                *rr = (0..a).fold(libnum::zero::<A>(),
-                    move |s, k| s + *self.uget((i, k)) * *rhs.uget((k, j))
-                );
-            }
-            j += 1;
-            if j == n {
-                j = 0;
-                i += 1;
-            }
-        }
-        unsafe {
-            ArrayBase::from_vec_dim_unchecked((m, n), res_elems)
-        }
+        mat_mul_impl(self, rhs)
     }
 
     /// Perform the matrix multiplication of the rectangular array `self` and
@@ -218,7 +212,7 @@ impl<A, S> ArrayBase<S, (Ix, Ix)>
         }
         for (i, rr) in enumerate(&mut res_elems) {
             unsafe {
-                *rr = (0..a).fold(libnum::zero::<A>(),
+                *rr = (0..a).fold(A::zero(),
                     move |s, k| s + *self.uget((i, k)) * *rhs.uget(k)
                 );
             }
@@ -229,4 +223,210 @@ impl<A, S> ArrayBase<S, (Ix, Ix)>
     }
 }
 
+#[cfg(not(feature="blas"))]
+use self::mat_mul_general as mat_mul_impl;
+
+#[cfg(feature="blas")]
+fn mat_mul_impl<A, S>(lhs: &ArrayBase<S, (Ix, Ix)>, rhs: &ArrayBase<S, (Ix, Ix)>)
+    -> OwnedArray<A, (Ix, Ix)>
+    where A: LinalgScalar,
+          S: Data<Elem=A>,
+{
+    // size cutoff for using BLAS
+    let cut = GEMM_BLAS_CUTOFF;
+    let ((mut m, a), (_, mut n)) = (lhs.dim, rhs.dim);
+    if !(m > cut || n > cut || a > cut) ||
+        !(same_type::<A, f32>() || same_type::<A, f64>()) {
+        return mat_mul_general(lhs, rhs);
+    }
+    // Use `c` for c-order and `f` for an f-order matrix
+    // We can handle c * c, f * f generally and
+    // c * f and f * c if the `f` matrix is square.
+    let mut lhs_ = lhs.view();
+    let mut rhs_ = rhs.view();
+    let lhs_s0 = lhs_.strides()[0];
+    let rhs_s0 = rhs_.strides()[0];
+    let both_f = lhs_s0 == 1 && rhs_s0 == 1;
+    let mut lhs_trans = CblasNoTrans;
+    let mut rhs_trans = CblasNoTrans;
+    if both_f {
+        // A^t B^t = C^t => B A = C
+        lhs_ = lhs_.reversed_axes();
+        rhs_ = rhs_.reversed_axes();
+        swap(&mut lhs_, &mut rhs_);
+        swap(&mut m, &mut n);
+    } else if lhs_s0 == 1 && m == a {
+        lhs_ = lhs_.reversed_axes();
+        lhs_trans = CblasTrans;
+    } else if rhs_s0 == 1 && a == n {
+        rhs_ = rhs_.reversed_axes();
+        rhs_trans = CblasTrans;
+    }
+
+    macro_rules! gemm {
+        ($ty:ty, $gemm:ident) => {
+        if blas_row_major_2d::<$ty, _>(&lhs_) && blas_row_major_2d::<$ty, _>(&rhs_) {
+            let mut elems = Vec::<A>::with_capacity(m * n);
+            let c;
+            unsafe {
+                elems.set_len(m * n);
+                c = OwnedArray::from_vec_dim_unchecked((m, n), elems);
+            }
+            {
+                let (m, k) = match lhs_trans {
+                    CblasNoTrans => lhs_.dim(),
+                    _ => {
+                        let (rows, cols) = lhs_.dim();
+                        (cols, rows)
+                    }
+                };
+                let n = match rhs_trans {
+                    CblasNoTrans => rhs_.dim().1,
+                    _ => rhs_.dim().0,
+                };
+                // gemm is C ← αA^Op B^Op + βC
+                // Where Op is notrans/trans/conjtrans
+                unsafe {
+                    blas_sys::c::$gemm(
+                    CblasRowMajor,
+                    lhs_trans,
+                    rhs_trans,
+                    m as blas_index, // m, rows of Op(a)
+                    n as blas_index, // n, cols of Op(b)
+                    k as blas_index, // k, cols of Op(a)
+                    1.0,                  // alpha
+                    lhs_.ptr as *const _, // a
+                    lhs_.strides()[0] as blas_index, // lda
+                    rhs_.ptr as *const _, // b
+                    rhs_.strides()[0] as blas_index, // ldb
+                    0.0,                   // beta
+                    c.ptr as *mut _,       // c
+                    c.strides()[0] as blas_index, // ldc
+                );
+                }
+            }
+            return if both_f {
+                c.reversed_axes()
+            } else {
+                c
+            };
+        }
+        }
+    }
+    gemm!(f32, cblas_sgemm);
+    gemm!(f64, cblas_dgemm);
+    return mat_mul_general(lhs, rhs);
+}
 
+fn mat_mul_general<A, S>(lhs: &ArrayBase<S, (Ix, Ix)>, rhs: &ArrayBase<S, (Ix, Ix)>)
+    -> OwnedArray<A, (Ix, Ix)>
+    where A: LinalgScalar,
+          S: Data<Elem=A>,
+{
+    let ((m, a), (_, n)) = (lhs.dim, rhs.dim);
+
+    let lhs_s0 = lhs.strides()[0];
+    let rhs_s0 = rhs.strides()[0];
+    let column_major = lhs_s0 == 1 && rhs_s0 == 1;
+
+    // Avoid initializing the memory in vec -- set it during iteration
+    // Panic safe because A: Copy
+    let mut res_elems = Vec::<A>::with_capacity(m as usize * n as usize);
+    unsafe {
+        res_elems.set_len(m as usize * n as usize);
+    }
+    let mut i = 0;
+    let mut j = 0;
+    for rr in &mut res_elems {
+        unsafe {
+            *rr = (0..a).fold(A::zero(),
+                move |s, k| s + *lhs.uget((i, k)) * *rhs.uget((k, j)));
+        }
+        if !column_major {
+            j += 1;
+            if j == n {
+                j = 0;
+                i += 1;
+            }
+        } else {
+            i += 1;
+            if i == m {
+                i = 0;
+                j += 1;
+            }
+        }
+    }
+    unsafe {
+        if !column_major {
+            ArrayBase::from_vec_dim_unchecked((m, n), res_elems)
+        } else {
+            ArrayBase::from_vec_dim_unchecked_f((m, n), res_elems)
+        }
+    }
+}
+
+#[cfg(feature="blas")]
+#[inline(always)]
+/// Return `true` if `A` and `B` are the same type
+fn same_type<A: Any, B: Any>() -> bool {
+    TypeId::of::<A>() == TypeId::of::<B>()
+}
+
+#[cfg(feature="blas")]
+// Read pointer to type `A` as type `B`.
+//
+// **Panics** if `A` and `B` are not the same type
+fn cast_as<A: Any + Copy, B: Any + Copy>(a: &A) -> B {
+    assert!(same_type::<A, B>());
+    unsafe {
+        ::std::ptr::read(a as *const _ as *const B)
+    }
+}
+
+#[cfg(feature="blas")]
+fn blas_compat_1d<A, S>(a: &ArrayBase<S, Ix>) -> bool
+    where S: Data,
+          A: Any,
+          S::Elem: Any,
+{
+    if !same_type::<A, S::Elem>() {
+        return false;
+    }
+    if a.len() > blas_index::max_value() as usize {
+        return false;
+    }
+    let stride = a.strides()[0];
+    if stride > blas_index::max_value() as isize ||
+        stride < blas_index::min_value() as isize {
+        return false;
+    }
+    true
+}
+
+#[cfg(feature="blas")]
+fn blas_row_major_2d<A, S>(a: &ArrayBase<S, (Ix, Ix)>) -> bool
+    where S: Data,
+          A: Any,
+          S::Elem: Any,
+{
+    if !same_type::<A, S::Elem>() {
+        return false;
+    }
+    let s0 = a.strides()[0];
+    let s1 = a.strides()[1];
+    if s1 != 1 {
+        return false;
+    }
+    if (s0 > blas_index::max_value() as isize || s0 < blas_index::min_value() as isize) ||
+        (s1 > blas_index::max_value() as isize || s1 < blas_index::min_value() as isize)
+    {
+        return false;
+    }
+    let (m, n) = a.dim();
+    if m > blas_index::max_value() as usize ||
+        n > blas_index::max_value() as usize
+    {
+        return false;
+    }
+    true
+}
diff --git a/src/lib.rs b/src/lib.rs
index 76aa0779d..1b0be291f 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -74,6 +74,8 @@ extern crate rustc_serialize as serialize;
 
 #[cfg(feature = "rblas")]
 extern crate rblas;
+#[cfg(feature="blas")]
+extern crate blas_sys;
 
 extern crate itertools;
 extern crate num as libnum;
diff --git a/src/linalg.rs b/src/linalg.rs
index b7ab51470..a4c57e257 100644
--- a/src/linalg.rs
+++ b/src/linalg.rs
@@ -19,21 +19,6 @@ use std::ops::{
 };
 use ScalarOperand;
 
-#[cfg(feature="rblas")]
-use std::any::TypeId;
-
-#[cfg(feature="rblas")]
-use ShapeError;
-
-#[cfg(feature="rblas")]
-use error::{from_kind, ErrorKind};
-
-#[cfg(feature="rblas")]
-use blas::{AsBlas, BlasArrayView};
-
-#[cfg(feature="rblas")]
-use imp_prelude::*;
-
 /// Elements that support linear algebra operations.
 ///
 /// `Any` for type-based specialization, `Copy` so that they don't need move
@@ -90,32 +75,3 @@ pub trait NdFloat :
 
 impl NdFloat for f32 { }
 impl NdFloat for f64 { }
-
-
-#[cfg(feature = "rblas")]
-pub trait AsBlasAny<A, S, D> : AsBlas<A, S, D> {
-    fn blas_view_as_type<T: Any>(&self) -> Result<BlasArrayView<T, D>, ShapeError>
-        where S: Data;
-}
-
-#[cfg(feature = "rblas")]
-/// ***Requires `features = "rblas"`***
-impl<A, S, D> AsBlasAny<A, S, D> for ArrayBase<S, D>
-    where S: Data<Elem=A>,
-          D: Dimension,
-          A: Any,
-{
-    fn blas_view_as_type<T: Any>(&self) -> Result<BlasArrayView<T, D>, ShapeError>
-        where S: Data
-    {
-        if TypeId::of::<A>() == TypeId::of::<T>() {
-            unsafe {
-                let v = self.view();
-                let u = ArrayView::new_(v.ptr as *const T, v.dim, v.strides);
-                Priv(u).into_blas_view()
-            }
-        } else {
-            Err(from_kind(ErrorKind::IncompatibleLayout))
-        }
-    }
-}
diff --git a/tests/array.rs b/tests/array.rs
index 48d2636ac..9bdefd236 100644
--- a/tests/array.rs
+++ b/tests/array.rs
@@ -2,6 +2,7 @@
 
 #[macro_use]
 extern crate ndarray;
+extern crate itertools;
 
 use ndarray::{RcArray, S, Si,
     OwnedArray,
@@ -737,3 +738,26 @@ fn test_range() {
     assert_eq!(d[10], 2.);
     assert_eq!(d[12], 2.2);
 }
+
+#[test]
+fn test_f_order() {
+    // Test that arrays are logically equal in every way,
+    // even if the underlying memory order is different
+    let c = arr2(&[[1, 2, 3],
+                   [4, 5, 6]]);
+    let mut f = OwnedArray::zeros_f(c.dim());
+    f.assign(&c);
+    assert_eq!(f, c);
+    assert_eq!(f.shape(), c.shape());
+    assert_eq!(c.strides(), &[3, 1]);
+    assert_eq!(f.strides(), &[1, 2]);
+    itertools::assert_equal(f.iter(), c.iter());
+    itertools::assert_equal(f.inner_iter(), c.inner_iter());
+    itertools::assert_equal(f.outer_iter(), c.outer_iter());
+    itertools::assert_equal(f.axis_iter(Axis(0)), c.axis_iter(Axis(0)));
+    itertools::assert_equal(f.axis_iter(Axis(1)), c.axis_iter(Axis(1)));
+
+    let dupc = &c + &c;
+    let dupf = &f + &f;
+    assert_eq!(dupc, dupf);
+}
diff --git a/tests/format.rs b/tests/format.rs
index 9bfc293fa..c3adb2a2c 100644
--- a/tests/format.rs
+++ b/tests/format.rs
@@ -7,25 +7,25 @@ use ndarray::{arr0, rcarr1, aview1};
 fn formatting()
 {
     let a = rcarr1::<f32>(&[1., 2., 3., 4.]);
-    assert_eq!(format!("{:?}", a),
+    assert_eq!(format!("{}", a),
                //"[   1,    2,    3,    4]");
                "[1, 2, 3, 4]");
-    assert_eq!(format!("{:4?}", a),
+    assert_eq!(format!("{:4}", a),
                "[   1,    2,    3,    4]");
     let a = a.reshape((4, 1, 1));
-    assert_eq!(format!("{:4?}", a),
+    assert_eq!(format!("{:4}", a),
                "[[[   1]],\n [[   2]],\n [[   3]],\n [[   4]]]");
 
     let a = a.reshape((2, 2));
     assert_eq!(format!("{}", a), 
                "[[1, 2],\n [3, 4]]");
-    assert_eq!(format!("{:?}", a), 
+    assert_eq!(format!("{}", a),
                "[[1, 2],\n [3, 4]]");
-    assert_eq!(format!("{:#4?}", a),
+    assert_eq!(format!("{:#4}", a),
                "[[   1,    2], [   3,    4]]");
 
     let b = arr0::<f32>(3.5);
-    assert_eq!(format!("{:?}", b),
+    assert_eq!(format!("{}", b),
                "3.5");
 
     let s = format!("{:.3e}", aview1::<f32>(&[1.1, 2.2, 33., 440.]));
diff --git a/tests/oper.rs b/tests/oper.rs
index 24c27cde6..69930ec82 100644
--- a/tests/oper.rs
+++ b/tests/oper.rs
@@ -5,6 +5,7 @@ use ndarray::RcArray;
 use ndarray::{arr0, rcarr1, rcarr2};
 use ndarray::{
     OwnedArray,
+    Ix,
 };
 
 use std::fmt;
@@ -131,3 +132,68 @@ fn dot_product() {
     let b = b.map(|f| *f as i32);
     assert_eq!(a.dot(&b), dot as i32);
 }
+
+fn range_mat(m: Ix, n: Ix) -> OwnedArray<f32, (Ix, Ix)> {
+    OwnedArray::linspace(0., (m * n - 1) as f32, m * n).into_shape((m, n)).unwrap()
+}
+
+#[cfg(has_assign)]
+#[test]
+fn mat_mul() {
+    let (m, n, k) = (8, 8, 8);
+    let a = range_mat(m, n);
+    let b = range_mat(n, k);
+    let mut b = b / 4.;
+    {
+        let mut c = b.column_mut(0);
+        c += 1.0;
+    }
+    let ab = a.mat_mul(&b);
+
+    let mut af = OwnedArray::zeros_f(a.dim());
+    let mut bf = OwnedArray::zeros_f(b.dim());
+    af.assign(&a);
+    bf.assign(&b);
+
+    assert_eq!(ab, a.mat_mul(&bf));
+    assert_eq!(ab, af.mat_mul(&b));
+    assert_eq!(ab, af.mat_mul(&bf));
+
+    let (m, n, k) = (10, 5, 11);
+    let a = range_mat(m, n);
+    let b = range_mat(n, k);
+    let mut b = b / 4.;
+    {
+        let mut c = b.column_mut(0);
+        c += 1.0;
+    }
+    let ab = a.mat_mul(&b);
+
+    let mut af = OwnedArray::zeros_f(a.dim());
+    let mut bf = OwnedArray::zeros_f(b.dim());
+    af.assign(&a);
+    bf.assign(&b);
+
+    assert_eq!(ab, a.mat_mul(&bf));
+    assert_eq!(ab, af.mat_mul(&b));
+    assert_eq!(ab, af.mat_mul(&bf));
+}
+
+// Check that matrix multiplication of contiguous matrices returns a
+// matrix with the same order 
+#[test]
+fn mat_mul_order() {
+    let (m, n, k) = (8, 8, 8);
+    let a = range_mat(m, n);
+    let b = range_mat(n, k);
+    let mut af = OwnedArray::zeros_f(a.dim());
+    let mut bf = OwnedArray::zeros_f(b.dim());
+    af.assign(&a);
+    bf.assign(&b);
+
+    let cc = a.mat_mul(&b);
+    let ff = af.mat_mul(&bf);
+
+    assert_eq!(cc.strides()[1], 1);
+    assert_eq!(ff.strides()[0], 1);
+}