diff --git a/metric_learn/constraints.py b/metric_learn/constraints.py
index 069a6564..e42ef4b8 100644
--- a/metric_learn/constraints.py
+++ b/metric_learn/constraints.py
@@ -6,6 +6,7 @@
 import warnings
 from six.moves import xrange
 from scipy.sparse import coo_matrix
+from sklearn.utils import check_random_state
 
 __all__ = ['Constraints']
 
@@ -23,7 +24,8 @@ def __init__(self, partial_labels):
     self.known_label_idx, = np.where(partial_labels >= 0)
     self.known_labels = partial_labels[self.known_label_idx]
 
-  def adjacency_matrix(self, num_constraints, random_state=np.random):
+  def adjacency_matrix(self, num_constraints, random_state=None):
+    random_state = check_random_state(random_state)
     a, b, c, d = self.positive_negative_pairs(num_constraints,
                                               random_state=random_state)
     row = np.concatenate((a, c))
@@ -35,7 +37,8 @@ def adjacency_matrix(self, num_constraints, random_state=np.random):
     return adj + adj.T
 
   def positive_negative_pairs(self, num_constraints, same_length=False,
-                              random_state=np.random):
+                              random_state=None):
+    random_state = check_random_state(random_state)
     a, b = self._pairs(num_constraints, same_label=True,
                        random_state=random_state)
     c, d = self._pairs(num_constraints, same_label=False,
@@ -68,13 +71,14 @@ def _pairs(self, num_constraints, same_label=True, max_iter=10,
     ab = np.array(list(ab)[:num_constraints], dtype=int)
     return self.known_label_idx[ab.T]
 
-  def chunks(self, num_chunks=100, chunk_size=2, random_state=np.random):
+  def chunks(self, num_chunks=100, chunk_size=2, random_state=None):
     """
     the random state object to be passed must be a numpy random seed
     """
+    random_state = check_random_state(random_state)
     chunks = -np.ones_like(self.known_label_idx, dtype=int)
     uniq, lookup = np.unique(self.known_labels, return_inverse=True)
-    all_inds = [set(np.where(lookup==c)[0]) for c in xrange(len(uniq))]
+    all_inds = [set(np.where(lookup == c)[0]) for c in xrange(len(uniq))]
     idx = 0
     while idx < num_chunks and all_inds:
       if len(all_inds) == 1:
diff --git a/metric_learn/itml.py b/metric_learn/itml.py
index 16fc21db..36f5d715 100644
--- a/metric_learn/itml.py
+++ b/metric_learn/itml.py
@@ -6,6 +6,7 @@
 import warnings
 import numpy as np
 from six.moves import xrange
+from sklearn.exceptions import ChangedBehaviorWarning
 from sklearn.metrics import pairwise_distances
 from sklearn.utils.validation import check_array
 from sklearn.base import TransformerMixin
@@ -298,7 +299,6 @@ class ITML_Supervised(_BaseITML, TransformerMixin):
            A positive definite (PD) matrix of shape
            (n_features, n_features), that will be used as such to set the
            prior.
-
   A0 : Not used
     .. deprecated:: 0.5.0
        `A0` was deprecated in version 0.5.0 and will
@@ -310,7 +310,9 @@ class ITML_Supervised(_BaseITML, TransformerMixin):
       tuples will be formed like this: X[indices].
   random_state : int or numpy.RandomState or None, optional (default=None)
       A pseudo random number generator object or a seed for it if int. If
-      ``prior='random'``, ``random_state`` is used to set the prior.
+      ``prior='random'``, ``random_state`` is used to set the prior. In any
+        case, `random_state` is also used to randomly sample constraints from
+        labels.
 
 
   Attributes
@@ -350,7 +352,7 @@ def __init__(self, gamma=1., max_iter=1000, convergence_threshold=1e-3,
     self.num_constraints = num_constraints
     self.bounds = bounds
 
-  def fit(self, X, y, random_state=np.random, bounds=None):
+  def fit(self, X, y, random_state='deprecated', bounds=None):
     """Create constraints from labels and learn the ITML model.
 
 
@@ -362,8 +364,11 @@ def fit(self, X, y, random_state=np.random, bounds=None):
     y : (n) array-like
         Data labels.
 
-    random_state : numpy.random.RandomState, optional
-        If provided, controls random number generation.
+    random_state : Not used
+      .. deprecated:: 0.5.0
+        `random_state` in the `fit` function was deprecated in version 0.5.0
+        and will be removed in 0.6.0. Set `random_state` at initialization
+        instead (when instantiating a new `ITML_Supervised` object).
 
     bounds : array-like of two numbers
         Bounds on similarity, aside slack variables, s.t.
@@ -384,6 +389,18 @@ def fit(self, X, y, random_state=np.random, bounds=None):
                     ' It has been deprecated in version 0.5.0 and will be'
                     ' removed in 0.6.0. Use the "bounds" parameter of this '
                     'fit method instead.', DeprecationWarning)
+    if random_state != 'deprecated':
+      warnings.warn('"random_state" parameter in the `fit` function is '
+                    'deprecated. Set `random_state` at initialization '
+                    'instead (when instantiating a new `ITML_Supervised` '
+                    'object).', DeprecationWarning)
+    else:
+      warnings.warn('As of v0.5.0, `ITML_Supervised` now uses the '
+                    '`random_state` given at initialization to sample '
+                    'constraints, not the default `np.random` from the `fit` '
+                    'method, since this argument is now deprecated. '
+                    'This warning will disappear in v0.6.0.',
+                    ChangedBehaviorWarning)
     X, y = self._prepare_inputs(X, y, ensure_min_samples=2)
     num_constraints = self.num_constraints
     if num_constraints is None:
@@ -392,6 +409,6 @@ def fit(self, X, y, random_state=np.random, bounds=None):
 
     c = Constraints(y)
     pos_neg = c.positive_negative_pairs(num_constraints,
-                                        random_state=random_state)
+                                        random_state=self.random_state)
     pairs, y = wrap_pairs(X, pos_neg)
     return _BaseITML._fit(self, pairs, y, bounds=bounds)
diff --git a/metric_learn/lsml.py b/metric_learn/lsml.py
index e3b0d323..72a448ec 100644
--- a/metric_learn/lsml.py
+++ b/metric_learn/lsml.py
@@ -286,7 +286,8 @@ class LSML_Supervised(_BaseLSML, TransformerMixin):
   random_state : int or numpy.RandomState or None, optional (default=None)
       A pseudo random number generator object or a seed for it if int. If
       ``init='random'``, ``random_state`` is used to set the random
-      prior.
+      prior. In any case, `random_state` is also used to randomly sample
+      constraints from labels.
 
   Attributes
   ----------
@@ -308,7 +309,7 @@ def __init__(self, tol=1e-3, max_iter=1000, prior=None,
     self.num_constraints = num_constraints
     self.weights = weights
 
-  def fit(self, X, y, random_state=np.random):
+  def fit(self, X, y, random_state='deprecated'):
     """Create constraints from labels and learn the LSML model.
 
     Parameters
@@ -319,13 +320,28 @@ def fit(self, X, y, random_state=np.random):
     y : (n) array-like
         Data labels.
 
-    random_state : numpy.random.RandomState, optional
-        If provided, controls random number generation.
+    random_state : Not used
+      .. deprecated:: 0.5.0
+        `random_state` in the `fit` function was deprecated in version 0.5.0
+        and will be removed in 0.6.0. Set `random_state` at initialization
+        instead (when instantiating a new `LSML_Supervised` object).
     """
     if self.num_labeled != 'deprecated':
       warnings.warn('"num_labeled" parameter is not used.'
                     ' It has been deprecated in version 0.5.0 and will be'
                     ' removed in 0.6.0', DeprecationWarning)
+    if random_state != 'deprecated':
+      warnings.warn('"random_state" parameter in the `fit` function is '
+                    'deprecated. Set `random_state` at initialization '
+                    'instead (when instantiating a new `LSML_Supervised` '
+                    'object).', DeprecationWarning)
+    else:
+      warnings.warn('As of v0.5.0, `LSML_Supervised` now uses the '
+                    '`random_state` given at initialization to sample '
+                    'constraints, not the default `np.random` from the `fit` '
+                    'method, since this argument is now deprecated. '
+                    'This warning will disappear in v0.6.0.',
+                    ChangedBehaviorWarning)
     X, y = self._prepare_inputs(X, y, ensure_min_samples=2)
     num_constraints = self.num_constraints
     if num_constraints is None:
@@ -334,6 +350,6 @@ def fit(self, X, y, random_state=np.random):
 
     c = Constraints(y)
     pos_neg = c.positive_negative_pairs(num_constraints, same_length=True,
-                                        random_state=random_state)
+                                        random_state=self.random_state)
     return _BaseLSML._fit(self, X[np.column_stack(pos_neg)],
                           weights=self.weights)
diff --git a/metric_learn/mmc.py b/metric_learn/mmc.py
index 9f02425c..55337b2e 100644
--- a/metric_learn/mmc.py
+++ b/metric_learn/mmc.py
@@ -538,7 +538,8 @@ class MMC_Supervised(_BaseMMC, TransformerMixin):
   random_state : int or numpy.RandomState or None, optional (default=None)
       A pseudo random number generator object or a seed for it if int. If
       ``init='random'``, ``random_state`` is used to initialize the random
-      Mahalanobis matrix.
+      Mahalanobis matrix.  In any case, `random_state` is also used to
+      randomly sample constraints from labels.
 
   `MMC_Supervised` creates pairs of similar sample by taking same class
   samples, and pairs of dissimilar samples by taking different class
@@ -566,7 +567,7 @@ def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-6,
     self.num_labeled = num_labeled
     self.num_constraints = num_constraints
 
-  def fit(self, X, y, random_state=np.random):
+  def fit(self, X, y, random_state='deprecated'):
     """Create constraints from labels and learn the MMC model.
 
     Parameters
@@ -575,13 +576,28 @@ def fit(self, X, y, random_state=np.random):
         Input data, where each row corresponds to a single instance.
     y : (n) array-like
         Data labels.
-    random_state : numpy.random.RandomState, optional
-        If provided, controls random number generation.
+    random_state : Not used
+      .. deprecated:: 0.5.0
+        `random_state` in the `fit` function was deprecated in version 0.5.0
+        and will be removed in 0.6.0. Set `random_state` at initialization
+        instead (when instantiating a new `MMC_Supervised` object).
     """
     if self.num_labeled != 'deprecated':
       warnings.warn('"num_labeled" parameter is not used.'
                     ' It has been deprecated in version 0.5.0 and will be'
                     ' removed in 0.6.0', DeprecationWarning)
+    if random_state != 'deprecated':
+      warnings.warn('"random_state" parameter in the `fit` function is '
+                    'deprecated. Set `random_state` at initialization '
+                    'instead (when instantiating a new `MMC_Supervised` '
+                    'object).', DeprecationWarning)
+    else:
+      warnings.warn('As of v0.5.0, `MMC_Supervised` now uses the '
+                    '`random_state` given at initialization to sample '
+                    'constraints, not the default `np.random` from the `fit` '
+                    'method, since this argument is now deprecated. '
+                    'This warning will disappear in v0.6.0.',
+                    ChangedBehaviorWarning)
     X, y = self._prepare_inputs(X, y, ensure_min_samples=2)
     num_constraints = self.num_constraints
     if num_constraints is None:
@@ -590,6 +606,6 @@ def fit(self, X, y, random_state=np.random):
 
     c = Constraints(y)
     pos_neg = c.positive_negative_pairs(num_constraints,
-                                        random_state=random_state)
+                                        random_state=self.random_state)
     pairs, y = wrap_pairs(X, pos_neg)
     return _BaseMMC._fit(self, pairs, y)
diff --git a/metric_learn/rca.py b/metric_learn/rca.py
index 503e2408..8686f02d 100644
--- a/metric_learn/rca.py
+++ b/metric_learn/rca.py
@@ -184,11 +184,17 @@ class RCA_Supervised(RCA):
         be removed in 0.6.0. Use `n_components` instead.
 
   num_chunks: int, optional
+
   chunk_size: int, optional
+
   preprocessor : array-like, shape=(n_samples, n_features) or callable
       The preprocessor to call to get tuples from indices. If array-like,
       tuples will be formed like this: X[indices].
 
+  random_state : int or numpy.RandomState or None, optional (default=None)
+      A pseudo random number generator object or a seed for it if int.
+      It is used to randomly sample constraints from labels.
+
   Attributes
   ----------
   transformer_ : `numpy.ndarray`, shape=(n_components, n_features)
@@ -197,13 +203,15 @@ class RCA_Supervised(RCA):
 
   def __init__(self, num_dims='deprecated', n_components=None,
                pca_comps='deprecated', num_chunks=100, chunk_size=2,
-               preprocessor=None):
+               preprocessor=None, random_state=None):
+    """Initialize the supervised version of `RCA`."""
     RCA.__init__(self, num_dims=num_dims, n_components=n_components,
                  pca_comps=pca_comps, preprocessor=preprocessor)
     self.num_chunks = num_chunks
     self.chunk_size = chunk_size
+    self.random_state = random_state
 
-  def fit(self, X, y, random_state=np.random):
+  def fit(self, X, y, random_state='deprecated'):
     """Create constraints from labels and learn the RCA model.
     Needs num_constraints specified in constructor.
 
@@ -212,10 +220,26 @@ def fit(self, X, y, random_state=np.random):
     X : (n x d) data matrix
         each row corresponds to a single instance
     y : (n) data labels
-    random_state : a random.seed object to fix the random_state if needed.
+    random_state : Not used
+      .. deprecated:: 0.5.0
+        `random_state` in the `fit` function was deprecated in version 0.5.0
+        and will be removed in 0.6.0. Set `random_state` at initialization
+        instead (when instantiating a new `RCA_Supervised` object).
     """
+    if random_state != 'deprecated':
+      warnings.warn('"random_state" parameter in the `fit` function is '
+                    'deprecated. Set `random_state` at initialization '
+                    'instead (when instantiating a new `RCA_Supervised` '
+                    'object).', DeprecationWarning)
+    else:
+      warnings.warn('As of v0.5.0, `RCA_Supervised` now uses the '
+                    '`random_state` given at initialization to sample '
+                    'constraints, not the default `np.random` from the `fit` '
+                    'method, since this argument is now deprecated. '
+                    'This warning will disappear in v0.6.0.',
+                    ChangedBehaviorWarning)
     X, y = self._prepare_inputs(X, y, ensure_min_samples=2)
     chunks = Constraints(y).chunks(num_chunks=self.num_chunks,
                                    chunk_size=self.chunk_size,
-                                   random_state=random_state)
+                                   random_state=self.random_state)
     return RCA.fit(self, X, chunks)
diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py
index 70e65c86..9344ef7c 100644
--- a/metric_learn/sdml.py
+++ b/metric_learn/sdml.py
@@ -310,7 +310,8 @@ class SDML_Supervised(_BaseSDML, TransformerMixin):
   random_state : int or numpy.RandomState or None, optional (default=None)
       A pseudo random number generator object or a seed for it if int. If
       ``init='random'``, ``random_state`` is used to set the random
-      prior.
+      prior. In any case, `random_state` is also used to randomly sample
+      constraints from labels.
 
   Attributes
   ----------
@@ -336,7 +337,7 @@ def __init__(self, balance_param=0.5, sparsity_param=0.01, prior=None,
     self.num_labeled = num_labeled
     self.num_constraints = num_constraints
 
-  def fit(self, X, y, random_state=np.random):
+  def fit(self, X, y, random_state='deprecated'):
     """Create constraints from labels and learn the SDML model.
 
     Parameters
@@ -345,9 +346,11 @@ def fit(self, X, y, random_state=np.random):
         data matrix, where each row corresponds to a single instance
     y : array-like, shape (n,)
         data labels, one for each instance
-    random_state : {numpy.random.RandomState, int}, optional
-        Random number generator or random seed. If not given, the singleton
-        numpy.random will be used.
+    random_state : Not used
+      .. deprecated:: 0.5.0
+        `random_state` in the `fit` function was deprecated in version 0.5.0
+        and will be removed in 0.6.0. Set `random_state` at initialization
+        instead (when instantiating a new `SDML_Supervised` object).
 
     Returns
     -------
@@ -358,6 +361,18 @@ def fit(self, X, y, random_state=np.random):
       warnings.warn('"num_labeled" parameter is not used.'
                     ' It has been deprecated in version 0.5.0 and will be'
                     ' removed in 0.6.0', DeprecationWarning)
+    if random_state != 'deprecated':
+      warnings.warn('"random_state" parameter in the `fit` function is '
+                    'deprecated. Set `random_state` at initialization '
+                    'instead (when instantiating a new `SDML_Supervised` '
+                    'object).', DeprecationWarning)
+    else:
+      warnings.warn('As of v0.5.0, `SDML_Supervised` now uses the '
+                    '`random_state` given at initialization to sample '
+                    'constraints, not the default `np.random` from the `fit` '
+                    'method, since this argument is now deprecated. '
+                    'This warning will disappear in v0.6.0.',
+                    ChangedBehaviorWarning)
     X, y = self._prepare_inputs(X, y, ensure_min_samples=2)
     num_constraints = self.num_constraints
     if num_constraints is None:
@@ -366,6 +381,6 @@ def fit(self, X, y, random_state=np.random):
 
     c = Constraints(y)
     pos_neg = c.positive_negative_pairs(num_constraints,
-                                        random_state=random_state)
+                                        random_state=self.random_state)
     pairs, y = wrap_pairs(X, pos_neg)
     return _BaseSDML._fit(self, pairs, y)
diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py
index c49c9ef5..0f47a58a 100644
--- a/test/metric_learn_test.py
+++ b/test/metric_learn_test.py
@@ -119,6 +119,37 @@ def test_changed_behaviour_warning(self):
       lsml.fit(pairs)
     assert any(msg == str(wrn.message) for wrn in raised_warning)
 
+  def test_deprecation_random_state(self):
+    # test that a deprecation message is thrown if random_state is set at
+    # fit time
+    # TODO: remove in v.0.6
+    X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]])
+    y = np.array([1, 0, 1, 0])
+    lsml_supervised = LSML_Supervised()
+    msg = ('"random_state" parameter in the `fit` function is '
+           'deprecated. Set `random_state` at initialization '
+           'instead (when instantiating a new `LSML_Supervised` '
+           'object).')
+    with pytest.warns(DeprecationWarning) as raised_warning:
+      lsml_supervised.fit(X, y, random_state=np.random)
+    assert any(msg == str(wrn.message) for wrn in raised_warning)
+
+  def test_changed_behaviour_warning_random_state(self):
+    # test that a ChangedBehavior warning is thrown if the random_state is
+    # not set in fit.
+    # TODO: remove in v.0.6
+    X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]])
+    y = np.array([1, 0, 1, 0])
+    lsml_supervised = LSML_Supervised()
+    msg = ('As of v0.5.0, `LSML_Supervised` now uses the '
+           '`random_state` given at initialization to sample '
+           'constraints, not the default `np.random` from the `fit` '
+           'method, since this argument is now deprecated. '
+           'This warning will disappear in v0.6.0.')
+    with pytest.warns(ChangedBehaviorWarning) as raised_warning:
+      lsml_supervised.fit(X, y)
+    assert any(msg == str(wrn.message) for wrn in raised_warning)
+
 
 class TestITML(MetricTestCase):
   def test_iris(self):
@@ -174,6 +205,37 @@ def test_deprecation_A0(self):
       itml.fit(pairs, y_pairs)
     assert any(msg == str(wrn.message) for wrn in raised_warning)
 
+  def test_deprecation_random_state(self):
+    # test that a deprecation message is thrown if random_state is set at
+    # fit time
+    # TODO: remove in v.0.6
+    X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]])
+    y = np.array([1, 0, 1, 0])
+    itml_supervised = ITML_Supervised()
+    msg = ('"random_state" parameter in the `fit` function is '
+           'deprecated. Set `random_state` at initialization '
+           'instead (when instantiating a new `ITML_Supervised` '
+           'object).')
+    with pytest.warns(DeprecationWarning) as raised_warning:
+      itml_supervised.fit(X, y, random_state=np.random)
+    assert any(msg == str(wrn.message) for wrn in raised_warning)
+
+  def test_changed_behaviour_warning_random_state(self):
+    # test that a ChangedBehavior warning is thrown if the random_state is
+    # not set in fit.
+    # TODO: remove in v.0.6
+    X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]])
+    y = np.array([1, 0, 1, 0])
+    itml_supervised = ITML_Supervised()
+    msg = ('As of v0.5.0, `ITML_Supervised` now uses the '
+           '`random_state` given at initialization to sample '
+           'constraints, not the default `np.random` from the `fit` '
+           'method, since this argument is now deprecated. '
+           'This warning will disappear in v0.6.0.')
+    with pytest.warns(ChangedBehaviorWarning) as raised_warning:
+      itml_supervised.fit(X, y)
+    assert any(msg == str(wrn.message) for wrn in raised_warning)
+
 
 @pytest.mark.parametrize('bounds', [None, (20., 100.), [20., 100.],
                                     np.array([20., 100.]),
@@ -446,11 +508,11 @@ def test_sdml_supervised_raises_warning_msg_installed_skggm(self):
     X = np.array([[-10., 0.], [10., 0.], [5., 0.], [3., 0.]])
     y = [0, 0, 1, 1]
     sdml_supervised = SDML_Supervised(balance_param=0.5, prior='identity',
-                                      sparsity_param=0.01)
+                                      sparsity_param=0.01, random_state=rng)
     msg = ("There was a problem in SDML when using skggm's graphical "
            "lasso solver.")
     with pytest.raises(RuntimeError) as raised_error:
-      sdml_supervised.fit(X, y, random_state=rng)
+      sdml_supervised.fit(X, y)
     assert msg == str(raised_error.value)
 
   @pytest.mark.skipif(not HAS_SKGGM,
@@ -535,8 +597,9 @@ def test_sdml_works_on_non_spd_pb_with_skggm(self):
     it should work, but scikit-learn's graphical_lasso does not work"""
     X, y = load_iris(return_X_y=True)
     sdml = SDML_Supervised(balance_param=0.5, sparsity_param=0.01,
-                           prior='covariance')
-    sdml.fit(X, y, random_state=np.random.RandomState(42))
+                           prior='covariance',
+                           random_state=np.random.RandomState(42))
+    sdml.fit(X, y)
 
   def test_deprecation_use_cov(self):
     # test that a deprecation message is thrown if use_cov  is set at
@@ -586,6 +649,35 @@ def test_changed_behaviour_warning(self):
       sdml.fit(pairs, y_pairs)
     assert any(msg == str(wrn.message) for wrn in raised_warning)
 
+  def test_deprecation_random_state(self):
+    # test that a deprecation message is thrown if random_state is set at
+    # fit time
+    # TODO: remove in v.0.6
+    X, y = load_iris(return_X_y=True)
+    sdml_supervised = SDML_Supervised(balance_param=5e-5)
+    msg = ('"random_state" parameter in the `fit` function is '
+           'deprecated. Set `random_state` at initialization '
+           'instead (when instantiating a new `SDML_Supervised` '
+           'object).')
+    with pytest.warns(DeprecationWarning) as raised_warning:
+      sdml_supervised.fit(X, y, random_state=np.random)
+    assert any(msg == str(wrn.message) for wrn in raised_warning)
+
+  def test_changed_behaviour_warning_random_state(self):
+    # test that a ChangedBehavior warning is thrown if the random_state is
+    # not set in fit.
+    # TODO: remove in v.0.6
+    X, y = load_iris(return_X_y=True)
+    sdml_supervised = SDML_Supervised(balance_param=5e-5)
+    msg = ('As of v0.5.0, `SDML_Supervised` now uses the '
+           '`random_state` given at initialization to sample '
+           'constraints, not the default `np.random` from the `fit` '
+           'method, since this argument is now deprecated. '
+           'This warning will disappear in v0.6.0.')
+    with pytest.warns(ChangedBehaviorWarning) as raised_warning:
+      sdml_supervised.fit(X, y)
+    assert any(msg == str(wrn.message) for wrn in raised_warning)
+
 
 @pytest.mark.skipif(not HAS_SKGGM,
                     reason='The message should be printed only if skggm is '
@@ -819,24 +911,7 @@ def test_iris(self):
     rca = RCA_Supervised(n_components=2, num_chunks=30, chunk_size=2)
     rca.fit(self.iris_points, self.iris_labels)
     csep = class_separation(rca.transform(self.iris_points), self.iris_labels)
-    self.assertLess(csep, 0.25)
-
-  def test_feature_null_variance(self):
-    X = np.hstack((self.iris_points, np.eye(len(self.iris_points), M=1)))
-
-    # Apply PCA with the number of components
-    rca = RCA_Supervised(n_components=2, pca_comps=3, num_chunks=30,
-                         chunk_size=2)
-    rca.fit(X, self.iris_labels)
-    csep = class_separation(rca.transform(X), self.iris_labels)
-    self.assertLess(csep, 0.30)
-
-    # Apply PCA with the minimum variance ratio
-    rca = RCA_Supervised(n_components=2, pca_comps=0.95, num_chunks=30,
-                         chunk_size=2)
-    rca.fit(X, self.iris_labels)
-    csep = class_separation(rca.transform(X), self.iris_labels)
-    self.assertLess(csep, 0.30)
+    self.assertLess(csep, 0.29)
 
   def test_deprecation_pca_comps(self):
     # test that a deprecation message is thrown if pca_comps is set at
@@ -851,12 +926,12 @@ def test_deprecation_pca_comps(self):
            '`sklearn.decomposition.PCA` and an `sklearn.pipeline.Pipeline`.')
     with pytest.warns(ChangedBehaviorWarning) as expected_msg:
       rca_supervised.fit(X, y)
-    assert str(expected_msg[0].message) == msg
+    assert any(str(w.message) == msg for w in expected_msg)
 
     rca = RCA(pca_comps=X.shape[1])
     with pytest.warns(ChangedBehaviorWarning) as expected_msg:
       rca.fit(X, y)
-    assert str(expected_msg[0].message) == msg
+    assert any(str(w.message) == msg for w in expected_msg)
 
   def test_changedbehaviorwarning_preprocessing(self):
     # test that a ChangedBehaviorWarning is thrown when using RCA
@@ -871,12 +946,12 @@ def test_changedbehaviorwarning_preprocessing(self):
     rca_supervised = RCA_Supervised(num_chunks=20)
     with pytest.warns(ChangedBehaviorWarning) as expected_msg:
       rca_supervised.fit(X, y)
-    assert str(expected_msg[0].message) == msg
+    assert any(str(w.message) == msg for w in expected_msg)
 
     rca = RCA()
     with pytest.warns(ChangedBehaviorWarning) as expected_msg:
       rca.fit(X, y)
-    assert str(expected_msg[0].message) == msg
+    assert any(str(w.message) == msg for w in expected_msg)
 
   def test_rank_deficient_returns_warning(self):
     """Checks that if the covariance matrix is not invertible, we raise a
@@ -895,6 +970,35 @@ def test_rank_deficient_returns_warning(self):
       rca.fit(X, y)
     assert any(str(w.message) == msg for w in raised_warnings)
 
+  def test_deprecation_random_state(self):
+    # test that a deprecation message is thrown if random_state is set at
+    # fit time
+    # TODO: remove in v.0.6
+    X, y = make_classification(random_state=42, n_samples=100)
+    rca_supervised = RCA_Supervised(num_chunks=20)
+    msg = ('"random_state" parameter in the `fit` function is '
+           'deprecated. Set `random_state` at initialization '
+           'instead (when instantiating a new `RCA_Supervised` '
+           'object).')
+    with pytest.warns(DeprecationWarning) as raised_warning:
+      rca_supervised.fit(X, y, random_state=np.random)
+    assert any(msg == str(wrn.message) for wrn in raised_warning)
+
+  def test_changed_behaviour_warning_random_state(self):
+    # test that a ChangedBehavior warning is thrown if the random_state is
+    # not set in fit.
+    # TODO: remove in v.0.6
+    X, y = make_classification(random_state=42, n_samples=100)
+    rca_supervised = RCA_Supervised(num_chunks=20)
+    msg = ('As of v0.5.0, `RCA_Supervised` now uses the '
+           '`random_state` given at initialization to sample '
+           'constraints, not the default `np.random` from the `fit` '
+           'method, since this argument is now deprecated. '
+           'This warning will disappear in v0.6.0.')
+    with pytest.warns(ChangedBehaviorWarning) as raised_warning:
+      rca_supervised.fit(X, y)
+    assert any(msg == str(wrn.message) for wrn in raised_warning)
+
 
 @pytest.mark.parametrize('num_dims', [None, 2])
 def test_deprecation_num_dims_rca(num_dims):
@@ -908,7 +1012,7 @@ def test_deprecation_num_dims_rca(num_dims):
          ' removed in 0.6.0. Use "n_components" instead')
   with pytest.warns(DeprecationWarning) as raised_warning:
     rca.fit(X, y)
-  assert (str(raised_warning[0].message) == msg)
+  assert any(str(w.message) == msg for w in raised_warning)
 
   # we take a small number of chunks so that RCA works on iris
   rca_supervised = RCA_Supervised(num_dims=num_dims, num_chunks=10)
@@ -917,7 +1021,7 @@ def test_deprecation_num_dims_rca(num_dims):
          ' removed in 0.6.0. Use "n_components" instead')
   with pytest.warns(DeprecationWarning) as raised_warning:
     rca_supervised.fit(X, y)
-  assert (str(raised_warning[0].message) == msg)
+  assert any(str(w.message) == msg for w in raised_warning)
 
 
 class TestMLKR(MetricTestCase):
@@ -1095,6 +1199,37 @@ def test_changed_behaviour_warning(self):
       mmc.fit(pairs, y_pairs)
     assert any(msg == str(wrn.message) for wrn in raised_warning)
 
+  def test_deprecation_random_state(self):
+    # test that a deprecation message is thrown if random_state is set at
+    # fit time
+    # TODO: remove in v.0.6
+    X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]])
+    y = np.array([1, 0, 1, 0])
+    mmc_supervised = MMC_Supervised()
+    msg = ('"random_state" parameter in the `fit` function is '
+           'deprecated. Set `random_state` at initialization '
+           'instead (when instantiating a new `MMC_Supervised` '
+           'object).')
+    with pytest.warns(DeprecationWarning) as raised_warning:
+      mmc_supervised.fit(X, y, random_state=np.random)
+    assert any(msg == str(wrn.message) for wrn in raised_warning)
+
+  def test_changed_behaviour_warning_random_state(self):
+    # test that a ChangedBehavior warning is thrown if the random_state is
+    # not set in fit.
+    # TODO: remove in v.0.6
+    X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]])
+    y = np.array([1, 0, 1, 0])
+    mmc_supervised = MMC_Supervised()
+    msg = ('As of v0.5.0, `MMC_Supervised` now uses the '
+           '`random_state` given at initialization to sample '
+           'constraints, not the default `np.random` from the `fit` '
+           'method, since this argument is now deprecated. '
+           'This warning will disappear in v0.6.0.')
+    with pytest.warns(ChangedBehaviorWarning) as raised_warning:
+      mmc_supervised.fit(X, y)
+    assert any(msg == str(wrn.message) for wrn in raised_warning)
+
 
 @pytest.mark.parametrize(('algo_class', 'dataset'),
                          [(NCA, make_classification()),
diff --git a/test/test_base_metric.py b/test/test_base_metric.py
index 313948ec..0c1117ed 100644
--- a/test/test_base_metric.py
+++ b/test/test_base_metric.py
@@ -96,7 +96,7 @@ def test_rca(self):
                        "RCA_Supervised(chunk_size=2, "
                        "n_components=None, num_chunks=100, "
                        "num_dims='deprecated', pca_comps='deprecated', "
-                       "preprocessor=None)"))
+                       "preprocessor=None, random_state=None)"))
 
   def test_mlkr(self):
     self.assertEqual(remove_spaces(str(metric_learn.MLKR())),
diff --git a/test/test_fit_transform.py b/test/test_fit_transform.py
index b7255ea9..a9b2719e 100644
--- a/test/test_fit_transform.py
+++ b/test/test_fit_transform.py
@@ -30,25 +30,25 @@ def test_cov(self):
 
   def test_lsml_supervised(self):
     seed = np.random.RandomState(1234)
-    lsml = LSML_Supervised(num_constraints=200)
-    lsml.fit(self.X, self.y, random_state=seed)
+    lsml = LSML_Supervised(num_constraints=200, random_state=seed)
+    lsml.fit(self.X, self.y)
     res_1 = lsml.transform(self.X)
 
     seed = np.random.RandomState(1234)
-    lsml = LSML_Supervised(num_constraints=200)
-    res_2 = lsml.fit_transform(self.X, self.y, random_state=seed)
+    lsml = LSML_Supervised(num_constraints=200, random_state=seed)
+    res_2 = lsml.fit_transform(self.X, self.y)
 
     assert_array_almost_equal(res_1, res_2)
 
   def test_itml_supervised(self):
     seed = np.random.RandomState(1234)
-    itml = ITML_Supervised(num_constraints=200)
-    itml.fit(self.X, self.y, random_state=seed)
+    itml = ITML_Supervised(num_constraints=200, random_state=seed)
+    itml.fit(self.X, self.y)
     res_1 = itml.transform(self.X)
 
     seed = np.random.RandomState(1234)
-    itml = ITML_Supervised(num_constraints=200)
-    res_2 = itml.fit_transform(self.X, self.y, random_state=seed)
+    itml = ITML_Supervised(num_constraints=200, random_state=seed)
+    res_2 = itml.fit_transform(self.X, self.y)
 
     assert_array_almost_equal(res_1, res_2)
 
@@ -65,14 +65,14 @@ def test_lmnn(self):
   def test_sdml_supervised(self):
     seed = np.random.RandomState(1234)
     sdml = SDML_Supervised(num_constraints=1500, balance_param=1e-5,
-                           prior='identity')
-    sdml.fit(self.X, self.y, random_state=seed)
+                           prior='identity', random_state=seed)
+    sdml.fit(self.X, self.y)
     res_1 = sdml.transform(self.X)
 
     seed = np.random.RandomState(1234)
     sdml = SDML_Supervised(num_constraints=1500, balance_param=1e-5,
-                           prior='identity')
-    res_2 = sdml.fit_transform(self.X, self.y, random_state=seed)
+                           prior='identity', random_state=seed)
+    res_2 = sdml.fit_transform(self.X, self.y)
 
     assert_array_almost_equal(res_1, res_2)
 
@@ -100,13 +100,15 @@ def test_lfda(self):
 
   def test_rca_supervised(self):
     seed = np.random.RandomState(1234)
-    rca = RCA_Supervised(n_components=2, num_chunks=30, chunk_size=2)
-    rca.fit(self.X, self.y, random_state=seed)
+    rca = RCA_Supervised(n_components=2, num_chunks=30, chunk_size=2,
+                         random_state=seed)
+    rca.fit(self.X, self.y)
     res_1 = rca.transform(self.X)
 
     seed = np.random.RandomState(1234)
-    rca = RCA_Supervised(n_components=2, num_chunks=30, chunk_size=2)
-    res_2 = rca.fit_transform(self.X, self.y, random_state=seed)
+    rca = RCA_Supervised(n_components=2, num_chunks=30, chunk_size=2,
+                         random_state=seed)
+    res_2 = rca.fit_transform(self.X, self.y)
 
     assert_array_almost_equal(res_1, res_2)
 
@@ -122,13 +124,13 @@ def test_mlkr(self):
 
   def test_mmc_supervised(self):
     seed = np.random.RandomState(1234)
-    mmc = MMC_Supervised(num_constraints=200)
-    mmc.fit(self.X, self.y, random_state=seed)
+    mmc = MMC_Supervised(num_constraints=200, random_state=seed)
+    mmc.fit(self.X, self.y)
     res_1 = mmc.transform(self.X)
 
     seed = np.random.RandomState(1234)
-    mmc = MMC_Supervised(num_constraints=200)
-    res_2 = mmc.fit_transform(self.X, self.y, random_state=seed)
+    mmc = MMC_Supervised(num_constraints=200, random_state=seed)
+    res_2 = mmc.fit_transform(self.X, self.y)
 
     assert_array_almost_equal(res_1, res_2)
 
diff --git a/test/test_sklearn_compat.py b/test/test_sklearn_compat.py
index 4c511263..b2056c09 100644
--- a/test/test_sklearn_compat.py
+++ b/test/test_sklearn_compat.py
@@ -24,31 +24,28 @@
                              quadruplets_learners)
 
 
-# Wrap the _Supervised methods with a deterministic wrapper for testing.
-class deterministic_mixin(object):
-  def fit(self, X, y):
-    rs = np.random.RandomState(1234)
-    return super(deterministic_mixin, self).fit(X, y, random_state=rs)
+class Stable_RCA_Supervised(RCA_Supervised):
 
+  def __init__(self, n_components=None, pca_comps=None,
+               chunk_size=2, preprocessor=None, random_state=None):
+    # this init makes RCA stable for scikit-learn examples.
+    super(Stable_RCA_Supervised, self).__init__(
+        num_chunks=2, n_components=n_components, pca_comps=pca_comps,
+        chunk_size=chunk_size, preprocessor=preprocessor,
+        random_state=random_state)
 
-class dLSML(deterministic_mixin, LSML_Supervised):
-  pass
 
+class Stable_SDML_Supervised(SDML_Supervised):
 
-class dITML(deterministic_mixin, ITML_Supervised):
-  pass
-
-
-class dMMC(deterministic_mixin, MMC_Supervised):
-  pass
-
-
-class dSDML(deterministic_mixin, SDML_Supervised):
-  pass
-
-
-class dRCA(deterministic_mixin, RCA_Supervised):
-  pass
+  def __init__(self, sparsity_param=0.01, num_labeled='deprecated',
+               num_constraints=None, verbose=False, preprocessor=None,
+               random_state=None):
+    # this init makes SDML stable for scikit-learn examples.
+    super(Stable_SDML_Supervised, self).__init__(
+        sparsity_param=sparsity_param, num_labeled=num_labeled,
+        num_constraints=num_constraints, verbose=verbose,
+        preprocessor=preprocessor, balance_param=1e-5, prior='identity',
+        random_state=random_state)
 
 
 class TestSklearnCompat(unittest.TestCase):
@@ -68,36 +65,19 @@ def test_nca(self):
     check_estimator(NCA)
 
   def test_lsml(self):
-    check_estimator(dLSML)
+    check_estimator(LSML_Supervised)
 
   def test_itml(self):
-    check_estimator(dITML)
+    check_estimator(ITML_Supervised)
 
   def test_mmc(self):
-    check_estimator(dMMC)
+    check_estimator(MMC_Supervised)
 
   def test_sdml(self):
-    def stable_init(self, sparsity_param=0.01, num_labeled='deprecated',
-                    num_constraints=None, verbose=False, preprocessor=None):
-      # this init makes SDML stable for scikit-learn examples.
-      SDML_Supervised.__init__(self, sparsity_param=sparsity_param,
-                               num_labeled=num_labeled,
-                               num_constraints=num_constraints,
-                               verbose=verbose,
-                               preprocessor=preprocessor,
-                               balance_param=1e-5, prior='identity')
-    dSDML.__init__ = stable_init
-    check_estimator(dSDML)
+    check_estimator(Stable_SDML_Supervised)
 
   def test_rca(self):
-    def stable_init(self, n_components=None, pca_comps=None,
-                    chunk_size=2, preprocessor=None):
-      # this init makes RCA stable for scikit-learn examples.
-      RCA_Supervised.__init__(self, num_chunks=2, n_components=n_components,
-                              pca_comps=pca_comps, chunk_size=chunk_size,
-                              preprocessor=preprocessor)
-    dRCA.__init__ = stable_init
-    check_estimator(dRCA)
+    check_estimator(Stable_RCA_Supervised)
 
 
 RNG = check_random_state(0)
diff --git a/test/test_transformer_metric_conversion.py b/test/test_transformer_metric_conversion.py
index 651f60ea..62ac8777 100644
--- a/test/test_transformer_metric_conversion.py
+++ b/test/test_transformer_metric_conversion.py
@@ -30,8 +30,8 @@ def test_cov(self):
 
   def test_lsml_supervised(self):
     seed = np.random.RandomState(1234)
-    lsml = LSML_Supervised(num_constraints=200)
-    lsml.fit(self.X, self.y, random_state=seed)
+    lsml = LSML_Supervised(num_constraints=200, random_state=seed)
+    lsml.fit(self.X, self.y)
     L = lsml.transformer_
     assert_array_almost_equal(L.T.dot(L), lsml.get_mahalanobis_matrix())
 
@@ -51,8 +51,8 @@ def test_lmnn(self):
   def test_sdml_supervised(self):
     seed = np.random.RandomState(1234)
     sdml = SDML_Supervised(num_constraints=1500, prior='identity',
-                           balance_param=1e-5)
-    sdml.fit(self.X, self.y, random_state=seed)
+                           balance_param=1e-5, random_state=seed)
+    sdml.fit(self.X, self.y)
     L = sdml.transformer_
     assert_array_almost_equal(L.T.dot(L), sdml.get_mahalanobis_matrix())