diff --git a/metric_learn/constraints.py b/metric_learn/constraints.py index 069a6564..e42ef4b8 100644 --- a/metric_learn/constraints.py +++ b/metric_learn/constraints.py @@ -6,6 +6,7 @@ import warnings from six.moves import xrange from scipy.sparse import coo_matrix +from sklearn.utils import check_random_state __all__ = ['Constraints'] @@ -23,7 +24,8 @@ def __init__(self, partial_labels): self.known_label_idx, = np.where(partial_labels >= 0) self.known_labels = partial_labels[self.known_label_idx] - def adjacency_matrix(self, num_constraints, random_state=np.random): + def adjacency_matrix(self, num_constraints, random_state=None): + random_state = check_random_state(random_state) a, b, c, d = self.positive_negative_pairs(num_constraints, random_state=random_state) row = np.concatenate((a, c)) @@ -35,7 +37,8 @@ def adjacency_matrix(self, num_constraints, random_state=np.random): return adj + adj.T def positive_negative_pairs(self, num_constraints, same_length=False, - random_state=np.random): + random_state=None): + random_state = check_random_state(random_state) a, b = self._pairs(num_constraints, same_label=True, random_state=random_state) c, d = self._pairs(num_constraints, same_label=False, @@ -68,13 +71,14 @@ def _pairs(self, num_constraints, same_label=True, max_iter=10, ab = np.array(list(ab)[:num_constraints], dtype=int) return self.known_label_idx[ab.T] - def chunks(self, num_chunks=100, chunk_size=2, random_state=np.random): + def chunks(self, num_chunks=100, chunk_size=2, random_state=None): """ the random state object to be passed must be a numpy random seed """ + random_state = check_random_state(random_state) chunks = -np.ones_like(self.known_label_idx, dtype=int) uniq, lookup = np.unique(self.known_labels, return_inverse=True) - all_inds = [set(np.where(lookup==c)[0]) for c in xrange(len(uniq))] + all_inds = [set(np.where(lookup == c)[0]) for c in xrange(len(uniq))] idx = 0 while idx < num_chunks and all_inds: if len(all_inds) == 1: diff --git a/metric_learn/itml.py b/metric_learn/itml.py index 16fc21db..36f5d715 100644 --- a/metric_learn/itml.py +++ b/metric_learn/itml.py @@ -6,6 +6,7 @@ import warnings import numpy as np from six.moves import xrange +from sklearn.exceptions import ChangedBehaviorWarning from sklearn.metrics import pairwise_distances from sklearn.utils.validation import check_array from sklearn.base import TransformerMixin @@ -298,7 +299,6 @@ class ITML_Supervised(_BaseITML, TransformerMixin): A positive definite (PD) matrix of shape (n_features, n_features), that will be used as such to set the prior. - A0 : Not used .. deprecated:: 0.5.0 `A0` was deprecated in version 0.5.0 and will @@ -310,7 +310,9 @@ class ITML_Supervised(_BaseITML, TransformerMixin): tuples will be formed like this: X[indices]. random_state : int or numpy.RandomState or None, optional (default=None) A pseudo random number generator object or a seed for it if int. If - ``prior='random'``, ``random_state`` is used to set the prior. + ``prior='random'``, ``random_state`` is used to set the prior. In any + case, `random_state` is also used to randomly sample constraints from + labels. Attributes @@ -350,7 +352,7 @@ def __init__(self, gamma=1., max_iter=1000, convergence_threshold=1e-3, self.num_constraints = num_constraints self.bounds = bounds - def fit(self, X, y, random_state=np.random, bounds=None): + def fit(self, X, y, random_state='deprecated', bounds=None): """Create constraints from labels and learn the ITML model. @@ -362,8 +364,11 @@ def fit(self, X, y, random_state=np.random, bounds=None): y : (n) array-like Data labels. - random_state : numpy.random.RandomState, optional - If provided, controls random number generation. + random_state : Not used + .. deprecated:: 0.5.0 + `random_state` in the `fit` function was deprecated in version 0.5.0 + and will be removed in 0.6.0. Set `random_state` at initialization + instead (when instantiating a new `ITML_Supervised` object). bounds : array-like of two numbers Bounds on similarity, aside slack variables, s.t. @@ -384,6 +389,18 @@ def fit(self, X, y, random_state=np.random, bounds=None): ' It has been deprecated in version 0.5.0 and will be' ' removed in 0.6.0. Use the "bounds" parameter of this ' 'fit method instead.', DeprecationWarning) + if random_state != 'deprecated': + warnings.warn('"random_state" parameter in the `fit` function is ' + 'deprecated. Set `random_state` at initialization ' + 'instead (when instantiating a new `ITML_Supervised` ' + 'object).', DeprecationWarning) + else: + warnings.warn('As of v0.5.0, `ITML_Supervised` now uses the ' + '`random_state` given at initialization to sample ' + 'constraints, not the default `np.random` from the `fit` ' + 'method, since this argument is now deprecated. ' + 'This warning will disappear in v0.6.0.', + ChangedBehaviorWarning) X, y = self._prepare_inputs(X, y, ensure_min_samples=2) num_constraints = self.num_constraints if num_constraints is None: @@ -392,6 +409,6 @@ def fit(self, X, y, random_state=np.random, bounds=None): c = Constraints(y) pos_neg = c.positive_negative_pairs(num_constraints, - random_state=random_state) + random_state=self.random_state) pairs, y = wrap_pairs(X, pos_neg) return _BaseITML._fit(self, pairs, y, bounds=bounds) diff --git a/metric_learn/lsml.py b/metric_learn/lsml.py index e3b0d323..72a448ec 100644 --- a/metric_learn/lsml.py +++ b/metric_learn/lsml.py @@ -286,7 +286,8 @@ class LSML_Supervised(_BaseLSML, TransformerMixin): random_state : int or numpy.RandomState or None, optional (default=None) A pseudo random number generator object or a seed for it if int. If ``init='random'``, ``random_state`` is used to set the random - prior. + prior. In any case, `random_state` is also used to randomly sample + constraints from labels. Attributes ---------- @@ -308,7 +309,7 @@ def __init__(self, tol=1e-3, max_iter=1000, prior=None, self.num_constraints = num_constraints self.weights = weights - def fit(self, X, y, random_state=np.random): + def fit(self, X, y, random_state='deprecated'): """Create constraints from labels and learn the LSML model. Parameters @@ -319,13 +320,28 @@ def fit(self, X, y, random_state=np.random): y : (n) array-like Data labels. - random_state : numpy.random.RandomState, optional - If provided, controls random number generation. + random_state : Not used + .. deprecated:: 0.5.0 + `random_state` in the `fit` function was deprecated in version 0.5.0 + and will be removed in 0.6.0. Set `random_state` at initialization + instead (when instantiating a new `LSML_Supervised` object). """ if self.num_labeled != 'deprecated': warnings.warn('"num_labeled" parameter is not used.' ' It has been deprecated in version 0.5.0 and will be' ' removed in 0.6.0', DeprecationWarning) + if random_state != 'deprecated': + warnings.warn('"random_state" parameter in the `fit` function is ' + 'deprecated. Set `random_state` at initialization ' + 'instead (when instantiating a new `LSML_Supervised` ' + 'object).', DeprecationWarning) + else: + warnings.warn('As of v0.5.0, `LSML_Supervised` now uses the ' + '`random_state` given at initialization to sample ' + 'constraints, not the default `np.random` from the `fit` ' + 'method, since this argument is now deprecated. ' + 'This warning will disappear in v0.6.0.', + ChangedBehaviorWarning) X, y = self._prepare_inputs(X, y, ensure_min_samples=2) num_constraints = self.num_constraints if num_constraints is None: @@ -334,6 +350,6 @@ def fit(self, X, y, random_state=np.random): c = Constraints(y) pos_neg = c.positive_negative_pairs(num_constraints, same_length=True, - random_state=random_state) + random_state=self.random_state) return _BaseLSML._fit(self, X[np.column_stack(pos_neg)], weights=self.weights) diff --git a/metric_learn/mmc.py b/metric_learn/mmc.py index 9f02425c..55337b2e 100644 --- a/metric_learn/mmc.py +++ b/metric_learn/mmc.py @@ -538,7 +538,8 @@ class MMC_Supervised(_BaseMMC, TransformerMixin): random_state : int or numpy.RandomState or None, optional (default=None) A pseudo random number generator object or a seed for it if int. If ``init='random'``, ``random_state`` is used to initialize the random - Mahalanobis matrix. + Mahalanobis matrix. In any case, `random_state` is also used to + randomly sample constraints from labels. `MMC_Supervised` creates pairs of similar sample by taking same class samples, and pairs of dissimilar samples by taking different class @@ -566,7 +567,7 @@ def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-6, self.num_labeled = num_labeled self.num_constraints = num_constraints - def fit(self, X, y, random_state=np.random): + def fit(self, X, y, random_state='deprecated'): """Create constraints from labels and learn the MMC model. Parameters @@ -575,13 +576,28 @@ def fit(self, X, y, random_state=np.random): Input data, where each row corresponds to a single instance. y : (n) array-like Data labels. - random_state : numpy.random.RandomState, optional - If provided, controls random number generation. + random_state : Not used + .. deprecated:: 0.5.0 + `random_state` in the `fit` function was deprecated in version 0.5.0 + and will be removed in 0.6.0. Set `random_state` at initialization + instead (when instantiating a new `MMC_Supervised` object). """ if self.num_labeled != 'deprecated': warnings.warn('"num_labeled" parameter is not used.' ' It has been deprecated in version 0.5.0 and will be' ' removed in 0.6.0', DeprecationWarning) + if random_state != 'deprecated': + warnings.warn('"random_state" parameter in the `fit` function is ' + 'deprecated. Set `random_state` at initialization ' + 'instead (when instantiating a new `MMC_Supervised` ' + 'object).', DeprecationWarning) + else: + warnings.warn('As of v0.5.0, `MMC_Supervised` now uses the ' + '`random_state` given at initialization to sample ' + 'constraints, not the default `np.random` from the `fit` ' + 'method, since this argument is now deprecated. ' + 'This warning will disappear in v0.6.0.', + ChangedBehaviorWarning) X, y = self._prepare_inputs(X, y, ensure_min_samples=2) num_constraints = self.num_constraints if num_constraints is None: @@ -590,6 +606,6 @@ def fit(self, X, y, random_state=np.random): c = Constraints(y) pos_neg = c.positive_negative_pairs(num_constraints, - random_state=random_state) + random_state=self.random_state) pairs, y = wrap_pairs(X, pos_neg) return _BaseMMC._fit(self, pairs, y) diff --git a/metric_learn/rca.py b/metric_learn/rca.py index 503e2408..8686f02d 100644 --- a/metric_learn/rca.py +++ b/metric_learn/rca.py @@ -184,11 +184,17 @@ class RCA_Supervised(RCA): be removed in 0.6.0. Use `n_components` instead. num_chunks: int, optional + chunk_size: int, optional + preprocessor : array-like, shape=(n_samples, n_features) or callable The preprocessor to call to get tuples from indices. If array-like, tuples will be formed like this: X[indices]. + random_state : int or numpy.RandomState or None, optional (default=None) + A pseudo random number generator object or a seed for it if int. + It is used to randomly sample constraints from labels. + Attributes ---------- transformer_ : `numpy.ndarray`, shape=(n_components, n_features) @@ -197,13 +203,15 @@ class RCA_Supervised(RCA): def __init__(self, num_dims='deprecated', n_components=None, pca_comps='deprecated', num_chunks=100, chunk_size=2, - preprocessor=None): + preprocessor=None, random_state=None): + """Initialize the supervised version of `RCA`.""" RCA.__init__(self, num_dims=num_dims, n_components=n_components, pca_comps=pca_comps, preprocessor=preprocessor) self.num_chunks = num_chunks self.chunk_size = chunk_size + self.random_state = random_state - def fit(self, X, y, random_state=np.random): + def fit(self, X, y, random_state='deprecated'): """Create constraints from labels and learn the RCA model. Needs num_constraints specified in constructor. @@ -212,10 +220,26 @@ def fit(self, X, y, random_state=np.random): X : (n x d) data matrix each row corresponds to a single instance y : (n) data labels - random_state : a random.seed object to fix the random_state if needed. + random_state : Not used + .. deprecated:: 0.5.0 + `random_state` in the `fit` function was deprecated in version 0.5.0 + and will be removed in 0.6.0. Set `random_state` at initialization + instead (when instantiating a new `RCA_Supervised` object). """ + if random_state != 'deprecated': + warnings.warn('"random_state" parameter in the `fit` function is ' + 'deprecated. Set `random_state` at initialization ' + 'instead (when instantiating a new `RCA_Supervised` ' + 'object).', DeprecationWarning) + else: + warnings.warn('As of v0.5.0, `RCA_Supervised` now uses the ' + '`random_state` given at initialization to sample ' + 'constraints, not the default `np.random` from the `fit` ' + 'method, since this argument is now deprecated. ' + 'This warning will disappear in v0.6.0.', + ChangedBehaviorWarning) X, y = self._prepare_inputs(X, y, ensure_min_samples=2) chunks = Constraints(y).chunks(num_chunks=self.num_chunks, chunk_size=self.chunk_size, - random_state=random_state) + random_state=self.random_state) return RCA.fit(self, X, chunks) diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py index 70e65c86..9344ef7c 100644 --- a/metric_learn/sdml.py +++ b/metric_learn/sdml.py @@ -310,7 +310,8 @@ class SDML_Supervised(_BaseSDML, TransformerMixin): random_state : int or numpy.RandomState or None, optional (default=None) A pseudo random number generator object or a seed for it if int. If ``init='random'``, ``random_state`` is used to set the random - prior. + prior. In any case, `random_state` is also used to randomly sample + constraints from labels. Attributes ---------- @@ -336,7 +337,7 @@ def __init__(self, balance_param=0.5, sparsity_param=0.01, prior=None, self.num_labeled = num_labeled self.num_constraints = num_constraints - def fit(self, X, y, random_state=np.random): + def fit(self, X, y, random_state='deprecated'): """Create constraints from labels and learn the SDML model. Parameters @@ -345,9 +346,11 @@ def fit(self, X, y, random_state=np.random): data matrix, where each row corresponds to a single instance y : array-like, shape (n,) data labels, one for each instance - random_state : {numpy.random.RandomState, int}, optional - Random number generator or random seed. If not given, the singleton - numpy.random will be used. + random_state : Not used + .. deprecated:: 0.5.0 + `random_state` in the `fit` function was deprecated in version 0.5.0 + and will be removed in 0.6.0. Set `random_state` at initialization + instead (when instantiating a new `SDML_Supervised` object). Returns ------- @@ -358,6 +361,18 @@ def fit(self, X, y, random_state=np.random): warnings.warn('"num_labeled" parameter is not used.' ' It has been deprecated in version 0.5.0 and will be' ' removed in 0.6.0', DeprecationWarning) + if random_state != 'deprecated': + warnings.warn('"random_state" parameter in the `fit` function is ' + 'deprecated. Set `random_state` at initialization ' + 'instead (when instantiating a new `SDML_Supervised` ' + 'object).', DeprecationWarning) + else: + warnings.warn('As of v0.5.0, `SDML_Supervised` now uses the ' + '`random_state` given at initialization to sample ' + 'constraints, not the default `np.random` from the `fit` ' + 'method, since this argument is now deprecated. ' + 'This warning will disappear in v0.6.0.', + ChangedBehaviorWarning) X, y = self._prepare_inputs(X, y, ensure_min_samples=2) num_constraints = self.num_constraints if num_constraints is None: @@ -366,6 +381,6 @@ def fit(self, X, y, random_state=np.random): c = Constraints(y) pos_neg = c.positive_negative_pairs(num_constraints, - random_state=random_state) + random_state=self.random_state) pairs, y = wrap_pairs(X, pos_neg) return _BaseSDML._fit(self, pairs, y) diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index c49c9ef5..0f47a58a 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -119,6 +119,37 @@ def test_changed_behaviour_warning(self): lsml.fit(pairs) assert any(msg == str(wrn.message) for wrn in raised_warning) + def test_deprecation_random_state(self): + # test that a deprecation message is thrown if random_state is set at + # fit time + # TODO: remove in v.0.6 + X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) + y = np.array([1, 0, 1, 0]) + lsml_supervised = LSML_Supervised() + msg = ('"random_state" parameter in the `fit` function is ' + 'deprecated. Set `random_state` at initialization ' + 'instead (when instantiating a new `LSML_Supervised` ' + 'object).') + with pytest.warns(DeprecationWarning) as raised_warning: + lsml_supervised.fit(X, y, random_state=np.random) + assert any(msg == str(wrn.message) for wrn in raised_warning) + + def test_changed_behaviour_warning_random_state(self): + # test that a ChangedBehavior warning is thrown if the random_state is + # not set in fit. + # TODO: remove in v.0.6 + X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) + y = np.array([1, 0, 1, 0]) + lsml_supervised = LSML_Supervised() + msg = ('As of v0.5.0, `LSML_Supervised` now uses the ' + '`random_state` given at initialization to sample ' + 'constraints, not the default `np.random` from the `fit` ' + 'method, since this argument is now deprecated. ' + 'This warning will disappear in v0.6.0.') + with pytest.warns(ChangedBehaviorWarning) as raised_warning: + lsml_supervised.fit(X, y) + assert any(msg == str(wrn.message) for wrn in raised_warning) + class TestITML(MetricTestCase): def test_iris(self): @@ -174,6 +205,37 @@ def test_deprecation_A0(self): itml.fit(pairs, y_pairs) assert any(msg == str(wrn.message) for wrn in raised_warning) + def test_deprecation_random_state(self): + # test that a deprecation message is thrown if random_state is set at + # fit time + # TODO: remove in v.0.6 + X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) + y = np.array([1, 0, 1, 0]) + itml_supervised = ITML_Supervised() + msg = ('"random_state" parameter in the `fit` function is ' + 'deprecated. Set `random_state` at initialization ' + 'instead (when instantiating a new `ITML_Supervised` ' + 'object).') + with pytest.warns(DeprecationWarning) as raised_warning: + itml_supervised.fit(X, y, random_state=np.random) + assert any(msg == str(wrn.message) for wrn in raised_warning) + + def test_changed_behaviour_warning_random_state(self): + # test that a ChangedBehavior warning is thrown if the random_state is + # not set in fit. + # TODO: remove in v.0.6 + X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) + y = np.array([1, 0, 1, 0]) + itml_supervised = ITML_Supervised() + msg = ('As of v0.5.0, `ITML_Supervised` now uses the ' + '`random_state` given at initialization to sample ' + 'constraints, not the default `np.random` from the `fit` ' + 'method, since this argument is now deprecated. ' + 'This warning will disappear in v0.6.0.') + with pytest.warns(ChangedBehaviorWarning) as raised_warning: + itml_supervised.fit(X, y) + assert any(msg == str(wrn.message) for wrn in raised_warning) + @pytest.mark.parametrize('bounds', [None, (20., 100.), [20., 100.], np.array([20., 100.]), @@ -446,11 +508,11 @@ def test_sdml_supervised_raises_warning_msg_installed_skggm(self): X = np.array([[-10., 0.], [10., 0.], [5., 0.], [3., 0.]]) y = [0, 0, 1, 1] sdml_supervised = SDML_Supervised(balance_param=0.5, prior='identity', - sparsity_param=0.01) + sparsity_param=0.01, random_state=rng) msg = ("There was a problem in SDML when using skggm's graphical " "lasso solver.") with pytest.raises(RuntimeError) as raised_error: - sdml_supervised.fit(X, y, random_state=rng) + sdml_supervised.fit(X, y) assert msg == str(raised_error.value) @pytest.mark.skipif(not HAS_SKGGM, @@ -535,8 +597,9 @@ def test_sdml_works_on_non_spd_pb_with_skggm(self): it should work, but scikit-learn's graphical_lasso does not work""" X, y = load_iris(return_X_y=True) sdml = SDML_Supervised(balance_param=0.5, sparsity_param=0.01, - prior='covariance') - sdml.fit(X, y, random_state=np.random.RandomState(42)) + prior='covariance', + random_state=np.random.RandomState(42)) + sdml.fit(X, y) def test_deprecation_use_cov(self): # test that a deprecation message is thrown if use_cov is set at @@ -586,6 +649,35 @@ def test_changed_behaviour_warning(self): sdml.fit(pairs, y_pairs) assert any(msg == str(wrn.message) for wrn in raised_warning) + def test_deprecation_random_state(self): + # test that a deprecation message is thrown if random_state is set at + # fit time + # TODO: remove in v.0.6 + X, y = load_iris(return_X_y=True) + sdml_supervised = SDML_Supervised(balance_param=5e-5) + msg = ('"random_state" parameter in the `fit` function is ' + 'deprecated. Set `random_state` at initialization ' + 'instead (when instantiating a new `SDML_Supervised` ' + 'object).') + with pytest.warns(DeprecationWarning) as raised_warning: + sdml_supervised.fit(X, y, random_state=np.random) + assert any(msg == str(wrn.message) for wrn in raised_warning) + + def test_changed_behaviour_warning_random_state(self): + # test that a ChangedBehavior warning is thrown if the random_state is + # not set in fit. + # TODO: remove in v.0.6 + X, y = load_iris(return_X_y=True) + sdml_supervised = SDML_Supervised(balance_param=5e-5) + msg = ('As of v0.5.0, `SDML_Supervised` now uses the ' + '`random_state` given at initialization to sample ' + 'constraints, not the default `np.random` from the `fit` ' + 'method, since this argument is now deprecated. ' + 'This warning will disappear in v0.6.0.') + with pytest.warns(ChangedBehaviorWarning) as raised_warning: + sdml_supervised.fit(X, y) + assert any(msg == str(wrn.message) for wrn in raised_warning) + @pytest.mark.skipif(not HAS_SKGGM, reason='The message should be printed only if skggm is ' @@ -819,24 +911,7 @@ def test_iris(self): rca = RCA_Supervised(n_components=2, num_chunks=30, chunk_size=2) rca.fit(self.iris_points, self.iris_labels) csep = class_separation(rca.transform(self.iris_points), self.iris_labels) - self.assertLess(csep, 0.25) - - def test_feature_null_variance(self): - X = np.hstack((self.iris_points, np.eye(len(self.iris_points), M=1))) - - # Apply PCA with the number of components - rca = RCA_Supervised(n_components=2, pca_comps=3, num_chunks=30, - chunk_size=2) - rca.fit(X, self.iris_labels) - csep = class_separation(rca.transform(X), self.iris_labels) - self.assertLess(csep, 0.30) - - # Apply PCA with the minimum variance ratio - rca = RCA_Supervised(n_components=2, pca_comps=0.95, num_chunks=30, - chunk_size=2) - rca.fit(X, self.iris_labels) - csep = class_separation(rca.transform(X), self.iris_labels) - self.assertLess(csep, 0.30) + self.assertLess(csep, 0.29) def test_deprecation_pca_comps(self): # test that a deprecation message is thrown if pca_comps is set at @@ -851,12 +926,12 @@ def test_deprecation_pca_comps(self): '`sklearn.decomposition.PCA` and an `sklearn.pipeline.Pipeline`.') with pytest.warns(ChangedBehaviorWarning) as expected_msg: rca_supervised.fit(X, y) - assert str(expected_msg[0].message) == msg + assert any(str(w.message) == msg for w in expected_msg) rca = RCA(pca_comps=X.shape[1]) with pytest.warns(ChangedBehaviorWarning) as expected_msg: rca.fit(X, y) - assert str(expected_msg[0].message) == msg + assert any(str(w.message) == msg for w in expected_msg) def test_changedbehaviorwarning_preprocessing(self): # test that a ChangedBehaviorWarning is thrown when using RCA @@ -871,12 +946,12 @@ def test_changedbehaviorwarning_preprocessing(self): rca_supervised = RCA_Supervised(num_chunks=20) with pytest.warns(ChangedBehaviorWarning) as expected_msg: rca_supervised.fit(X, y) - assert str(expected_msg[0].message) == msg + assert any(str(w.message) == msg for w in expected_msg) rca = RCA() with pytest.warns(ChangedBehaviorWarning) as expected_msg: rca.fit(X, y) - assert str(expected_msg[0].message) == msg + assert any(str(w.message) == msg for w in expected_msg) def test_rank_deficient_returns_warning(self): """Checks that if the covariance matrix is not invertible, we raise a @@ -895,6 +970,35 @@ def test_rank_deficient_returns_warning(self): rca.fit(X, y) assert any(str(w.message) == msg for w in raised_warnings) + def test_deprecation_random_state(self): + # test that a deprecation message is thrown if random_state is set at + # fit time + # TODO: remove in v.0.6 + X, y = make_classification(random_state=42, n_samples=100) + rca_supervised = RCA_Supervised(num_chunks=20) + msg = ('"random_state" parameter in the `fit` function is ' + 'deprecated. Set `random_state` at initialization ' + 'instead (when instantiating a new `RCA_Supervised` ' + 'object).') + with pytest.warns(DeprecationWarning) as raised_warning: + rca_supervised.fit(X, y, random_state=np.random) + assert any(msg == str(wrn.message) for wrn in raised_warning) + + def test_changed_behaviour_warning_random_state(self): + # test that a ChangedBehavior warning is thrown if the random_state is + # not set in fit. + # TODO: remove in v.0.6 + X, y = make_classification(random_state=42, n_samples=100) + rca_supervised = RCA_Supervised(num_chunks=20) + msg = ('As of v0.5.0, `RCA_Supervised` now uses the ' + '`random_state` given at initialization to sample ' + 'constraints, not the default `np.random` from the `fit` ' + 'method, since this argument is now deprecated. ' + 'This warning will disappear in v0.6.0.') + with pytest.warns(ChangedBehaviorWarning) as raised_warning: + rca_supervised.fit(X, y) + assert any(msg == str(wrn.message) for wrn in raised_warning) + @pytest.mark.parametrize('num_dims', [None, 2]) def test_deprecation_num_dims_rca(num_dims): @@ -908,7 +1012,7 @@ def test_deprecation_num_dims_rca(num_dims): ' removed in 0.6.0. Use "n_components" instead') with pytest.warns(DeprecationWarning) as raised_warning: rca.fit(X, y) - assert (str(raised_warning[0].message) == msg) + assert any(str(w.message) == msg for w in raised_warning) # we take a small number of chunks so that RCA works on iris rca_supervised = RCA_Supervised(num_dims=num_dims, num_chunks=10) @@ -917,7 +1021,7 @@ def test_deprecation_num_dims_rca(num_dims): ' removed in 0.6.0. Use "n_components" instead') with pytest.warns(DeprecationWarning) as raised_warning: rca_supervised.fit(X, y) - assert (str(raised_warning[0].message) == msg) + assert any(str(w.message) == msg for w in raised_warning) class TestMLKR(MetricTestCase): @@ -1095,6 +1199,37 @@ def test_changed_behaviour_warning(self): mmc.fit(pairs, y_pairs) assert any(msg == str(wrn.message) for wrn in raised_warning) + def test_deprecation_random_state(self): + # test that a deprecation message is thrown if random_state is set at + # fit time + # TODO: remove in v.0.6 + X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) + y = np.array([1, 0, 1, 0]) + mmc_supervised = MMC_Supervised() + msg = ('"random_state" parameter in the `fit` function is ' + 'deprecated. Set `random_state` at initialization ' + 'instead (when instantiating a new `MMC_Supervised` ' + 'object).') + with pytest.warns(DeprecationWarning) as raised_warning: + mmc_supervised.fit(X, y, random_state=np.random) + assert any(msg == str(wrn.message) for wrn in raised_warning) + + def test_changed_behaviour_warning_random_state(self): + # test that a ChangedBehavior warning is thrown if the random_state is + # not set in fit. + # TODO: remove in v.0.6 + X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) + y = np.array([1, 0, 1, 0]) + mmc_supervised = MMC_Supervised() + msg = ('As of v0.5.0, `MMC_Supervised` now uses the ' + '`random_state` given at initialization to sample ' + 'constraints, not the default `np.random` from the `fit` ' + 'method, since this argument is now deprecated. ' + 'This warning will disappear in v0.6.0.') + with pytest.warns(ChangedBehaviorWarning) as raised_warning: + mmc_supervised.fit(X, y) + assert any(msg == str(wrn.message) for wrn in raised_warning) + @pytest.mark.parametrize(('algo_class', 'dataset'), [(NCA, make_classification()), diff --git a/test/test_base_metric.py b/test/test_base_metric.py index 313948ec..0c1117ed 100644 --- a/test/test_base_metric.py +++ b/test/test_base_metric.py @@ -96,7 +96,7 @@ def test_rca(self): "RCA_Supervised(chunk_size=2, " "n_components=None, num_chunks=100, " "num_dims='deprecated', pca_comps='deprecated', " - "preprocessor=None)")) + "preprocessor=None, random_state=None)")) def test_mlkr(self): self.assertEqual(remove_spaces(str(metric_learn.MLKR())), diff --git a/test/test_fit_transform.py b/test/test_fit_transform.py index b7255ea9..a9b2719e 100644 --- a/test/test_fit_transform.py +++ b/test/test_fit_transform.py @@ -30,25 +30,25 @@ def test_cov(self): def test_lsml_supervised(self): seed = np.random.RandomState(1234) - lsml = LSML_Supervised(num_constraints=200) - lsml.fit(self.X, self.y, random_state=seed) + lsml = LSML_Supervised(num_constraints=200, random_state=seed) + lsml.fit(self.X, self.y) res_1 = lsml.transform(self.X) seed = np.random.RandomState(1234) - lsml = LSML_Supervised(num_constraints=200) - res_2 = lsml.fit_transform(self.X, self.y, random_state=seed) + lsml = LSML_Supervised(num_constraints=200, random_state=seed) + res_2 = lsml.fit_transform(self.X, self.y) assert_array_almost_equal(res_1, res_2) def test_itml_supervised(self): seed = np.random.RandomState(1234) - itml = ITML_Supervised(num_constraints=200) - itml.fit(self.X, self.y, random_state=seed) + itml = ITML_Supervised(num_constraints=200, random_state=seed) + itml.fit(self.X, self.y) res_1 = itml.transform(self.X) seed = np.random.RandomState(1234) - itml = ITML_Supervised(num_constraints=200) - res_2 = itml.fit_transform(self.X, self.y, random_state=seed) + itml = ITML_Supervised(num_constraints=200, random_state=seed) + res_2 = itml.fit_transform(self.X, self.y) assert_array_almost_equal(res_1, res_2) @@ -65,14 +65,14 @@ def test_lmnn(self): def test_sdml_supervised(self): seed = np.random.RandomState(1234) sdml = SDML_Supervised(num_constraints=1500, balance_param=1e-5, - prior='identity') - sdml.fit(self.X, self.y, random_state=seed) + prior='identity', random_state=seed) + sdml.fit(self.X, self.y) res_1 = sdml.transform(self.X) seed = np.random.RandomState(1234) sdml = SDML_Supervised(num_constraints=1500, balance_param=1e-5, - prior='identity') - res_2 = sdml.fit_transform(self.X, self.y, random_state=seed) + prior='identity', random_state=seed) + res_2 = sdml.fit_transform(self.X, self.y) assert_array_almost_equal(res_1, res_2) @@ -100,13 +100,15 @@ def test_lfda(self): def test_rca_supervised(self): seed = np.random.RandomState(1234) - rca = RCA_Supervised(n_components=2, num_chunks=30, chunk_size=2) - rca.fit(self.X, self.y, random_state=seed) + rca = RCA_Supervised(n_components=2, num_chunks=30, chunk_size=2, + random_state=seed) + rca.fit(self.X, self.y) res_1 = rca.transform(self.X) seed = np.random.RandomState(1234) - rca = RCA_Supervised(n_components=2, num_chunks=30, chunk_size=2) - res_2 = rca.fit_transform(self.X, self.y, random_state=seed) + rca = RCA_Supervised(n_components=2, num_chunks=30, chunk_size=2, + random_state=seed) + res_2 = rca.fit_transform(self.X, self.y) assert_array_almost_equal(res_1, res_2) @@ -122,13 +124,13 @@ def test_mlkr(self): def test_mmc_supervised(self): seed = np.random.RandomState(1234) - mmc = MMC_Supervised(num_constraints=200) - mmc.fit(self.X, self.y, random_state=seed) + mmc = MMC_Supervised(num_constraints=200, random_state=seed) + mmc.fit(self.X, self.y) res_1 = mmc.transform(self.X) seed = np.random.RandomState(1234) - mmc = MMC_Supervised(num_constraints=200) - res_2 = mmc.fit_transform(self.X, self.y, random_state=seed) + mmc = MMC_Supervised(num_constraints=200, random_state=seed) + res_2 = mmc.fit_transform(self.X, self.y) assert_array_almost_equal(res_1, res_2) diff --git a/test/test_sklearn_compat.py b/test/test_sklearn_compat.py index 4c511263..b2056c09 100644 --- a/test/test_sklearn_compat.py +++ b/test/test_sklearn_compat.py @@ -24,31 +24,28 @@ quadruplets_learners) -# Wrap the _Supervised methods with a deterministic wrapper for testing. -class deterministic_mixin(object): - def fit(self, X, y): - rs = np.random.RandomState(1234) - return super(deterministic_mixin, self).fit(X, y, random_state=rs) +class Stable_RCA_Supervised(RCA_Supervised): + def __init__(self, n_components=None, pca_comps=None, + chunk_size=2, preprocessor=None, random_state=None): + # this init makes RCA stable for scikit-learn examples. + super(Stable_RCA_Supervised, self).__init__( + num_chunks=2, n_components=n_components, pca_comps=pca_comps, + chunk_size=chunk_size, preprocessor=preprocessor, + random_state=random_state) -class dLSML(deterministic_mixin, LSML_Supervised): - pass +class Stable_SDML_Supervised(SDML_Supervised): -class dITML(deterministic_mixin, ITML_Supervised): - pass - - -class dMMC(deterministic_mixin, MMC_Supervised): - pass - - -class dSDML(deterministic_mixin, SDML_Supervised): - pass - - -class dRCA(deterministic_mixin, RCA_Supervised): - pass + def __init__(self, sparsity_param=0.01, num_labeled='deprecated', + num_constraints=None, verbose=False, preprocessor=None, + random_state=None): + # this init makes SDML stable for scikit-learn examples. + super(Stable_SDML_Supervised, self).__init__( + sparsity_param=sparsity_param, num_labeled=num_labeled, + num_constraints=num_constraints, verbose=verbose, + preprocessor=preprocessor, balance_param=1e-5, prior='identity', + random_state=random_state) class TestSklearnCompat(unittest.TestCase): @@ -68,36 +65,19 @@ def test_nca(self): check_estimator(NCA) def test_lsml(self): - check_estimator(dLSML) + check_estimator(LSML_Supervised) def test_itml(self): - check_estimator(dITML) + check_estimator(ITML_Supervised) def test_mmc(self): - check_estimator(dMMC) + check_estimator(MMC_Supervised) def test_sdml(self): - def stable_init(self, sparsity_param=0.01, num_labeled='deprecated', - num_constraints=None, verbose=False, preprocessor=None): - # this init makes SDML stable for scikit-learn examples. - SDML_Supervised.__init__(self, sparsity_param=sparsity_param, - num_labeled=num_labeled, - num_constraints=num_constraints, - verbose=verbose, - preprocessor=preprocessor, - balance_param=1e-5, prior='identity') - dSDML.__init__ = stable_init - check_estimator(dSDML) + check_estimator(Stable_SDML_Supervised) def test_rca(self): - def stable_init(self, n_components=None, pca_comps=None, - chunk_size=2, preprocessor=None): - # this init makes RCA stable for scikit-learn examples. - RCA_Supervised.__init__(self, num_chunks=2, n_components=n_components, - pca_comps=pca_comps, chunk_size=chunk_size, - preprocessor=preprocessor) - dRCA.__init__ = stable_init - check_estimator(dRCA) + check_estimator(Stable_RCA_Supervised) RNG = check_random_state(0) diff --git a/test/test_transformer_metric_conversion.py b/test/test_transformer_metric_conversion.py index 651f60ea..62ac8777 100644 --- a/test/test_transformer_metric_conversion.py +++ b/test/test_transformer_metric_conversion.py @@ -30,8 +30,8 @@ def test_cov(self): def test_lsml_supervised(self): seed = np.random.RandomState(1234) - lsml = LSML_Supervised(num_constraints=200) - lsml.fit(self.X, self.y, random_state=seed) + lsml = LSML_Supervised(num_constraints=200, random_state=seed) + lsml.fit(self.X, self.y) L = lsml.transformer_ assert_array_almost_equal(L.T.dot(L), lsml.get_mahalanobis_matrix()) @@ -51,8 +51,8 @@ def test_lmnn(self): def test_sdml_supervised(self): seed = np.random.RandomState(1234) sdml = SDML_Supervised(num_constraints=1500, prior='identity', - balance_param=1e-5) - sdml.fit(self.X, self.y, random_state=seed) + balance_param=1e-5, random_state=seed) + sdml.fit(self.X, self.y) L = sdml.transformer_ assert_array_almost_equal(L.T.dot(L), sdml.get_mahalanobis_matrix())