From ab4e1a678bd65c04486484c71d137c17a7b7b15c Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Fri, 31 Aug 2018 15:24:46 +0200 Subject: [PATCH 01/14] API: remove num_labeled parameter --- metric_learn/constraints.py | 12 ------------ metric_learn/itml.py | 9 ++------- metric_learn/lsml.py | 8 ++------ metric_learn/mmc.py | 10 +++------- metric_learn/sdml.py | 8 ++------ test/test_base_metric.py | 13 ++++++------- 6 files changed, 15 insertions(+), 45 deletions(-) diff --git a/metric_learn/constraints.py b/metric_learn/constraints.py index 8824450a..437ac3f0 100644 --- a/metric_learn/constraints.py +++ b/metric_learn/constraints.py @@ -88,15 +88,3 @@ def chunks(self, num_chunks=100, chunk_size=2, random_state=np.random): raise ValueError('Unable to make %d chunks of %d examples each' % (num_chunks, chunk_size)) return chunks - - @staticmethod - def random_subset(all_labels, num_preserved=np.inf, random_state=np.random): - """ - the random state object to be passed must be a numpy random seed - """ - n = len(all_labels) - num_ignored = max(0, n - num_preserved) - idx = random_state.randint(n, size=num_ignored) - partial_labels = np.array(all_labels, copy=True) - partial_labels[idx] = -1 - return Constraints(partial_labels) diff --git a/metric_learn/itml.py b/metric_learn/itml.py index 4d27c412..b02ceacf 100644 --- a/metric_learn/itml.py +++ b/metric_learn/itml.py @@ -143,8 +143,7 @@ def metric(self): class ITML_Supervised(ITML): """Information Theoretic Metric Learning (ITML)""" def __init__(self, gamma=1., max_iter=1000, convergence_threshold=1e-3, - num_labeled=np.inf, num_constraints=None, bounds=None, A0=None, - verbose=False): + num_constraints=None, bounds=None, A0=None, verbose=False): """Initialize the learner. Parameters @@ -153,8 +152,6 @@ def __init__(self, gamma=1., max_iter=1000, convergence_threshold=1e-3, value for slack variables max_iter : int, optional convergence_threshold : float, optional - num_labeled : int, optional - number of labels to preserve for training num_constraints: int, optional number of constraints to generate bounds : list (pos,neg) pairs, optional @@ -167,7 +164,6 @@ def __init__(self, gamma=1., max_iter=1000, convergence_threshold=1e-3, ITML.__init__(self, gamma=gamma, max_iter=max_iter, convergence_threshold=convergence_threshold, A0=A0, verbose=verbose) - self.num_labeled = num_labeled self.num_constraints = num_constraints self.bounds = bounds @@ -191,8 +187,7 @@ def fit(self, X, y, random_state=np.random): num_classes = len(np.unique(y)) num_constraints = 20 * num_classes**2 - c = Constraints.random_subset(y, self.num_labeled, - random_state=random_state) + c = Constraints(y) pos_neg = c.positive_negative_pairs(num_constraints, random_state=random_state) return ITML.fit(self, X, pos_neg, bounds=self.bounds) diff --git a/metric_learn/lsml.py b/metric_learn/lsml.py index 404fe286..c22f658f 100644 --- a/metric_learn/lsml.py +++ b/metric_learn/lsml.py @@ -132,7 +132,7 @@ def _gradient(self, metric): class LSML_Supervised(LSML): - def __init__(self, tol=1e-3, max_iter=1000, prior=None, num_labeled=np.inf, + def __init__(self, tol=1e-3, max_iter=1000, prior=None, num_constraints=None, weights=None, verbose=False): """Initialize the learner. @@ -142,8 +142,6 @@ def __init__(self, tol=1e-3, max_iter=1000, prior=None, num_labeled=np.inf, max_iter : int, optional prior : (d x d) matrix, optional guess at a metric [default: covariance(X)] - num_labeled : int, optional - number of labels to preserve for training num_constraints: int, optional number of constraints to generate weights : (m,) array of floats, optional @@ -153,7 +151,6 @@ def __init__(self, tol=1e-3, max_iter=1000, prior=None, num_labeled=np.inf, """ LSML.__init__(self, tol=tol, max_iter=max_iter, prior=prior, verbose=verbose) - self.num_labeled = num_labeled self.num_constraints = num_constraints self.weights = weights @@ -177,8 +174,7 @@ def fit(self, X, y, random_state=np.random): num_classes = len(np.unique(y)) num_constraints = 20 * num_classes**2 - c = Constraints.random_subset(y, self.num_labeled, - random_state=random_state) + c = Constraints(y) pairs = c.positive_negative_pairs(num_constraints, same_length=True, random_state=random_state) return LSML.fit(self, X, pairs, weights=self.weights) diff --git a/metric_learn/mmc.py b/metric_learn/mmc.py index ef08aeef..7adc95ea 100644 --- a/metric_learn/mmc.py +++ b/metric_learn/mmc.py @@ -384,8 +384,8 @@ def transformer(self): class MMC_Supervised(MMC): """Mahalanobis Metric for Clustering (MMC)""" def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-6, - num_labeled=np.inf, num_constraints=None, - A0=None, diagonal=False, diagonal_c=1.0, verbose=False): + num_constraints=None, A0=None, diagonal=False, + diagonal_c=1.0, verbose=False): """Initialize the learner. Parameters @@ -393,8 +393,6 @@ def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-6, max_iter : int, optional max_proj : int, optional convergence_threshold : float, optional - num_labeled : int, optional - number of labels to preserve for training num_constraints: int, optional number of constraints to generate A0 : (d x d) matrix, optional @@ -413,7 +411,6 @@ def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-6, convergence_threshold=convergence_threshold, A0=A0, diagonal=diagonal, diagonal_c=diagonal_c, verbose=verbose) - self.num_labeled = num_labeled self.num_constraints = num_constraints def fit(self, X, y, random_state=np.random): @@ -434,8 +431,7 @@ def fit(self, X, y, random_state=np.random): num_classes = len(np.unique(y)) num_constraints = 20 * num_classes**2 - c = Constraints.random_subset(y, self.num_labeled, - random_state=random_state) + c = Constraints(y) pos_neg = c.positive_negative_pairs(num_constraints, random_state=random_state) return MMC.fit(self, X, pos_neg) diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py index 93280334..599860cb 100644 --- a/metric_learn/sdml.py +++ b/metric_learn/sdml.py @@ -82,7 +82,7 @@ def fit(self, X, W): class SDML_Supervised(SDML): def __init__(self, balance_param=0.5, sparsity_param=0.01, use_cov=True, - num_labeled=np.inf, num_constraints=None, verbose=False): + num_constraints=None, verbose=False): """ Parameters ---------- @@ -92,8 +92,6 @@ def __init__(self, balance_param=0.5, sparsity_param=0.01, use_cov=True, trade off between optimizer and sparseness (see graph_lasso) use_cov : bool, optional controls prior matrix, will use the identity if use_cov=False - num_labeled : int, optional - number of labels to preserve for training num_constraints : int, optional number of constraints to generate verbose : bool, optional @@ -102,7 +100,6 @@ def __init__(self, balance_param=0.5, sparsity_param=0.01, use_cov=True, SDML.__init__(self, balance_param=balance_param, sparsity_param=sparsity_param, use_cov=use_cov, verbose=verbose) - self.num_labeled = num_labeled self.num_constraints = num_constraints def fit(self, X, y, random_state=np.random): @@ -129,7 +126,6 @@ def fit(self, X, y, random_state=np.random): num_classes = len(np.unique(y)) num_constraints = 20 * num_classes**2 - c = Constraints.random_subset(y, self.num_labeled, - random_state=random_state) + c = Constraints(y) adj = c.adjacency_matrix(num_constraints, random_state=random_state) return SDML.fit(self, X, adj) diff --git a/test/test_base_metric.py b/test/test_base_metric.py index 4b132af4..7ac90da8 100644 --- a/test/test_base_metric.py +++ b/test/test_base_metric.py @@ -30,8 +30,7 @@ def test_itml(self): """.strip('\n')) self.assertEqual(str(metric_learn.ITML_Supervised()), """ ITML_Supervised(A0=None, bounds=None, convergence_threshold=0.001, gamma=1.0, - max_iter=1000, num_constraints=None, num_labeled=inf, - verbose=False) + max_iter=1000, num_constraints=None, verbose=False) """.strip('\n')) def test_lsml(self): @@ -39,8 +38,8 @@ def test_lsml(self): str(metric_learn.LSML()), "LSML(max_iter=1000, prior=None, tol=0.001, verbose=False)") self.assertEqual(str(metric_learn.LSML_Supervised()), """ -LSML_Supervised(max_iter=1000, num_constraints=None, num_labeled=inf, - prior=None, tol=0.001, verbose=False, weights=None) +LSML_Supervised(max_iter=1000, num_constraints=None, prior=None, tol=0.001, + verbose=False, weights=None) """.strip('\n')) def test_sdml(self): @@ -48,8 +47,8 @@ def test_sdml(self): "SDML(balance_param=0.5, sparsity_param=0.01, " "use_cov=True, verbose=False)") self.assertEqual(str(metric_learn.SDML_Supervised()), """ -SDML_Supervised(balance_param=0.5, num_constraints=None, num_labeled=inf, - sparsity_param=0.01, use_cov=True, verbose=False) +SDML_Supervised(balance_param=0.5, num_constraints=None, sparsity_param=0.01, + use_cov=True, verbose=False) """.strip('\n')) def test_rca(self): @@ -72,7 +71,7 @@ def test_mmc(self): self.assertEqual(str(metric_learn.MMC_Supervised()), """ MMC_Supervised(A0=None, convergence_threshold=1e-06, diagonal=False, diagonal_c=1.0, max_iter=100, max_proj=10000, num_constraints=None, - num_labeled=inf, verbose=False) + verbose=False) """.strip('\n')) if __name__ == '__main__': From f0ffdfdb519c44cafd1e62926f3894180c330e98 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Fri, 31 Aug 2018 18:17:53 +0200 Subject: [PATCH 02/14] DEP: Add deprecation warnings for num_labels --- metric_learn/itml.py | 14 ++++++++++++- metric_learn/lsml.py | 13 +++++++++++- metric_learn/mmc.py | 14 +++++++++++-- metric_learn/sdml.py | 12 ++++++++++- test/metric_learn_test.py | 44 +++++++++++++++++++++++++++++++++++++++ test/test_base_metric.py | 14 +++++++------ 6 files changed, 100 insertions(+), 11 deletions(-) diff --git a/metric_learn/itml.py b/metric_learn/itml.py index 4da4321c..3c3f3d43 100644 --- a/metric_learn/itml.py +++ b/metric_learn/itml.py @@ -14,6 +14,7 @@ """ from __future__ import print_function, absolute_import +import warnings import numpy as np from six.moves import xrange from sklearn.metrics import pairwise_distances @@ -143,7 +144,8 @@ def metric(self): class ITML_Supervised(ITML): """Information Theoretic Metric Learning (ITML)""" def __init__(self, gamma=1., max_iter=1000, convergence_threshold=1e-3, - num_constraints=None, bounds=None, A0=None, verbose=False): + num_labeled='deprecated', num_constraints=None, bounds=None, + A0=None, verbose=False): """Initialize the supervised version of `ITML`. `ITML_Supervised` creates pairs of similar sample by taking same class @@ -156,6 +158,10 @@ def __init__(self, gamma=1., max_iter=1000, convergence_threshold=1e-3, value for slack variables max_iter : int, optional convergence_threshold : float, optional + num_labeled : Not used + .. deprecated:: 0.4.0 + `num_labeled` was deprecated in version 0.4.0 and will + be removed in 0.5.0. num_constraints: int, optional number of constraints to generate bounds : list (pos,neg) pairs, optional @@ -164,10 +170,12 @@ def __init__(self, gamma=1., max_iter=1000, convergence_threshold=1e-3, initial regularization matrix, defaults to identity verbose : bool, optional if True, prints information while learning + learning_rate : Not used """ ITML.__init__(self, gamma=gamma, max_iter=max_iter, convergence_threshold=convergence_threshold, A0=A0, verbose=verbose) + self.num_labeled = num_labeled self.num_constraints = num_constraints self.bounds = bounds @@ -185,6 +193,10 @@ def fit(self, X, y, random_state=np.random): random_state : numpy.random.RandomState, optional If provided, controls random number generation. """ + if self.num_labeled != 'deprecated': + warnings.warn('"num_labeled" parameter is not used.' + ' It has been deprecated in version 0.4 and will be' + 'removed in 0.5', DeprecationWarning) X, y = check_X_y(X, y) num_constraints = self.num_constraints if num_constraints is None: diff --git a/metric_learn/lsml.py b/metric_learn/lsml.py index d3b01044..1e8e2aff 100644 --- a/metric_learn/lsml.py +++ b/metric_learn/lsml.py @@ -8,6 +8,7 @@ """ from __future__ import print_function, absolute_import, division +import warnings import numpy as np import scipy.linalg from six.moves import xrange @@ -133,7 +134,8 @@ def _gradient(self, metric): class LSML_Supervised(LSML): def __init__(self, tol=1e-3, max_iter=1000, prior=None, - num_constraints=None, weights=None, verbose=False): + num_labeled='deprecated', num_constraints=None, weights=None, + verbose=False): """Initialize the supervised version of `LSML`. `LSML_Supervised` creates quadruplets from labeled samples by taking two @@ -147,6 +149,10 @@ def __init__(self, tol=1e-3, max_iter=1000, prior=None, max_iter : int, optional prior : (d x d) matrix, optional guess at a metric [default: covariance(X)] + num_labeled : Not used + .. deprecated:: 0.4.0 + `num_labeled` was deprecated in version 0.4.0 and will + be removed in 0.5.0. num_constraints: int, optional number of constraints to generate weights : (m,) array of floats, optional @@ -156,6 +162,7 @@ def __init__(self, tol=1e-3, max_iter=1000, prior=None, """ LSML.__init__(self, tol=tol, max_iter=max_iter, prior=prior, verbose=verbose) + self.num_labeled = num_labeled self.num_constraints = num_constraints self.weights = weights @@ -173,6 +180,10 @@ def fit(self, X, y, random_state=np.random): random_state : numpy.random.RandomState, optional If provided, controls random number generation. """ + if self.num_labeled != 'deprecated': + warnings.warn('"num_labeled" parameter is not used.' + ' It has been deprecated in version 0.4 and will be' + 'removed in 0.5', DeprecationWarning) X, y = check_X_y(X, y) num_constraints = self.num_constraints if num_constraints is None: diff --git a/metric_learn/mmc.py b/metric_learn/mmc.py index f703896f..4e76a75d 100644 --- a/metric_learn/mmc.py +++ b/metric_learn/mmc.py @@ -17,6 +17,7 @@ """ from __future__ import print_function, absolute_import, division +import warnings import numpy as np from six.moves import xrange from sklearn.metrics import pairwise_distances @@ -384,8 +385,8 @@ def transformer(self): class MMC_Supervised(MMC): """Mahalanobis Metric for Clustering (MMC)""" def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-6, - num_constraints=None, A0=None, diagonal=False, - diagonal_c=1.0, verbose=False): + num_labeled='deprecated', num_constraints=None, A0=None, + diagonal=False, diagonal_c=1.0, verbose=False): """Initialize the supervised version of `MMC`. `MMC_Supervised` creates pairs of similar sample by taking same class @@ -397,6 +398,10 @@ def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-6, max_iter : int, optional max_proj : int, optional convergence_threshold : float, optional + num_labeled : Not used + .. deprecated:: 0.4.0 + `num_labeled` was deprecated in version 0.4.0 and will + be removed in 0.5.0. num_constraints: int, optional number of constraints to generate A0 : (d x d) matrix, optional @@ -415,6 +420,7 @@ def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-6, convergence_threshold=convergence_threshold, A0=A0, diagonal=diagonal, diagonal_c=diagonal_c, verbose=verbose) + self.num_labeled = num_labeled self.num_constraints = num_constraints def fit(self, X, y, random_state=np.random): @@ -429,6 +435,10 @@ def fit(self, X, y, random_state=np.random): random_state : numpy.random.RandomState, optional If provided, controls random number generation. """ + if self.num_labeled != 'deprecated': + warnings.warn('"num_labeled" parameter is not used.' + ' It has been deprecated in version 0.4 and will be' + 'removed in 0.5', DeprecationWarning) X, y = check_X_y(X, y) num_constraints = self.num_constraints if num_constraints is None: diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py index 6506d97c..7c119f49 100644 --- a/metric_learn/sdml.py +++ b/metric_learn/sdml.py @@ -9,6 +9,7 @@ """ from __future__ import absolute_import +import warnings import numpy as np from scipy.sparse.csgraph import laplacian from sklearn.covariance import graph_lasso @@ -82,7 +83,7 @@ def fit(self, X, W): class SDML_Supervised(SDML): def __init__(self, balance_param=0.5, sparsity_param=0.01, use_cov=True, - num_constraints=None, verbose=False): + num_labeled='deprecated', num_constraints=None, verbose=False): """Initialize the supervised version of `SDML`. `SDML_Supervised` creates pairs of similar sample by taking same class @@ -97,6 +98,10 @@ def __init__(self, balance_param=0.5, sparsity_param=0.01, use_cov=True, trade off between optimizer and sparseness (see graph_lasso) use_cov : bool, optional controls prior matrix, will use the identity if use_cov=False + num_labeled : Not used + .. deprecated:: 0.4.0 + `num_labeled` was deprecated in version 0.4.0 and will + be removed in 0.5.0. num_constraints : int, optional number of constraints to generate verbose : bool, optional @@ -105,6 +110,7 @@ def __init__(self, balance_param=0.5, sparsity_param=0.01, use_cov=True, SDML.__init__(self, balance_param=balance_param, sparsity_param=sparsity_param, use_cov=use_cov, verbose=verbose) + self.num_labeled = num_labeled self.num_constraints = num_constraints def fit(self, X, y, random_state=np.random): @@ -125,6 +131,10 @@ def fit(self, X, y, random_state=np.random): self : object Returns the instance. """ + if self.num_labeled != 'deprecated': + warnings.warn('"num_labeled" parameter is not used.' + ' It has been deprecated in version 0.4 and will be' + 'removed in 0.5', DeprecationWarning) y = check_array(y, ensure_2d=False) num_constraints = self.num_constraints if num_constraints is None: diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index 1d0a5d02..d0fdf4b3 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -55,6 +55,17 @@ def test_iris(self): csep = class_separation(lsml.transform(), self.iris_labels) self.assertLess(csep, 0.8) # it's pretty terrible + def test_deprecation(self): + # test that the right deprecation message is thrown. + # TODO: remove in v.0.5 + X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) + y = np.array([1, 0, 1, 0]) + lsml_supervised = LSML_Supervised(num_labeled=np.inf) + msg = ('"num_labeled" parameter is not used.' + ' It has been deprecated in version 0.4 and will be' + 'removed in 0.5') + assert_warns_message(DeprecationWarning, msg, lsml_supervised.fit, X, y) + class TestITML(MetricTestCase): def test_iris(self): @@ -64,6 +75,17 @@ def test_iris(self): csep = class_separation(itml.transform(), self.iris_labels) self.assertLess(csep, 0.2) + def test_deprecation(self): + # test that the right deprecation message is thrown. + # TODO: remove in v.0.5 + X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) + y = np.array([1, 0, 1, 0]) + itml_supervised = ITML_Supervised(num_labeled=np.inf) + msg = ('"num_labeled" parameter is not used.' + ' It has been deprecated in version 0.4 and will be' + 'removed in 0.5') + assert_warns_message(DeprecationWarning, msg, itml_supervised.fit, X, y) + class TestLMNN(MetricTestCase): def test_iris(self): @@ -118,6 +140,17 @@ def test_iris(self): csep = class_separation(sdml.transform(), self.iris_labels) self.assertLess(csep, 0.25) + def test_deprecation(self): + # test that the right deprecation message is thrown. + # TODO: remove in v.0.5 + X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) + y = np.array([1, 0, 1, 0]) + sdml_supervised = SDML_Supervised(num_labeled=np.inf) + msg = ('"num_labeled" parameter is not used.' + ' It has been deprecated in version 0.4 and will be' + 'removed in 0.5') + assert_warns_message(DeprecationWarning, msg, sdml_supervised.fit, X, y) + class TestNCA(MetricTestCase): def test_iris(self): @@ -343,6 +376,17 @@ def test_iris(self): csep = class_separation(mmc.transform(), self.iris_labels) self.assertLess(csep, 0.2) + def test_deprecation(self): + # test that the right deprecation message is thrown. + # TODO: remove in v.0.5 + X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) + y = np.array([1, 0, 1, 0]) + mmc_supervised = MMC_Supervised(num_labeled=np.inf) + msg = ('"num_labeled" parameter is not used.' + ' It has been deprecated in version 0.4 and will be' + 'removed in 0.5') + assert_warns_message(DeprecationWarning, msg, mmc_supervised.fit, X, y) + @pytest.mark.parametrize(('algo_class', 'dataset'), [(NCA, make_classification()), diff --git a/test/test_base_metric.py b/test/test_base_metric.py index 7ac90da8..e66074ee 100644 --- a/test/test_base_metric.py +++ b/test/test_base_metric.py @@ -30,7 +30,8 @@ def test_itml(self): """.strip('\n')) self.assertEqual(str(metric_learn.ITML_Supervised()), """ ITML_Supervised(A0=None, bounds=None, convergence_threshold=0.001, gamma=1.0, - max_iter=1000, num_constraints=None, verbose=False) + max_iter=1000, num_constraints=None, num_labeled='deprecated', + verbose=False) """.strip('\n')) def test_lsml(self): @@ -38,8 +39,8 @@ def test_lsml(self): str(metric_learn.LSML()), "LSML(max_iter=1000, prior=None, tol=0.001, verbose=False)") self.assertEqual(str(metric_learn.LSML_Supervised()), """ -LSML_Supervised(max_iter=1000, num_constraints=None, prior=None, tol=0.001, - verbose=False, weights=None) +LSML_Supervised(max_iter=1000, num_constraints=None, num_labeled='deprecated', + prior=None, tol=0.001, verbose=False, weights=None) """.strip('\n')) def test_sdml(self): @@ -47,8 +48,9 @@ def test_sdml(self): "SDML(balance_param=0.5, sparsity_param=0.01, " "use_cov=True, verbose=False)") self.assertEqual(str(metric_learn.SDML_Supervised()), """ -SDML_Supervised(balance_param=0.5, num_constraints=None, sparsity_param=0.01, - use_cov=True, verbose=False) +SDML_Supervised(balance_param=0.5, num_constraints=None, + num_labeled='deprecated', sparsity_param=0.01, use_cov=True, + verbose=False) """.strip('\n')) def test_rca(self): @@ -71,7 +73,7 @@ def test_mmc(self): self.assertEqual(str(metric_learn.MMC_Supervised()), """ MMC_Supervised(A0=None, convergence_threshold=1e-06, diagonal=False, diagonal_c=1.0, max_iter=100, max_proj=10000, num_constraints=None, - verbose=False) + num_labeled='deprecated', verbose=False) """.strip('\n')) if __name__ == '__main__': From 8727c44213b2c25c0aebfa6fa12b25c322abada7 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Mon, 3 Sep 2018 11:51:11 +0200 Subject: [PATCH 03/14] MAINT: put deprecation for version 0.5.0 --- metric_learn/itml.py | 6 +++--- metric_learn/lsml.py | 6 +++--- metric_learn/mmc.py | 6 +++--- metric_learn/sdml.py | 6 +++--- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/metric_learn/itml.py b/metric_learn/itml.py index 3c3f3d43..cfbbe9b1 100644 --- a/metric_learn/itml.py +++ b/metric_learn/itml.py @@ -159,9 +159,9 @@ def __init__(self, gamma=1., max_iter=1000, convergence_threshold=1e-3, max_iter : int, optional convergence_threshold : float, optional num_labeled : Not used - .. deprecated:: 0.4.0 - `num_labeled` was deprecated in version 0.4.0 and will - be removed in 0.5.0. + .. deprecated:: 0.5.0 + `num_labeled` was deprecated in version 0.5.0 and will + be removed in 0.6.0. num_constraints: int, optional number of constraints to generate bounds : list (pos,neg) pairs, optional diff --git a/metric_learn/lsml.py b/metric_learn/lsml.py index 1e8e2aff..c32a7cb2 100644 --- a/metric_learn/lsml.py +++ b/metric_learn/lsml.py @@ -150,9 +150,9 @@ def __init__(self, tol=1e-3, max_iter=1000, prior=None, prior : (d x d) matrix, optional guess at a metric [default: covariance(X)] num_labeled : Not used - .. deprecated:: 0.4.0 - `num_labeled` was deprecated in version 0.4.0 and will - be removed in 0.5.0. + .. deprecated:: 0.5.0 + `num_labeled` was deprecated in version 0.5.0 and will + be removed in 0.6.0. num_constraints: int, optional number of constraints to generate weights : (m,) array of floats, optional diff --git a/metric_learn/mmc.py b/metric_learn/mmc.py index 4e76a75d..490e55b1 100644 --- a/metric_learn/mmc.py +++ b/metric_learn/mmc.py @@ -399,9 +399,9 @@ def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-6, max_proj : int, optional convergence_threshold : float, optional num_labeled : Not used - .. deprecated:: 0.4.0 - `num_labeled` was deprecated in version 0.4.0 and will - be removed in 0.5.0. + .. deprecated:: 0.5.0 + `num_labeled` was deprecated in version 0.5.0 and will + be removed in 0.6.0. num_constraints: int, optional number of constraints to generate A0 : (d x d) matrix, optional diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py index 7c119f49..7fd48036 100644 --- a/metric_learn/sdml.py +++ b/metric_learn/sdml.py @@ -99,9 +99,9 @@ def __init__(self, balance_param=0.5, sparsity_param=0.01, use_cov=True, use_cov : bool, optional controls prior matrix, will use the identity if use_cov=False num_labeled : Not used - .. deprecated:: 0.4.0 - `num_labeled` was deprecated in version 0.4.0 and will - be removed in 0.5.0. + .. deprecated:: 0.5.0 + `num_labeled` was deprecated in version 0.5.0 and will + be removed in 0.6.0. num_constraints : int, optional number of constraints to generate verbose : bool, optional From bc1eb32f95d0aea84ba84d60e6d320d5e809e4fe Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Wed, 2 Jan 2019 15:52:15 +0100 Subject: [PATCH 04/14] Revert "MAINT: put deprecation for version 0.5.0" This reverts commit 8727c44213b2c25c0aebfa6fa12b25c322abada7. --- metric_learn/itml.py | 6 +++--- metric_learn/lsml.py | 6 +++--- metric_learn/mmc.py | 6 +++--- metric_learn/sdml.py | 6 +++--- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/metric_learn/itml.py b/metric_learn/itml.py index ab9db7be..adc018d6 100644 --- a/metric_learn/itml.py +++ b/metric_learn/itml.py @@ -159,9 +159,9 @@ def __init__(self, gamma=1., max_iter=1000, convergence_threshold=1e-3, max_iter : int, optional convergence_threshold : float, optional num_labeled : Not used - .. deprecated:: 0.5.0 - `num_labeled` was deprecated in version 0.5.0 and will - be removed in 0.6.0. + .. deprecated:: 0.4.0 + `num_labeled` was deprecated in version 0.4.0 and will + be removed in 0.5.0. num_constraints: int, optional number of constraints to generate bounds : list (pos,neg) pairs, optional diff --git a/metric_learn/lsml.py b/metric_learn/lsml.py index a14404f5..8569cc65 100644 --- a/metric_learn/lsml.py +++ b/metric_learn/lsml.py @@ -150,9 +150,9 @@ def __init__(self, tol=1e-3, max_iter=1000, prior=None, prior : (d x d) matrix, optional guess at a metric [default: covariance(X)] num_labeled : Not used - .. deprecated:: 0.5.0 - `num_labeled` was deprecated in version 0.5.0 and will - be removed in 0.6.0. + .. deprecated:: 0.4.0 + `num_labeled` was deprecated in version 0.4.0 and will + be removed in 0.5.0. num_constraints: int, optional number of constraints to generate weights : (m,) array of floats, optional diff --git a/metric_learn/mmc.py b/metric_learn/mmc.py index 9a71e29b..908b8e28 100644 --- a/metric_learn/mmc.py +++ b/metric_learn/mmc.py @@ -399,9 +399,9 @@ def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-6, max_proj : int, optional convergence_threshold : float, optional num_labeled : Not used - .. deprecated:: 0.5.0 - `num_labeled` was deprecated in version 0.5.0 and will - be removed in 0.6.0. + .. deprecated:: 0.4.0 + `num_labeled` was deprecated in version 0.4.0 and will + be removed in 0.5.0. num_constraints: int, optional number of constraints to generate A0 : (d x d) matrix, optional diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py index 7fd48036..7c119f49 100644 --- a/metric_learn/sdml.py +++ b/metric_learn/sdml.py @@ -99,9 +99,9 @@ def __init__(self, balance_param=0.5, sparsity_param=0.01, use_cov=True, use_cov : bool, optional controls prior matrix, will use the identity if use_cov=False num_labeled : Not used - .. deprecated:: 0.5.0 - `num_labeled` was deprecated in version 0.5.0 and will - be removed in 0.6.0. + .. deprecated:: 0.4.0 + `num_labeled` was deprecated in version 0.4.0 and will + be removed in 0.5.0. num_constraints : int, optional number of constraints to generate verbose : bool, optional From d6bd0d40e3ab191b894b0e6d025d123992c0d9f7 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Wed, 2 Jan 2019 16:17:47 +0100 Subject: [PATCH 05/14] Revert "Merge remote-tracking branch 'origin/master' into fix/remove_num_labeled_parameter" This reverts commit 944bb3ec80a6b27ae247465a9bdfa042ce411127, reversing changes made to 8727c44213b2c25c0aebfa6fa12b25c322abada7. --- doc/conf.py | 2 +- metric_learn/itml.py | 2 +- metric_learn/lmnn.py | 2 +- metric_learn/lsml.py | 2 +- metric_learn/mmc.py | 2 +- setup.py | 16 +++------------- test/metric_learn_test.py | 15 +++++++-------- 7 files changed, 15 insertions(+), 26 deletions(-) diff --git a/doc/conf.py b/doc/conf.py index dff9ce47..1c8beeab 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -15,7 +15,7 @@ # General information about the project. project = u'metric-learn' -copyright = u'2015-2018, CJ Carey and Yuan Tang' +copyright = u'2015-2017, CJ Carey and Yuan Tang' author = u'CJ Carey and Yuan Tang' version = '0.4.0' release = '0.4.0' diff --git a/metric_learn/itml.py b/metric_learn/itml.py index adc018d6..3c3f3d43 100644 --- a/metric_learn/itml.py +++ b/metric_learn/itml.py @@ -197,7 +197,7 @@ def fit(self, X, y, random_state=np.random): warnings.warn('"num_labeled" parameter is not used.' ' It has been deprecated in version 0.4 and will be' 'removed in 0.5', DeprecationWarning) - X, y = check_X_y(X, y, ensure_min_samples=2) + X, y = check_X_y(X, y) num_constraints = self.num_constraints if num_constraints is None: num_classes = len(np.unique(y)) diff --git a/metric_learn/lmnn.py b/metric_learn/lmnn.py index d1a41a33..f58bc00a 100644 --- a/metric_learn/lmnn.py +++ b/metric_learn/lmnn.py @@ -52,7 +52,7 @@ def transformer(self): class python_LMNN(_base_LMNN): def _process_inputs(self, X, labels): - self.X_ = check_array(X, dtype=float, ensure_min_samples=2) + self.X_ = check_array(X, dtype=float) num_pts, num_dims = self.X_.shape unique_labels, self.label_inds_ = np.unique(labels, return_inverse=True) if len(self.label_inds_) != num_pts: diff --git a/metric_learn/lsml.py b/metric_learn/lsml.py index 8569cc65..1e8e2aff 100644 --- a/metric_learn/lsml.py +++ b/metric_learn/lsml.py @@ -184,7 +184,7 @@ def fit(self, X, y, random_state=np.random): warnings.warn('"num_labeled" parameter is not used.' ' It has been deprecated in version 0.4 and will be' 'removed in 0.5', DeprecationWarning) - X, y = check_X_y(X, y, ensure_min_samples=2) + X, y = check_X_y(X, y) num_constraints = self.num_constraints if num_constraints is None: num_classes = len(np.unique(y)) diff --git a/metric_learn/mmc.py b/metric_learn/mmc.py index 908b8e28..4e76a75d 100644 --- a/metric_learn/mmc.py +++ b/metric_learn/mmc.py @@ -439,7 +439,7 @@ def fit(self, X, y, random_state=np.random): warnings.warn('"num_labeled" parameter is not used.' ' It has been deprecated in version 0.4 and will be' 'removed in 0.5', DeprecationWarning) - X, y = check_X_y(X, y, ensure_min_samples=2) + X, y = check_X_y(X, y) num_constraints = self.num_constraints if num_constraints is None: num_classes = len(np.unique(y)) diff --git a/setup.py b/setup.py index 168fbcb6..34fedd76 100755 --- a/setup.py +++ b/setup.py @@ -2,23 +2,17 @@ # -*- coding: utf-8 -*- from setuptools import setup import os -import io version = {} -with io.open(os.path.join('metric_learn', '_version.py')) as fp: +with open(os.path.join('metric_learn', '_version.py')) as fp: exec(fp.read(), version) -# Get the long description from README.md -with io.open('README.rst', encoding='utf-8') as f: - long_description = f.read() - setup(name='metric-learn', version=version['__version__'], description='Python implementations of metric learning algorithms', - long_description=long_description, author=['CJ Carey', 'Yuan Tang'], author_email='ccarey@cs.umass.edu', - url='http://github.com/metric-learn/metric-learn', + url='http://github.com/all-umass/metric-learn', license='MIT', classifiers=[ 'Development Status :: 4 - Beta', @@ -46,9 +40,5 @@ 'Information Theoretic Metric Learning', 'Sparse Determinant Metric Learning', 'Least Squares Metric Learning', - 'Neighborhood Components Analysis', - 'Local Fisher Discriminant Analysis', - 'Relative Components Analysis', - 'Mahalanobis Metric for Clustering', - 'Metric Learning for Kernel Regression' + 'Neighborhood Components Analysis' ]) diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index e9390930..d0fdf4b3 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -44,7 +44,7 @@ def test_iris(self): csep = class_separation(cov.transform(), self.iris_labels) # deterministic result - self.assertAlmostEqual(csep, 0.72981476) + self.assertAlmostEqual(csep, 0.73068122) class TestLSML(MetricTestCase): @@ -166,7 +166,7 @@ def test_iris(self): nca = NCA(max_iter=(100000//n), num_dims=2, tol=1e-9) nca.fit(self.iris_points, self.iris_labels) csep = class_separation(nca.transform(), self.iris_labels) - self.assertLess(csep, 0.20) + self.assertLess(csep, 0.15) def test_finite_differences(self): """Test gradient of loss function @@ -352,17 +352,16 @@ def test_iris(self): # Full metric mmc = MMC(convergence_threshold=0.01) mmc.fit(self.iris_points, [a,b,c,d]) - expected = [[ 0.000514, 0.000868, -0.001195, -0.001703], - [ 0.000868, 0.001468, -0.002021, -0.002879], - [-0.001195, -0.002021, 0.002782, 0.003964], - [-0.001703, -0.002879, 0.003964, 0.005648]] + expected = [[+0.00046504, +0.00083371, -0.00111959, -0.00165265], + [+0.00083371, +0.00149466, -0.00200719, -0.00296284], + [-0.00111959, -0.00200719, +0.00269546, +0.00397881], + [-0.00165265, -0.00296284, +0.00397881, +0.00587320]] assert_array_almost_equal(expected, mmc.metric(), decimal=6) # Diagonal metric mmc = MMC(diagonal=True) mmc.fit(self.iris_points, [a,b,c,d]) - expected = [0, 0, 1.210220, 1.228596] - + expected = [0, 0, 1.21045968, 1.22552608] assert_array_almost_equal(np.diag(expected), mmc.metric(), decimal=6) # Supervised Full From 6691c1b39b2c781b22ff14ed892f65cd9ed86323 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Wed, 2 Jan 2019 16:17:54 +0100 Subject: [PATCH 06/14] Revert "Revert "MAINT: put deprecation for version 0.5.0"" This reverts commit bc1eb32f95d0aea84ba84d60e6d320d5e809e4fe. --- metric_learn/itml.py | 6 +++--- metric_learn/lsml.py | 6 +++--- metric_learn/mmc.py | 6 +++--- metric_learn/sdml.py | 6 +++--- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/metric_learn/itml.py b/metric_learn/itml.py index 3c3f3d43..cfbbe9b1 100644 --- a/metric_learn/itml.py +++ b/metric_learn/itml.py @@ -159,9 +159,9 @@ def __init__(self, gamma=1., max_iter=1000, convergence_threshold=1e-3, max_iter : int, optional convergence_threshold : float, optional num_labeled : Not used - .. deprecated:: 0.4.0 - `num_labeled` was deprecated in version 0.4.0 and will - be removed in 0.5.0. + .. deprecated:: 0.5.0 + `num_labeled` was deprecated in version 0.5.0 and will + be removed in 0.6.0. num_constraints: int, optional number of constraints to generate bounds : list (pos,neg) pairs, optional diff --git a/metric_learn/lsml.py b/metric_learn/lsml.py index 1e8e2aff..c32a7cb2 100644 --- a/metric_learn/lsml.py +++ b/metric_learn/lsml.py @@ -150,9 +150,9 @@ def __init__(self, tol=1e-3, max_iter=1000, prior=None, prior : (d x d) matrix, optional guess at a metric [default: covariance(X)] num_labeled : Not used - .. deprecated:: 0.4.0 - `num_labeled` was deprecated in version 0.4.0 and will - be removed in 0.5.0. + .. deprecated:: 0.5.0 + `num_labeled` was deprecated in version 0.5.0 and will + be removed in 0.6.0. num_constraints: int, optional number of constraints to generate weights : (m,) array of floats, optional diff --git a/metric_learn/mmc.py b/metric_learn/mmc.py index 4e76a75d..490e55b1 100644 --- a/metric_learn/mmc.py +++ b/metric_learn/mmc.py @@ -399,9 +399,9 @@ def __init__(self, max_iter=100, max_proj=10000, convergence_threshold=1e-6, max_proj : int, optional convergence_threshold : float, optional num_labeled : Not used - .. deprecated:: 0.4.0 - `num_labeled` was deprecated in version 0.4.0 and will - be removed in 0.5.0. + .. deprecated:: 0.5.0 + `num_labeled` was deprecated in version 0.5.0 and will + be removed in 0.6.0. num_constraints: int, optional number of constraints to generate A0 : (d x d) matrix, optional diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py index 7c119f49..7fd48036 100644 --- a/metric_learn/sdml.py +++ b/metric_learn/sdml.py @@ -99,9 +99,9 @@ def __init__(self, balance_param=0.5, sparsity_param=0.01, use_cov=True, use_cov : bool, optional controls prior matrix, will use the identity if use_cov=False num_labeled : Not used - .. deprecated:: 0.4.0 - `num_labeled` was deprecated in version 0.4.0 and will - be removed in 0.5.0. + .. deprecated:: 0.5.0 + `num_labeled` was deprecated in version 0.5.0 and will + be removed in 0.6.0. num_constraints : int, optional number of constraints to generate verbose : bool, optional From 83fef14418b6b1d7f9288a5abdfae55719524122 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Wed, 2 Jan 2019 16:33:10 +0100 Subject: [PATCH 07/14] FIX string representation test wrongly merged --- test/test_base_metric.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/test/test_base_metric.py b/test/test_base_metric.py index 73c0e043..fdea2949 100644 --- a/test/test_base_metric.py +++ b/test/test_base_metric.py @@ -52,10 +52,9 @@ def test_sdml(self): "SDML(balance_param=0.5, preprocessor=None, " "sparsity_param=0.01, use_cov=True,\n verbose=False)") self.assertEqual(str(metric_learn.SDML_Supervised()), """ -SDML_Supervised(balance_param=0.5, num_constraints=None, num_labeled=inf, - num_labeled='deprecated', preprocessor=None, sparsity_param=0.01, - use_cov=True, - verbose=False) +SDML_Supervised(balance_param=0.5, num_constraints=None, + num_labeled='deprecated', preprocessor=None, sparsity_param=0.01, + use_cov=True, verbose=False) """.strip('\n')) def test_rca(self): From d177a0264ba10c3a082ad1c53ca17f0d57e0b5d5 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Wed, 2 Jan 2019 16:48:29 +0100 Subject: [PATCH 08/14] git revert d6bd0d4 --- doc/conf.py | 2 +- metric_learn/itml.py | 2 +- setup.py | 16 ++++-- test/metric_learn_test.py | 106 +++++++++++++++++++++++++++++++++++++- 4 files changed, 120 insertions(+), 6 deletions(-) diff --git a/doc/conf.py b/doc/conf.py index efa9c6ee..ed476edd 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -16,7 +16,7 @@ # General information about the project. project = u'metric-learn' -copyright = u'2015-2017, CJ Carey and Yuan Tang' +copyright = u'2015-2018, CJ Carey and Yuan Tang' author = u'CJ Carey and Yuan Tang' version = '0.4.0' release = '0.4.0' diff --git a/metric_learn/itml.py b/metric_learn/itml.py index 69cc0b93..85e50ef3 100644 --- a/metric_learn/itml.py +++ b/metric_learn/itml.py @@ -229,7 +229,7 @@ def fit(self, X, y, random_state=np.random): warnings.warn('"num_labeled" parameter is not used.' ' It has been deprecated in version 0.4 and will be' 'removed in 0.5', DeprecationWarning) - X, y = self._prepare_inputs(X, y) + X, y = self._prepare_inputs(X, y, ensure_min_samples=2) num_constraints = self.num_constraints if num_constraints is None: num_classes = len(np.unique(y)) diff --git a/setup.py b/setup.py index 34fedd76..168fbcb6 100755 --- a/setup.py +++ b/setup.py @@ -2,17 +2,23 @@ # -*- coding: utf-8 -*- from setuptools import setup import os +import io version = {} -with open(os.path.join('metric_learn', '_version.py')) as fp: +with io.open(os.path.join('metric_learn', '_version.py')) as fp: exec(fp.read(), version) +# Get the long description from README.md +with io.open('README.rst', encoding='utf-8') as f: + long_description = f.read() + setup(name='metric-learn', version=version['__version__'], description='Python implementations of metric learning algorithms', + long_description=long_description, author=['CJ Carey', 'Yuan Tang'], author_email='ccarey@cs.umass.edu', - url='http://github.com/all-umass/metric-learn', + url='http://github.com/metric-learn/metric-learn', license='MIT', classifiers=[ 'Development Status :: 4 - Beta', @@ -40,5 +46,9 @@ 'Information Theoretic Metric Learning', 'Sparse Determinant Metric Learning', 'Least Squares Metric Learning', - 'Neighborhood Components Analysis' + 'Neighborhood Components Analysis', + 'Local Fisher Discriminant Analysis', + 'Relative Components Analysis', + 'Mahalanobis Metric for Clustering', + 'Metric Learning for Kernel Regression' ]) diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index 6eb7cdbd..22599905 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -46,7 +46,7 @@ def test_iris(self): csep = class_separation(cov.transform(self.iris_points), self.iris_labels) # deterministic result - self.assertAlmostEqual(csep, 0.73068122) + self.assertAlmostEqual(csep, 0.72981476) class TestLSML(MetricTestCase): @@ -171,6 +171,110 @@ def test_iris(self): csep = class_separation(nca.transform(self.iris_points), self.iris_labels) self.assertLess(csep, 0.20) + def test_finite_differences(self): + """Test gradient of loss function + + Assert that the gradient is almost equal to its finite differences + approximation. + """ + # Initialize the transformation `M`, as well as `X` and `y` and `NCA` + X, y = make_classification() + M = np.random.randn(np.random.randint(1, X.shape[1] + 1), X.shape[1]) + mask = y[:, np.newaxis] == y[np.newaxis, :] + nca = NCA() + nca.n_iter_ = 0 + + def fun(M): + return nca._loss_grad_lbfgs(M, X, mask)[0] + + def grad(M): + return nca._loss_grad_lbfgs(M, X, mask)[1].ravel() + + # compute relative error + rel_diff = check_grad(fun, grad, M.ravel()) / np.linalg.norm(grad(M)) + np.testing.assert_almost_equal(rel_diff, 0., decimal=6) + + def test_simple_example(self): + """Test on a simple example. + + Puts four points in the input space where the opposite labels points are + next to each other. After transform the same labels points should be next + to each other. + + """ + X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) + y = np.array([1, 0, 1, 0]) + nca = NCA(num_dims=2,) + nca.fit(X, y) + Xansformed = nca.transform(X) + np.testing.assert_equal(pairwise_distances(Xansformed).argsort()[:, 1], + np.array([2, 3, 0, 1])) + + def test_deprecation(self): + # test that the right deprecation message is thrown. + # TODO: remove in v.0.5 + X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) + y = np.array([1, 0, 1, 0]) + nca = NCA(num_dims=2, learning_rate=0.01) + msg = ('"learning_rate" parameter is not used.' + ' It has been deprecated in version 0.4 and will be' + 'removed in 0.5') + assert_warns_message(DeprecationWarning, msg, nca.fit, X, y) + + def test_singleton_class(self): + X = self.iris_points + y = self.iris_labels + + # one singleton class: test fitting works + singleton_class = 1 + ind_singleton, = np.where(y == singleton_class) + y[ind_singleton] = 2 + y[ind_singleton[0]] = singleton_class + + nca = NCA(max_iter=30) + nca.fit(X, y) + + # One non-singleton class: test fitting works + ind_1, = np.where(y == 1) + ind_2, = np.where(y == 2) + y[ind_1] = 0 + y[ind_1[0]] = 1 + y[ind_2] = 0 + y[ind_2[0]] = 2 + + nca = NCA(max_iter=30) + nca.fit(X, y) + + # Only singleton classes: test fitting does nothing (the gradient + # must be null in this case, so the final matrix must stay like + # the initialization) + ind_0, = np.where(y == 0) + ind_1, = np.where(y == 1) + ind_2, = np.where(y == 2) + X = X[[ind_0[0], ind_1[0], ind_2[0]]] + y = y[[ind_0[0], ind_1[0], ind_2[0]]] + + EPS = np.finfo(float).eps + A = np.zeros((X.shape[1], X.shape[1])) + np.fill_diagonal(A, + 1. / (np.maximum(X.max(axis=0) - X.min(axis=0), EPS))) + nca = NCA(max_iter=30, num_dims=X.shape[1]) + nca.fit(X, y) + assert_array_equal(nca.A_, A) + + def test_one_class(self): + # if there is only one class the gradient is null, so the final matrix + # must stay like the initialization + X = self.iris_points[self.iris_labels == 0] + y = self.iris_labels[self.iris_labels == 0] + EPS = np.finfo(float).eps + A = np.zeros((X.shape[1], X.shape[1])) + np.fill_diagonal(A, + 1. / (np.maximum(X.max(axis=0) - X.min(axis=0), EPS))) + nca = NCA(max_iter=30, num_dims=X.shape[1]) + nca.fit(X, y) + assert_array_equal(nca.A_, A) + class TestLFDA(MetricTestCase): def test_iris(self): From 3169b74775251879e9ffef096528353d64eb1e05 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Wed, 2 Jan 2019 16:55:31 +0100 Subject: [PATCH 09/14] STY fix pep8 errors --- metric_learn/constraints.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/metric_learn/constraints.py b/metric_learn/constraints.py index bb24a33d..c4ddcae8 100644 --- a/metric_learn/constraints.py +++ b/metric_learn/constraints.py @@ -89,6 +89,7 @@ def chunks(self, num_chunks=100, chunk_size=2, random_state=np.random): (num_chunks, chunk_size)) return chunks + def wrap_pairs(X, constraints): a = np.array(constraints[0]) b = np.array(constraints[1]) @@ -97,4 +98,4 @@ def wrap_pairs(X, constraints): constraints = np.vstack((np.column_stack((a, b)), np.column_stack((c, d)))) y = np.vstack([np.ones((len(a), 1)), - np.ones((len(c), 1))]) pairs = X[constraints] - return pairs, y \ No newline at end of file + return pairs, y From 33bce89af9a45d16a7d53ac60dbee65dc06df258 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Wed, 2 Jan 2019 16:58:44 +0100 Subject: [PATCH 10/14] STY: fix docstring indentation --- metric_learn/itml.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/metric_learn/itml.py b/metric_learn/itml.py index 85e50ef3..7a6bb731 100644 --- a/metric_learn/itml.py +++ b/metric_learn/itml.py @@ -189,8 +189,8 @@ def __init__(self, gamma=1., max_iter=1000, convergence_threshold=1e-3, convergence_threshold : float, optional num_labeled : Not used .. deprecated:: 0.5.0 - `num_labeled` was deprecated in version 0.5.0 and will - be removed in 0.6.0. + `num_labeled` was deprecated in version 0.5.0 and will + be removed in 0.6.0. num_constraints: int, optional number of constraints to generate bounds : list (pos,neg) pairs, optional From 73a13126be1a7dee4bcd0100a3b91eecb3183624 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Wed, 2 Jan 2019 17:29:49 +0100 Subject: [PATCH 11/14] FIX remove tests from NCA that are dealt with in #143 --- test/metric_learn_test.py | 93 --------------------------------------- 1 file changed, 93 deletions(-) diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index 22599905..3f7238a1 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -171,45 +171,6 @@ def test_iris(self): csep = class_separation(nca.transform(self.iris_points), self.iris_labels) self.assertLess(csep, 0.20) - def test_finite_differences(self): - """Test gradient of loss function - - Assert that the gradient is almost equal to its finite differences - approximation. - """ - # Initialize the transformation `M`, as well as `X` and `y` and `NCA` - X, y = make_classification() - M = np.random.randn(np.random.randint(1, X.shape[1] + 1), X.shape[1]) - mask = y[:, np.newaxis] == y[np.newaxis, :] - nca = NCA() - nca.n_iter_ = 0 - - def fun(M): - return nca._loss_grad_lbfgs(M, X, mask)[0] - - def grad(M): - return nca._loss_grad_lbfgs(M, X, mask)[1].ravel() - - # compute relative error - rel_diff = check_grad(fun, grad, M.ravel()) / np.linalg.norm(grad(M)) - np.testing.assert_almost_equal(rel_diff, 0., decimal=6) - - def test_simple_example(self): - """Test on a simple example. - - Puts four points in the input space where the opposite labels points are - next to each other. After transform the same labels points should be next - to each other. - - """ - X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) - y = np.array([1, 0, 1, 0]) - nca = NCA(num_dims=2,) - nca.fit(X, y) - Xansformed = nca.transform(X) - np.testing.assert_equal(pairwise_distances(Xansformed).argsort()[:, 1], - np.array([2, 3, 0, 1])) - def test_deprecation(self): # test that the right deprecation message is thrown. # TODO: remove in v.0.5 @@ -221,60 +182,6 @@ def test_deprecation(self): 'removed in 0.5') assert_warns_message(DeprecationWarning, msg, nca.fit, X, y) - def test_singleton_class(self): - X = self.iris_points - y = self.iris_labels - - # one singleton class: test fitting works - singleton_class = 1 - ind_singleton, = np.where(y == singleton_class) - y[ind_singleton] = 2 - y[ind_singleton[0]] = singleton_class - - nca = NCA(max_iter=30) - nca.fit(X, y) - - # One non-singleton class: test fitting works - ind_1, = np.where(y == 1) - ind_2, = np.where(y == 2) - y[ind_1] = 0 - y[ind_1[0]] = 1 - y[ind_2] = 0 - y[ind_2[0]] = 2 - - nca = NCA(max_iter=30) - nca.fit(X, y) - - # Only singleton classes: test fitting does nothing (the gradient - # must be null in this case, so the final matrix must stay like - # the initialization) - ind_0, = np.where(y == 0) - ind_1, = np.where(y == 1) - ind_2, = np.where(y == 2) - X = X[[ind_0[0], ind_1[0], ind_2[0]]] - y = y[[ind_0[0], ind_1[0], ind_2[0]]] - - EPS = np.finfo(float).eps - A = np.zeros((X.shape[1], X.shape[1])) - np.fill_diagonal(A, - 1. / (np.maximum(X.max(axis=0) - X.min(axis=0), EPS))) - nca = NCA(max_iter=30, num_dims=X.shape[1]) - nca.fit(X, y) - assert_array_equal(nca.A_, A) - - def test_one_class(self): - # if there is only one class the gradient is null, so the final matrix - # must stay like the initialization - X = self.iris_points[self.iris_labels == 0] - y = self.iris_labels[self.iris_labels == 0] - EPS = np.finfo(float).eps - A = np.zeros((X.shape[1], X.shape[1])) - np.fill_diagonal(A, - 1. / (np.maximum(X.max(axis=0) - X.min(axis=0), EPS))) - nca = NCA(max_iter=30, num_dims=X.shape[1]) - nca.fit(X, y) - assert_array_equal(nca.A_, A) - class TestLFDA(MetricTestCase): def test_iris(self): From 3a200cdd11cee20918eda269ee78a7a1004e2b0c Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Wed, 2 Jan 2019 17:32:42 +0100 Subject: [PATCH 12/14] FIX remove nca deprecation test because we remove totally learning rate in the merge #139 --- test/metric_learn_test.py | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index 3f7238a1..daf87fc6 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -171,17 +171,6 @@ def test_iris(self): csep = class_separation(nca.transform(self.iris_points), self.iris_labels) self.assertLess(csep, 0.20) - def test_deprecation(self): - # test that the right deprecation message is thrown. - # TODO: remove in v.0.5 - X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]]) - y = np.array([1, 0, 1, 0]) - nca = NCA(num_dims=2, learning_rate=0.01) - msg = ('"learning_rate" parameter is not used.' - ' It has been deprecated in version 0.4 and will be' - 'removed in 0.5') - assert_warns_message(DeprecationWarning, msg, nca.fit, X, y) - class TestLFDA(MetricTestCase): def test_iris(self): From c7d56d13ac418bfd44af6d07879fe5ca697c804e Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Wed, 2 Jan 2019 17:40:12 +0100 Subject: [PATCH 13/14] FIX update version --- metric_learn/itml.py | 4 ++-- metric_learn/lsml.py | 4 ++-- metric_learn/mmc.py | 4 ++-- metric_learn/sdml.py | 4 ++-- test/metric_learn_test.py | 16 ++++++++-------- 5 files changed, 16 insertions(+), 16 deletions(-) diff --git a/metric_learn/itml.py b/metric_learn/itml.py index 7a6bb731..8a251fe0 100644 --- a/metric_learn/itml.py +++ b/metric_learn/itml.py @@ -227,8 +227,8 @@ def fit(self, X, y, random_state=np.random): """ if self.num_labeled != 'deprecated': warnings.warn('"num_labeled" parameter is not used.' - ' It has been deprecated in version 0.4 and will be' - 'removed in 0.5', DeprecationWarning) + ' It has been deprecated in version 0.5.0 and will be' + 'removed in 0.6.0', DeprecationWarning) X, y = self._prepare_inputs(X, y, ensure_min_samples=2) num_constraints = self.num_constraints if num_constraints is None: diff --git a/metric_learn/lsml.py b/metric_learn/lsml.py index 35288afc..9090a431 100644 --- a/metric_learn/lsml.py +++ b/metric_learn/lsml.py @@ -226,8 +226,8 @@ def fit(self, X, y, random_state=np.random): """ if self.num_labeled != 'deprecated': warnings.warn('"num_labeled" parameter is not used.' - ' It has been deprecated in version 0.4 and will be' - 'removed in 0.5', DeprecationWarning) + ' It has been deprecated in version 0.5.0 and will be' + 'removed in 0.6.0', DeprecationWarning) X, y = self._prepare_inputs(X, y, ensure_min_samples=2) num_constraints = self.num_constraints if num_constraints is None: diff --git a/metric_learn/mmc.py b/metric_learn/mmc.py index 237d4e99..6d929d6e 100644 --- a/metric_learn/mmc.py +++ b/metric_learn/mmc.py @@ -446,8 +446,8 @@ def fit(self, X, y, random_state=np.random): """ if self.num_labeled != 'deprecated': warnings.warn('"num_labeled" parameter is not used.' - ' It has been deprecated in version 0.4 and will be' - 'removed in 0.5', DeprecationWarning) + ' It has been deprecated in version 0.5.0 and will be' + 'removed in 0.6.0', DeprecationWarning) X, y = self._prepare_inputs(X, y, ensure_min_samples=2) num_constraints = self.num_constraints if num_constraints is None: diff --git a/metric_learn/sdml.py b/metric_learn/sdml.py index 721dfaff..b1421736 100644 --- a/metric_learn/sdml.py +++ b/metric_learn/sdml.py @@ -167,8 +167,8 @@ def fit(self, X, y, random_state=np.random): """ if self.num_labeled != 'deprecated': warnings.warn('"num_labeled" parameter is not used.' - ' It has been deprecated in version 0.4 and will be' - 'removed in 0.5', DeprecationWarning) + ' It has been deprecated in version 0.5.0 and will be' + 'removed in 0.6.0', DeprecationWarning) X, y = self._prepare_inputs(X, y, ensure_min_samples=2) num_constraints = self.num_constraints if num_constraints is None: diff --git a/test/metric_learn_test.py b/test/metric_learn_test.py index daf87fc6..85cfcf77 100644 --- a/test/metric_learn_test.py +++ b/test/metric_learn_test.py @@ -64,8 +64,8 @@ def test_deprecation(self): y = np.array([1, 0, 1, 0]) lsml_supervised = LSML_Supervised(num_labeled=np.inf) msg = ('"num_labeled" parameter is not used.' - ' It has been deprecated in version 0.4 and will be' - 'removed in 0.5') + ' It has been deprecated in version 0.5.0 and will be' + 'removed in 0.6.0') assert_warns_message(DeprecationWarning, msg, lsml_supervised.fit, X, y) @@ -84,8 +84,8 @@ def test_deprecation(self): y = np.array([1, 0, 1, 0]) itml_supervised = ITML_Supervised(num_labeled=np.inf) msg = ('"num_labeled" parameter is not used.' - ' It has been deprecated in version 0.4 and will be' - 'removed in 0.5') + ' It has been deprecated in version 0.5.0 and will be' + 'removed in 0.6.0') assert_warns_message(DeprecationWarning, msg, itml_supervised.fit, X, y) @@ -150,8 +150,8 @@ def test_deprecation(self): y = np.array([1, 0, 1, 0]) sdml_supervised = SDML_Supervised(num_labeled=np.inf) msg = ('"num_labeled" parameter is not used.' - ' It has been deprecated in version 0.4 and will be' - 'removed in 0.5') + ' It has been deprecated in version 0.5.0 and will be' + 'removed in 0.6.0') assert_warns_message(DeprecationWarning, msg, sdml_supervised.fit, X, y) @@ -282,8 +282,8 @@ def test_deprecation(self): y = np.array([1, 0, 1, 0]) mmc_supervised = MMC_Supervised(num_labeled=np.inf) msg = ('"num_labeled" parameter is not used.' - ' It has been deprecated in version 0.4 and will be' - 'removed in 0.5') + ' It has been deprecated in version 0.5.0 and will be' + 'removed in 0.6.0') assert_warns_message(DeprecationWarning, msg, mmc_supervised.fit, X, y) From 38fb0211c64505dd708e28d980e519ca806cbc35 Mon Sep 17 00:00:00 2001 From: William de Vazelhes Date: Wed, 2 Jan 2019 17:52:47 +0100 Subject: [PATCH 14/14] Remove the use of random_subset --- test/test_utils.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index de59e9ff..39c718ac 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -58,8 +58,7 @@ def build_data(): input_data, labels = load_iris(return_X_y=True) X, y = shuffle(input_data, labels, random_state=SEED) num_constraints = 50 - constraints = ( - Constraints.random_subset(y, random_state=check_random_state(SEED))) + constraints = Constraints(y) pairs = ( constraints .positive_negative_pairs(num_constraints, same_length=True,