scikit-learn-contrib · perimosocordiae · Mar 4, 2020 · Feb 13, 2020 · Feb 17, 2020 · Feb 18, 2020
diff --git a/doc/metric_learn.rst b/doc/metric_learn.rst
@@ -14,6 +14,7 @@ Base Classes
     metric_learn.Constraints
     metric_learn.base_metric.BaseMetricLearner
     metric_learn.base_metric._PairsClassifierMixin
+    metric_learn.base_metric._TripletsClassifierMixin
     metric_learn.base_metric._QuadrupletsClassifierMixin
 
 Supervised Learning Algorithms

diff --git a/doc/weakly_supervised.rst b/doc/weakly_supervised.rst
@@ -592,14 +592,122 @@ points, while constrains the sum of distances between dissimilar points:
         -with-side-information.pdf>`_. NIPS 2002
   .. [2] Adapted from Matlab code http://www.cs.cmu.edu/%7Eepxing/papers/Old_papers/code_Metric_online.tar.gz
 
+.. _learning_on_triplets:
+
+Learning on triplets
+====================
+
+Some metric learning algorithms learn on triplets of samples. In this case,
+one should provide the algorithm with `n_samples` triplets of points. The
+semantic of each triplet is that the first point should be closer to the
+second point than to the third one.
+
+Fitting
+-------
+Here is an example for fitting on triplets (see :ref:`fit_ws` for more
+details on the input data format and how to fit, in the general case of
+learning on tuples).
+
+>>> from metric_learn import SCML
+>>> triplets = np.array([[[1.2, 3.2], [2.3, 5.5], [2.1, 0.6]],
+>>>                      [[4.5, 2.3], [2.1, 2.3], [7.3, 3.4]]])
+>>> scml = SCML(random_state=42)
+>>> scml.fit(triplets)
+SCML(beta=1e-5, B=None, max_iter=100000, verbose=False,
+    preprocessor=None, random_state=None)
+
+Or alternatively (using a preprocessor):
+
+>>> X = np.array([[[1.2, 3.2], 
+>>>                [2.3, 5.5],
+>>>                [2.1, 0.6],
+>>>                [4.5, 2.3],
+>>>                [2.1, 2.3],
+>>>                [7.3, 3.4]])
+>>> triplets_indices = np.array([[0, 1, 2], [3, 4, 5]])
+>>> scml = SCML(preprocessor=X, random_state=42)
+>>> scml.fit(triplets_indices)
+SCML(beta=1e-5, B=None, max_iter=100000, verbose=False,
+   preprocessor=array([[1.2, 3.2],
+       [2.3, 5.5],
+       [2.4, 6.7],
+       [2.1, 0.6],
+       [4.5, 2.3],
+       [2.1, 2.3],
+       [0.6, 1.2],
+       [7.3, 3.4]]),
+    random_state=None)
+
+
+Here, we want to learn a metric that, for each of the two
+`triplets`, will make the first point closer to the
+second point than to the third one.
+
+.. _triplets_predicting:
+
+Prediction
+----------
+
+When a triplets learner is fitted, it is also able to predict, for an
+upcoming triplet, whether the first point is closer to the second point 
+than to the third one (+1), or not (-1).
+
+>>> triplets_test = np.array(
+... [[[5.6, 5.3], [2.2, 2.1], [1.2, 3.4]],
+...  [[6.0, 4.2], [4.3, 1.2], [0.1, 7.8]]])
+>>> scml.predict(triplets_test)
+array([-1.,  1.])
+
+.. _triplets_scoring:
+
+Scoring
+-------
+
+Triplet metric learners can also return a `decision_function` for a set of triplets,
+which corresponds to the distance between the first two points minus the distance
+between the first and last points of the triplet (the higher the value, the more
+similar the first point to the second point compared to the last one). This "score"
+can be interpreted as a measure of likeliness of having a +1 prediction for this 
+triplet.
+
+>>> scml.decision_function(triplets_test)
+array([-1.75700306,  4.98982131])
+
+In the above example, for the first triplet in `triplets_test`, the first 
+point is predicted less similar to the second point than to the last point
+(they are further away in the transformed space).
+
+Unlike pairs learners, triplets learners do not allow to give a `y` when fitting: we
+assume that the ordering of points within triplets is such that the training triplets
+are all positive. Therefore, it is not possible to use scikit-learn scoring functions
+(such as 'f1_score') for triplets learners.
+
+However, triplets learners do have a default scoring function, which will
+basically return the accuracy score on a given test set, i.e. the proportion
+of triplets that have the right predicted ordering.
+
+>>> scml.score(triplets_test)
+0.5
+
+.. note::
+   See :ref:`fit_ws` for more details on metric learners functions that are
+   not specific to learning on pairs, like `transform`, `score_pairs`,
+   `get_metric` and `get_mahalanobis_matrix`.
+
+
+
+
+Algorithms
+----------
+
 
 .. _learning_on_quadruplets:
 
 Learning on quadruplets
 =======================
 
 Some metric learning algorithms learn on quadruplets of samples. In this case,
-one should provide the algorithm with `n_samples` quadruplets of points. Th
+one should provide the algorithm with `n_samples` quadruplets of points. The
 semantic of each quadruplet is that the first two points should be closer
 together than the last two points.
 
@@ -666,14 +774,12 @@ array([-1.,  1.])
 Scoring
 -------
 
-Quadruplet metric learners can also
-return a `decision_function` for a set of pairs. This is basically the "score"
-which sign will be taken to find the prediction for the pair, which
-corresponds to the difference between the distance between the two last points,
-and the distance between the two last points of the quadruplet (higher
-score means the two last points are more likely to be more dissimilar than
-the two first points (i.e. more likely to have a +1 prediction since it's
-the right ordering)).
+Quadruplet metric learners can also return a `decision_function` for a set of
+quadruplets, which corresponds to the distance between the first pair of points minus 
+the distance between the second pair of points of the triplet (the higher the value,
+the more similar the first pair is than the last pair). 
+This "score" can be interpreted as a measure of likeliness of having a +1 prediction 
+for this quadruplet.
 
 >>> lsml.decision_function(quadruplets_test)
 array([-1.75700306,  4.98982131])
@@ -682,17 +788,10 @@ In the above example, for the first quadruplet in `quadruplets_test`, the
 two first points are predicted less similar than the two last points (they
 are further away in the transformed space).
 
-Unlike for pairs learners, quadruplets learners don't allow to give a `y`
-when fitting, which does not allow to use scikit-learn scoring functions
-like:
-
->>> from sklearn.model_selection import cross_val_score
->>> cross_val_score(lsml, quadruplets, scoring='f1_score')  # this won't work
-
-(This is actually intentional, for more details
-about that, see
-`this comment <https://github.com/scikit-learn-contrib/metric-learn/pull/168#pullrequestreview-203730742>`_
-on github.)
+Like triplet learners, quadruplets learners do not allow to give a `y` when fitting: we
+assume that the ordering of points within triplets is such that the training triplets
+are all positive. Therefore, it is not possible to use scikit-learn scoring functions
+(such as 'f1_score') for triplets learners.
 
 However, quadruplets learners do have a default scoring function, which will
 basically return the accuracy score on a given test set, i.e. the proportion

diff --git a/metric_learn/base_metric.py b/metric_learn/base_metric.py
@@ -589,6 +589,90 @@ def _validate_calibration_params(strategy='accuracy', min_rate=None,
                          'Got {} instead.'.format(type(beta)))
 
 
+class _TripletsClassifierMixin(BaseMetricLearner):
+  """Base class for triplets learners.
+  """
+
+  _tuple_size = 3  # number of points in a tuple, 3 for triplets
+
+  def predict(self, triplets):
+    """Predicts the ordering between sample distances in input triplets.
+
+    For each triplets, returns 1 if the first element is closer to the second
+    than to the last and -1 if not.
+
+    Parameters
+    ----------
+    triplets : array-like, shape=(n_triplets, 3, n_features) or (n_triplets, 3)
+      3D array of triplets to predict, with each row corresponding to three
+      points, or 2D array of indices of triplets if the metric learner
+      uses a preprocessor.
+
+    Returns
+    -------
+    prediction : `numpy.ndarray` of floats, shape=(n_constraints,)
+      Predictions of the ordering of pairs, for each triplet.
+    """
+    return np.sign(self.decision_function(triplets))
+
+  def decision_function(self, triplets):
+    """Predicts differences between sample distances in input triplets.
+
+    For each triplet (X_a, X_b, X_c) in the samples, computes the difference
+    between the learned distance of the second pair (X_a, X_c) minus the
+    learned distance of the first pair (X_a, X_b). The higher it is, the more
+    probable it is that the pairs in the triplets are presented in the right
+    order, i.e. that the label of the triplet is 1. The lower it is, the more
+    probable it is that the label of the triplet is -1.
+
+    Parameters
+    ----------
+    triplet : array-like, shape=(n_triplets, 3, n_features) or \
+                  (n_triplets, 3)
+      3D array of triplets to predict, with each row corresponding to three
+      points, or 2D array of indices of triplets if the metric learner
+      uses a preprocessor.
+
+    Returns
+    -------
+    decision_function : `numpy.ndarray` of floats, shape=(n_constraints,)
+      Metric differences.
+    """
+    check_is_fitted(self, 'preprocessor_')
+    triplets = check_input(triplets, type_of_inputs='tuples',
+                           preprocessor=self.preprocessor_,
+                           estimator=self, tuple_size=self._tuple_size)
+    return (self.score_pairs(triplets[:, [0, 2]]) -
+            self.score_pairs(triplets[:, :2]))
+
+  def score(self, triplets):
+    """Computes score on input triplets.
+
+    Returns the accuracy score of the following classification task: a triplet
+    (X_a, X_b, X_c) is correctly classified if the predicted similarity between
+    the first pair (X_a, X_b) is higher than that of the second pair (X_a, X_c)
+
+    Parameters
+    ----------
+    triplets : array-like, shape=(n_triplets, 3, n_features) or \
+                  (n_triplets, 3)
+      3D array of triplets to score, with each row corresponding to three
+      points, or 2D array of indices of triplets if the metric learner
+      uses a preprocessor.
+
+    Returns
+    -------
+    score : float
+      The triplets score.
+    """
+    # Since the prediction is a vector of values in {-1, +1}, we need to
+    # rescale them to {0, 1} to compute the accuracy using the mean (because
+    # then 1 means a correctly classified result (pairs are in the right
+    # order), and a 0 an incorrectly classified result (pairs are in the
+    # wrong order).
+    return self.predict(triplets).mean() / 2 + 0.5
+
+
 class _QuadrupletsClassifierMixin(BaseMetricLearner):
   """Base class for quadruplets learners.
   """
@@ -614,10 +698,6 @@ def predict(self, quadruplets):
     prediction : `numpy.ndarray` of floats, shape=(n_constraints,)
       Predictions of the ordering of pairs, for each quadruplet.
     """
-    check_is_fitted(self, 'preprocessor_')
-    quadruplets = check_input(quadruplets, type_of_inputs='tuples',
-                              preprocessor=self.preprocessor_,
-                              estimator=self, tuple_size=self._tuple_size)
     return np.sign(self.decision_function(quadruplets))
 
   def decision_function(self, quadruplets):