Resolve SentenceTransformer resetting devices after moving a SetFitModel (#283)

tomaarsen · web-flow · commit e1a5375aa89c · 2023-02-06T15:29:01.000+01:00
* Update the SentenceTransformer target device when moving SetFitModel

* Add regression test for moving SetFitModel

* Parametrize regression test for different head types
diff --git a/src/setfit/modeling.py b/src/setfit/modeling.py
@@ -445,6 +445,9 @@ def to(self, device: Union[str, torch.device]) -> "SetFitModel":
         Returns:
             SetFitModel: Returns the original model, but now on the desired device.
         """
+        # Note that we must also set _target_device, or any SentenceTransformer.fit() call will reset
+        # the body location
+        self.model_body._target_device = device if isinstance(device, torch.device) else torch.device(device)
         self.model_body = self.model_body.to(device)
 
         if self.has_differentiable_head:
diff --git a/tests/test_modeling.py b/tests/test_modeling.py
@@ -1,6 +1,7 @@
 from unittest import TestCase
 
 import numpy as np
+import pytest
 import torch
 from datasets import load_dataset
 from sentence_transformers import SentenceTransformer
@@ -12,6 +13,9 @@
 from setfit.modeling import MODEL_HEAD_NAME, sentence_pairs_generation, sentence_pairs_generation_multilabel
 
 
+torch_cuda_available = pytest.mark.skipif(not torch.cuda.is_available(), reason="PyTorch must be compiled with CUDA")
+
+
 def test_sentence_pairs_generation():
     sentences = np.array(["sent 1", "sent 2", "sent 3"])
     labels = np.array(["label 1", "label 2", "label 3"])
@@ -255,3 +259,20 @@ def test_to_torch_head():
         model.to(device)
         assert model.model_body.device == device
         assert model.model_head.device == device
+
+
+@torch_cuda_available
+@pytest.mark.parametrize("use_differentiable_head", [True, False])
+def test_to_sentence_transformer_device_reset(use_differentiable_head):
+    # This should initialize SentenceTransformer() without a specific device
+    # which sets the model to CUDA iff CUDA is available.
+    model = SetFitModel.from_pretrained(
+        "sentence-transformers/paraphrase-albert-small-v2", use_differentiable_head=use_differentiable_head
+    )
+    # If we move the entire model to CPU, we expect it to stay on CPU forever,
+    # Even after encoding or fitting
+    model.to("cpu")
+    assert model.model_body.device == torch.device("cpu")
+
+    model.model_body.encode("This is a test sample to encode")
+    assert model.model_body.device == torch.device("cpu")