stop stripping first character from output string (#1351)

csauper · facebook-github-bot · commit 7b80c5b44902 · 2024-09-19T09:59:57.000-07:00
Summary: Pull Request resolved: #1351 previously first character was stripped as SOS token, but that doesn't actually seem to be the case with current LLMs. Keep all tokens. Reviewed By: craymichael Differential Revision: D62775617 fbshipit-source-id: 9a0edd83318ac39654a1526020111991005fa4f3
diff --git a/captum/attr/_core/llm_attr.py b/captum/attr/_core/llm_attr.py
@@ -383,6 +383,7 @@ def attribute(
         self,
         inp: InterpretableInput,
         target: Union[str, torch.Tensor, None] = None,
+        skip_tokens: Union[List[int], List[str], None] = None,
         num_trials: int = 1,
         gen_args: Optional[Dict[str, Any]] = None,
         use_cached_outputs: bool = True,
@@ -397,6 +398,12 @@ def attribute(
                     which attributions are computed. If None, it uses the model
                     to generate the target based on the input and gen_args.
                     Default: None
+            skip_tokens (List[int] or List[str], optional): the tokens to skip in the
+                    the output's interpretable representation. Use this argument to
+                    define uninterested tokens, commonly like special tokens, e.g.,
+                    sos, and unk. It can be a list of strings of the tokens or a list
+                    of integers of the token ids.
+                    Default: None
             num_trials (int, optional): number of trials to run. Return is the average
                     attribibutions over all the trials.
                     Defaults: 1.
@@ -433,13 +440,23 @@ def attribute(
             target_tokens = output_tokens[0][model_inp.size(1) :]
         else:
             assert gen_args is None, "gen_args must be None when target is given"
+            # Encode skip tokens
+            if skip_tokens:
+                if isinstance(skip_tokens[0], str):
+                    skip_tokens = cast(List[str], skip_tokens)
+                    skip_tokens = self.tokenizer.convert_tokens_to_ids(skip_tokens)
+            else:
+                skip_tokens = []
 
-            if type(target) is str:
-                # exclude sos
-                target_tokens = self.tokenizer.encode(target)[1:]
-                target_tokens = torch.tensor(target_tokens)
-            elif type(target) is torch.Tensor:
-                target_tokens = target
+            if isinstance(target, str):
+                encoded = self.tokenizer.encode(target)
+                target_tokens = torch.tensor(
+                    [token for token in encoded if token not in skip_tokens]
+                )
+            elif isinstance(target, torch.Tensor):
+                target_tokens = target[
+                    ~torch.isin(target, torch.tensor(skip_tokens, device=target.device))
+                ]
             else:
                 raise TypeError(
                     "target must either be str or Tensor, but the type of target is "
@@ -562,6 +579,7 @@ def attribute(
         self,
         inp: InterpretableInput,
         target: Union[str, torch.Tensor, None] = None,
+        skip_tokens: Union[List[int], List[str], None] = None,
         gen_args: Optional[Dict[str, Any]] = None,
         **kwargs: Any,
     ) -> LLMAttributionResult:
@@ -572,6 +590,12 @@ def attribute(
                     which attributions are computed. If None, it uses the model
                     to generate the target based on the input and gen_args.
                     Default: None
+            skip_tokens (List[int] or List[str], optional): the tokens to skip in the
+                    the output's interpretable representation. Use this argument to
+                    define uninterested tokens, commonly like special tokens, e.g.,
+                    sos, and unk. It can be a list of strings of the tokens or a list
+                    of integers of the token ids.
+                    Default: None
             gen_args (dict, optional): arguments for generating the target. Only used if
                     target is not given. When None, the default arguments are used,
                     {"max_new_tokens": 25, "do_sample": False,
@@ -605,13 +629,23 @@ def attribute(
                 target_tokens = output_tokens[0][model_inp.size(1) :]
         else:
             assert gen_args is None, "gen_args must be None when target is given"
+            # Encode skip tokens
+            if skip_tokens:
+                if isinstance(skip_tokens[0], str):
+                    skip_tokens = cast(List[str], skip_tokens)
+                    skip_tokens = self.tokenizer.convert_tokens_to_ids(skip_tokens)
+            else:
+                skip_tokens = []
 
-            if type(target) is str:
-                # exclude sos
-                target_tokens = self.tokenizer.encode(target)[1:]
-                target_tokens = torch.tensor(target_tokens)
-            elif type(target) is torch.Tensor:
-                target_tokens = target
+            if isinstance(target, str):
+                encoded = self.tokenizer.encode(target)
+                target_tokens = torch.tensor(
+                    [token for token in encoded if token not in skip_tokens]
+                )
+            elif isinstance(target, torch.Tensor):
+                target_tokens = target[
+                    ~torch.isin(target, torch.tensor(skip_tokens, device=target.device))
+                ]
             else:
                 raise TypeError(
                     "target must either be str or Tensor, but the type of target is "
diff --git a/tests/attr/test_llm_attr.py b/tests/attr/test_llm_attr.py
@@ -95,7 +95,9 @@ def convert_tokens_to_ids(
         raise NotImplementedError
 
     def decode(self, token_ids: Tensor) -> str:
-        return " ".join(self.convert_ids_to_tokens(token_ids.tolist()))
+        tokens = self.convert_ids_to_tokens(token_ids.tolist())
+        # pyre-fixme[7]: Expected `str` but got `Union[List[str], str]`.
+        return tokens if isinstance(tokens, str) else " ".join(tokens)
 
 
 class Result(NamedTuple):
@@ -271,6 +273,7 @@ def test_llm_attr(
         res = llm_attr.attribute(
             inp,
             "m n o p q",
+            skip_tokens=[0],
             use_cached_outputs=self.use_cached_outputs,
             # pyre-fixme[6]: In call `LLMAttribution.attribute`,
             # for 4th positional argument, expected
@@ -330,7 +333,10 @@ def test_llm_attr_fa_log_prob(self) -> None:
 
         inp = TextTemplateInput("{} b {} {} e {}", ["a", "c", "d", "f"])
         res = llm_fa.attribute(
-            inp, "m n o p q", use_cached_outputs=self.use_cached_outputs
+            inp,
+            "m n o p q",
+            skip_tokens=[0],
+            use_cached_outputs=self.use_cached_outputs,
         )
 
         # With FeatureAblation, the seq attr in log_prob
@@ -385,6 +391,7 @@ def test_llm_attr_without_token(
         res = llm_fa.attribute(
             inp,
             "m n o p q",
+            skip_tokens=[0],
             use_cached_outputs=self.use_cached_outputs,
             # pyre-fixme[6]: In call `LLMAttribution.attribute`,
             # for 4th positional argument, expected
@@ -416,6 +423,52 @@ def test_futures_not_implemented(self) -> None:
             attributions = llm_fa.attribute_future()
         self.assertEqual(attributions, None)
 
+    def test_llm_attr_with_no_skip_tokens(self) -> None:
+        llm = DummyLLM()
+        llm.to(self.device)
+        tokenizer = DummyTokenizer()
+        fa = FeatureAblation(llm)
+        llm_fa = LLMAttribution(fa, tokenizer)
+
+        inp = TextTokenInput("a b c", tokenizer)
+        res = llm_fa.attribute(
+            inp,
+            "m n o p q",
+            use_cached_outputs=self.use_cached_outputs,
+        )
+
+        # 5 output tokens, 4 input tokens including sos
+        self.assertEqual(res.seq_attr.shape, (4,))
+        assert res.token_attr is not None  # make pyre/mypy happy
+        self.assertIsNotNone(res.token_attr)
+        token_attr = res.token_attr
+        self.assertEqual(token_attr.shape, (6, 4))  # type: ignore
+        self.assertEqual(res.input_tokens, ["<sos>", "a", "b", "c"])
+        self.assertEqual(res.output_tokens, ["<sos>", "m", "n", "o", "p", "q"])
+
+    def test_llm_attr_with_skip_tensor_target(self) -> None:
+        llm = DummyLLM()
+        llm.to(self.device)
+        tokenizer = DummyTokenizer()
+        fa = FeatureAblation(llm)
+        llm_fa = LLMAttribution(fa, tokenizer)
+
+        inp = TextTokenInput("a b c", tokenizer)
+        res = llm_fa.attribute(
+            inp,
+            torch.tensor(tokenizer.encode("m n o p q")),
+            skip_tokens=[0],
+        )
+
+        # 5 output tokens, 4 input tokens including sos
+        self.assertEqual(res.seq_attr.shape, (4,))
+        assert res.token_attr is not None  # make pyre/mypy happy
+        self.assertIsNotNone(res.token_attr)
+        token_attr = res.token_attr
+        self.assertEqual(token_attr.shape, (5, 4))  # type: ignore
+        self.assertEqual(res.input_tokens, ["<sos>", "a", "b", "c"])
+        self.assertEqual(res.output_tokens, ["m", "n", "o", "p", "q"])
+
 
 @parameterized_class(
     ("device",), [("cpu",), ("cuda",)] if torch.cuda.is_available() else [("cpu",)]
@@ -448,7 +501,7 @@ def test_llm_attr(
             )
 
         inp = TextTokenInput("a b c", tokenizer)
-        res = llm_attr.attribute(inp, "m n o p q", **attr_kws)
+        res = llm_attr.attribute(inp, "m n o p q", skip_tokens=[0], **attr_kws)
 
         # 5 output tokens, 4 input tokens including sos
         self.assertEqual(res.seq_attr.shape, (4,))
@@ -523,7 +576,7 @@ def test_llm_attr_with_skip_tokens(
             )
 
         inp = TextTokenInput("a b c", tokenizer, skip_tokens=[0])
-        res = llm_attr.attribute(inp, "m n o p q", **attr_kws)
+        res = llm_attr.attribute(inp, "m n o p q", skip_tokens=[0], **attr_kws)
 
         # 5 output tokens, 4 input tokens including sos
         self.assertEqual(res.seq_attr.shape, (3,))
@@ -537,3 +590,48 @@ def test_llm_attr_with_skip_tokens(
         self.assertEqual(res.seq_attr.device.type, self.device)
         assert res.token_attr is not None  # make pyre/mypy happy
         self.assertEqual(token_attr.device.type, self.device)  # type: ignore
+
+    def test_llm_attr_with_no_skip_tokens(self) -> None:
+        llm = DummyLLM()
+        llm.to(self.device)
+        tokenizer = DummyTokenizer()
+        attr = LayerIntegratedGradients(llm, llm.emb)  # type: ignore[call-arg]
+        llm_attr = LLMGradientAttribution(attr, tokenizer)
+
+        attr_kws: Dict[str, Any] = {}
+        inp = TextTokenInput("a b c", tokenizer)
+        res = llm_attr.attribute(inp, "m n o p q", **attr_kws)
+
+        # 5 output tokens, 4 input tokens including sos
+        self.assertEqual(res.seq_attr.shape, (4,))
+        assert res.token_attr is not None  # make pyre/mypy happy
+        self.assertIsNotNone(res.token_attr)
+        token_attr = res.token_attr
+        self.assertEqual(token_attr.shape, (6, 4))  # type: ignore
+        self.assertEqual(res.input_tokens, ["<sos>", "a", "b", "c"])
+        self.assertEqual(res.output_tokens, ["<sos>", "m", "n", "o", "p", "q"])
+
+    def test_llm_attr_with_skip_tensor_target(self) -> None:
+        llm = DummyLLM()
+        llm.to(self.device)
+        tokenizer = DummyTokenizer()
+        attr = LayerIntegratedGradients(llm, llm.emb)  # type: ignore[call-arg]
+        llm_attr = LLMGradientAttribution(attr, tokenizer)
+
+        attr_kws: Dict[str, Any] = {}
+        inp = TextTokenInput("a b c", tokenizer)
+        res = llm_attr.attribute(
+            inp,
+            torch.tensor(tokenizer.encode("m n o p q")),
+            skip_tokens=[0],
+            **attr_kws,
+        )
+
+        # 5 output tokens, 4 input tokens including sos
+        self.assertEqual(res.seq_attr.shape, (4,))
+        assert res.token_attr is not None  # make pyre/mypy happy
+        self.assertIsNotNone(res.token_attr)
+        token_attr = res.token_attr
+        self.assertEqual(token_attr.shape, (5, 4))  # type: ignore
+        self.assertEqual(res.input_tokens, ["<sos>", "a", "b", "c"])
+        self.assertEqual(res.output_tokens, ["m", "n", "o", "p", "q"])
diff --git a/tests/attr/test_llm_attr_gpu.py b/tests/attr/test_llm_attr_gpu.py
@@ -84,7 +84,9 @@ def convert_tokens_to_ids(
         raise NotImplementedError
 
     def decode(self, token_ids: Tensor) -> str:
-        return " ".join(self.convert_ids_to_tokens(token_ids.tolist()))
+        tokens = self.convert_ids_to_tokens(token_ids.tolist())
+        # pyre-fixme[7]: Expected `str` but got `Union[List[str], str]`.
+        return tokens if isinstance(tokens, str) else " ".join(tokens)
 
 
 class Result(NamedTuple):
@@ -195,7 +197,10 @@ def test_llm_attr_gpu(self, AttrClass: Type[PerturbationAttribution]) -> None:
 
         inp = TextTemplateInput("{} b {} {} e {}", ["a", "c", "d", "f"])
         res = llm_attr.attribute(
-            inp, "m n o p q", use_cached_outputs=self.use_cached_outputs
+            inp,
+            "m n o p q",
+            skip_tokens=[0],
+            use_cached_outputs=self.use_cached_outputs,
         )
         self.assertEqual(res.seq_attr.shape, (4,))
         self.assertEqual(cast(Tensor, res.token_attr).shape, (5, 4))
@@ -234,7 +239,10 @@ def test_llm_attr_fa_log_prob_gpu(self) -> None:
 
         inp = TextTemplateInput("{} b {} {} e {}", ["a", "c", "d", "f"])
         res = llm_fa.attribute(
-            inp, "m n o p q", use_cached_outputs=self.use_cached_outputs
+            inp,
+            "m n o p q",
+            skip_tokens=[0],
+            use_cached_outputs=self.use_cached_outputs,
         )
 
         # With FeatureAblation, the seq attr in log_prob
@@ -253,7 +261,10 @@ def test_llm_attr_without_token_gpu(
 
         inp = TextTemplateInput("{} b {} {} e {}", ["a", "c", "d", "f"])
         res = llm_fa.attribute(
-            inp, "m n o p q", use_cached_outputs=self.use_cached_outputs
+            inp,
+            "m n o p q",
+            skip_tokens=[0],
+            use_cached_outputs=self.use_cached_outputs,
         )
 
         self.assertEqual(res.seq_attr.shape, (4,))
@@ -280,7 +291,7 @@ def test_llm_attr(self) -> None:
         llm_attr = LLMGradientAttribution(attr, tokenizer)
 
         inp = TextTokenInput("a b c", tokenizer)
-        res = llm_attr.attribute(inp, "m n o p q")
+        res = llm_attr.attribute(inp, "m n o p q", skip_tokens=[0])
         # 5 output tokens, 4 input tokens including sos
         self.assertEqual(res.seq_attr.shape, (4,))
         assert res.token_attr is not None  # make pyre/mypy happy
@@ -324,7 +335,7 @@ def test_llm_attr_with_skip_tokens(self) -> None:
         llm_attr = LLMGradientAttribution(attr, tokenizer)
 
         inp = TextTokenInput("a b c", tokenizer, skip_tokens=[0])
-        res = llm_attr.attribute(inp, "m n o p q")
+        res = llm_attr.attribute(inp, "m n o p q", skip_tokens=[0])
 
         # 5 output tokens, 4 input tokens including sos
         self.assertEqual(res.seq_attr.shape, (3,))