Skip to content

Commit cd06184

Browse files
sanchit-gandhiArthurZucker
authored andcommitted
[whisper] fix short-form output type (#32178)
* [whisper] fix short-form output type * add test * make style * update long-form tests * fixes * last fix * finalise test
1 parent 38d94bf commit cd06184

File tree

2 files changed

+29
-5
lines changed

2 files changed

+29
-5
lines changed

src/transformers/models/whisper/generation_whisper.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -498,7 +498,7 @@ def generate(
498498

499499
# 3. Make sure generation config is correctly set
500500
# Make sure the generation config is correctly set depending on whether timestamps are to be returned or not
501-
self._set_return_outputs(
501+
return_dict_in_generate = self._set_return_outputs(
502502
return_dict_in_generate=return_dict_in_generate,
503503
return_token_timestamps=return_token_timestamps,
504504
logprob_threshold=logprob_threshold,
@@ -732,7 +732,7 @@ def generate(
732732
else:
733733
outputs = sequences
734734

735-
if generation_config.return_dict_in_generate:
735+
if return_dict_in_generate and generation_config.return_dict_in_generate:
736736
dict_outputs = self._stack_split_outputs(seek_outputs, model_output_type, sequences.device, kwargs)
737737

738738
if num_return_sequences > 1:
@@ -1109,18 +1109,20 @@ def _maybe_warn_unused_inputs(
11091109
def _set_return_outputs(return_dict_in_generate, return_token_timestamps, logprob_threshold, generation_config):
11101110
if return_dict_in_generate is None:
11111111
return_dict_in_generate = generation_config.return_dict_in_generate
1112+
else:
1113+
generation_config.return_dict_in_generate = return_dict_in_generate
11121114

11131115
generation_config.return_token_timestamps = return_token_timestamps
11141116
if return_token_timestamps:
1115-
return_dict_in_generate = True
1117+
generation_config.return_dict_in_generate = True
11161118
generation_config.output_attentions = True
11171119
generation_config.output_scores = True
11181120

11191121
if logprob_threshold is not None:
1120-
return_dict_in_generate = True
1122+
generation_config.return_dict_in_generate = True
11211123
generation_config.output_scores = True
11221124

1123-
generation_config.return_dict_in_generate = return_dict_in_generate
1125+
return return_dict_in_generate
11241126

11251127
def _set_return_timestamps(self, return_timestamps, is_shortform, generation_config):
11261128
if not is_shortform:

tests/models/whisper/test_modeling_whisper.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
import numpy as np
2727
import pytest
2828
from huggingface_hub import hf_hub_download
29+
from parameterized import parameterized
2930

3031
import transformers
3132
from transformers import WhisperConfig
@@ -72,6 +73,7 @@
7273
BeamSearchEncoderDecoderOutput,
7374
GenerateBeamDecoderOnlyOutput,
7475
GenerateBeamEncoderDecoderOutput,
76+
GenerateEncoderDecoderOutput,
7577
PhrasalConstraint,
7678
)
7779
from transformers.generation.logits_process import LogitsProcessor
@@ -1820,6 +1822,26 @@ def test_custom_4d_attention_mask(self):
18201822
normalized_1 = torch.nn.functional.softmax(out_shared_prefix_last_tokens)
18211823
torch.testing.assert_close(normalized_0, normalized_1, rtol=1e-3, atol=1e-4)
18221824

1825+
@parameterized.expand([(True,), (False,)])
1826+
def test_generate_output_type(self, return_dict_in_generate):
1827+
expected_output_type = GenerateEncoderDecoderOutput if return_dict_in_generate else torch.Tensor
1828+
for model_class in self.all_generative_model_classes:
1829+
config, inputs = self.model_tester.prepare_config_and_inputs()
1830+
model = model_class(config).to(torch_device).eval()
1831+
1832+
# short-form generation without fallback
1833+
pred_ids = model.generate(**inputs, return_dict_in_generate=return_dict_in_generate)
1834+
assert isinstance(pred_ids, expected_output_type)
1835+
1836+
# short-form generation with fallback
1837+
pred_ids = model.generate(
1838+
**inputs,
1839+
logprob_threshold=-1.0,
1840+
temperature=[0.0, 0.1],
1841+
return_dict_in_generate=return_dict_in_generate,
1842+
)
1843+
assert isinstance(pred_ids, expected_output_type)
1844+
18231845

18241846
@require_torch
18251847
@require_torchaudio

0 commit comments

Comments
 (0)