Skip to content

Conversation

@huseinzol05
Copy link

quick example,

from transformers import AutoProcessor, WhisperForConditionalGeneration
from cut_cross_entropy.transformers import cce_patch
import requests
from datasets import Audio

sr = 16000
audio = Audio(sampling_rate=sr)
processor = AutoProcessor.from_pretrained('openai/whisper-small')
model = WhisperForConditionalGeneration.from_pretrained('openai/whisper-small')
model = cce_patch(model)
_ = model.cuda()
r = requests.get('https://github.com/mesolitica/malaya-speech/raw/master/speech/7021-79759-0004.wav')
input_str = '<|en|><|0.0|> Without going to any such extreme as this, we can easily see on reflection how vast an influence on the ideas and conceptions,<|10.6|><|11.04|> as well as on the principles of action in mature years, must be exerted by the nature and character of the images which<|19.84|><|19.84|>the period of infancy and childhood impress upon mind.<|24.14|>'
y = audio.decode_example(audio.encode_example(r.content))['array']
labels = processor.tokenizer(input_str, add_special_tokens=False, return_tensors = 'pt').to('cuda').input_ids
decoder_input_ids = labels[:, :-1]
labels = labels[:, 1:]
batch = processor(
    [y],
    return_tensors='pt',
    sampling_rate=processor.feature_extractor.sampling_rate,
    device = 'cuda'
).to('cuda')
batch["labels"] = labels
batch["decoder_input_ids"] = decoder_input_ids
model(**batch)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

None yet

Projects

None yet

Development

Successfully merging this pull request may close these issues.

1 participant