Skip to content

Commit 003a0cf

Browse files
zsposgugger
andauthored
Fix some docs what layerdrop does (#23691)
* Fix some docs what layerdrop does * Update src/transformers/models/data2vec/configuration_data2vec_audio.py Co-authored-by: Sylvain Gugger <[email protected]> * Fix more docs --------- Co-authored-by: Sylvain Gugger <[email protected]>
1 parent 357f281 commit 003a0cf

36 files changed

+68
-44
lines changed

examples/research_projects/bert-loses-patience/pabee/modeling_pabee_albert.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -253,7 +253,7 @@ def forward(
253253
254254
Returns:
255255
:obj:`tuple(torch.FloatTensor)` comprising various elements depending on the configuration (:class:`~transformers.AlbertConfig`) and inputs:
256-
loss: (`optional`, returned when ``labels`` is provided) ``torch.FloatTensor`` of shape ``(1,)``:
256+
loss (`optional`, returned when ``labels`` is provided) ``torch.FloatTensor`` of shape ``(1,)``:
257257
Classification (or regression if config.num_labels==1) loss.
258258
logits ``torch.FloatTensor`` of shape ``(batch_size, config.num_labels)``
259259
Classification (or regression if config.num_labels==1) scores (before SoftMax).

src/transformers/generation/logits_process.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -678,7 +678,7 @@ class PrefixConstrainedLogitsProcessor(LogitsProcessor):
678678
generation. See [Autoregressive Entity Retrieval](https://arxiv.org/abs/2010.00904) for more information.
679679
680680
Args:
681-
prefix_allowed_tokens_fn: (`Callable[[int, torch.Tensor], List[int]]`):
681+
prefix_allowed_tokens_fn (`Callable[[int, torch.Tensor], List[int]]`):
682682
This function constraints the beam search to allowed tokens only at each step. This function takes 2
683683
arguments `inputs_ids` and the batch ID `batch_id`. It has to return a list with the allowed tokens for the
684684
next generation step conditioned on the previously generated tokens `inputs_ids` and the batch ID

src/transformers/modeling_outputs.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1522,7 +1522,7 @@ class Seq2SeqTSModelOutput(ModelOutput):
15221522
scale (`torch.FloatTensor` of shape `(batch_size,)` or `(batch_size, input_size)`, *optional*):
15231523
Scaling values of each time series' context window which is used to give the model inputs of the same
15241524
magnitude and then used to rescale back to the original magnitude.
1525-
static_features: (`torch.FloatTensor` of shape `(batch_size, feature size)`, *optional*):
1525+
static_features (`torch.FloatTensor` of shape `(batch_size, feature size)`, *optional*):
15261526
Static features of each time series' in a batch which are copied to the covariates at inference time.
15271527
"""
15281528

@@ -1593,7 +1593,7 @@ class Seq2SeqTSPredictionOutput(ModelOutput):
15931593
scale (`torch.FloatTensor` of shape `(batch_size,)` or `(batch_size, input_size)`, *optional*):
15941594
Scaling values of each time series' context window which is used to give the model inputs of the same
15951595
magnitude and then used to rescale back to the original magnitude.
1596-
static_features: (`torch.FloatTensor` of shape `(batch_size, feature size)`, *optional*):
1596+
static_features (`torch.FloatTensor` of shape `(batch_size, feature size)`, *optional*):
15971597
Static features of each time series' in a batch which are copied to the covariates at inference time.
15981598
"""
15991599

src/transformers/modeling_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -912,7 +912,7 @@ def get_head_mask(
912912
The mask indicating if we should keep the heads or not (1.0 for keep, 0.0 for discard).
913913
num_hidden_layers (`int`):
914914
The number of hidden layers in the model.
915-
is_attention_chunked: (`bool`, *optional*, defaults to `False`):
915+
is_attention_chunked (`bool`, *optional*, defaults to `False`):
916916
Whether or not the attentions scores are computed by chunks or not.
917917
918918
Returns:

src/transformers/models/align/configuration_align.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,7 @@ class AlignVisionConfig(PretrainedConfig):
184184
List of output channel sizes to be used in each block for convolutional layers.
185185
depthwise_padding (`List[int]`, *optional*, defaults to `[]`):
186186
List of block indices with square padding.
187-
strides: (`List[int]`, *optional*, defaults to `[1, 2, 2, 2, 1, 2, 1]`):
187+
strides (`List[int]`, *optional*, defaults to `[1, 2, 2, 2, 1, 2, 1]`):
188188
List of stride sizes to be used in each block for convolutional layers.
189189
num_block_repeats (`List[int]`, *optional*, defaults to `[1, 2, 2, 3, 3, 4, 1]`):
190190
List of the number of times each block is to repeated.

src/transformers/models/blip/modeling_blip_text.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -613,7 +613,7 @@ def get_extended_attention_mask(
613613
Mask with ones indicating tokens to attend to, zeros for tokens to ignore.
614614
input_shape (`Tuple[int]`):
615615
The shape of the input to the model.
616-
device: (`torch.device`):
616+
device (`torch.device`):
617617
The device of the input to the model.
618618
619619
Returns:

src/transformers/models/blip/modeling_tf_blip_text.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -633,7 +633,7 @@ def get_extended_attention_mask(
633633
Mask with ones indicating tokens to attend to, zeros for tokens to ignore.
634634
input_shape (`Tuple[int]`):
635635
The shape of the input to the model.
636-
is_decoder: (`bool`):
636+
is_decoder (`bool`):
637637
Whether the model is used as a decoder.
638638
639639
Returns:

src/transformers/models/blip_2/modeling_blip_2.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1059,7 +1059,7 @@ def get_extended_attention_mask(
10591059
Mask with ones indicating tokens to attend to, zeros for tokens to ignore.
10601060
input_shape (`Tuple[int]`):
10611061
The shape of the input to the model.
1062-
device: (`torch.device`):
1062+
device (`torch.device`):
10631063
The device of the input to the model.
10641064
10651065
Returns:

src/transformers/models/bloom/modeling_bloom.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -256,7 +256,7 @@ def _merge_heads(self, x: torch.Tensor) -> torch.Tensor:
256256
Merge heads together over the last dimenstion
257257
258258
Args:
259-
x: (`torch.tensor`, *required*): [batch_size * num_heads, seq_length, head_dim]
259+
x (`torch.tensor`, *required*): [batch_size * num_heads, seq_length, head_dim]
260260
261261
Returns:
262262
torch.tensor: [batch_size, seq_length, num_heads * head_dim]

src/transformers/models/data2vec/configuration_data2vec_audio.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,9 @@ class Data2VecAudioConfig(PretrainedConfig):
6262
The dropout ratio for the attention probabilities.
6363
final_dropout (`float`, *optional*, defaults to 0.1):
6464
The dropout probability for the final projection layer of [`Data2VecAudioForCTC`].
65+
layerdrop (`float`, *optional*, defaults to 0.1):
66+
The LayerDrop probability. See the [LayerDrop paper](see https://arxiv.org/abs/1909.11556) for more
67+
details.
6568
initializer_range (`float`, *optional*, defaults to 0.02):
6669
The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
6770
layer_norm_eps (`float`, *optional*, defaults to 1e-12):

0 commit comments

Comments
 (0)