7
7
class FullyConnected (nn .Module ):
8
8
"""
9
9
Args:
10
- in_features : Number of input features
11
- hidden_size : Internal hidden unit size.
10
+ n_feature : Number of input features
11
+ n_hidden : Internal hidden unit size.
12
12
"""
13
13
14
14
def __init__ (self ,
15
- in_features : int ,
16
- hidden_size : int ,
15
+ n_feature : int ,
16
+ n_hidden : int ,
17
17
dropout : float ,
18
18
relu_max_clip : int = 20 ) -> None :
19
19
super (FullyConnected , self ).__init__ ()
20
- self .fc = nn .Linear (in_features , hidden_size , bias = True )
20
+ self .fc = nn .Linear (n_feature , n_hidden , bias = True )
21
21
self .relu_max_clip = relu_max_clip
22
22
self .dropout = dropout
23
23
@@ -37,32 +37,32 @@ class DeepSpeech(nn.Module):
37
37
<https://arxiv.org/abs/1412.5567> paper.
38
38
39
39
Args:
40
- in_features : Number of input features
41
- hidden_size : Internal hidden unit size.
42
- num_classes : Number of output classes
40
+ n_feature : Number of input features
41
+ n_hidden : Internal hidden unit size.
42
+ n_class : Number of output classes
43
43
"""
44
44
45
45
def __init__ (self ,
46
- in_features : int ,
47
- hidden_size : int = 2048 ,
48
- num_classes : int = 40 ,
46
+ n_feature : int ,
47
+ n_hidden : int = 2048 ,
48
+ n_class : int = 40 ,
49
49
dropout : float = 0.0 ) -> None :
50
50
super (DeepSpeech , self ).__init__ ()
51
- self .hidden_size = hidden_size
52
- self .fc1 = FullyConnected (in_features , hidden_size , dropout )
53
- self .fc2 = FullyConnected (hidden_size , hidden_size , dropout )
54
- self .fc3 = FullyConnected (hidden_size , hidden_size , dropout )
51
+ self .n_hidden = n_hidden
52
+ self .fc1 = FullyConnected (n_feature , n_hidden , dropout )
53
+ self .fc2 = FullyConnected (n_hidden , n_hidden , dropout )
54
+ self .fc3 = FullyConnected (n_hidden , n_hidden , dropout )
55
55
self .bi_rnn = nn .RNN (
56
- hidden_size , hidden_size , num_layers = 1 , nonlinearity = 'relu' , bidirectional = True )
57
- self .fc4 = FullyConnected (hidden_size , hidden_size , dropout )
58
- self .out = nn .Linear (hidden_size , num_classes )
56
+ n_hidden , n_hidden , num_layers = 1 , nonlinearity = 'relu' , bidirectional = True )
57
+ self .fc4 = FullyConnected (n_hidden , n_hidden , dropout )
58
+ self .out = nn .Linear (n_hidden , n_class )
59
59
60
60
def forward (self , x : torch .Tensor ) -> torch .Tensor :
61
61
"""
62
62
Args:
63
- x (torch.Tensor): Tensor of dimension (batch_size, num_channels, input_length, num_features ).
63
+ x (torch.Tensor): Tensor of dimension (batch, channel, time, feature ).
64
64
Returns:
65
- Tensor: Predictor tensor of dimension (batch_size, input_length, number_of_classes ).
65
+ Tensor: Predictor tensor of dimension (batch, time, class ).
66
66
"""
67
67
# N x C x T x F
68
68
x = self .fc1 (x )
@@ -77,14 +77,14 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
77
77
# T x N x H
78
78
x , _ = self .bi_rnn (x )
79
79
# The fifth (non-recurrent) layer takes both the forward and backward units as inputs
80
- x = x [:, :, :self .hidden_size ] + x [:, :, self .hidden_size :]
80
+ x = x [:, :, :self .n_hidden ] + x [:, :, self .n_hidden :]
81
81
# T x N x H
82
82
x = self .fc4 (x )
83
83
# T x N x H
84
84
x = self .out (x )
85
- # T x N x num_classes
85
+ # T x N x n_class
86
86
x = x .permute (1 , 0 , 2 )
87
- # N x T x num_classes
87
+ # N x T x n_class
88
88
x = torch .nn .functional .log_softmax (x , dim = 2 )
89
- # T x N x num_classes
89
+ # N x T x n_class
90
90
return x
0 commit comments