Skip to content

Commit 953fb1f

Browse files
committed
Fixed comments, and fixed discrepancies to match torch perplexity
1 parent 18ec76d commit 953fb1f

File tree

2 files changed

+20
-20
lines changed

2 files changed

+20
-20
lines changed

word_language_model/main.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
parser.add_argument('-nhid' , type=int, default=200 , help='Number of hidden units per layer.' )
2424
parser.add_argument('-nlayers' , type=int, default=2 , help='Number of layers.' )
2525
# Optimization parameters.
26-
parser.add_argument('-lr' , type=float, default=20 , help='Initial learning rate.' )
26+
parser.add_argument('-lr' , type=float, default=1 , help='Initial learning rate.' )
2727
parser.add_argument('-clip' , type=float, default=0.5 , help='Gradient clipping.' )
2828
parser.add_argument('-maxepoch' , type=int, default=6 , help='Upper epoch limit.' )
2929
parser.add_argument('-batchsize' , type=int, default=20 , help='Batch size.' )
@@ -68,8 +68,6 @@ def batchify(data, bsz, bptt):
6868
# MAKE MODEL
6969
###############################################################################
7070

71-
initrange = 0.1
72-
7371
class RNNModel(nn.Container):
7472
"""A container module with an encoder, an RNN (one of several flavors),
7573
and a decoder. Runs one RNN step at a time.
@@ -96,11 +94,12 @@ def __init__(self, rnnType, ntoken, ninp, nhid, nlayers):
9694

9795
# FIXME: is this better than the standard init? probably
9896
# FIXME: we need better reset_parameters methods in stdlib
97+
initrange = 0.1
9998
self.encoder.weight.data.uniform_(-initrange, initrange)
10099
self.decoder.bias.data.fill_(0)
101100
self.decoder.weight.data.uniform_(-initrange, initrange)
102101

103-
def __call__(self, hidden, input):
102+
def forward(self, hidden, input):
104103
emb = self.encoder(input)
105104
hidden, output = self.rnn(hidden, emb)
106105
decoded = self.decoder(output)
@@ -130,9 +129,9 @@ def evaluate(model, data, criterion):
130129
# Loop over validation data.
131130
for i in range(0, data.size(0) - 1):
132131
hidden, output = model(hidden, Variable(data[i], requires_grad=False))
133-
loss += criterion(output, Variable(data[i+1], requires_grad=False)).data[0]
132+
loss += criterion(output, Variable(data[i+1], requires_grad=False)).data
134133

135-
return loss / data.size(0)
134+
return loss[0] / data.size(0)
136135

137136
# simple gradient clipping, using the total norm of the gradient
138137
def clipGradient(model, clip):
@@ -193,7 +192,8 @@ def repackageHidden(h):
193192
print(
194193
('| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.6f} | ms/batch {:5.2f} | '
195194
+ 'train loss {:5.2f} | train ppl {:8.2f}').format(
196-
epoch, i / bptt, train.size(0) / bptt, lr, elapsed * 1000 / reportinterval,
195+
epoch, i / bptt, train.size(0) / bptt, lr,
196+
elapsed * 1000 / reportinterval * bptt,
197197
cur_loss, math.exp(cur_loss)
198198
))
199199
total_loss = 0
@@ -204,7 +204,7 @@ def repackageHidden(h):
204204
# ps = pstats.Stats(pr, stream=s).sort_stats("time")
205205
# ps.print_stats()
206206
# print(s.getvalue())
207-
# val_loss = evaluate(model, valid, criterion)
207+
val_loss = evaluate(model, valid, criterion)
208208

209209
print(
210210
'| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.2f} | valid ppl {:8.2f}'.format(

word_language_model/rnn_modules.py

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@ class RNN(nn.Container):
1212

1313
def __init__(self, ninp, nhid):
1414
super(RNN, self).__init__(
15-
i2h=nn.Linear(ninp, nhid),
16-
h2h=nn.Linear(nhid, nhid),
15+
i2h=nn.Linear(ninp, nhid, bias=False),
16+
h2h=nn.Linear(nhid, nhid, bias=False),
1717
sigmoid=nn.Sigmoid(),
1818
)
1919
self.ninp = ninp
@@ -31,15 +31,15 @@ class LSTM(nn.Container):
3131

3232
def __init__(self, ninp, nhid):
3333
super(LSTM, self).__init__(
34-
i2h=nn.Linear(ninp, 4 * nhid),
35-
h2h=nn.Linear(nhid, 4 * nhid),
34+
i2h=nn.Linear(ninp, 4 * nhid, bias=False),
35+
h2h=nn.Linear(nhid, 4 * nhid, bias=False),
3636
sigmoid=nn.Sigmoid(),
3737
tanh=nn.Tanh(),
3838
)
3939
self.ninp = ninp
4040
self.nhid = nhid
4141

42-
def __call__(self, hidden, input):
42+
def forward(self, hidden, input):
4343
c, h = hidden
4444
gates = self.h2h(h) + self.i2h(input)
4545
gates = gates.view(input.size(0), 4, self.nhid).transpose(0, 1)
@@ -63,23 +63,23 @@ class GRU(nn.Container):
6363

6464
def __init__(self, ninp, nhid):
6565
super(GRU, self).__init__(
66-
i2h=nn.Linear(ninp, 3 * nhid),
67-
h2h=nn.Linear(nhid, 3 * nhid),
66+
i2h=nn.Linear(ninp, 3 * nhid, bias=False),
67+
h2h=nn.Linear(nhid, 3 * nhid, bias=False),
6868
sigmoid=nn.Sigmoid(),
6969
tanh=nn.Tanh(),
7070
)
7171
self.ninp = ninp
7272
self.nhid = nhid
7373

74-
def __call__(self, hidden, input):
75-
gi = i2h(input).view(3, input.size(0), self.nhid).transpose(0, 1)
76-
gh = h2h(hidden).view(3, input.size(0), self.nhid).transpose(0, 1)
74+
def forward(self, hidden, input):
75+
gi = self.i2h(input).view(input.size(0), 3, self.nhid).transpose(0, 1)
76+
gh = self.h2h(hidden).view(input.size(0), 3, self.nhid).transpose(0, 1)
7777

7878
resetgate = self.sigmoid(gi[0] + gh[0])
7979
updategate = self.sigmoid(gi[1] + gh[1])
8080

8181
output = self.tanh(gi[2] + resetgate * gh[2])
82-
nexth = hidden + updategate * (output - h)
82+
nexth = hidden + updategate * (output - hidden)
8383

8484
return nexth, output
8585

@@ -97,7 +97,7 @@ def __init__(self, rnnClass, ninp, nhid, nlayers):
9797
self.layers += [layer]
9898
self.add_module('layer' + str(i), layer)
9999

100-
def __call__(self, hidden, input):
100+
def forward(self, hidden, input):
101101
output = input
102102
new_hidden = [None] * self.nlayers
103103
for i in range(self.nlayers):

0 commit comments

Comments
 (0)