Skip to content

Commit 7359eef

Browse files
committed
Adjusted some aspect for training
1 parent 5ab308f commit 7359eef

File tree

2 files changed

+30
-22
lines changed

2 files changed

+30
-22
lines changed

Models/Dataset.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ class MyDataset(Dataset):
3939
# sample is the <Sample> object associated
4040
# dirty is boolean and it means: this sample was already taken from the method get_fraction_of_dataset, this implies that externally somebody already taken this sample.
4141

42-
def __init__(self, directory_of_data:str = None, already_computed_dataframe: pd.DataFrame = None, state: DatasetState = DatasetState.Raw):
42+
def __init__(self, directory_of_data:str = None, percentage:int = 100,already_computed_dataframe: pd.DataFrame = None, state: DatasetState = DatasetState.Raw):
4343
"""Create a new dataset from source files
4444
4545
Args:
@@ -55,7 +55,8 @@ def __init__(self, directory_of_data:str = None, already_computed_dataframe: pd.
5555
if not os.path.isdir(directory_of_data):
5656
raise ValueError(f"{directory_of_data} is not a directory!")
5757

58-
_temp_dataset=pd.read_csv(f"{directory_of_data}/results.csv", sep="|", skipinitialspace=True)[["image_name","comment"]].iloc[0:1000,:]
58+
_temp_dataset=pd.read_csv(f"{directory_of_data}/results.csv", sep="|", skipinitialspace=True)[["image_name","comment"]]
59+
_temp_dataset = _temp_dataset.head(int(len(_temp_dataset)*(percentage/100)))
5960
samples = _temp_dataset.apply(lambda row: Sample(int(row.name)+1,f"{directory_of_data}/images/{row.image_name}",row.comment),axis=1)
6061

6162
self.dataset: pd.DataFrame = pd.DataFrame(list(zip([i for i in range(len(samples))],samples,[False for _ in range(len(samples))])), columns=["id_sample","sample","dirty"])

Models/NeuralNet.py

Lines changed: 27 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
class EncoderCNN(nn.Module):
88
def __init__(self, embed_size):
99
super(EncoderCNN, self).__init__()
10-
resnet = torch.hub.load('pytorch/vision:v0.6.0', 'resnet50', pretrained=True)
10+
resnet = models.resnet50(pretrained=True)
1111
for param in resnet.parameters():
1212
param.requires_grad_(False)
1313

@@ -16,7 +16,8 @@ def __init__(self, embed_size):
1616
self.embed = nn.Linear(resnet.fc.in_features, embed_size) # attach a linear layer ()
1717

1818
def forward(self, images):
19-
features = self.resnet(images)
19+
with torch.no_grad():
20+
features = self.resnet(images)
2021
features = features.reshape(features.size(0), -1)
2122
features = self.embed(features)
2223
return features
@@ -60,7 +61,8 @@ def forward(self, features, captions,caption_lengths):
6061

6162
# Initialize the hidden state
6263
batch_size = features.shape[0] # features is of shape (batch_size, embed_size)
63-
64+
self.hidden = self.init_hidden(batch_size)
65+
6466
# Create embedded word vectors for each word in the captions
6567
embeddings = self.word_embeddings(captions) # embeddings new shape : (batch_size, captions length -1, embed_size)
6668

@@ -82,14 +84,16 @@ def sample(self, features, states=None):
8284
sampled_ids = []
8385
inputs = features.unsqueeze(1)
8486
inputs = inputs.reshape((1,1,inputs.shape[0]))
85-
for _ in range(30):
86-
hiddens, states = self.lstm(inputs, states) # hiddens: (batch_size, 1, hidden_size)
87-
outputs = self.linear(hiddens.squeeze(1)) # outputs: (batch_size, vocab_size)
88-
_, predicted = outputs.max(1) # predicted: (batch_size)
89-
sampled_ids.append(predicted)
90-
inputs = self.word_embeddings(predicted) # inputs: (batch_size, embed_size)
91-
inputs = inputs.unsqueeze(1) # inputs: (batch_size, 1, embed_size)
92-
sampled_ids = torch.stack(sampled_ids, 1) # sampled_ids: (batch_size, max_seq_length)
87+
self.init_hidden(1)
88+
with torch.no_grad():
89+
for _ in range(30):
90+
hiddens, states = self.lstm(inputs, states) # hiddens: (batch_size, 1, hidden_size)
91+
outputs = self.linear(hiddens.squeeze(1)) # outputs: (batch_size, vocab_size)
92+
_, predicted = outputs.max(1) # predicted: (batch_size)
93+
sampled_ids.append(predicted)
94+
inputs = self.word_embeddings(predicted) # inputs: (batch_size, embed_size)
95+
inputs = inputs.unsqueeze(1) # inputs: (batch_size, 1, embed_size)
96+
sampled_ids = torch.stack(sampled_ids, 1) # sampled_ids: (batch_size, max_seq_length)
9397
return sampled_ids
9498

9599
def save(self, file_name):
@@ -112,7 +116,7 @@ def train(train_set, validation_set, lr, epochs, vocabulary):
112116
best_epoch = -1 # the epoch in which the best accuracy above was computed
113117

114118
encoder = EncoderCNN(50)
115-
decoder = DecoderRNN(100,0,len(v_enriched.word2id.keys()),v_enriched.embeddings)
119+
decoder = DecoderRNN(2048,0,len(v_enriched.word2id.keys()),v_enriched.embeddings)
116120

117121
# ensuring the classifier is in 'train' mode (pytorch)
118122
decoder.train()
@@ -129,9 +133,9 @@ def train(train_set, validation_set, lr, epochs, vocabulary):
129133
epoch_num_train_examples = 0
130134

131135
for images,captions,captions_length in train_set:
132-
# zeroing the memory areas that were storing previously computed gradients
133136
decoder.zero_grad()
134-
encoder.zero_grad()
137+
encoder.zero_grad()
138+
# zeroing the memory areas that were storing previously computed gradients
135139
batch_num_train_examples = images.shape[0] # mini-batch size (it might be different from 'batch_size')
136140
epoch_num_train_examples += batch_num_train_examples
137141

@@ -147,8 +151,10 @@ def train(train_set, validation_set, lr, epochs, vocabulary):
147151
targets = pack_padded_sequence(captions, captions_length, batch_first=True)[0]
148152

149153
# computing the loss function
150-
loss = criterion(outputs, targets)
151-
154+
try:
155+
loss = criterion(outputs, targets)
156+
except Exception as ex:
157+
print(ex)
152158
# computing gradients and updating the network weights
153159
loss.backward() # computing gradients
154160
optimizer.step() # updating weights
@@ -157,6 +163,7 @@ def train(train_set, validation_set, lr, epochs, vocabulary):
157163
torch.save(decoder.state_dict(),".saved/decoder.pt")
158164
features = encoder(images)
159165
caption = decoder.sample(features[0])
166+
print(v_enriched.rev_translate(captions))
160167
print(v_enriched.rev_translate(caption))
161168
# computing the performance of the net on the current training mini-batch
162169
# with torch.no_grad(): # keeping these operations out of those for which we will compute the gradient
@@ -198,14 +205,14 @@ def train(train_set, validation_set, lr, epochs, vocabulary):
198205
from PreProcess import PreProcess
199206
from Dataset import MyDataset
200207
from torch.utils.data import DataLoader
201-
ds = MyDataset("./dataset/flickr30k_images/flickr30k_images")
202-
df = ds.get_fraction_of_dataset(percentage=10)
208+
ds = MyDataset("./dataset", percentage=2)
209+
df = ds.get_fraction_of_dataset(percentage=100)
203210

204211
# use dataloader facilities which requires a preprocessed dataset
205212
v = Vocabulary(verbose=True)
206213
df_pre_processed,v_enriched = PreProcess.DatasetForTraining.process(dataset=df,vocabulary=v)
207214

208-
dataloader = DataLoader(df, batch_size=30,
209-
shuffle=False, num_workers=0, collate_fn=df.pack_minibatch)
215+
dataloader = DataLoader(df, batch_size=10,
216+
shuffle=True, num_workers=0, collate_fn=df.pack_minibatch)
210217

211218
train(dataloader, dataloader, 1e-2, 10, v_enriched)

0 commit comments

Comments
 (0)