Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -119,3 +119,4 @@ dmypy.json
dataset/*
.saved/*
*.Identifier
*.zip
15 changes: 15 additions & 0 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
// Usare IntelliSense per informazioni sui possibili attributi.
// Al passaggio del mouse vengono visualizzate le descrizioni degli attributi esistenti.
// Per altre informazioni, visitare: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "Python: File corrente",
"type": "python",
"request": "launch",
"program": "/home/christian/Documenti/GitHub/Image-Captioning/v1/NeuralNet.py",
"console": "integratedTerminal"
}
]
}
1 change: 1 addition & 0 deletions light_version/Dataset.py → bck_old/Dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ def pack_minibatch_evaluation(self, data):
images = torch.stack(images, 0)

caption_lengths = [len(caption) for caption in captions]
captions
captions = nn.utils.rnn.pad_sequence(captions, padding_value=0, batch_first=True)
return images,captions.type(torch.LongTensor),caption_lengths

23 changes: 7 additions & 16 deletions light_version/NeuralNet.py → bck_old/NeuralNet.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,46 +9,37 @@

device = "cuda:0"
class EncoderCNN(nn.Module):
def __init__(self, embed_size):
def __init__(self, embedding_size):
super(EncoderCNN, self).__init__()
resnet = models.resnet50(pretrained=True)
for param in resnet.parameters():
param.requires_grad_(False)

modules = list(resnet.children())[:-1] # remove last fc layer
self.resnet = nn.Sequential(*modules)
self.linear = nn.Linear(resnet.fc.in_features, 50)
self.linear = nn.Linear(resnet.fc.in_features, embedding_size)

def forward(self, images):

features = self.resnet(images)
features = features.reshape(features.size(0), -1)
features = features.reshape(features.size(0), -1) # (Batch Size, Embedding Dim.)
features = self.linear(features)
return features

class DecoderRNN(nn.Module):
def __init__(self, hidden_size, padding_index, vocab_size, embeddings ):
def __init__(self, hidden_size, padding_index, vocab_size, embeddings, embedding_size):
"""Set the hyper-parameters and build the layers."""
super(DecoderRNN, self).__init__()
# Keep track of hidden_size for initialization of hidden state
self.hidden_size = hidden_size

# Embedding layer that turns words into a vector of a specified size
self.word_embeddings = nn.Embedding.from_pretrained(embeddings, freeze=True, padding_idx = 0)
self.word_embeddings = nn.Embedding(vocab_size, embedding_size, padding_idx=padding_index)

# The LSTM takes embedded word vectors (of a specified size) as input
# and outputs hidden states of size hidden_dim
self.lstm = nn.LSTM(input_size=50, \
hidden_size=1024, # LSTM hidden units
num_layers=1, # number of LSTM layer
batch_first=True, # input & output will have batch size as 1st dimension
dropout=0, # Not applying dropout
bidirectional=False, # unidirectional LSTM
)
self.lstm_unit = torch.nn.LSTMCell(embedding_size, hidden_size)

# The linear layer that maps the hidden state output dimension
# to the number of words we want as output, vocab_size
self.linear_1 = nn.Linear(1024, vocab_size)
self.linear_1 = nn.Linear(hidden_size, vocab_size)

def init_hidden_state(self, encoder_out):
"""
Expand Down
File renamed without changes.
134 changes: 0 additions & 134 deletions heavy_version/Models/Dataset.py

This file was deleted.

1 change: 0 additions & 1 deletion heavy_version/Models/Interface/__init__.py

This file was deleted.

Loading