Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@ Makefile~
*.npy
*.bz2
*#*
.idea
13 changes: 13 additions & 0 deletions examples/export_word2vec_format.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from glove import Glove
import argparse

# Convert binary model to standardized .vec format for compatibility
# Example command: python export_word2vec_format.py -i model.model -o model.vec
if __name__ == '__main__':
# Set up command line parameters.
parser = argparse.ArgumentParser(description='Export model to word2vec format')
parser.add_argument("-i", "--input", type=str, default=None, help="input model")
parser.add_argument("-o", "--output", type=str, default=None, help="output model")
args = parser.parse_args()
glove = Glove.load(args.input)
glove.save_word2vec_format(args.output)
20 changes: 20 additions & 0 deletions glove/glove.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,26 @@ def save(self, filename):
savefile,
protocol=pickle.HIGHEST_PROTOCOL)

def save_word2vec_format(self, filename):
"""
Serialize model to filename in word2vec .vec format.
"""
with open(filename, 'w') as savefile:
(rows, cols) = self.word_vectors.shape
savefile.write(str(rows) + " " + str(cols) + "\n")
if hasattr(self.dictionary, 'iteritems'):
# Python 2 compat
items_iterator = self.dictionary.iteritems()
else:
items_iterator = self.dictionary.items()

for word, idx in items_iterator:
vector = self.word_vectors[idx]
vector_string = ""
for val_i in vector:
vector_string += " " + str(val_i)
savefile.write((word + vector_string + "\n"))

@classmethod
def load(cls, filename):
"""
Expand Down