Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 31 additions & 1 deletion src/ccutil/tessdatamanager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,32 @@ bool TessdataManager::LoadArchiveFile(const char *filename) {
}
return result;
}

bool TessdataManager::SaveArchiveFile(const char *filename) const{
bool result = false;
archive *a = archive_write_new();
archive_entry *ae = archive_entry_new();
if (a != nullptr) {
archive_write_set_format_zip(a);
archive_write_open_filename(a, filename);
std::string filename_str = filename;
filename_str += ".";
archive_entry_set_filetype(ae, AE_IFREG);
archive_entry_set_perm(ae, 333);
for (unsigned i = 0; i < TESSDATA_NUM_ENTRIES; ++i) {
if (!entries_[i].empty()) {
archive_entry_set_pathname(ae, (filename_str + kTessdataFileSuffixes[i]).c_str());
archive_entry_set_size(ae, entries_[i].size());
archive_write_header(a, ae);
archive_write_data(a, &entries_[i][0], entries_[i].size());
}
}
result = archive_write_close(a) == ARCHIVE_OK;
archive_write_free(a);
return result;
}
return result;
}
#endif

bool TessdataManager::Init(const char *data_file_name) {
Expand Down Expand Up @@ -162,12 +188,16 @@ void TessdataManager::OverwriteEntry(TessdataType type, const char *data, int si

// Saves to the given filename.
bool TessdataManager::SaveFile(const char *filename, FileWriter writer) const {
// TODO: This method supports only the proprietary file format.
// TODO: This method supports only the proprietary file format.
ASSERT_HOST(is_loaded_);
std::vector<char> data;
Serialize(&data);
if (writer == nullptr) {
#if defined(HAVE_LIBARCHIVE)
return SaveArchiveFile(filename);
#else
return SaveDataToFile(data, filename);
#endif
} else {
return (*writer)(data, filename);
}
Expand Down
1 change: 1 addition & 0 deletions src/ccutil/tessdatamanager.h
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,7 @@ class TESS_API TessdataManager {
private:
// Use libarchive.
bool LoadArchiveFile(const char *filename);
bool SaveArchiveFile(const char *filename) const;

/**
* Fills type with TessdataType of the tessdata component represented by the
Expand Down
17 changes: 17 additions & 0 deletions src/training/combine_tessdata.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,19 @@ int main(int argc, char **argv) {
tprintf("Failed to write modified traineddata:%s!\n", argv[2]);
return EXIT_FAILURE;
}
} else if (argc == 3 && strcmp(argv[1], "-t") == 0) {
#if defined(HAVE_LIBARCHIVE)
if (!tm.Init(argv[2])) {
tprintf("Failed to read %s\n", argv[2]);
return EXIT_FAILURE;
}
if (!tm.SaveFile(argv[2], nullptr)) {
tprintf("Failed to tranform traineddata:%s!\n", argv[2]);
return EXIT_FAILURE;
}
#else
tprintf("Failed to load libarchive. Is tesseract compiled with libarchive support?\n");
#endif
} else if (argc == 3 && strcmp(argv[1], "-d") == 0) {
return list_components(tm, argv[2]);
} else if (argc == 3 && strcmp(argv[1], "-l") == 0) {
Expand Down Expand Up @@ -272,6 +285,10 @@ int main(int argc, char **argv) {
"Usage for compacting LSTM component to int:\n"
" %s -c traineddata_file\n",
argv[0]);
printf(
"Usage for transforming the proprietary .traineddata file to a zip archive:\n"
" %s -t traineddata_file\n",
argv[0]);
return EXIT_FAILURE;
}
tm.Directory();
Expand Down