diff --git a/cpp/own_dataset/CMakeLists.txt b/cpp/own_dataset/CMakeLists.txt new file mode 100644 index 0000000000..0a1a8021ff --- /dev/null +++ b/cpp/own_dataset/CMakeLists.txt @@ -0,0 +1,11 @@ +cmake_minimum_required(VERSION 3.1 FATAL_ERROR) +project(own_dataset) + +find_package(Torch REQUIRED) +find_package(OpenCV REQUIRED ) + + +add_executable(dataset dataset.cpp) +target_link_libraries(dataset "${TORCH_LIBRARIES}") +target_link_libraries(dataset "${OpenCV_LIBS}") +set_property(TARGET dataset PROPERTY CXX_STANDARD 11) diff --git a/cpp/own_dataset/README.md b/cpp/own_dataset/README.md new file mode 100644 index 0000000000..19bdc639a3 --- /dev/null +++ b/cpp/own_dataset/README.md @@ -0,0 +1,49 @@ +# Own Dataset Example with the PyTorch C++ Frontend + +This folder contains an example of making an origianl image dataset to training classification model using PyTorch C++ frontend. + +The entire dataset code is contained in dataset.cpp + +To build the code, run the following commands from your terminal. + +```bash +$ cd own_dataset +$ mkdir build +$ cd build +$ cmake -DCMAKE_PREFIX_PATH=/path/to/libtorch .. +$ make +``` + +where /path/to/libtorch should be the path to the unzipped LibTorch distribution, which you can get from the [PyTorch homepage](https://pytorch.org/get-started/locally/). + +Execute the compiled binary to test own dataset: + +```bash +$ ./dataset ../test_data/ ../test_data/labels.txt +input dim: 4 +target: 1 +[ Variable[CPUByteType]{1} ] +``` + +The ```test_data``` directory has the following structure. + +```bash +$ tree ../test_data/ +../test_data/ +|-- images +| |-- image1.jpg +| |-- image2.jpg +| `-- image3.jpg +`-- labels.txt + +1 directory, 4 files +``` + +The contents of the ```labels.txt``` have the following format. + +```bash +$ cat ../test_data/labels.txt +image1.jpg,0 +image2.jpg,0 +image3.jpg,1 +``` diff --git a/cpp/own_dataset/dataset.cpp b/cpp/own_dataset/dataset.cpp new file mode 100644 index 0000000000..f0afac7d4f --- /dev/null +++ b/cpp/own_dataset/dataset.cpp @@ -0,0 +1,101 @@ +#include + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include + + +// Split label info. +std::tuple split(std::string &s, char delim) { + std::stringstream ss(s); + std::vector elems; + + std::string item; + while (getline(ss, item, delim)) { + if (!item.empty()) { + elems.push_back(item); + } + } + return std::forward_as_tuple(elems[0], elems[1]); +} + + +// Define own dataset +class ImageDataset : public torch::data::datasets::Dataset { + private: + std::string root; + std::vector files; + std::map labels; + + public: + explicit ImageDataset(const std::string root, const std::string labelfile) : root(root) { + // get files + auto p = opendir(root.c_str()); + dirent* entry; + if(p != nullptr) { + do { + entry = readdir(p); + + if(entry != nullptr) { + if(strcmp(entry->d_name, ".\0") == 0 || strcmp(entry->d_name, "..\0") == 0) continue; + files.push_back(entry->d_name); + } + } while(entry != nullptr); + } + + // get labels + std::ifstream fs(labelfile); + + std::string buf; + std::string fname, label; + while(fs >> buf) { + std::tie(fname, label) = split(buf, ','); + labels[fname] = stoi(label); + } + } + + torch::data::Example<> get(size_t index) override { + std::string fname = this->root + this->files[index]; + int label = this->labels.at(files[index]); + + cv::Mat image = cv::imread(fname, 1); + std::vector sizes = {1, 3, image.rows, image.cols}; + + at::Tensor tensor_image = torch::from_blob(image.data, at::IntList(sizes), at::ScalarType::Byte); + at::Tensor tensor_label = torch::tensor({label}, torch::dtype(torch::kUInt8)); + + tensor_image = tensor_image.toType(at::kFloat); + + return {tensor_image, tensor_label}; + } + + at::optional size() const override { + return this->files.size(); + } +}; + + +int main(int argc, char **argv) { + std::string root = argv[1]; + std::string labelfile = argv[2]; + + ImageDataset dataset(root, labelfile); + + auto batch = dataset.get(3); + + std::cout << "input dim: " << batch.data.dim() << std::endl; + std::cout << "target: " << batch.target << std::endl; + + return 0; +} diff --git a/cpp/own_dataset/test_data/images/image1.jpg b/cpp/own_dataset/test_data/images/image1.jpg new file mode 100644 index 0000000000..e5b0c51b20 Binary files /dev/null and b/cpp/own_dataset/test_data/images/image1.jpg differ diff --git a/cpp/own_dataset/test_data/images/image2.jpg b/cpp/own_dataset/test_data/images/image2.jpg new file mode 100644 index 0000000000..d4c87d3051 Binary files /dev/null and b/cpp/own_dataset/test_data/images/image2.jpg differ diff --git a/cpp/own_dataset/test_data/images/image3.jpg b/cpp/own_dataset/test_data/images/image3.jpg new file mode 100644 index 0000000000..c0958a9ca8 Binary files /dev/null and b/cpp/own_dataset/test_data/images/image3.jpg differ diff --git a/cpp/own_dataset/test_data/labels.txt b/cpp/own_dataset/test_data/labels.txt new file mode 100644 index 0000000000..281ae605ed --- /dev/null +++ b/cpp/own_dataset/test_data/labels.txt @@ -0,0 +1,3 @@ +image1.jpg,0 +image2.jpg,0 +image3.jpg,1