Skip to content
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -67,10 +67,14 @@ jobs:
./example_mt_search
./example_mt_filter
./example_mt_replace_deleted
./example_multivector_search
./example_epsilon_search
./searchKnnCloserFirst_test
./searchKnnWithFilter_test
./multiThreadLoad_test
./multiThread_replace_test
./test_updates
./test_updates update
./multivector_search_test
./epsilon_search_test
shell: bash
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@ var/
.vscode/
.vs/
**.DS_Store
*.pyc
12 changes: 12 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,12 @@ if(HNSWLIB_EXAMPLES)
add_executable(example_search examples/cpp/example_search.cpp)
target_link_libraries(example_search hnswlib)

add_executable(example_epsilon_search examples/cpp/example_epsilon_search.cpp)
target_link_libraries(example_epsilon_search hnswlib)

add_executable(example_multivector_search examples/cpp/example_multivector_search.cpp)
target_link_libraries(example_multivector_search hnswlib)

add_executable(example_filter examples/cpp/example_filter.cpp)
target_link_libraries(example_filter hnswlib)

Expand All @@ -73,6 +79,12 @@ if(HNSWLIB_EXAMPLES)
target_link_libraries(example_mt_replace_deleted hnswlib)

# tests
add_executable(multivector_search_test tests/cpp/multivector_search_test.cpp)
target_link_libraries(multivector_search_test hnswlib)

add_executable(epsilon_search_test tests/cpp/epsilon_search_test.cpp)
target_link_libraries(epsilon_search_test hnswlib)

add_executable(test_updates tests/cpp/updates_test.cpp)
target_link_libraries(test_updates hnswlib)

Expand Down
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,8 @@ print("Recall for two batches:", np.mean(labels.reshape(-1) == np.arange(len(dat
* filtering during the search with a boolean function
* deleting the elements and reusing the memory of the deleted elements for newly added elements
* multithreaded usage
* multivector search
* epsilon search


### Bindings installation
Expand Down
6 changes: 5 additions & 1 deletion examples/cpp/EXAMPLES.md
Original file line number Diff line number Diff line change
Expand Up @@ -182,4 +182,8 @@ int main() {
Multithreaded examples:
* Creating index, inserting elements, searching [example_mt_search.cpp](example_mt_search.cpp)
* Filtering during the search with a boolean function [example_mt_filter.cpp](example_mt_filter.cpp)
* Reusing the memory of the deleted elements when new elements are being added [example_mt_replace_deleted.cpp](example_mt_replace_deleted.cpp)
* Reusing the memory of the deleted elements when new elements are being added [example_mt_replace_deleted.cpp](example_mt_replace_deleted.cpp)

More examples:
* Multivector search [example_multivector_search.cpp](example_multivector_search.cpp)
* Epsilon search [example_epsilon_search.cpp](example_epsilon_search.cpp)
66 changes: 66 additions & 0 deletions examples/cpp/example_epsilon_search.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
#include "../../hnswlib/hnswlib.h"

typedef unsigned int docidtype;
typedef float dist_t;

int main() {
int dim = 16; // Dimension of the elements
int max_elements = 10000; // Maximum number of elements, should be known beforehand
int M = 16; // Tightly connected with internal dimensionality of the data
// strongly affects the memory consumption
int ef_construction = 200; // Controls index search speed/build speed tradeoff
int min_num_candidates = 100; // Minimum number of candidates to search in the epsilon region
// this parameter is similar to ef

int num_quries = 5;
float epsilon = 2.0; // Squared distance to query

// Initing index
hnswlib::L2Space space(dim);
hnswlib::HierarchicalNSW<dist_t>* alg_hnsw = new hnswlib::HierarchicalNSW<dist_t>(&space, max_elements, M, ef_construction);

// Generate random data
std::mt19937 rng;
rng.seed(47);
std::uniform_real_distribution<> distrib_real;

size_t data_point_size = space.get_data_size();
char* data = new char[data_point_size * max_elements];
for (int i = 0; i < max_elements; i++) {
char* point_data = data + i * data_point_size;
for (int j = 0; j < dim; j++) {
char* vec_data = point_data + j * sizeof(float);
float value = distrib_real(rng);
*(float*)vec_data = value;
}
}

// Add data to index
for (int i = 0; i < max_elements; i++) {
hnswlib::labeltype label = i;
char* point_data = data + i * data_point_size;
alg_hnsw->addPoint(point_data, label);
}

// Query random vectors
for (int i = 0; i < num_quries; i++) {
char* query_data = new char[data_point_size];
for (int j = 0; j < dim; j++) {
size_t offset = j * sizeof(float);
char* vec_data = query_data + offset;
float value = distrib_real(rng);
*(float*)vec_data = value;
}
std::cout << "Query #" << i << "\n";
hnswlib::EpsilonSearchStopCondition<dist_t> stop_condition(epsilon, min_num_candidates, max_elements);
std::vector<std::pair<float, hnswlib::labeltype>> result =
alg_hnsw->searchStopConditionClosest(query_data, stop_condition);
size_t num_vectors = result.size();
std::cout << "Found " << num_vectors << " vectors\n";
delete[] query_data;
}

delete[] data;
delete alg_hnsw;
return 0;
}
83 changes: 83 additions & 0 deletions examples/cpp/example_multivector_search.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
#include "../../hnswlib/hnswlib.h"

typedef unsigned int docidtype;
typedef float dist_t;

int main() {
int dim = 16; // Dimension of the elements
int max_elements = 10000; // Maximum number of elements, should be known beforehand
int M = 16; // Tightly connected with internal dimensionality of the data
// strongly affects the memory consumption
int ef_construction = 200; // Controls index search speed/build speed tradeoff

int num_quries = 5;
int num_docs = 5; // Number of documents to search
int ef_collection = 6; // Number of candidate documents during the search
// Controlls the recall: higher ef leads to better accuracy, but slower search
docidtype min_doc_id = 0;
docidtype max_doc_id = 9;

// Initing index
hnswlib::MultiVectorL2Space<docidtype> space(dim);
hnswlib::HierarchicalNSW<dist_t>* alg_hnsw = new hnswlib::HierarchicalNSW<dist_t>(&space, max_elements, M, ef_construction);

// Generate random data
std::mt19937 rng;
rng.seed(47);
std::uniform_real_distribution<> distrib_real;
std::uniform_int_distribution<docidtype> distrib_docid(min_doc_id, max_doc_id);

size_t data_point_size = space.get_data_size();
char* data = new char[data_point_size * max_elements];
for (int i = 0; i < max_elements; i++) {
// set vector value
char* point_data = data + i * data_point_size;
for (int j = 0; j < dim; j++) {
char* vec_data = point_data + j * sizeof(float);
float value = distrib_real(rng);
*(float*)vec_data = value;
}
// set document id
docidtype doc_id = distrib_docid(rng);
space.set_doc_id(point_data, doc_id);
}

// Add data to index
std::unordered_map<hnswlib::labeltype, docidtype> label_docid_lookup;
for (int i = 0; i < max_elements; i++) {
hnswlib::labeltype label = i;
char* point_data = data + i * data_point_size;
alg_hnsw->addPoint(point_data, label);
label_docid_lookup[label] = space.get_doc_id(point_data);
}

// Query random vectors
size_t query_size = dim * sizeof(float);
for (int i = 0; i < num_quries; i++) {
char* query_data = new char[query_size];
for (int j = 0; j < dim; j++) {
size_t offset = j * sizeof(float);
char* vec_data = query_data + offset;
float value = distrib_real(rng);
*(float*)vec_data = value;
}
std::cout << "Query #" << i << "\n";
hnswlib::MultiVectorSearchStopCondition<docidtype, dist_t> stop_condition(space, num_docs, ef_collection);
std::vector<std::pair<float, hnswlib::labeltype>> result =
alg_hnsw->searchStopConditionClosest(query_data, stop_condition);
size_t num_vectors = result.size();

std::unordered_map<docidtype, size_t> doc_counter;
for (auto pair: result) {
hnswlib::labeltype label = pair.second;
docidtype doc_id = label_docid_lookup[label];
doc_counter[doc_id] += 1;
}
std::cout << "Found " << doc_counter.size() << " documents, " << num_vectors << " vectors\n";
delete[] query_data;
}

delete[] data;
delete alg_hnsw;
return 0;
}
Loading