diff --git a/DOCKER_README.md b/DOCKER_README.md index b7685bd4..a4ed1f92 100644 --- a/DOCKER_README.md +++ b/DOCKER_README.md @@ -58,7 +58,7 @@ docker run --rm redis/vector-db-benchmark:latest run.py --describe datasets # Basic Redis benchmark (requires local Redis) docker run --rm -v $(pwd)/results:/app/results --network=host \ redis/vector-db-benchmark:latest \ - run.py --host localhost --engines redis-default-simple --dataset random-100 + run.py --host localhost --engines redis-default-simple --datasets random-100 ``` ## Features @@ -78,12 +78,12 @@ docker run --rm -v $(pwd)/results:/app/results --network=host \ ### Redis 8.2 with RediSearch ```bash # Start Redis 8.2 with built-in vector support -docker run -d --name redis-test -p 6379:6379 redis:8.2-rc1-bookworm +docker run -d --name redis-test -p 6379:6379 redis:8.2-bookworm # Run benchmark docker run --rm -v $(pwd)/results:/app/results --network=host \ redis/vector-db-benchmark:latest \ - run.py --host localhost --engines redis-default-simple --dataset glove-25-angular + run.py --host localhost --engines redis-default-simple --datasets glove-25-angular ``` @@ -103,18 +103,18 @@ docker run --rm redis/vector-db-benchmark:latest run.py --describe engines # Quick test with small dataset docker run --rm -v $(pwd)/results:/app/results --network=host \ redis/vector-db-benchmark:latest \ - run.py --host localhost --engines redis-default-simple --dataset random-100 + run.py --host localhost --engines redis-default-simple --datasets random-100 # Comprehensive benchmark with multiple configurations docker run --rm -v $(pwd)/results:/app/results --network=host \ redis/vector-db-benchmark:latest \ - run.py --host localhost --engines "*redis*" --dataset glove-25-angular + run.py --host localhost --engines "*redis*" --datasets glove-25-angular # With Redis authentication docker run --rm -v $(pwd)/results:/app/results --network=host \ -e REDIS_AUTH=mypassword -e REDIS_USER=myuser \ redis/vector-db-benchmark:latest \ - run.py --host localhost --engines redis-default-simple --dataset random-100 + run.py --host localhost --engines redis-default-simple --datasets random-100 ``` ### Results Analysis diff --git a/README.md b/README.md index 7bd110f9..fce96fb4 100644 --- a/README.md +++ b/README.md @@ -112,13 +112,13 @@ For testing with Redis, start a Redis container first: ```bash # Start Redis container -docker run -d --name redis-test -p 6379:6379 redis:8.2-rc1-bookworm +docker run -d --name redis-test -p 6379:6379 redis:8.2-bookworm # Run benchmark against Redis docker run --rm -v $(pwd)/results:/app/results --network=host \ redis/vector-db-benchmark:latest \ - run.py --host localhost --engines redis-default-simple --dataset random-100 + run.py --host localhost --engines redis-default-simple --datasets random-100 # Or use the convenience script ./docker-run.sh -H localhost -e redis-default-simple -d random-100 @@ -221,14 +221,14 @@ Run the benchmark: ```bash # Basic usage examples -python run.py --engines redis-default-simple --dataset random-100 -python run.py --engines redis-default-simple --dataset glove-25-angular -python run.py --engines "*-m-16-*" --dataset "glove-*" +python run.py --engines redis-default-simple --datasets random-100 +python run.py --engines redis-default-simple --datasets glove-25-angular +python run.py --engines "*-m-16-*" --datasets "glove-*" # Docker usage (recommended) docker run --rm -v $(pwd)/results:/app/results --network=host \ redis/vector-db-benchmark:latest \ - run.py --host localhost --engines redis-default-simple --dataset random-100 + run.py --host localhost --engines redis-default-simple --datasets random-100 # Get help python run.py --help diff --git a/datasets/datasets.json b/datasets/datasets.json index c615fcef..75304c01 100644 --- a/datasets/datasets.json +++ b/datasets/datasets.json @@ -972,13 +972,53 @@ "vector_count": 100000, "description": "Image embeddings" }, + { + "name": "dbpedia-openai-1M-512-angular", + "vector_size": 512, + "distance": "cosine", + "type": "h5", + "path": "dbpedia-openai-1M-512-angular/dbpedia_openai_1M", + "link": "http://benchmarks.redislabs.s3.amazonaws.com/vecsim/dbpedia/dbpedia-openai-1M-text-embedding-3-large-512d.hdf5", + "vector_count": 1000000, + "description": "Knowledge embeddings" + }, + { + "name": "dbpedia-openai-1M-1024-angular", + "vector_size": 1024, + "distance": "cosine", + "type": "h5", + "path": "dbpedia-openai-1M-1024-angular/dbpedia_openai_1M", + "link": "http://benchmarks.redislabs.s3.amazonaws.com/vecsim/dbpedia/dbpedia-openai-1M-text-embedding-3-large-1024d.hdf5", + "vector_count": 1000000, + "description": "Knowledge embeddings" + }, { "name": "dbpedia-openai-1M-1536-angular", "vector_size": 1536, "distance": "cosine", - "type": "tar", + "type": "h5", "path": "dbpedia-openai-1M-1536-angular/dbpedia_openai_1M", - "link": "https://storage.googleapis.com/ann-filtered-benchmark/datasets/dbpedia_openai_1M.tgz", + "link": "http://benchmarks.redislabs.s3.amazonaws.com/vecsim/dbpedia/dbpedia-openai-1M-text-embedding-3-large-1536d.hdf5", + "vector_count": 1000000, + "description": "Knowledge embeddings" + }, + { + "name": "dbpedia-openai-1M-2048-angular", + "vector_size": 2048, + "distance": "cosine", + "type": "h5", + "path": "dbpedia-openai-1M-2048-angular/dbpedia_openai_1M", + "link": "http://benchmarks.redislabs.s3.amazonaws.com/vecsim/dbpedia/dbpedia-openai-1M-text-embedding-3-large-2048d.hdf5", + "vector_count": 1000000, + "description": "Knowledge embeddings" + }, + { + "name": "dbpedia-openai-1M-3072-angular", + "vector_size": 3072, + "distance": "cosine", + "type": "h5", + "path": "dbpedia-openai-1M-3072-angular/dbpedia_openai_1M", + "link": "http://benchmarks.redislabs.s3.amazonaws.com/vecsim/dbpedia/dbpedia-openai-1M-text-embedding-3-large-3072d.hdf5", "vector_count": 1000000, "description": "Knowledge embeddings" }, diff --git a/run.py b/run.py index 0f43ec68..0a1bd876 100644 --- a/run.py +++ b/run.py @@ -1,7 +1,11 @@ import fnmatch import traceback +import warnings from typing import List +# Suppress the pkg_resources deprecation warning from stopit +warnings.filterwarnings("ignore", message="pkg_resources is deprecated", category=UserWarning) + import stopit import typer