diff --git a/engine/clients/vectorsets/configure.py b/engine/clients/vectorsets/configure.py index 5c5a06ae..95d111ba 100644 --- a/engine/clients/vectorsets/configure.py +++ b/engine/clients/vectorsets/configure.py @@ -20,7 +20,6 @@ def __init__(self, host, collection_params: dict, connection_params: dict): self.client = redis_constructor( host=host, port=REDIS_PORT, password=REDIS_AUTH, username=REDIS_USER ) - self.client.flushall() def clean(self): conns = [self.client] @@ -30,7 +29,6 @@ def clean(self): for node in self.client.get_primaries() ] for conn in conns: - index = conn.ft() try: conn.flushall() except redis.ResponseError as e: diff --git a/engine/clients/vectorsets/upload.py b/engine/clients/vectorsets/upload.py index ccd16dd9..aec62dfb 100644 --- a/engine/clients/vectorsets/upload.py +++ b/engine/clients/vectorsets/upload.py @@ -23,7 +23,15 @@ def init_client(cls, host, distance, connection_params, upload_params): cls.client = redis_constructor( host=host, port=REDIS_PORT, password=REDIS_AUTH, username=REDIS_USER ) + cls.client_decode = redis_constructor( + host=host, + port=REDIS_PORT, + password=REDIS_AUTH, + username=REDIS_USER, + decode_responses=True, + ) cls.upload_params = upload_params + cls._is_cluster = True if REDIS_CLUSTER else False @classmethod def upload_batch( @@ -33,7 +41,7 @@ def upload_batch( hnsw_params = upload_params.get("hnsw_config") M = hnsw_params.get("M", 16) efc = hnsw_params.get("EF_CONSTRUCTION", 200) - quant = hnsw_params.get("quant") + quant = hnsw_params.get("quant", "NOQUANT") p = cls.client.pipeline(transaction=False) for i in range(len(ids)): @@ -46,3 +54,18 @@ def upload_batch( @classmethod def post_upload(cls, _distance): return {} + + def get_memory_usage(cls): + used_memory = [] + conns = [cls.client_decode] + if cls._is_cluster: + conns = [ + cls.client_decode.get_redis_connection(node) + for node in cls.client_decode.get_primaries() + ] + for conn in conns: + used_memory_shard = conn.info("memory")["used_memory"] + used_memory.append(used_memory_shard) + + return {"used_memory": sum(used_memory), + "shards": len(used_memory)} diff --git a/experiments/configurations/create-vectorsets.py b/experiments/configurations/create-vectorsets.py new file mode 100644 index 00000000..70393e7e --- /dev/null +++ b/experiments/configurations/create-vectorsets.py @@ -0,0 +1,41 @@ +import json + +ms = [16] +ef_constructs = [100] +ef_runtimes = [40, 80] +# qants = ["NOQUANT", "Q8", "BIN"] +qants = ["NOQUANT"] +configs = [] +topKs = [10] +for m in ms: + for ef_construct in ef_constructs: + for quant in qants: + config = { + "name": f"redis-intel-vectorsets-m-{m}-ef-{ef_construct}-quant-{quant}", + "engine": "vectorsets", + "connection_params": {}, + "collection_params": {}, + "search_params": [], + "upload_params": { + "parallel": 128, + "hnsw_config": { + "M": m, + "EF_CONSTRUCTION": ef_construct, + "quant": quant, + }, + }, + } + for client in [1, 8]: + for top in topKs: + for ef_runtime in ef_runtimes: + test_config = { + "top": top, + "parallel": client, + "search_params": {"ef": ef_runtime}, + } + config["search_params"].append(test_config) + configs.append(config) + fname = f"redis-intel-vectorsets.json" + with open(fname, "w") as json_fd: + json.dump(configs, json_fd, indent=2) + print(f"created {len(configs)} configs for {fname}.")