Add reporting memory usage to vectorsets upload, fix running vectorsets with --skip-search --skip-upload (#22)

mpozniak95 · web-flow · commit fd59cd86fa14 · 2025-06-12T15:44:39.000+01:00
* Add script for creating vectorsets configuration, do not flush database on init, make noquant default

* Add topKs to create-vectorsets script

* Add get_memory_usage function to vectorsets

* Add empty line
diff --git a/engine/clients/vectorsets/configure.py b/engine/clients/vectorsets/configure.py
@@ -20,7 +20,6 @@ def __init__(self, host, collection_params: dict, connection_params: dict):
         self.client = redis_constructor(
             host=host, port=REDIS_PORT, password=REDIS_AUTH, username=REDIS_USER
         )
-        self.client.flushall()
 
     def clean(self):
         conns = [self.client]
@@ -30,7 +29,6 @@ def clean(self):
                 for node in self.client.get_primaries()
             ]
         for conn in conns:
-            index = conn.ft()
             try:
                  conn.flushall()
             except redis.ResponseError as e:
diff --git a/engine/clients/vectorsets/upload.py b/engine/clients/vectorsets/upload.py
@@ -23,7 +23,15 @@ def init_client(cls, host, distance, connection_params, upload_params):
         cls.client = redis_constructor(
             host=host, port=REDIS_PORT, password=REDIS_AUTH, username=REDIS_USER
         )
+        cls.client_decode = redis_constructor(
+            host=host,
+            port=REDIS_PORT,
+            password=REDIS_AUTH,
+            username=REDIS_USER,
+            decode_responses=True,
+        )
         cls.upload_params = upload_params
+        cls._is_cluster = True if REDIS_CLUSTER else False
 
     @classmethod
     def upload_batch(
@@ -33,7 +41,7 @@ def upload_batch(
         hnsw_params = upload_params.get("hnsw_config")
         M = hnsw_params.get("M", 16)
         efc = hnsw_params.get("EF_CONSTRUCTION", 200)
-        quant = hnsw_params.get("quant")
+        quant = hnsw_params.get("quant", "NOQUANT")
         
         p = cls.client.pipeline(transaction=False)
         for i in range(len(ids)):
@@ -46,3 +54,18 @@ def upload_batch(
     @classmethod
     def post_upload(cls, _distance):
         return {}
+
+    def get_memory_usage(cls):
+        used_memory = []
+        conns = [cls.client_decode]
+        if cls._is_cluster:
+            conns = [
+                cls.client_decode.get_redis_connection(node)
+                for node in cls.client_decode.get_primaries()
+            ]
+        for conn in conns:
+            used_memory_shard = conn.info("memory")["used_memory"]
+            used_memory.append(used_memory_shard)
+
+        return {"used_memory": sum(used_memory),
+                "shards": len(used_memory)}
diff --git a/experiments/configurations/create-vectorsets.py b/experiments/configurations/create-vectorsets.py
@@ -0,0 +1,41 @@
+import json
+
+ms = [16]
+ef_constructs = [100]
+ef_runtimes = [40, 80]
+# qants = ["NOQUANT", "Q8", "BIN"]
+qants = ["NOQUANT"]
+configs = []
+topKs = [10]
+for m in ms:
+    for ef_construct in ef_constructs:
+        for quant in qants:
+            config = {
+                "name": f"redis-intel-vectorsets-m-{m}-ef-{ef_construct}-quant-{quant}",
+                "engine": "vectorsets",
+                "connection_params": {},
+                "collection_params": {},
+                "search_params": [],
+                "upload_params": {
+                    "parallel": 128,
+                    "hnsw_config": {
+                        "M": m,
+                        "EF_CONSTRUCTION": ef_construct,
+                        "quant": quant,
+                    },
+                },
+            }
+            for client in [1, 8]:
+                for top in topKs:
+                    for ef_runtime in ef_runtimes:
+                        test_config = {
+                            "top": top,
+                            "parallel": client,
+                            "search_params": {"ef": ef_runtime},
+                        }
+                        config["search_params"].append(test_config)
+            configs.append(config)
+    fname = f"redis-intel-vectorsets.json"
+    with open(fname, "w") as json_fd:
+        json.dump(configs, json_fd, indent=2)
+        print(f"created {len(configs)} configs for {fname}.")

Original file line number	Diff line number	Diff line change
`@@ -20,7 +20,6 @@ def __init__(self, host, collection_params: dict, connection_params: dict):`
`20`	`20`	`self.client = redis_constructor(`
`21`	`21`	`host=host, port=REDIS_PORT, password=REDIS_AUTH, username=REDIS_USER`
`22`	`22`	`)`
`23`		`- self.client.flushall()`
`24`	`23`
`25`	`24`	`def clean(self):`
`26`	`25`	`conns = [self.client]`
`@@ -30,7 +29,6 @@ def clean(self):`
`30`	`29`	`for node in self.client.get_primaries()`
`31`	`30`	`]`
`32`	`31`	`for conn in conns:`
`33`		`- index = conn.ft()`
`34`	`32`	`try:`
`35`	`33`	`conn.flushall()`
`36`	`34`	`except redis.ResponseError as e:`