Skip to content

Commit fd59cd8

Browse files
authored
Add reporting memory usage to vectorsets upload, fix running vectorsets with --skip-search --skip-upload (#22)
* Add script for creating vectorsets configuration, do not flush database on init, make noquant default * Add topKs to create-vectorsets script * Add get_memory_usage function to vectorsets * Add empty line
1 parent 7640df0 commit fd59cd8

File tree

3 files changed

+65
-3
lines changed

3 files changed

+65
-3
lines changed

engine/clients/vectorsets/configure.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@ def __init__(self, host, collection_params: dict, connection_params: dict):
2020
self.client = redis_constructor(
2121
host=host, port=REDIS_PORT, password=REDIS_AUTH, username=REDIS_USER
2222
)
23-
self.client.flushall()
2423

2524
def clean(self):
2625
conns = [self.client]
@@ -30,7 +29,6 @@ def clean(self):
3029
for node in self.client.get_primaries()
3130
]
3231
for conn in conns:
33-
index = conn.ft()
3432
try:
3533
conn.flushall()
3634
except redis.ResponseError as e:

engine/clients/vectorsets/upload.py

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,15 @@ def init_client(cls, host, distance, connection_params, upload_params):
2323
cls.client = redis_constructor(
2424
host=host, port=REDIS_PORT, password=REDIS_AUTH, username=REDIS_USER
2525
)
26+
cls.client_decode = redis_constructor(
27+
host=host,
28+
port=REDIS_PORT,
29+
password=REDIS_AUTH,
30+
username=REDIS_USER,
31+
decode_responses=True,
32+
)
2633
cls.upload_params = upload_params
34+
cls._is_cluster = True if REDIS_CLUSTER else False
2735

2836
@classmethod
2937
def upload_batch(
@@ -33,7 +41,7 @@ def upload_batch(
3341
hnsw_params = upload_params.get("hnsw_config")
3442
M = hnsw_params.get("M", 16)
3543
efc = hnsw_params.get("EF_CONSTRUCTION", 200)
36-
quant = hnsw_params.get("quant")
44+
quant = hnsw_params.get("quant", "NOQUANT")
3745

3846
p = cls.client.pipeline(transaction=False)
3947
for i in range(len(ids)):
@@ -46,3 +54,18 @@ def upload_batch(
4654
@classmethod
4755
def post_upload(cls, _distance):
4856
return {}
57+
58+
def get_memory_usage(cls):
59+
used_memory = []
60+
conns = [cls.client_decode]
61+
if cls._is_cluster:
62+
conns = [
63+
cls.client_decode.get_redis_connection(node)
64+
for node in cls.client_decode.get_primaries()
65+
]
66+
for conn in conns:
67+
used_memory_shard = conn.info("memory")["used_memory"]
68+
used_memory.append(used_memory_shard)
69+
70+
return {"used_memory": sum(used_memory),
71+
"shards": len(used_memory)}
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
import json
2+
3+
ms = [16]
4+
ef_constructs = [100]
5+
ef_runtimes = [40, 80]
6+
# qants = ["NOQUANT", "Q8", "BIN"]
7+
qants = ["NOQUANT"]
8+
configs = []
9+
topKs = [10]
10+
for m in ms:
11+
for ef_construct in ef_constructs:
12+
for quant in qants:
13+
config = {
14+
"name": f"redis-intel-vectorsets-m-{m}-ef-{ef_construct}-quant-{quant}",
15+
"engine": "vectorsets",
16+
"connection_params": {},
17+
"collection_params": {},
18+
"search_params": [],
19+
"upload_params": {
20+
"parallel": 128,
21+
"hnsw_config": {
22+
"M": m,
23+
"EF_CONSTRUCTION": ef_construct,
24+
"quant": quant,
25+
},
26+
},
27+
}
28+
for client in [1, 8]:
29+
for top in topKs:
30+
for ef_runtime in ef_runtimes:
31+
test_config = {
32+
"top": top,
33+
"parallel": client,
34+
"search_params": {"ef": ef_runtime},
35+
}
36+
config["search_params"].append(test_config)
37+
configs.append(config)
38+
fname = f"redis-intel-vectorsets.json"
39+
with open(fname, "w") as json_fd:
40+
json.dump(configs, json_fd, indent=2)
41+
print(f"created {len(configs)} configs for {fname}.")

0 commit comments

Comments
 (0)