Skip to content

Add reporting memory usage to vectorsets upload, fix running vectorsets with --skip-search --skip-upload #22

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions engine/clients/vectorsets/configure.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ def __init__(self, host, collection_params: dict, connection_params: dict):
self.client = redis_constructor(
host=host, port=REDIS_PORT, password=REDIS_AUTH, username=REDIS_USER
)
self.client.flushall()

def clean(self):
conns = [self.client]
Expand All @@ -30,7 +29,6 @@ def clean(self):
for node in self.client.get_primaries()
]
for conn in conns:
index = conn.ft()
try:
conn.flushall()
except redis.ResponseError as e:
Expand Down
25 changes: 24 additions & 1 deletion engine/clients/vectorsets/upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,15 @@ def init_client(cls, host, distance, connection_params, upload_params):
cls.client = redis_constructor(
host=host, port=REDIS_PORT, password=REDIS_AUTH, username=REDIS_USER
)
cls.client_decode = redis_constructor(
host=host,
port=REDIS_PORT,
password=REDIS_AUTH,
username=REDIS_USER,
decode_responses=True,
)
cls.upload_params = upload_params
cls._is_cluster = True if REDIS_CLUSTER else False

@classmethod
def upload_batch(
Expand All @@ -33,7 +41,7 @@ def upload_batch(
hnsw_params = upload_params.get("hnsw_config")
M = hnsw_params.get("M", 16)
efc = hnsw_params.get("EF_CONSTRUCTION", 200)
quant = hnsw_params.get("quant")
quant = hnsw_params.get("quant", "NOQUANT")

p = cls.client.pipeline(transaction=False)
for i in range(len(ids)):
Expand All @@ -46,3 +54,18 @@ def upload_batch(
@classmethod
def post_upload(cls, _distance):
return {}

def get_memory_usage(cls):
used_memory = []
conns = [cls.client_decode]
if cls._is_cluster:
conns = [
cls.client_decode.get_redis_connection(node)
for node in cls.client_decode.get_primaries()
]
for conn in conns:
used_memory_shard = conn.info("memory")["used_memory"]
used_memory.append(used_memory_shard)

return {"used_memory": sum(used_memory),
"shards": len(used_memory)}
41 changes: 41 additions & 0 deletions experiments/configurations/create-vectorsets.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import json

ms = [16]
ef_constructs = [100]
ef_runtimes = [40, 80]
# qants = ["NOQUANT", "Q8", "BIN"]
qants = ["NOQUANT"]
configs = []
topKs = [10]
for m in ms:
for ef_construct in ef_constructs:
for quant in qants:
config = {
"name": f"redis-intel-vectorsets-m-{m}-ef-{ef_construct}-quant-{quant}",
"engine": "vectorsets",
"connection_params": {},
"collection_params": {},
"search_params": [],
"upload_params": {
"parallel": 128,
"hnsw_config": {
"M": m,
"EF_CONSTRUCTION": ef_construct,
"quant": quant,
},
},
}
for client in [1, 8]:
for top in topKs:
for ef_runtime in ef_runtimes:
test_config = {
"top": top,
"parallel": client,
"search_params": {"ef": ef_runtime},
}
config["search_params"].append(test_config)
configs.append(config)
fname = f"redis-intel-vectorsets.json"
with open(fname, "w") as json_fd:
json.dump(configs, json_fd, indent=2)
print(f"created {len(configs)} configs for {fname}.")