Skip to content

Commit e2fe39a

Browse files
feat!: Migrate Vector DB IDs to Vector Store IDs (breaking change) (#3253)
# What does this PR do? This change migrates the VectorDB id generation to Vector Stores. This is a breaking change for **_some users_** that may have application code using the `vector_db_id` parameter in the request of the VectorDB protocol instead of the `VectorDB.identifier` in the response. By default we will now create a Vector Store every time we register a VectorDB. The caveat with this approach is that this maps the `vector_db_id` → `vector_store.name`. This is a reasonable tradeoff to transition users towards OpenAI Vector Stores. As an added benefit, registering VectorDBs will result in them appearing in the VectorStores admin UI. ### Why? This PR makes the `POST` API call to `/v1/vector-dbs` swap the `vector_db_id` parameter in the **request body** into the VectorStore's name field and sets the `vector_db_id` to the generated vector store id (e.g., `vs_038247dd-4bbb-4dbb-a6be-d5ecfd46cfdb`). That means that users would have to do something like follows in their application code: ```python res = client.vector_dbs.register( vector_db_id='my-vector-db-id', embedding_model='ollama/all-minilm:l6-v2', embedding_dimension=384, ) vector_db_id = res.identifier ``` And then the rest of their code would behave, including `VectorIO`'s insert protocol using `vector_db_id` in the request. An alternative implementation would be to just delete the `vector_db_id` parameter in `VectorDB` but the end result would still require users having to write `vector_db_id = res.identifier` since `VectorStores.create()` generates the ID for you. So this approach felt the easiest way to migrate users towards VectorStores (subsequent PRs will be added to trigger `files.create()` and `vector_stores.files.create()`). ## Test Plan Unit tests and integration tests have been added. Signed-off-by: Francisco Javier Arceo <[email protected]>
1 parent 64b2977 commit e2fe39a

File tree

4 files changed

+208
-48
lines changed

4 files changed

+208
-48
lines changed

llama_stack/core/routing_tables/vector_dbs.py

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,6 @@ async def register_vector_db(
5252
provider_vector_db_id: str | None = None,
5353
vector_db_name: str | None = None,
5454
) -> VectorDB:
55-
provider_vector_db_id = provider_vector_db_id or vector_db_id
5655
if provider_id is None:
5756
if len(self.impls_by_provider_id) > 0:
5857
provider_id = list(self.impls_by_provider_id.keys())[0]
@@ -69,14 +68,33 @@ async def register_vector_db(
6968
raise ModelTypeError(embedding_model, model.model_type, ModelType.embedding)
7069
if "embedding_dimension" not in model.metadata:
7170
raise ValueError(f"Model {embedding_model} does not have an embedding dimension")
71+
72+
provider = self.impls_by_provider_id[provider_id]
73+
logger.warning(
74+
"VectorDB is being deprecated in future releases in favor of VectorStore. Please migrate your usage accordingly."
75+
)
76+
vector_store = await provider.openai_create_vector_store(
77+
name=vector_db_name or vector_db_id,
78+
embedding_model=embedding_model,
79+
embedding_dimension=model.metadata["embedding_dimension"],
80+
provider_id=provider_id,
81+
provider_vector_db_id=provider_vector_db_id,
82+
)
83+
84+
vector_store_id = vector_store.id
85+
actual_provider_vector_db_id = provider_vector_db_id or vector_store_id
86+
logger.warning(
87+
f"Ignoring vector_db_id {vector_db_id} and using vector_store_id {vector_store_id} instead. Setting VectorDB {vector_db_id} to VectorDB.vector_db_name"
88+
)
89+
7290
vector_db_data = {
73-
"identifier": vector_db_id,
91+
"identifier": vector_store_id,
7492
"type": ResourceType.vector_db.value,
7593
"provider_id": provider_id,
76-
"provider_resource_id": provider_vector_db_id,
94+
"provider_resource_id": actual_provider_vector_db_id,
7795
"embedding_model": embedding_model,
7896
"embedding_dimension": model.metadata["embedding_dimension"],
79-
"vector_db_name": vector_db_name,
97+
"vector_db_name": vector_store.name,
8098
}
8199
vector_db = TypeAdapter(VectorDBWithOwner).validate_python(vector_db_data)
82100
await self.register_object(vector_db)

tests/integration/vector_io/test_vector_io.py

Lines changed: 45 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -47,34 +47,45 @@ def clear_registry():
4747

4848

4949
def test_vector_db_retrieve(client_with_empty_registry, embedding_model_id, embedding_dimension):
50-
# Register a memory bank first
51-
vector_db_id = "test_vector_db"
52-
client_with_empty_registry.vector_dbs.register(
53-
vector_db_id=vector_db_id,
50+
vector_db_name = "test_vector_db"
51+
register_response = client_with_empty_registry.vector_dbs.register(
52+
vector_db_id=vector_db_name,
5453
embedding_model=embedding_model_id,
5554
embedding_dimension=embedding_dimension,
5655
)
5756

57+
actual_vector_db_id = register_response.identifier
58+
5859
# Retrieve the memory bank and validate its properties
59-
response = client_with_empty_registry.vector_dbs.retrieve(vector_db_id=vector_db_id)
60+
response = client_with_empty_registry.vector_dbs.retrieve(vector_db_id=actual_vector_db_id)
6061
assert response is not None
61-
assert response.identifier == vector_db_id
62+
assert response.identifier == actual_vector_db_id
6263
assert response.embedding_model == embedding_model_id
63-
assert response.provider_resource_id == vector_db_id
64+
assert response.identifier.startswith("vs_")
6465

6566

6667
def test_vector_db_register(client_with_empty_registry, embedding_model_id, embedding_dimension):
67-
vector_db_id = "test_vector_db"
68-
client_with_empty_registry.vector_dbs.register(
69-
vector_db_id=vector_db_id,
68+
vector_db_name = "test_vector_db"
69+
response = client_with_empty_registry.vector_dbs.register(
70+
vector_db_id=vector_db_name,
7071
embedding_model=embedding_model_id,
7172
embedding_dimension=embedding_dimension,
7273
)
7374

75+
actual_vector_db_id = response.identifier
76+
assert actual_vector_db_id.startswith("vs_")
77+
assert actual_vector_db_id != vector_db_name
78+
7479
vector_dbs_after_register = [vector_db.identifier for vector_db in client_with_empty_registry.vector_dbs.list()]
75-
assert vector_dbs_after_register == [vector_db_id]
80+
assert vector_dbs_after_register == [actual_vector_db_id]
81+
82+
vector_stores = client_with_empty_registry.vector_stores.list()
83+
assert len(vector_stores.data) == 1
84+
vector_store = vector_stores.data[0]
85+
assert vector_store.id == actual_vector_db_id
86+
assert vector_store.name == vector_db_name
7687

77-
client_with_empty_registry.vector_dbs.unregister(vector_db_id=vector_db_id)
88+
client_with_empty_registry.vector_dbs.unregister(vector_db_id=actual_vector_db_id)
7889

7990
vector_dbs = [vector_db.identifier for vector_db in client_with_empty_registry.vector_dbs.list()]
8091
assert len(vector_dbs) == 0
@@ -91,20 +102,22 @@ def test_vector_db_register(client_with_empty_registry, embedding_model_id, embe
91102
],
92103
)
93104
def test_insert_chunks(client_with_empty_registry, embedding_model_id, embedding_dimension, sample_chunks, test_case):
94-
vector_db_id = "test_vector_db"
95-
client_with_empty_registry.vector_dbs.register(
96-
vector_db_id=vector_db_id,
105+
vector_db_name = "test_vector_db"
106+
register_response = client_with_empty_registry.vector_dbs.register(
107+
vector_db_id=vector_db_name,
97108
embedding_model=embedding_model_id,
98109
embedding_dimension=embedding_dimension,
99110
)
100111

112+
actual_vector_db_id = register_response.identifier
113+
101114
client_with_empty_registry.vector_io.insert(
102-
vector_db_id=vector_db_id,
115+
vector_db_id=actual_vector_db_id,
103116
chunks=sample_chunks,
104117
)
105118

106119
response = client_with_empty_registry.vector_io.query(
107-
vector_db_id=vector_db_id,
120+
vector_db_id=actual_vector_db_id,
108121
query="What is the capital of France?",
109122
)
110123
assert response is not None
@@ -113,7 +126,7 @@ def test_insert_chunks(client_with_empty_registry, embedding_model_id, embedding
113126

114127
query, expected_doc_id = test_case
115128
response = client_with_empty_registry.vector_io.query(
116-
vector_db_id=vector_db_id,
129+
vector_db_id=actual_vector_db_id,
117130
query=query,
118131
)
119132
assert response is not None
@@ -128,13 +141,15 @@ def test_insert_chunks_with_precomputed_embeddings(client_with_empty_registry, e
128141
"remote::qdrant": {"score_threshold": -1.0},
129142
"inline::qdrant": {"score_threshold": -1.0},
130143
}
131-
vector_db_id = "test_precomputed_embeddings_db"
132-
client_with_empty_registry.vector_dbs.register(
133-
vector_db_id=vector_db_id,
144+
vector_db_name = "test_precomputed_embeddings_db"
145+
register_response = client_with_empty_registry.vector_dbs.register(
146+
vector_db_id=vector_db_name,
134147
embedding_model=embedding_model_id,
135148
embedding_dimension=embedding_dimension,
136149
)
137150

151+
actual_vector_db_id = register_response.identifier
152+
138153
chunks_with_embeddings = [
139154
Chunk(
140155
content="This is a test chunk with precomputed embedding.",
@@ -144,13 +159,13 @@ def test_insert_chunks_with_precomputed_embeddings(client_with_empty_registry, e
144159
]
145160

146161
client_with_empty_registry.vector_io.insert(
147-
vector_db_id=vector_db_id,
162+
vector_db_id=actual_vector_db_id,
148163
chunks=chunks_with_embeddings,
149164
)
150165

151166
provider = [p.provider_id for p in client_with_empty_registry.providers.list() if p.api == "vector_io"][0]
152167
response = client_with_empty_registry.vector_io.query(
153-
vector_db_id=vector_db_id,
168+
vector_db_id=actual_vector_db_id,
154169
query="precomputed embedding test",
155170
params=vector_io_provider_params_dict.get(provider, None),
156171
)
@@ -173,13 +188,15 @@ def test_query_returns_valid_object_when_identical_to_embedding_in_vdb(
173188
"remote::qdrant": {"score_threshold": 0.0},
174189
"inline::qdrant": {"score_threshold": 0.0},
175190
}
176-
vector_db_id = "test_precomputed_embeddings_db"
177-
client_with_empty_registry.vector_dbs.register(
178-
vector_db_id=vector_db_id,
191+
vector_db_name = "test_precomputed_embeddings_db"
192+
register_response = client_with_empty_registry.vector_dbs.register(
193+
vector_db_id=vector_db_name,
179194
embedding_model=embedding_model_id,
180195
embedding_dimension=embedding_dimension,
181196
)
182197

198+
actual_vector_db_id = register_response.identifier
199+
183200
chunks_with_embeddings = [
184201
Chunk(
185202
content="duplicate",
@@ -189,13 +206,13 @@ def test_query_returns_valid_object_when_identical_to_embedding_in_vdb(
189206
]
190207

191208
client_with_empty_registry.vector_io.insert(
192-
vector_db_id=vector_db_id,
209+
vector_db_id=actual_vector_db_id,
193210
chunks=chunks_with_embeddings,
194211
)
195212

196213
provider = [p.provider_id for p in client_with_empty_registry.providers.list() if p.api == "vector_io"][0]
197214
response = client_with_empty_registry.vector_io.query(
198-
vector_db_id=vector_db_id,
215+
vector_db_id=actual_vector_db_id,
199216
query="duplicate",
200217
params=vector_io_provider_params_dict.get(provider, None),
201218
)

tests/unit/distribution/routers/test_routing_tables.py

Lines changed: 24 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,20 @@ async def register_vector_db(self, vector_db: VectorDB):
146146
async def unregister_vector_db(self, vector_db_id: str):
147147
return vector_db_id
148148

149+
async def openai_create_vector_store(self, **kwargs):
150+
import time
151+
import uuid
152+
153+
from llama_stack.apis.vector_io.vector_io import VectorStoreFileCounts, VectorStoreObject
154+
155+
vector_store_id = kwargs.get("provider_vector_db_id") or f"vs_{uuid.uuid4()}"
156+
return VectorStoreObject(
157+
id=vector_store_id,
158+
name=kwargs.get("name", vector_store_id),
159+
created_at=int(time.time()),
160+
file_counts=VectorStoreFileCounts(completed=0, cancelled=0, failed=0, in_progress=0, total=0),
161+
)
162+
149163

150164
async def test_models_routing_table(cached_disk_dist_registry):
151165
table = ModelsRoutingTable({"test_provider": InferenceImpl()}, cached_disk_dist_registry, {})
@@ -247,17 +261,21 @@ async def test_vectordbs_routing_table(cached_disk_dist_registry):
247261
)
248262

249263
# Register multiple vector databases and verify listing
250-
await table.register_vector_db(vector_db_id="test-vectordb", embedding_model="test_provider/test-model")
251-
await table.register_vector_db(vector_db_id="test-vectordb-2", embedding_model="test_provider/test-model")
264+
vdb1 = await table.register_vector_db(vector_db_id="test-vectordb", embedding_model="test_provider/test-model")
265+
vdb2 = await table.register_vector_db(vector_db_id="test-vectordb-2", embedding_model="test_provider/test-model")
252266
vector_dbs = await table.list_vector_dbs()
253267

254268
assert len(vector_dbs.data) == 2
255269
vector_db_ids = {v.identifier for v in vector_dbs.data}
256-
assert "test-vectordb" in vector_db_ids
257-
assert "test-vectordb-2" in vector_db_ids
270+
assert vdb1.identifier in vector_db_ids
271+
assert vdb2.identifier in vector_db_ids
272+
273+
# Verify they have UUID-based identifiers
274+
assert vdb1.identifier.startswith("vs_")
275+
assert vdb2.identifier.startswith("vs_")
258276

259-
await table.unregister_vector_db(vector_db_id="test-vectordb")
260-
await table.unregister_vector_db(vector_db_id="test-vectordb-2")
277+
await table.unregister_vector_db(vector_db_id=vdb1.identifier)
278+
await table.unregister_vector_db(vector_db_id=vdb2.identifier)
261279

262280
vector_dbs = await table.list_vector_dbs()
263281
assert len(vector_dbs.data) == 0

0 commit comments

Comments
 (0)