Skip to content

Commit fb0d120

Browse files
feat: Add CockroachDB vector database support (#630)
- UUID PRIMARY KEY for distributed write performance - Connection pooling with 100+ base connections - Comprehensive retry logic for transient errors (40001, 40003) - C-SPANN vector index with tunable parameters - CLI integration with full parameter support Co-authored-by: Min Tian <[email protected]>
1 parent 538b884 commit fb0d120

File tree

9 files changed

+1009
-1
lines changed

9 files changed

+1009
-1
lines changed

README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ All the database client supported
5252
| redis | `pip install vectordb-bench[redis]` |
5353
| memorydb | `pip install vectordb-bench[memorydb]` |
5454
| chromadb | `pip install vectordb-bench[chromadb]` |
55+
| cockroachdb | `pip install vectordb-bench[cockroachdb]` |
5556
| awsopensearch | `pip install vectordb-bench[opensearch]` |
5657
| aliyun_opensearch | `pip install vectordb-bench[aliyun_opensearch]` |
5758
| mongodb | `pip install vectordb-bench[mongodb]` |
@@ -520,7 +521,7 @@ Now we can only run one task at the same time.
520521
### Code Structure
521522
![image](https://github.com/zilliztech/VectorDBBench/assets/105927039/8c06512e-5419-4381-b084-9c93aed59639)
522523
### Client
523-
Our client module is designed with flexibility and extensibility in mind, aiming to integrate APIs from different systems seamlessly. As of now, it supports Milvus, Zilliz Cloud, Elastic Search, Pinecone, Qdrant Cloud, Weaviate Cloud, PgVector, Redis, Chroma, etc. Stay tuned for more options, as we are consistently working on extending our reach to other systems.
524+
Our client module is designed with flexibility and extensibility in mind, aiming to integrate APIs from different systems seamlessly. As of now, it supports Milvus, Zilliz Cloud, Elastic Search, Pinecone, Qdrant Cloud, Weaviate Cloud, PgVector, Redis, Chroma, CockroachDB, etc. Stay tuned for more options, as we are consistently working on extending our reach to other systems.
524525
### Benchmark Cases
525526
We've developed lots of comprehensive benchmark cases to test vector databases' various capabilities, each designed to give you a different piece of the puzzle. These cases are categorized into four main types:
526527
#### Capacity Case

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@ aliyun_opensearch = [ "alibabacloud_ha3engine_vector" ]
9595
mongodb = [ "pymongo" ]
9696
mariadb = [ "mariadb" ]
9797
tidb = [ "PyMySQL" ]
98+
cockroachdb = [ "psycopg[binary,pool]", "pgvector" ]
9899
clickhouse = [ "clickhouse-connect" ]
99100
vespa = [ "pyvespa" ]
100101
lancedb = [ "lancedb" ]

tests/test_cockroachdb.py

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
"""
2+
Tests for CockroachDB vector database client.
3+
4+
Assumes CockroachDB is running on localhost:26257.
5+
6+
To start CockroachDB locally:
7+
cockroach start-single-node --insecure --listen-addr=localhost:26257
8+
"""
9+
10+
import logging
11+
12+
import numpy as np
13+
14+
from vectordb_bench.models import DB
15+
16+
log = logging.getLogger(__name__)
17+
18+
19+
class TestCockroachDB:
20+
"""Test suite for CockroachDB vector operations."""
21+
22+
def test_insert_and_search(self):
23+
"""Test basic insert and search operations."""
24+
assert DB.CockroachDB.value == "CockroachDB"
25+
26+
dbcls = DB.CockroachDB.init_cls
27+
dbConfig = DB.CockroachDB.config_cls
28+
29+
# Connection config (matches your local CockroachDB instance)
30+
config = {
31+
"host": "localhost",
32+
"port": 26257,
33+
"user_name": "root",
34+
"password": "",
35+
"db_name": "defaultdb",
36+
"table_name": "test_cockroachdb",
37+
}
38+
39+
# Note: sslmode=disable is handled in the client's connect_config options
40+
41+
dim = 128
42+
count = 1000
43+
44+
# Initialize CockroachDB client
45+
cockroachdb = dbcls(
46+
dim=dim,
47+
db_config=config,
48+
db_case_config=None,
49+
collection_name="test_cockroachdb",
50+
drop_old=True,
51+
)
52+
53+
embeddings = [[np.random.random() for _ in range(dim)] for _ in range(count)]
54+
55+
# Test insert
56+
with cockroachdb.init():
57+
res = cockroachdb.insert_embeddings(embeddings=embeddings, metadata=list(range(count)))
58+
59+
assert res[0] == count, f"Insert count mismatch: {res[0]} != {count}"
60+
assert res[1] is None, f"Insert failed with error: {res[1]}"
61+
62+
# Test search
63+
with cockroachdb.init():
64+
test_id = np.random.randint(count)
65+
q = embeddings[test_id]
66+
67+
res = cockroachdb.search_embedding(query=q, k=10)
68+
69+
assert len(res) > 0, "Search returned no results"
70+
assert res[0] == int(test_id), f"Top result {res[0]} != query id {test_id}"
71+
72+
log.info("CockroachDB insert and search test passed")
73+
74+
def test_search_with_filter(self):
75+
"""Test search with filters."""
76+
assert DB.CockroachDB.value == "CockroachDB"
77+
78+
dbcls = DB.CockroachDB.init_cls
79+
80+
config = {
81+
"host": "localhost",
82+
"port": 26257,
83+
"user_name": "root",
84+
"password": "",
85+
"db_name": "defaultdb",
86+
"table_name": "test_cockroachdb_filter",
87+
}
88+
89+
dim = 128
90+
count = 1000
91+
filter_value = 0.9
92+
93+
cockroachdb = dbcls(
94+
dim=dim,
95+
db_config=config,
96+
db_case_config=None,
97+
collection_name="test_cockroachdb_filter",
98+
drop_old=True,
99+
)
100+
101+
embeddings = [[np.random.random() for _ in range(dim)] for _ in range(count)]
102+
103+
# Insert data
104+
with cockroachdb.init():
105+
res = cockroachdb.insert_embeddings(embeddings=embeddings, metadata=list(range(count)))
106+
assert res[0] == count, f"Insert count mismatch"
107+
108+
# Search with filter
109+
with cockroachdb.init():
110+
filter_id = int(count * filter_value)
111+
test_id = np.random.randint(filter_id, count)
112+
q = embeddings[test_id]
113+
114+
from vectordb_bench.backend.filter import IntFilter
115+
116+
filters = IntFilter(int_value=filter_id, filter_rate=0.9)
117+
cockroachdb.prepare_filter(filters)
118+
119+
res = cockroachdb.search_embedding(query=q, k=10)
120+
121+
assert len(res) > 0, "Filtered search returned no results"
122+
assert res[0] == int(test_id), f"Top result {res[0]} != query id {test_id}"
123+
124+
# Verify all results are >= filter_value
125+
for result_id in res:
126+
assert int(result_id) >= filter_id, f"Result {result_id} < filter threshold {filter_id}"
127+
128+
log.info("CockroachDB filter test passed")

vectordb_bench/backend/clients/__init__.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ class DB(Enum):
4545
AliyunOpenSearch = "AliyunOpenSearch"
4646
MongoDB = "MongoDB"
4747
TiDB = "TiDB"
48+
CockroachDB = "CockroachDB"
4849
Clickhouse = "Clickhouse"
4950
Vespa = "Vespa"
5051
LanceDB = "LanceDB"
@@ -178,6 +179,10 @@ def init_cls(self) -> type[VectorDB]: # noqa: PLR0911, PLR0912, C901, PLR0915
178179

179180
return TiDB
180181

182+
if self == DB.CockroachDB:
183+
from .cockroachdb.cockroachdb import CockroachDB
184+
185+
return CockroachDB
181186
if self == DB.Doris:
182187
from .doris.doris import Doris
183188

@@ -344,6 +349,10 @@ def config_cls(self) -> type[DBConfig]: # noqa: PLR0911, PLR0912, C901, PLR0915
344349

345350
return TiDBConfig
346351

352+
if self == DB.CockroachDB:
353+
from .cockroachdb.config import CockroachDBConfig
354+
355+
return CockroachDBConfig
347356
if self == DB.Doris:
348357
from .doris.config import DorisConfig
349358

@@ -491,6 +500,11 @@ def case_config_cls( # noqa: C901, PLR0911, PLR0912, PLR0915
491500

492501
return TiDBIndexConfig
493502

503+
if self == DB.CockroachDB:
504+
from .cockroachdb.config import _cockroachdb_case_config
505+
506+
return _cockroachdb_case_config.get(index_type)
507+
494508
if self == DB.Vespa:
495509
from .vespa.config import VespaHNSWConfig
496510

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
"""CLI parameter definitions for CockroachDB."""
2+
3+
from typing import Annotated, Unpack
4+
5+
import click
6+
from pydantic import SecretStr
7+
8+
from vectordb_bench.backend.clients import DB
9+
10+
from ....cli.cli import (
11+
CommonTypedDict,
12+
cli,
13+
click_parameter_decorators_from_typed_dict,
14+
get_custom_case_config,
15+
run,
16+
)
17+
18+
19+
class CockroachDBTypedDict(CommonTypedDict):
20+
"""Type definition for CockroachDB CLI parameters."""
21+
22+
user_name: Annotated[
23+
str,
24+
click.option("--user-name", type=str, help="CockroachDB username", default="root", show_default=True),
25+
]
26+
password: Annotated[
27+
str,
28+
click.option("--password", type=str, help="CockroachDB password", default="", show_default=False),
29+
]
30+
host: Annotated[
31+
str,
32+
click.option("--host", type=str, help="CockroachDB host", required=True),
33+
]
34+
port: Annotated[
35+
int,
36+
click.option("--port", type=int, help="CockroachDB port", default=26257, show_default=True),
37+
]
38+
db_name: Annotated[
39+
str,
40+
click.option("--db-name", type=str, help="Database name", required=True),
41+
]
42+
min_partition_size: Annotated[
43+
int | None,
44+
click.option(
45+
"--min-partition-size",
46+
type=int,
47+
help="Minimum vectors per partition (default: 16, range: 1-1024)",
48+
default=16,
49+
show_default=True,
50+
),
51+
]
52+
max_partition_size: Annotated[
53+
int | None,
54+
click.option(
55+
"--max-partition-size",
56+
type=int,
57+
help="Maximum vectors per partition (default: 128, range: 4x min-4096)",
58+
default=128,
59+
show_default=True,
60+
),
61+
]
62+
vector_search_beam_size: Annotated[
63+
int | None,
64+
click.option(
65+
"--vector-search-beam-size",
66+
type=int,
67+
help="Partitions explored during search (default: 32)",
68+
default=32,
69+
show_default=True,
70+
),
71+
]
72+
73+
74+
@cli.command()
75+
@click_parameter_decorators_from_typed_dict(CockroachDBTypedDict)
76+
def CockroachDB(
77+
**parameters: Unpack[CockroachDBTypedDict],
78+
):
79+
"""Run CockroachDB vector benchmark."""
80+
from .config import CockroachDBConfig, CockroachDBVectorIndexConfig
81+
82+
parameters["custom_case"] = get_custom_case_config(parameters)
83+
84+
from vectordb_bench.backend.clients.api import MetricType
85+
86+
# Use provided metric_type or default to COSINE
87+
metric_type = parameters.get("metric_type")
88+
if metric_type is None:
89+
metric_type = MetricType.COSINE
90+
elif isinstance(metric_type, str):
91+
metric_type = MetricType(metric_type)
92+
93+
run(
94+
db=DB.CockroachDB,
95+
db_config=CockroachDBConfig(
96+
db_label=parameters["db_label"],
97+
user_name=SecretStr(parameters["user_name"]),
98+
password=SecretStr(parameters["password"]) if parameters["password"] else None,
99+
host=parameters["host"],
100+
port=parameters["port"],
101+
db_name=parameters["db_name"],
102+
),
103+
db_case_config=CockroachDBVectorIndexConfig(
104+
metric_type=metric_type,
105+
min_partition_size=parameters.get("min_partition_size", 16),
106+
max_partition_size=parameters.get("max_partition_size", 128),
107+
vector_search_beam_size=parameters.get("vector_search_beam_size", 32),
108+
),
109+
**parameters,
110+
)

0 commit comments

Comments
 (0)