diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index f26c4858b..c38add1ba 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -20,12 +20,12 @@ concurrency: env: WEAVIATE_125: 1.25.34 WEAVIATE_126: 1.26.17 - WEAVIATE_127: 1.27.14 - WEAVIATE_128: 1.28.8 - WEAVIATE_129: 1.29.8 - WEAVIATE_130: 1.30.9 + WEAVIATE_127: 1.27.27 + WEAVIATE_128: 1.28.16 + WEAVIATE_129: 1.29.9 + WEAVIATE_130: 1.30.11 WEAVIATE_131: 1.31.5 - WEAVIATE_132: 1.32.0-rc.0-6eb89d6.amd64 + WEAVIATE_132: 1.32.0-rc.1-098c594 jobs: lint-and-format: diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 57a480d17..4b897b920 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -49,4 +49,4 @@ repos: language: node pass_filenames: false types: [python] - additional_dependencies: [pyright@1.1.398] + additional_dependencies: [pyright@1.1.400] diff --git a/docs/conf.py b/docs/conf.py index 2b267a2f9..775892d59 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -6,7 +6,6 @@ import contextlib import os import sys - from importlib.metadata import PackageNotFoundError from importlib.metadata import version as version_func diff --git a/integration/conftest.py b/integration/conftest.py index 01e44e2c2..a1014a871 100644 --- a/integration/conftest.py +++ b/integration/conftest.py @@ -28,6 +28,7 @@ _ReferencePropertyBase, _ReplicationConfigCreate, _RerankerProvider, + _VectorConfigCreate, _VectorIndexConfigCreate, _VectorizerConfigCreate, ) @@ -58,6 +59,9 @@ def __call__( vector_index_config: Optional[_VectorIndexConfigCreate] = None, description: Optional[str] = None, reranker_config: Optional[_RerankerProvider] = None, + vector_config: Optional[ + Optional[Union[_VectorConfigCreate, List[_VectorConfigCreate]]] + ] = None, ) -> Collection[Any, Any]: """Typing for fixture.""" ... @@ -129,6 +133,9 @@ def _factory( vector_index_config: Optional[_VectorIndexConfigCreate] = None, description: Optional[str] = None, reranker_config: Optional[_RerankerProvider] = None, + vector_config: Optional[ + Optional[Union[_VectorConfigCreate, List[_VectorConfigCreate]]] + ] = None, ) -> Collection[Any, Any]: try: nonlocal client_fixture, name_fixtures, call_counter # noqa: F824 @@ -148,7 +155,8 @@ def _factory( collection: Collection[Any, Any] = client_fixture.collections.create( name=name_fixture, description=description, - vectorizer_config=vectorizer_config or Configure.Vectorizer.none(), + vectorizer_config=vectorizer_config + or (Configure.Vectorizer.none() if vector_config is None else None), properties=properties, references=references, inverted_index_config=inverted_index_config, @@ -159,6 +167,7 @@ def _factory( replication_config=replication_config, vector_index_config=vector_index_config, reranker_config=reranker_config, + vector_config=vector_config, ) return collection except Exception as e: @@ -315,6 +324,9 @@ def __call__( vectorizer_config: Optional[ Union[_VectorizerConfigCreate, List[_NamedVectorConfigCreate]] ] = None, + vector_config: Optional[ + Optional[Union[_VectorConfigCreate, List[_VectorConfigCreate]]] + ] = None, ) -> Collection[Any, Any]: """Typing for fixture.""" ... @@ -329,17 +341,18 @@ def _factory( vectorizer_config: Optional[ Union[_VectorizerConfigCreate, List[_NamedVectorConfigCreate]] ] = None, + vector_config: Optional[ + Optional[Union[_VectorConfigCreate, List[_VectorConfigCreate]]] + ] = None, ) -> Collection[Any, Any]: api_key = os.environ.get("OPENAI_APIKEY") if api_key is None: pytest.skip("No OpenAI API key found.") - if vectorizer_config is None: - vectorizer_config = Configure.Vectorizer.none() - collection = collection_factory( name=name, - vectorizer_config=vectorizer_config or Configure.Vectorizer.none(), + vectorizer_config=vectorizer_config, + vector_config=vector_config or Configure.Vectors.self_provided(), properties=[ Property(name="text", data_type=DataType.TEXT), Property(name="content", data_type=DataType.TEXT), diff --git a/integration/test_client.py b/integration/test_client.py index cb9bda516..cd368d794 100644 --- a/integration/test_client.py +++ b/integration/test_client.py @@ -1,4 +1,4 @@ -from typing import Callable, Generator, Tuple, Union +from typing import Callable, Generator, Optional, Tuple, Union import pytest from _pytest.fixtures import SubRequest @@ -28,7 +28,7 @@ @pytest.fixture(scope="module") def client_factory() -> Generator[Callable[[int, int], weaviate.WeaviateClient], None, None]: - client: weaviate.WeaviateClient = None + client: Optional[weaviate.WeaviateClient] = None def maker(http: int, grpc: int) -> weaviate.WeaviateClient: nonlocal client @@ -593,7 +593,7 @@ async def test_async_client_with_extra_options() -> None: def test_client_error_for_wcs_without_auth() -> None: with pytest.raises(weaviate.exceptions.AuthenticationFailedError) as e: - weaviate.connect_to_wcs(cluster_url=WCS_URL, auth_credentials=None) + weaviate.connect_to_wcs(cluster_url=WCS_URL, auth_credentials=None) # pyright: ignore assert "wvc.init.Auth.api_key" in e.value.message diff --git a/integration/test_collection.py b/integration/test_collection.py index 06d08a426..561dbd597 100644 --- a/integration/test_collection.py +++ b/integration/test_collection.py @@ -7,7 +7,11 @@ import pytest -from integration.conftest import CollectionFactory, CollectionFactoryGet, _sanitize_collection_name +from integration.conftest import ( + CollectionFactory, + CollectionFactoryGet, + _sanitize_collection_name, +) from integration.constants import WEAVIATE_LOGO_NEW_ENCODED, WEAVIATE_LOGO_OLD_ENCODED from weaviate.collections.classes.batch import ErrorObject from weaviate.collections.classes.config import ( @@ -40,7 +44,11 @@ ReferenceToMulti, _CrossReference, ) -from weaviate.collections.classes.types import PhoneNumber, WeaviateProperties, _PhoneNumber +from weaviate.collections.classes.types import ( + PhoneNumber, + WeaviateProperties, + _PhoneNumber, +) from weaviate.exceptions import ( UnexpectedStatusCodeError, WeaviateInsertInvalidPropertyError, @@ -71,7 +79,7 @@ class TestInsert(TypedDict): dummy = collection_factory( properties=[Property(name="Name", data_type=DataType.TEXT)], - vectorizer_config=Configure.Vectorizer.none(), + vector_config=Configure.Vectors.self_provided(), ) collection = collection_factory_get(dummy.name, TestInsert) uuid = collection.data.insert(properties=TestInsert(name="some name")) @@ -87,7 +95,7 @@ def test_insert_with_dict_generic( ) -> None: dummy = collection_factory( properties=[Property(name="Name", data_type=DataType.TEXT)], - vectorizer_config=Configure.Vectorizer.none(), + vector_config=Configure.Vectors.self_provided(), ) collection = collection_factory_get(dummy.name, Dict[str, str]) uuid = collection.data.insert(properties={"name": "some name"}) @@ -100,7 +108,7 @@ def test_insert_with_dict_generic( def test_insert_with_no_generic(collection_factory: CollectionFactory) -> None: collection = collection_factory( properties=[Property(name="Name", data_type=DataType.TEXT)], - vectorizer_config=Configure.Vectorizer.none(), + vector_config=Configure.Vectors.self_provided(), ) uuid = collection.data.insert(properties={"name": "some name"}) objects = collection.query.fetch_objects() @@ -112,7 +120,7 @@ def test_insert_with_no_generic(collection_factory: CollectionFactory) -> None: def test_insert_with_consistency_level(collection_factory: CollectionFactory) -> None: collection = collection_factory( properties=[Property(name="Name", data_type=DataType.TEXT)], - vectorizer_config=Configure.Vectorizer.none(), + vector_config=Configure.Vectors.self_provided(), ).with_consistency_level(ConsistencyLevel.ALL) uuid = collection.data.insert(properties={"name": "some name"}) objects = collection.query.fetch_objects() @@ -134,7 +142,7 @@ def __init__(self) -> None: def test_delete_by_id(collection_factory: CollectionFactory) -> None: collection = collection_factory( - vectorizer_config=Configure.Vectorizer.none(), + vector_config=Configure.Vectors.self_provided(), ) uuid = collection.data.insert(properties={}) @@ -145,7 +153,7 @@ def test_delete_by_id(collection_factory: CollectionFactory) -> None: def test_delete_by_id_consistency_level(collection_factory: CollectionFactory) -> None: collection = collection_factory( - vectorizer_config=Configure.Vectorizer.none(), + vector_config=Configure.Vectors.self_provided(), ).with_consistency_level(ConsistencyLevel.ALL) uuid = collection.data.insert(properties={}) @@ -224,7 +232,7 @@ def test_insert_many( ) -> None: collection = collection_factory( properties=[Property(name="Name", data_type=DataType.TEXT)], - vectorizer_config=Configure.Vectorizer.none(), + vector_config=Configure.Vectors.self_provided(), ) if not should_error: ret = collection.data.insert_many(objects) @@ -258,7 +266,7 @@ def test_insert_many_all_error( ) -> None: collection = collection_factory( properties=[Property(name="Name", data_type=DataType.TEXT)], - vectorizer_config=Configure.Vectorizer.none(), + vector_config=Configure.Vectors.self_provided(), multi_tenancy_config=Configure.multi_tenancy(True), ) with pytest.raises(WeaviateInsertManyAllFailedError) as e: @@ -278,7 +286,7 @@ class TestInsertManyWithTypedDict(TypedDict): dummy = collection_factory( properties=[Property(name="Name", data_type=DataType.TEXT)], - vectorizer_config=Configure.Vectorizer.none(), + vector_config=Configure.Vectors.self_provided(), ) collection = collection_factory_get(dummy.name, TestInsertManyWithTypedDict) ret = collection.data.insert_many( @@ -297,7 +305,7 @@ class TestInsertManyWithTypedDict(TypedDict): def test_insert_many_with_refs(collection_factory: CollectionFactory) -> None: ref_collection = collection_factory( - name="target", vectorizer_config=Configure.Vectorizer.none() + name="target", vector_config=Configure.Vectors.self_provided() ) uuid_to1 = ref_collection.data.insert(properties={}) uuid_to2 = ref_collection.data.insert(properties={}) @@ -306,7 +314,7 @@ def test_insert_many_with_refs(collection_factory: CollectionFactory) -> None: name="source", properties=[Property(name="Name", data_type=DataType.TEXT)], references=[ReferenceProperty(name="ref_single", target_collection=ref_collection.name)], - vectorizer_config=Configure.Vectorizer.none(), + vector_config=Configure.Vectors.self_provided(), ) collection.config.add_reference( ReferenceProperty.MultiTarget( @@ -377,7 +385,7 @@ def test_insert_many_with_refs(collection_factory: CollectionFactory) -> None: def test_insert_many_error(collection_factory: CollectionFactory) -> None: collection = collection_factory( properties=[Property(name="Name", data_type=DataType.TEXT)], - vectorizer_config=Configure.Vectorizer.none(), + vector_config=Configure.Vectors.self_provided(), ) ret = collection.data.insert_many( [ @@ -403,7 +411,7 @@ def test_insert_many_error(collection_factory: CollectionFactory) -> None: def test_replace(collection_factory: CollectionFactory) -> None: collection = collection_factory( properties=[Property(name="Name", data_type=DataType.TEXT)], - vectorizer_config=Configure.Vectorizer.none(), + vector_config=Configure.Vectors.self_provided(), ) uuid = collection.data.insert(properties={"name": "some name"}) collection.data.replace(properties={"name": "other name"}, uuid=uuid) @@ -413,7 +421,7 @@ def test_replace(collection_factory: CollectionFactory) -> None: @pytest.mark.parametrize("to_uuids", [UUID3, [UUID3]]) def test_replace_with_refs(collection_factory: CollectionFactory, to_uuids: UUIDS) -> None: ref_collection = collection_factory( - name="target", vectorizer_config=Configure.Vectorizer.none() + name="target", vector_config=Configure.Vectors.self_provided() ) ref_collection.data.insert(properties={}, uuid=UUID1) ref_collection.data.insert(properties={}, uuid=UUID2) @@ -423,7 +431,7 @@ def test_replace_with_refs(collection_factory: CollectionFactory, to_uuids: UUID name="source", properties=[Property(name="Name", data_type=DataType.TEXT)], references=[ReferenceProperty(name="ref", target_collection=ref_collection.name)], - vectorizer_config=Configure.Vectorizer.none(), + vector_config=Configure.Vectors.self_provided(), ) uuid = collection.data.insert( properties={"name": "some name"}, @@ -453,7 +461,7 @@ def test_replace_with_refs(collection_factory: CollectionFactory, to_uuids: UUID def test_replace_overwrites_vector(collection_factory: CollectionFactory) -> None: collection = collection_factory( properties=[Property(name="Name", data_type=DataType.TEXT)], - vectorizer_config=Configure.Vectorizer.none(), + vector_config=Configure.Vectors.self_provided(), ) uuid = collection.data.insert(properties={"name": "some name"}, vector=[1, 2, 3]) obj = collection.query.fetch_object_by_id(uuid, include_vector=True) @@ -463,7 +471,10 @@ def test_replace_overwrites_vector(collection_factory: CollectionFactory) -> Non collection.data.replace(properties={"name": "other name"}, uuid=uuid) obj = collection.query.fetch_object_by_id(uuid, include_vector=True) assert obj.properties["name"] == "other name" - assert "default" not in obj.vector + if collection._connection._weaviate_version.is_lower_than(1, 27, 0): + assert len(obj.vector["default"]) == 0 + else: + assert "default" not in obj.vector collection.data.replace(properties={"name": "real name"}, uuid=uuid, vector=[2, 3, 4]) obj = collection.query.fetch_object_by_id(uuid, include_vector=True) @@ -474,7 +485,7 @@ def test_replace_overwrites_vector(collection_factory: CollectionFactory) -> Non @pytest.mark.parametrize("to_uuids", [UUID3, [UUID3]]) def test_update_with_refs(collection_factory: CollectionFactory, to_uuids: UUIDS) -> None: ref_collection = collection_factory( - name="target", vectorizer_config=Configure.Vectorizer.none() + name="target", vector_config=Configure.Vectors.self_provided() ) ref_collection.data.insert(properties={}, uuid=UUID1) ref_collection.data.insert(properties={}, uuid=UUID2) @@ -484,7 +495,7 @@ def test_update_with_refs(collection_factory: CollectionFactory, to_uuids: UUIDS name="source", properties=[Property(name="Name", data_type=DataType.TEXT)], references=[ReferenceProperty(name="ref", target_collection=ref_collection.name)], - vectorizer_config=Configure.Vectorizer.none(), + vector_config=Configure.Vectors.self_provided(), ) uuid = collection.data.insert( properties={"name": "some name"}, @@ -528,7 +539,7 @@ def test_types(collection_factory: CollectionFactory, data_type: DataType, value name = "name" collection = collection_factory( properties=[Property(name=name, data_type=data_type)], - vectorizer_config=Configure.Vectorizer.none(), + vector_config=Configure.Vectors.self_provided(), ) uuid_object = collection.data.insert(properties={name: value}) @@ -545,7 +556,7 @@ def test_types(collection_factory: CollectionFactory, data_type: DataType, value def test_bm25(collection_factory: CollectionFactory) -> None: collection = collection_factory( properties=[Property(name="Name", data_type=DataType.TEXT)], - vectorizer_config=Configure.Vectorizer.none(), + vector_config=Configure.Vectors.self_provided(), ) res = collection.data.insert_many( @@ -562,7 +573,7 @@ def test_bm25(collection_factory: CollectionFactory) -> None: def test_bm25_group_by(collection_factory: CollectionFactory) -> None: collection = collection_factory( properties=[Property(name="Name", data_type=DataType.TEXT)], - vectorizer_config=Configure.Vectorizer.none(), + vector_config=Configure.Vectors.self_provided(), ) res = collection.data.insert_many( @@ -590,7 +601,7 @@ def test_bm25_group_by(collection_factory: CollectionFactory) -> None: def test_bm25_limit(collection_factory: CollectionFactory, limit: int) -> None: collection = collection_factory( properties=[Property(name="Name", data_type=DataType.TEXT, tokenization=Tokenization.WORD)], - vectorizer_config=Configure.Vectorizer.none(), + vector_config=Configure.Vectors.self_provided(), ) res = collection.data.insert_many( @@ -608,7 +619,7 @@ def test_bm25_limit(collection_factory: CollectionFactory, limit: int) -> None: def test_bm25_offset(collection_factory: CollectionFactory, offset: int, expected: int) -> None: collection = collection_factory( properties=[Property(name="Name", data_type=DataType.TEXT, tokenization=Tokenization.WORD)], - vectorizer_config=Configure.Vectorizer.none(), + vector_config=Configure.Vectors.self_provided(), ) res = collection.data.insert_many( @@ -627,7 +638,7 @@ def test_bm25_offset(collection_factory: CollectionFactory, offset: int, expecte def test_fetch_objects_offset(collection_factory: CollectionFactory, offset: int) -> None: collection = collection_factory( properties=[Property(name="Name", data_type=DataType.TEXT)], - vectorizer_config=Configure.Vectorizer.none(), + vector_config=Configure.Vectors.self_provided(), ) nr_objects = 5 @@ -642,7 +653,7 @@ def test_fetch_objects_offset(collection_factory: CollectionFactory, offset: int def test_fetch_objects_limit(collection_factory: CollectionFactory, limit: int) -> None: collection = collection_factory( properties=[Property(name="Name", data_type=DataType.TEXT)], - vectorizer_config=Configure.Vectorizer.none(), + vector_config=Configure.Vectors.self_provided(), ) for i in range(5): collection.data.insert({"Name": str(i)}) @@ -653,7 +664,7 @@ def test_fetch_objects_limit(collection_factory: CollectionFactory, limit: int) def test_search_after(collection_factory: CollectionFactory) -> None: collection = collection_factory( properties=[Property(name="Name", data_type=DataType.TEXT)], - vectorizer_config=Configure.Vectorizer.none(), + vector_config=Configure.Vectors.self_provided(), ) nr_objects = 10 @@ -669,7 +680,7 @@ def test_search_after(collection_factory: CollectionFactory) -> None: def test_auto_limit(collection_factory: CollectionFactory) -> None: collection = collection_factory( properties=[Property(name="Name", data_type=DataType.TEXT)], - vectorizer_config=Configure.Vectorizer.none(), + vector_config=Configure.Vectors.self_provided(), inverted_index_config=Configure.inverted_index(), ) for _ in range(4): @@ -702,7 +713,7 @@ def test_query_properties(collection_factory: CollectionFactory) -> None: Property(name="Name", data_type=DataType.TEXT), Property(name="Age", data_type=DataType.INT), ], - vectorizer_config=Configure.Vectorizer.none(), + vector_config=Configure.Vectors.self_provided(), ) collection.data.insert({"Name": "rain", "Age": 1}) collection.data.insert({"Name": "sun", "Age": 2}) @@ -728,9 +739,7 @@ def test_query_properties(collection_factory: CollectionFactory) -> None: def test_near_object(collection_factory: CollectionFactory) -> None: collection = collection_factory( properties=[Property(name="Name", data_type=DataType.TEXT)], - vectorizer_config=Configure.Vectorizer.text2vec_contextionary( - vectorize_collection_name=False - ), + vector_config=Configure.Vectors.text2vec_contextionary(vectorize_collection_name=False), ) uuid_banana = collection.data.insert({"Name": "Banana"}) collection.data.insert({"Name": "Fruit"}) @@ -756,9 +765,7 @@ def test_near_object(collection_factory: CollectionFactory) -> None: def test_near_object_limit(collection_factory: CollectionFactory) -> None: collection = collection_factory( properties=[Property(name="Name", data_type=DataType.TEXT)], - vectorizer_config=Configure.Vectorizer.text2vec_contextionary( - vectorize_collection_name=False - ), + vector_config=Configure.Vectors.text2vec_contextionary(vectorize_collection_name=False), ) uuid_banana = collection.data.insert({"Name": "Banana"}) uuid_fruit = collection.data.insert({"Name": "Fruit"}) @@ -776,9 +783,7 @@ def test_near_object_limit(collection_factory: CollectionFactory) -> None: def test_near_object_offset(collection_factory: CollectionFactory) -> None: collection = collection_factory( properties=[Property(name="Name", data_type=DataType.TEXT)], - vectorizer_config=Configure.Vectorizer.text2vec_contextionary( - vectorize_collection_name=False - ), + vector_config=Configure.Vectors.text2vec_contextionary(vectorize_collection_name=False), ) uuid_banana = collection.data.insert({"Name": "Banana"}) uuid_fruit = collection.data.insert({"Name": "Fruit"}) @@ -798,9 +803,7 @@ def test_near_object_group_by_argument(collection_factory: CollectionFactory) -> Property(name="Name", data_type=DataType.TEXT), Property(name="Count", data_type=DataType.INT), ], - vectorizer_config=Configure.Vectorizer.text2vec_contextionary( - vectorize_collection_name=False - ), + vector_config=Configure.Vectors.text2vec_contextionary(vectorize_collection_name=False), ) uuid_banana1 = collection.data.insert({"Name": "Banana", "Count": 51}) collection.data.insert({"Name": "Banana", "Count": 72}) @@ -820,14 +823,14 @@ def test_near_object_group_by_argument(collection_factory: CollectionFactory) -> assert len(ret.objects) == 4 assert ret.objects[0].belongs_to_group == "Banana" assert ret.objects[1].belongs_to_group == "Banana" - assert ret.objects[2].belongs_to_group == "car" - assert ret.objects[3].belongs_to_group == "Mountain" + assert ret.objects[2].belongs_to_group == "Mountain" + assert ret.objects[3].belongs_to_group == "car" def test_multi_searches(collection_factory: CollectionFactory) -> None: collection = collection_factory( properties=[Property(name="name", data_type=DataType.TEXT)], - vectorizer_config=Configure.Vectorizer.none(), + vector_config=Configure.Vectors.self_provided(), ) collection.data.insert(properties={"name": "word"}) @@ -854,7 +857,7 @@ def test_multi_searches(collection_factory: CollectionFactory) -> None: def test_fetch_objects_with_limit(collection_factory: CollectionFactory) -> None: collection = collection_factory( - vectorizer_config=Configure.Vectorizer.none(), + vector_config=Configure.Vectors.self_provided(), properties=[Property(name="name", data_type=DataType.TEXT)], ) @@ -867,7 +870,7 @@ def test_fetch_objects_with_limit(collection_factory: CollectionFactory) -> None def test_add_property(collection_factory: CollectionFactory) -> None: collection = collection_factory( - vectorizer_config=Configure.Vectorizer.none(), + vector_config=Configure.Vectors.self_provided(), properties=[Property(name="name", data_type=DataType.TEXT)], ) uuid1 = collection.data.insert({"name": "first"}) @@ -882,9 +885,7 @@ def test_add_property(collection_factory: CollectionFactory) -> None: def test_add_property_with_vectorizer(collection_factory: CollectionFactory) -> None: collection = collection_factory( - vectorizer_config=Configure.Vectorizer.text2vec_contextionary( - vectorize_collection_name=False - ), + vector_config=Configure.Vectors.text2vec_contextionary(vectorize_collection_name=False), properties=[Property(name="name", data_type=DataType.TEXT)], ) collection.config.add_property( @@ -924,7 +925,7 @@ def test_add_property_with_vectorizer(collection_factory: CollectionFactory) -> def test_add_reference(collection_factory: CollectionFactory) -> None: collection = collection_factory( - vectorizer_config=Configure.Vectorizer.none(), + vector_config=Configure.Vectors.self_provided(), properties=[Property(name="name", data_type=DataType.TEXT)], ) uuid1 = collection.data.insert({"name": "first"}) @@ -946,7 +947,7 @@ def test_add_reference(collection_factory: CollectionFactory) -> None: def test_collection_config_get(collection_factory: CollectionFactory) -> None: collection = collection_factory( - vectorizer_config=Configure.Vectorizer.none(), + vector_config=Configure.Vectors.self_provided(), properties=[ Property(name="name", data_type=DataType.TEXT), Property(name="age", data_type=DataType.INT), @@ -959,7 +960,9 @@ def test_collection_config_get(collection_factory: CollectionFactory) -> None: assert config.properties[0].data_type == DataType.TEXT assert config.properties[1].name == "age" assert config.properties[1].data_type == DataType.INT - assert config.vectorizer == Vectorizers.NONE + assert config.vectorizer is None + assert config.vector_config is not None + assert config.vector_config["default"].vectorizer.vectorizer == Vectorizers.NONE @pytest.mark.parametrize("return_properties", [None, [], ["name"], False, True]) @@ -991,7 +994,7 @@ def test_return_properties_metadata_references_combos( include_vector: bool, ) -> None: collection = collection_factory( - vectorizer_config=Configure.Vectorizer.none(), + vector_config=Configure.Vectors.self_provided(), properties=[ Property(name="name", data_type=DataType.TEXT), Property(name="age", data_type=DataType.INT), @@ -1067,7 +1070,7 @@ def test_insert_date_property( collection_factory: CollectionFactory, hours: int, minutes: int, sign: int ) -> None: collection = collection_factory( - vectorizer_config=Configure.Vectorizer.none(), + vector_config=Configure.Vectors.self_provided(), properties=[Property(name="date", data_type=DataType.DATE)], ) @@ -1088,7 +1091,7 @@ def test_insert_date_property( def test_exist(collection_factory: CollectionFactory) -> None: collection = collection_factory( - vectorizer_config=Configure.Vectorizer.none(), + vector_config=Configure.Vectors.self_provided(), ) uuid1 = collection.data.insert({}) @@ -1101,7 +1104,7 @@ def test_return_list_properties(collection_factory: CollectionFactory) -> None: name_small = "TestReturnList" collection = collection_factory( name=name_small, - vectorizer_config=Configure.Vectorizer.none(), + vector_config=Configure.Vectors.self_provided(), properties=[ Property(name="ints", data_type=DataType.INT_ARRAY), Property(name="floats", data_type=DataType.NUMBER_ARRAY), @@ -1143,9 +1146,7 @@ def test_near_text( return_properties: Optional[PROPERTIES], ) -> None: collection = collection_factory( - vectorizer_config=Configure.Vectorizer.text2vec_contextionary( - vectorize_collection_name=False - ), + vector_config=Configure.Vectors.text2vec_contextionary(vectorize_collection_name=False), properties=[Property(name="value", data_type=DataType.TEXT)], ) @@ -1176,7 +1177,7 @@ def test_near_text( def test_near_text_error(collection_factory: CollectionFactory) -> None: collection = collection_factory( - vectorizer_config=Configure.Vectorizer.none(), + vector_config=Configure.Vectors.self_provided(), ) with pytest.raises(ValueError): @@ -1184,10 +1185,14 @@ def test_near_text_error(collection_factory: CollectionFactory) -> None: def test_near_text_group_by_argument(collection_factory: CollectionFactory) -> None: + dummy = collection_factory("dummy") + if dummy._connection._weaviate_version.is_lower_than(1, 27, 0): + pytest.skip( + 'Vectors not passed to GroupByHit in servers for versions < 1.27.0. Therefore `assert "default" in ret.objects[i].vector` fails always.' + ) + collection = collection_factory( - vectorizer_config=Configure.Vectorizer.text2vec_contextionary( - vectorize_collection_name=False - ), + vector_config=Configure.Vectors.text2vec_contextionary(vectorize_collection_name=False), properties=[Property(name="value", data_type=DataType.TEXT)], ) @@ -1222,9 +1227,7 @@ def test_near_text_group_by_argument(collection_factory: CollectionFactory) -> N def test_near_text_limit(collection_factory: CollectionFactory) -> None: collection = collection_factory( - vectorizer_config=Configure.Vectorizer.text2vec_contextionary( - vectorize_collection_name=False - ), + vector_config=Configure.Vectors.text2vec_contextionary(vectorize_collection_name=False), properties=[Property(name="value", data_type=DataType.TEXT)], ) @@ -1252,9 +1255,7 @@ def test_near_text_limit(collection_factory: CollectionFactory) -> None: def test_near_text_offset(collection_factory: CollectionFactory) -> None: collection = collection_factory( - vectorizer_config=Configure.Vectorizer.text2vec_contextionary( - vectorize_collection_name=False - ), + vector_config=Configure.Vectors.text2vec_contextionary(vectorize_collection_name=False), properties=[Property(name="value", data_type=DataType.TEXT)], ) @@ -1302,7 +1303,7 @@ def test_near_image( certainty: Optional[float], ) -> None: collection = collection_factory( - vectorizer_config=Configure.Vectorizer.img2vec_neural(image_fields=["imageProp"]), + vector_config=Configure.Vectors.img2vec_neural(image_fields=["imageProp"]), properties=[ Property(name="imageProp", data_type=DataType.BLOB), ], @@ -1359,7 +1360,7 @@ def test_near_media( certainty: Optional[float], ) -> None: collection = collection_factory( - vectorizer_config=Configure.Vectorizer.img2vec_neural(image_fields=["imageProp"]), + vector_config=Configure.Vectors.img2vec_neural(image_fields=["imageProp"]), properties=[ Property(name="imageProp", data_type=DataType.BLOB), ], @@ -1407,7 +1408,7 @@ def test_return_properties_with_query_specific_typed_dict( collection_factory: CollectionFactory, which_case: int ) -> None: collection = collection_factory( - vectorizer_config=Configure.Vectorizer.none(), + vector_config=Configure.Vectors.self_provided(), properties=[ Property(name="int_", data_type=DataType.INT), Property(name="ints", data_type=DataType.INT_ARRAY), @@ -1469,7 +1470,7 @@ class _Data(TypedDict): ints: List[int] collection = collection_factory( - vectorizer_config=Configure.Vectorizer.none(), + vector_config=Configure.Vectors.self_provided(), properties=[ Property(name="int_", data_type=DataType.INT), Property(name="ints", data_type=DataType.INT_ARRAY), @@ -1494,7 +1495,7 @@ class _Data(TypedDict): int_: int collection = collection_factory( - vectorizer_config=Configure.Vectorizer.none(), + vector_config=Configure.Vectors.self_provided(), properties=[ Property(name="int_", data_type=DataType.INT), Property(name="ints", data_type=DataType.INT_ARRAY), @@ -1515,7 +1516,7 @@ class _Data(TypedDict): def test_batch_with_arrays(collection_factory: CollectionFactory) -> None: collection = collection_factory( - vectorizer_config=Configure.Vectorizer.none(), + vector_config=Configure.Vectors.self_provided(), properties=[ Property(name="texts", data_type=DataType.TEXT_ARRAY), Property(name="ints", data_type=DataType.INT_ARRAY), @@ -1581,7 +1582,7 @@ def test_sort( expected: List[int], ) -> None: collection = collection_factory( - vectorizer_config=Configure.Vectorizer.none(), + vector_config=Configure.Vectors.self_provided(), properties=[ Property(name="age", data_type=DataType.INT), Property(name="name", data_type=DataType.TEXT), @@ -1607,7 +1608,7 @@ def test_sort( def test_optional_ref_returns(collection_factory: CollectionFactory) -> None: ref_collection = collection_factory( name="target", - vectorizer_config=Configure.Vectorizer.none(), + vector_config=Configure.Vectors.self_provided(), properties=[Property(name="text", data_type=DataType.TEXT)], ) uuid_to1 = ref_collection.data.insert(properties={"text": "ref text"}) @@ -1617,7 +1618,7 @@ def test_optional_ref_returns(collection_factory: CollectionFactory) -> None: references=[ ReferenceProperty(name="ref", target_collection=ref_collection.name), ], - vectorizer_config=Configure.Vectorizer.none(), + vector_config=Configure.Vectors.self_provided(), ) collection.data.insert({}, references={"ref": uuid_to1}) @@ -1636,7 +1637,7 @@ def test_return_properties_with_type_hint_generic( value: str, ) -> None: dummy = collection_factory( - vectorizer_config=Configure.Vectorizer.none(), + vector_config=Configure.Vectors.self_provided(), properties=[ Property(name="name", data_type=DataType.TEXT), ], @@ -1738,9 +1739,7 @@ def test_none_query_hybrid_bm25(collection_factory: CollectionFactory) -> None: properties=[ Property(name="text", data_type=DataType.TEXT), ], - vectorizer_config=Configure.Vectorizer.text2vec_contextionary( - vectorize_collection_name=False - ), + vector_config=Configure.Vectors.text2vec_contextionary(vectorize_collection_name=False), ) collection.data.insert({"text": "banana"}) diff --git a/integration/test_collection_aggregate.py b/integration/test_collection_aggregate.py index af417c8d7..32ea03505 100644 --- a/integration/test_collection_aggregate.py +++ b/integration/test_collection_aggregate.py @@ -17,7 +17,12 @@ GroupByAggregate, Metrics, ) -from weaviate.collections.classes.config import Configure, DataType, Property, ReferenceProperty +from weaviate.collections.classes.config import ( + Configure, + DataType, + Property, + ReferenceProperty, +) from weaviate.collections.classes.filters import Filter, _Filters from weaviate.collections.classes.grpc import Move from weaviate.collections.classes.tenants import Tenant diff --git a/integration/test_collection_config.py b/integration/test_collection_config.py index 25db46e07..d2f62819d 100644 --- a/integration/test_collection_config.py +++ b/integration/test_collection_config.py @@ -5,7 +5,11 @@ import weaviate import weaviate.classes as wvc -from integration.conftest import CollectionFactory, OpenAICollection, _sanitize_collection_name +from integration.conftest import ( + CollectionFactory, + OpenAICollection, + _sanitize_collection_name, +) from weaviate.collections.classes.config import ( _BQConfig, _CollectionConfig, @@ -1278,6 +1282,30 @@ def test_named_vectors_export_and_import( client.collections.delete(name) +@pytest.mark.parametrize("source_properties", [None, ["text"]]) +def test_vectors_export_and_import( + collection_factory: CollectionFactory, source_properties: Optional[List[str]] +) -> None: + collection = collection_factory( + properties=[Property(name="text", data_type=DataType.TEXT)], + vector_config=Configure.Vectors.text2vec_contextionary( + vectorize_collection_name=False, + source_properties=source_properties, + ), + ) + config = collection.config.get() + + name = f"TestCollectionConfigExportAndRecreate_{collection.name}" + config.name = name + with weaviate.connect_to_local() as client: + client.collections.delete(name) + client.collections.create_from_config(config) + new = client.collections.use(name).config.get() + assert config == new + assert config.to_dict() == new.to_dict() + client.collections.delete(name) + + @pytest.mark.parametrize("source_properties", [None, ["text"]]) def test_named_vectors_export_and_import_dict( collection_factory: CollectionFactory, source_properties: Optional[List[str]] @@ -1305,6 +1333,30 @@ def test_named_vectors_export_and_import_dict( client.collections.delete(name) +@pytest.mark.parametrize("source_properties", [None, ["text"]]) +def test_vectors_export_and_import_dict( + collection_factory: CollectionFactory, source_properties: Optional[List[str]] +) -> None: + collection = collection_factory( + properties=[Property(name="text", data_type=DataType.TEXT)], + vector_config=Configure.Vectors.text2vec_contextionary( + vectorize_collection_name=False, + source_properties=source_properties, + ), + ) + config = collection.config.get() + + name = f"TestCollectionConfigExportAndRecreateDict_{collection.name}" + config.name = name + with weaviate.connect_to_local() as client: + client.collections.delete(name) + client.collections.create_from_dict(config.to_dict()) + new = client.collections.use(name).config.get() + assert config == new + assert config.to_dict() == new.to_dict() + client.collections.delete(name) + + @pytest.mark.parametrize("index_range_filters", [True, False]) def test_range_filters(collection_factory: CollectionFactory, index_range_filters: bool) -> None: collection_dummy = collection_factory("dummy") @@ -1397,13 +1449,10 @@ def test_config_multi_vector_enabled( collection = collection_factory( ports=(8086, 50057), properties=[Property(name="name", data_type=DataType.TEXT)], - vectorizer_config=[ - Configure.NamedVectors.text2colbert_jinaai( + vector_config=[ + Configure.MultiVectors.text2vec_jinaai( name="vec", vectorize_collection_name=False, - vector_index_config=Configure.VectorIndex.hnsw( - multi_vector=Configure.VectorIndex.MultiVector.multi_vector() - ), ) ], ) @@ -1447,15 +1496,11 @@ def test_config_muvera_enabled( collection = collection_factory( ports=(8086, 50057), properties=[Property(name="name", data_type=DataType.TEXT)], - vectorizer_config=[ - Configure.NamedVectors.text2colbert_jinaai( + vector_config=[ + Configure.MultiVectors.text2vec_jinaai( name="vec", vectorize_collection_name=False, - vector_index_config=Configure.VectorIndex.hnsw( - multi_vector=Configure.VectorIndex.MultiVector.multi_vector( - encoding=Configure.VectorIndex.MultiVector.Encoding.muvera() - ) - ), + encoding=Configure.VectorIndex.MultiVector.Encoding.muvera(), ) ], ) @@ -1480,13 +1525,10 @@ def test_config_muvera_disabled( collection = collection_factory( ports=(8086, 50057), properties=[Property(name="name", data_type=DataType.TEXT)], - vectorizer_config=[ - Configure.NamedVectors.text2colbert_jinaai( + vector_config=[ + Configure.MultiVectors.text2vec_jinaai( name="vec", vectorize_collection_name=False, - vector_index_config=Configure.VectorIndex.hnsw( - multi_vector=Configure.VectorIndex.MultiVector.multi_vector() - ), ) ], ) diff --git a/integration/test_tenants.py b/integration/test_tenants.py index 8a2bd37a8..7665517e6 100644 --- a/integration/test_tenants.py +++ b/integration/test_tenants.py @@ -19,7 +19,10 @@ TenantCreate, ) from weaviate.collections.tenants import TenantCreateInputType -from weaviate.exceptions import WeaviateInvalidInputError, WeaviateUnsupportedFeatureError +from weaviate.exceptions import ( + WeaviateInvalidInputError, + WeaviateUnsupportedFeatureError, +) def test_shards_on_tenants( diff --git a/integration/test_named_vectors.py b/integration/test_vectors.py similarity index 84% rename from integration/test_named_vectors.py rename to integration/test_vectors.py index b7888f605..35538ad30 100644 --- a/integration/test_named_vectors.py +++ b/integration/test_vectors.py @@ -16,7 +16,10 @@ _VectorIndexConfigHNSW, ) from weaviate.collections.classes.data import DataObject -from weaviate.collections.classes.grpc import _ListOfVectorsQuery, _MultiTargetVectorJoin +from weaviate.collections.classes.grpc import ( + _ListOfVectorsQuery, + _MultiTargetVectorJoin, +) from weaviate.exceptions import WeaviateInvalidInputError, WeaviateQueryError from weaviate.types import INCLUDE_VECTOR @@ -33,23 +36,23 @@ def test_create_named_vectors( wvc.config.Property(name="title", data_type=wvc.config.DataType.TEXT), wvc.config.Property(name="content", data_type=wvc.config.DataType.TEXT), ], - vectorizer_config=[ - wvc.config.Configure.NamedVectors.text2vec_contextionary( - "title", source_properties=["title"], vectorize_collection_name=False + vector_config=[ + wvc.config.Configure.Vectors.text2vec_contextionary( + name="title", source_properties=["title"], vectorize_collection_name=False ), - wvc.config.Configure.NamedVectors.text2vec_contextionary( + wvc.config.Configure.Vectors.text2vec_contextionary( name="content", source_properties=["content"], vectorize_collection_name=False ), - wvc.config.Configure.NamedVectors.text2vec_contextionary( + wvc.config.Configure.Vectors.text2vec_contextionary( name="All", vectorize_collection_name=False ), - wvc.config.Configure.NamedVectors.text2vec_contextionary( + wvc.config.Configure.Vectors.text2vec_contextionary( name="AllExplicit", source_properties=["title", "content"], vectorize_collection_name=False, ), - wvc.config.Configure.NamedVectors.none(name="bringYourOwn"), - wvc.config.Configure.NamedVectors.none(name="bringYourOwn2"), + wvc.config.Configure.Vectors.self_provided(name="bringYourOwn"), + wvc.config.Configure.Vectors.self_provided(name="bringYourOwn2"), ], ) @@ -85,11 +88,11 @@ def test_insert_many_add(collection_factory: CollectionFactory) -> None: wvc.config.Property(name="title", data_type=wvc.config.DataType.TEXT), wvc.config.Property(name="content", data_type=wvc.config.DataType.TEXT), ], - vectorizer_config=[ - wvc.config.Configure.NamedVectors.text2vec_contextionary( - "title", source_properties=["title"], vectorize_collection_name=False + vector_config=[ + wvc.config.Configure.Vectors.text2vec_contextionary( + name="title", source_properties=["title"], vectorize_collection_name=False ), - wvc.config.Configure.NamedVectors.none(name="bringYourOwn"), + wvc.config.Configure.Vectors.self_provided(name="bringYourOwn"), ], ) @@ -114,9 +117,7 @@ def test_update(collection_factory: CollectionFactory) -> None: wvc.config.Property(name="title", data_type=wvc.config.DataType.TEXT), wvc.config.Property(name="content", data_type=wvc.config.DataType.TEXT), ], - vectorizer_config=[ - wvc.config.Configure.NamedVectors.none(name="bringYourOwn"), - ], + vector_config=wvc.config.Configure.Vectors.self_provided(name="bringYourOwn"), ) uuid = collection.data.insert( @@ -145,9 +146,7 @@ def test_replace(collection_factory: CollectionFactory) -> None: wvc.config.Property(name="title", data_type=wvc.config.DataType.TEXT), wvc.config.Property(name="content", data_type=wvc.config.DataType.TEXT), ], - vectorizer_config=[ - wvc.config.Configure.NamedVectors.none(name="bringYourOwn"), - ], + vector_config=wvc.config.Configure.Vectors.self_provided(name="bringYourOwn"), ) uuid = collection.data.insert( @@ -177,11 +176,11 @@ def test_query(collection_factory: CollectionFactory) -> None: wvc.config.Property(name="title", data_type=wvc.config.DataType.TEXT), wvc.config.Property(name="content", data_type=wvc.config.DataType.TEXT), ], - vectorizer_config=[ - wvc.config.Configure.NamedVectors.text2vec_contextionary( - "title", source_properties=["title"], vectorize_collection_name=False + vector_config=[ + wvc.config.Configure.Vectors.text2vec_contextionary( + name="title", source_properties=["title"], vectorize_collection_name=False ), - wvc.config.Configure.NamedVectors.text2vec_contextionary( + wvc.config.Configure.Vectors.text2vec_contextionary( name="content", source_properties=["content"], vectorize_collection_name=False ), ], @@ -207,11 +206,11 @@ def test_generate(openai_collection: OpenAICollection) -> None: if collection._connection._weaviate_version.is_lower_than(1, 24, 0): pytest.skip("Named vectors are not supported in versions lower than 1.24.0") collection = openai_collection( - vectorizer_config=[ - wvc.config.Configure.NamedVectors.text2vec_openai( - "text", source_properties=["text"], vectorize_collection_name=False + vector_config=[ + wvc.config.Configure.Vectors.text2vec_openai( + name="text", source_properties=["text"], vectorize_collection_name=False ), - wvc.config.Configure.NamedVectors.text2vec_openai( + wvc.config.Configure.Vectors.text2vec_openai( name="content", source_properties=["content"], vectorize_collection_name=False ), ], @@ -252,11 +251,11 @@ def test_batch_add(collection_factory: CollectionFactory) -> None: wvc.config.Property(name="title", data_type=wvc.config.DataType.TEXT), wvc.config.Property(name="content", data_type=wvc.config.DataType.TEXT), ], - vectorizer_config=[ - wvc.config.Configure.NamedVectors.text2vec_contextionary( - "title", source_properties=["title"], vectorize_collection_name=False + vector_config=[ + wvc.config.Configure.Vectors.text2vec_contextionary( + name="title", source_properties=["title"], vectorize_collection_name=False ), - wvc.config.Configure.NamedVectors.none(name="bringYourOwn"), + wvc.config.Configure.Vectors.self_provided(name="bringYourOwn"), ], ) uuid1 = uuid.uuid4() @@ -279,21 +278,21 @@ def test_named_vector_with_index_config(collection_factory: CollectionFactory) - wvc.config.Property(name="title", data_type=wvc.config.DataType.TEXT), wvc.config.Property(name="second", data_type=wvc.config.DataType.TEXT), ], - vectorizer_config=[ - wvc.config.Configure.NamedVectors.text2vec_contextionary( - "title", + vector_config=[ + wvc.config.Configure.Vectors.text2vec_contextionary( + name="title", source_properties=["title"], vectorize_collection_name=False, vector_index_config=wvc.config.Configure.VectorIndex.flat( distance_metric=wvc.config.VectorDistances.HAMMING, - quantizer=wvc.config.Configure.VectorIndex.Quantizer.bq(rescore_limit=10), ), + quantizer=wvc.config.Configure.VectorIndex.Quantizer.bq(rescore_limit=10), ), - wvc.config.Configure.NamedVectors.none( - "custom", + wvc.config.Configure.Vectors.self_provided( + name="custom", ), - wvc.config.Configure.NamedVectors.text2vec_contextionary( - "default", + wvc.config.Configure.Vectors.text2vec_contextionary( + name="default", vectorize_collection_name=False, # needed as contextionary cant handle "_" in collection names ), ], @@ -334,15 +333,15 @@ def test_aggregation(collection_factory: CollectionFactory) -> None: wvc.config.Property(name="second", data_type=wvc.config.DataType.TEXT), wvc.config.Property(name="number", data_type=wvc.config.DataType.INT), ], - # vectorizer_config=wvc.config.Configure.Vectorizer.text2vec_contextionary(vectorize_collection_name=False), - vectorizer_config=[ - wvc.config.Configure.NamedVectors.text2vec_contextionary( - "first", + # vector_config=wvc.config.Configure.Vectorizer.text2vec_contextionary(vectorize_collection_name=False), + vector_config=[ + wvc.config.Configure.Vectors.text2vec_contextionary( + name="first", source_properties=["first"], vectorize_collection_name=False, ), - wvc.config.Configure.NamedVectors.text2vec_contextionary( - "second", + wvc.config.Configure.Vectors.text2vec_contextionary( + name="second", source_properties=["second"], vectorize_collection_name=False, ), @@ -408,14 +407,14 @@ def test_update_to_enable_quantizer_on_specific_named_vector( wvc.config.Property(name="first", data_type=wvc.config.DataType.TEXT), wvc.config.Property(name="second", data_type=wvc.config.DataType.TEXT), ], - vectorizer_config=[ - wvc.config.Configure.NamedVectors.text2vec_contextionary( - "first", + vector_config=[ + wvc.config.Configure.Vectors.text2vec_contextionary( + name="first", source_properties=["first"], vectorize_collection_name=False, ), - wvc.config.Configure.NamedVectors.text2vec_contextionary( - "second", + wvc.config.Configure.Vectors.text2vec_contextionary( + name="second", source_properties=["second"], vectorize_collection_name=False, ), @@ -429,8 +428,8 @@ def test_update_to_enable_quantizer_on_specific_named_vector( assert config.vector_config["second"].vector_index_config.quantizer is None collection.config.update( - vectorizer_config=[ - wvc.config.Reconfigure.NamedVectors.update( + vector_config=[ + wvc.config.Reconfigure.Vectors.update( name="second", vector_index_config=wvc.config.Reconfigure.VectorIndex.hnsw( quantizer=wvc.config.Reconfigure.VectorIndex.Quantizer.pq() @@ -458,13 +457,13 @@ def test_update_to_enable_quantizer_on_specific_named_vector( # wvc.config.Property(name="first", data_type=wvc.config.DataType.TEXT), # wvc.config.Property(name="second", data_type=wvc.config.DataType.TEXT), # ], -# vectorizer_config=[ -# wvc.config.Configure.NamedVectors.text2vec_contextionary( +# vector_config=[ +# wvc.config.Configure.Vectors.text2vec_contextionary( # "first", # source_properties=["first"], # vectorize_collection_name=False, # ), -# wvc.config.Configure.NamedVectors.text2vec_contextionary( +# wvc.config.Configure.Vectors.text2vec_contextionary( # "second", # source_properties=["second"], # vectorize_collection_name=False, @@ -483,8 +482,8 @@ def test_update_to_enable_quantizer_on_specific_named_vector( # with pytest.raises(WeaviateInvalidInputError): # collection.config.update( -# vectorizer_config=[ -# wvc.config.Reconfigure.NamedVectors.update( +# vector_config=[ +# wvc.config.ReConfigure.Vectors.update( # name="second", # vector_index_config=wvc.config.Reconfigure.VectorIndex.hnsw( # quantizer=wvc.config.Reconfigure.VectorIndex.Quantizer.bq() @@ -497,12 +496,12 @@ def test_update_to_enable_quantizer_on_specific_named_vector( def test_duplicate_named_vectors(collection_factory: CollectionFactory) -> None: with pytest.raises(WeaviateInvalidInputError) as e: collection_factory( - vectorizer_config=[ - wvc.config.Configure.NamedVectors.text2vec_contextionary( - "title", source_properties=["title"], vectorize_collection_name=False + vector_config=[ + wvc.config.Configure.Vectors.text2vec_contextionary( + name="title", source_properties=["title"], vectorize_collection_name=False ), - wvc.config.Configure.NamedVectors.text2vec_contextionary( - "title", source_properties=["content"], vectorize_collection_name=False + wvc.config.Configure.Vectors.text2vec_contextionary( + name="title", source_properties=["content"], vectorize_collection_name=False ), ], ) @@ -529,9 +528,9 @@ def test_named_vector_multi_target( collection = collection_factory( properties=[], - vectorizer_config=[ - wvc.config.Configure.NamedVectors.none("first"), - wvc.config.Configure.NamedVectors.none("second"), + vector_config=[ + wvc.config.Configure.Vectors.self_provided(name="first"), + wvc.config.Configure.Vectors.self_provided(name="second"), ], ) @@ -549,9 +548,9 @@ def test_named_vector_multi_target_vector_per_target(collection_factory: Collect collection = collection_factory( properties=[], - vectorizer_config=[ - wvc.config.Configure.NamedVectors.none("first"), - wvc.config.Configure.NamedVectors.none("second"), + vector_config=[ + wvc.config.Configure.Vectors.self_provided(name="first"), + wvc.config.Configure.Vectors.self_provided(name="second"), ], ) @@ -571,9 +570,9 @@ def test_multi_query_error_no_target_vector(collection_factory: CollectionFactor collection = collection_factory( properties=[], - vectorizer_config=[ - wvc.config.Configure.NamedVectors.none("first"), - wvc.config.Configure.NamedVectors.none("second"), + vector_config=[ + wvc.config.Configure.Vectors.self_provided(name="first"), + wvc.config.Configure.Vectors.self_provided(name="second"), ], ) @@ -582,7 +581,7 @@ def test_multi_query_error_no_target_vector(collection_factory: CollectionFactor with pytest.raises(WeaviateInvalidInputError): collection.query.near_vector([[1.0, 0.0], [1.0, 0.0, 0.0]]) with pytest.raises(WeaviateInvalidInputError): - collection.query.near_vector([[[1.0, 0.0], [1.0, 0.0]], [1.0, 0.0, 0.0]]) + collection.query.near_vector([[[1.0, 0.0], [1.0, 0.0]], [1.0, 0.0, 0.0]]) # type: ignore else: # throws an error in the server instead as implicit multi vector is understood now as using multi-vectors with pytest.raises(WeaviateQueryError): @@ -618,9 +617,9 @@ def test_same_target_vector_multiple_input( collection = collection_factory( properties=[], - vectorizer_config=[ - wvc.config.Configure.NamedVectors.none("first"), - wvc.config.Configure.NamedVectors.none("second"), + vector_config=[ + wvc.config.Configure.Vectors.self_provided(name="first"), + wvc.config.Configure.Vectors.self_provided(name="second"), ], ) @@ -670,9 +669,9 @@ def test_same_target_vector_multiple_input_combinations( collection = collection_factory( properties=[], - vectorizer_config=[ - wvc.config.Configure.NamedVectors.none("first"), - wvc.config.Configure.NamedVectors.none("second"), + vector_config=[ + wvc.config.Configure.Vectors.self_provided(name="first"), + wvc.config.Configure.Vectors.self_provided(name="second"), ], ) @@ -696,9 +695,9 @@ def test_deprecated_syntax(collection_factory: CollectionFactory): collection = collection_factory( properties=[], - vectorizer_config=[ - wvc.config.Configure.NamedVectors.none("first"), - wvc.config.Configure.NamedVectors.none("second"), + vector_config=[ + wvc.config.Configure.Vectors.self_provided(name="first"), + wvc.config.Configure.Vectors.self_provided(name="second"), ], ) @@ -707,7 +706,7 @@ def test_deprecated_syntax(collection_factory: CollectionFactory): with pytest.raises(WeaviateInvalidInputError) as e: collection.query.near_vector( - [[0.0, 1.0], [[1.0, 0.0, 0.0], [0.0, 0.0, 1.0]]], + [[0.0, 1.0], [[1.0, 0.0, 0.0], [0.0, 0.0, 1.0]]], # # type: ignore target_vector=["first", "second", "second"], return_metadata=wvc.query.MetadataQuery.full(), ) @@ -727,8 +726,7 @@ def test_deprecated_syntax(collection_factory: CollectionFactory): [ (False, {}), (["bringYourOwn1"], {"bringYourOwn1": [0, 1, 2]}), - # TODO: to be uncommented when https://github.com/weaviate/weaviate/issues/6279 is resolved - # (True, {"bringYourOwn1": [0, 1, 2], "bringYourOwn2": [3, 4, 5]}) + (True, {"bringYourOwn1": [0, 1, 2], "bringYourOwn2": [3, 4, 5]}), ], ) def test_include_vector_on_references( @@ -736,8 +734,10 @@ def test_include_vector_on_references( ) -> None: """Test include vector on reference.""" dummy = collection_factory() - if dummy._connection._weaviate_version.is_lower_than(1, 24, 0): - pytest.skip("Named vectorizers are only supported in Weaviate v1.24.0 and higher.") + if dummy._connection._weaviate_version.is_lower_than(1, 26, 0): + pytest.skip( + "https://github.com/weaviate/weaviate/issues/6279 was resolved in >1.26.0 only." + ) ref_collection = collection_factory( name="Target", @@ -777,16 +777,14 @@ def test_colbert_vectors_byov(collection_factory: CollectionFactory) -> None: data_type=wvc.config.DataType.TEXT, ) ], - vectorizer_config=[ - wvc.config.Configure.NamedVectors.none( + vector_config=[ + wvc.config.Configure.MultiVectors.self_provided( name="colbert", - vector_index_config=wvc.config.Configure.VectorIndex.hnsw( - multi_vector=wvc.config.Configure.VectorIndex.MultiVector.multi_vector( - aggregation=wvc.config.MultiVectorAggregation.MAX_SIM - ) + multi_vector_config=wvc.config.Configure.VectorIndex.MultiVector.multi_vector( + aggregation=wvc.config.MultiVectorAggregation.MAX_SIM ), ), - wvc.config.Configure.NamedVectors.none( + wvc.config.Configure.Vectors.self_provided( name="regular", ), ], @@ -880,8 +878,8 @@ def test_colbert_vectors_jinaai(collection_factory: CollectionFactory) -> None: data_type=wvc.config.DataType.TEXT, ) ], - vectorizer_config=[ - wvc.config.Configure.NamedVectors.text2colbert_jinaai( + vector_config=[ + wvc.config.Configure.MultiVectors.text2vec_jinaai( name="colbert", ) ], diff --git a/journey_tests/journeys.py b/journey_tests/journeys.py index 87152db40..9777c67a1 100644 --- a/journey_tests/journeys.py +++ b/journey_tests/journeys.py @@ -1,6 +1,11 @@ from typing import List, cast -from weaviate import WeaviateAsyncClient, WeaviateClient, connect_to_local, use_async_with_local +from weaviate import ( + WeaviateAsyncClient, + WeaviateClient, + connect_to_local, + use_async_with_local, +) from weaviate.classes.config import DataType, Property diff --git a/test/collection/test_config.py b/test/collection/test_config.py index 0cdec831f..ff4b78703 100644 --- a/test/collection/test_config.py +++ b/test/collection/test_config.py @@ -15,11 +15,20 @@ _VectorizerConfigCreate, ) from weaviate.collections.classes.config_named_vectors import _NamedVectorConfigCreate -from weaviate.collections.classes.config_vectorizers import Multi2VecField, VectorDistances +from weaviate.collections.classes.config_vectorizers import ( + Multi2VecField, + VectorDistances, +) DEFAULTS = { - "vectorizer": "none", - "vectorIndexType": "hnsw", + "vectorConfig": { + "default": { + "vectorIndexType": "hnsw", + "vectorizer": { + "none": {}, + }, + } + } } @@ -674,7 +683,7 @@ def test_config_with_default_vectorizer( ) -> None: config = _CollectionConfigCreate(name="test", vectorizer_config=vectorizer_config) assert config._to_dict() == { - **DEFAULTS, + "vectorIndexType": "hnsw", "vectorizer": vectorizer_config.vectorizer.value, "class": "Test", "moduleConfig": expected, @@ -783,7 +792,7 @@ def test_config_with_vectorizer_and_properties( name="test", properties=properties, vectorizer_config=vectorizer_config ) assert config._to_dict() == { - **DEFAULTS, + "vectorIndexType": "hnsw", "vectorizer": vectorizer_config.vectorizer.value, "class": "Test", "properties": expected_props, @@ -1057,7 +1066,6 @@ def test_config_with_generative( config = _CollectionConfigCreate(name="test", generative_config=generative_config) assert config._to_dict() == { **DEFAULTS, - "vectorizer": "none", "class": "Test", "moduleConfig": expected_mc, } @@ -1137,7 +1145,6 @@ def test_config_with_reranker( config = _CollectionConfigCreate(name="test", reranker_config=reranker_config) assert config._to_dict() == { **DEFAULTS, - "vectorizer": "none", "class": "Test", "moduleConfig": expected_mc, } diff --git a/weaviate/classes/config.py b/weaviate/classes/config.py index f6d0dcc04..651818de3 100644 --- a/weaviate/classes/config.py +++ b/weaviate/classes/config.py @@ -3,7 +3,6 @@ ConsistencyLevel, DataType, GenerativeSearches, - MultiVectorAggregation, PQEncoderDistribution, PQEncoderType, Property, @@ -15,7 +14,10 @@ Tokenization, VectorDistances, ) -from weaviate.collections.classes.config_vector_index import VectorFilterStrategy +from weaviate.collections.classes.config_vector_index import ( + MultiVectorAggregation, + VectorFilterStrategy, +) from weaviate.collections.classes.config_vectorizers import Multi2VecField, Vectorizers from weaviate.connect.integrations import Integrations diff --git a/weaviate/collections/classes/config.py b/weaviate/collections/classes/config.py index 84c539131..f5333fc47 100644 --- a/weaviate/collections/classes/config.py +++ b/weaviate/collections/classes/config.py @@ -32,19 +32,19 @@ _NamedVectorsUpdate, ) from weaviate.collections.classes.config_vector_index import ( + PQEncoderDistribution, + PQEncoderType, VectorFilterStrategy, - _EncodingConfigCreate, - _MultiVectorConfigCreate, - _MuveraConfigCreate, - _QuantizerConfigCreate, + _BQConfigUpdate, + _PQConfigUpdate, + _PQEncoderConfigUpdate, + _RQConfigUpdate, + _SQConfigUpdate, + _VectorIndex, _VectorIndexConfigCreate, - _VectorIndexConfigDynamicCreate, _VectorIndexConfigDynamicUpdate, - _VectorIndexConfigFlatCreate, _VectorIndexConfigFlatUpdate, - _VectorIndexConfigHNSWCreate, _VectorIndexConfigHNSWUpdate, - _VectorIndexConfigSkipCreate, _VectorIndexConfigUpdate, ) from weaviate.collections.classes.config_vector_index import ( @@ -61,6 +61,13 @@ from weaviate.collections.classes.config_vectorizers import ( Vectorizers as VectorizersAlias, ) +from weaviate.collections.classes.config_vectors import ( + _MultiVectors, + _VectorConfigCreate, + _VectorConfigUpdate, + _Vectors, + _VectorsUpdate, +) from weaviate.exceptions import WeaviateInvalidInputError from weaviate.str_enum import BaseEnum from weaviate.util import _capitalize_first_letter @@ -240,140 +247,6 @@ class ReplicationDeletionStrategy(str, BaseEnum): TIME_BASED_RESOLUTION = "TimeBasedResolution" -class PQEncoderType(str, BaseEnum): - """Type of the PQ encoder. - - Attributes: - KMEANS: K-means encoder. - TILE: Tile encoder. - """ - - KMEANS = "kmeans" - TILE = "tile" - - -class PQEncoderDistribution(str, BaseEnum): - """Distribution of the PQ encoder. - - Attributes: - LOG_NORMAL: Log-normal distribution. - NORMAL: Normal distribution. - """ - - LOG_NORMAL = "log-normal" - NORMAL = "normal" - - -class MultiVectorAggregation(str, BaseEnum): - """Aggregation type to use for multivector indices. - - Attributes: - MAX_SIM: Maximum similarity. - """ - - MAX_SIM = "maxSim" - - -class _PQEncoderConfigCreate(_ConfigCreateModel): - type_: Optional[PQEncoderType] = Field(serialization_alias="type") - distribution: Optional[PQEncoderDistribution] - - -class _PQEncoderConfigUpdate(_ConfigUpdateModel): - type_: Optional[PQEncoderType] - distribution: Optional[PQEncoderDistribution] - - def merge_with_existing(self, schema: Dict[str, Any]) -> Dict[str, Any]: - """Must be done manually since Pydantic does not work well with type and type_. - - Errors shadowing type occur if we want to use type as a field name. - """ - if self.type_ is not None: - schema["type"] = str(self.type_.value) - if self.distribution is not None: - schema["distribution"] = str(self.distribution.value) - return schema - - -class _PQConfigCreate(_QuantizerConfigCreate): - bitCompression: Optional[bool] = Field(default=None) - centroids: Optional[int] - encoder: _PQEncoderConfigCreate - segments: Optional[int] - trainingLimit: Optional[int] - - @staticmethod - def quantizer_name() -> str: - return "pq" - - -class _BQConfigCreate(_QuantizerConfigCreate): - cache: Optional[bool] - rescoreLimit: Optional[int] - - @staticmethod - def quantizer_name() -> str: - return "bq" - - -class _SQConfigCreate(_QuantizerConfigCreate): - cache: Optional[bool] - rescoreLimit: Optional[int] - trainingLimit: Optional[int] - - @staticmethod - def quantizer_name() -> str: - return "sq" - - -class _RQConfigCreate(_QuantizerConfigCreate): - bits: Optional[int] - - @staticmethod - def quantizer_name() -> str: - return "rq" - - -class _PQConfigUpdate(_QuantizerConfigUpdate): - bitCompression: Optional[bool] = Field(default=None) - centroids: Optional[int] - enabled: Optional[bool] - segments: Optional[int] - trainingLimit: Optional[int] - encoder: Optional[_PQEncoderConfigUpdate] - - @staticmethod - def quantizer_name() -> str: - return "pq" - - -class _BQConfigUpdate(_QuantizerConfigUpdate): - enabled: Optional[bool] - rescoreLimit: Optional[int] - - @staticmethod - def quantizer_name() -> str: - return "bq" - - -class _SQConfigUpdate(_QuantizerConfigUpdate): - enabled: Optional[bool] - rescoreLimit: Optional[int] - trainingLimit: Optional[int] - - @staticmethod - def quantizer_name() -> str: - return "sq" - - -class _RQConfigUpdate(_QuantizerConfigUpdate): - enabled: Optional[bool] - - @staticmethod - def quantizer_name() -> str: - return "rq" - - class _ShardingConfigCreate(_ConfigCreateModel): virtualPerPhysical: Optional[int] desiredCount: Optional[int] @@ -1228,12 +1101,33 @@ class _CollectionConfigUpdate(_ConfigUpdateModel): vectorizerConfig: Optional[Union[_VectorIndexConfigUpdate, List[_NamedVectorConfigUpdate]]] = ( Field(default=None, alias="vectorizer_config") ) + vectorConfig: Optional[Union[_VectorConfigUpdate, List[_VectorConfigUpdate]]] = Field( + default=None, alias="vector_config" + ) multiTenancyConfig: Optional[_MultiTenancyConfigUpdate] = Field( default=None, alias="multi_tenancy_config" ) generativeConfig: Optional[_GenerativeProvider] = Field(default=None, alias="generative_config") rerankerConfig: Optional[_RerankerProvider] = Field(default=None, alias="reranker_config") + @field_validator("vectorConfig", mode="before") + def mutual_exclusivity( + cls, + v: Optional[Union[_VectorConfigUpdate, List[_VectorConfigUpdate]]], + info: ValidationInfo, + ): + if v is None: + return v + if info.data["vectorizerConfig"] is not None: + raise ValueError( + "Cannot specify vectorizerConfig when also specifying vectorConfig. Please use one or the other." + ) + if info.data["vectorIndexConfig"] is not None: + raise ValueError( + "Cannot specify vectorIndexConfig when also specifying vectorConfig. Please use one or the other." + ) + return v + def __check_quantizers( self, quantizer: Optional[_QuantizerConfigUpdate], @@ -1358,6 +1252,29 @@ def merge_with_existing(self, schema: Dict[str, Any]) -> Dict[str, Any]: schema["vectorConfig"][vc.name]["vectorIndexType"] = ( vc.vectorIndexConfig.vector_index_type() ) + if self.vectorConfig is not None: + vcs = ( + [self.vectorConfig] + if isinstance(self.vectorConfig, _VectorConfigUpdate) + else self.vectorConfig + ) + for vc in vcs: + if vc.name not in schema["vectorConfig"]: + raise WeaviateInvalidInputError( + f"Vector config with name {vc.name} does not exist in the existing vector config" + ) + self.__check_quantizers( + vc.vectorIndexConfig.quantizer, + schema["vectorConfig"][vc.name]["vectorIndexConfig"], + ) + schema["vectorConfig"][vc.name]["vectorIndexConfig"] = ( + vc.vectorIndexConfig.merge_with_existing( + schema["vectorConfig"][vc.name]["vectorIndexConfig"] + ) + ) + schema["vectorConfig"][vc.name]["vectorIndexType"] = ( + vc.vectorIndexConfig.vector_index_type() + ) return schema @staticmethod @@ -1972,8 +1889,13 @@ class _CollectionConfigCreate(_ConfigCreateModel): vectorIndexConfig: Optional[_VectorIndexConfigCreate] = Field( default=None, alias="vector_index_config" ) - vectorizerConfig: Optional[Union[_VectorizerConfigCreate, List[_NamedVectorConfigCreate]]] = ( - Field(default=_Vectorizer.none(), alias="vectorizer_config") + vectorizerConfig: Union[_VectorizerConfigCreate, List[_NamedVectorConfigCreate], None] = Field( + default=None, alias="vectorizer_config" + ) + vectorConfig: Union[_VectorConfigCreate, List[_VectorConfigCreate], None] = Field( + default=None, + alias="vector_config", + validate_default=True, ) generativeSearch: Optional[_GenerativeProvider] = Field(default=None, alias="generative_config") rerankerConfig: Optional[_RerankerProvider] = Field(default=None, alias="reranker_config") @@ -1981,13 +1903,13 @@ class _CollectionConfigCreate(_ConfigCreateModel): def model_post_init(self, __context: Any) -> None: self.name = _capitalize_first_letter(self.name) - @field_validator("vectorizerConfig", mode="after") + @field_validator("vectorizerConfig", "vectorConfig", mode="after") @classmethod def validate_vector_names( cls, - v: Union[_VectorizerConfigCreate, List[_NamedVectorConfigCreate]], + v: Union[_VectorizerConfigCreate, _NamedVectorConfigCreate, List[_NamedVectorConfigCreate]], info: ValidationInfo, - ) -> Union[_VectorizerConfigCreate, List[_NamedVectorConfigCreate]]: + ) -> Union[_VectorizerConfigCreate, _NamedVectorConfigCreate, List[_NamedVectorConfigCreate]]: if isinstance(v, list): names = [vc.name for vc in v] if len(names) != len(set(names)): @@ -1995,6 +1917,19 @@ def validate_vector_names( raise ValueError(f"Vector config names must be unique. Found duplicates: {dups}") return v + @field_validator("vectorConfig", mode="after") + @classmethod + def inject_vector_config_none( + cls, + v: Union[_VectorConfigCreate, List[_VectorConfigCreate], None], + info: ValidationInfo, + ) -> Union[_VectorConfigCreate, List[_VectorConfigCreate], None]: + if v is None and info.data["vectorizerConfig"] is None: + return _VectorConfigCreate( + name="default", vectorizer=_VectorizerConfigCreate(vectorizer=Vectorizers.NONE) + ) + return v + @staticmethod def __add_to_module_config( return_dict: Dict[str, Any], addition_key: str, addition_val: Dict[str, Any] @@ -2024,6 +1959,8 @@ def _to_dict(self) -> Dict[str, Any]: elif isinstance(val, _VectorIndexConfigCreate): ret_dict["vectorIndexType"] = val.vector_index_type().value ret_dict[cls_field] = val._to_dict() + elif isinstance(val, _VectorConfigCreate): + ret_dict["vectorConfig"] = {val.name or "default": val._to_dict()} elif ( isinstance(val, list) and len(val) > 0 @@ -2031,7 +1968,19 @@ def _to_dict(self) -> Dict[str, Any]: ): val = cast(List[_NamedVectorConfigCreate], val) ret_dict["vectorConfig"] = {item.name: item._to_dict() for item in val} - + elif ( + isinstance(val, list) + and len(val) > 0 + and all(isinstance(item, _VectorConfigCreate) for item in val) + ): + val = cast(List[_VectorConfigCreate], val) + ret_dict["vectorConfig"] = {} + for item in val: + if item.name is None: + raise WeaviateInvalidInputError( + "Vector config name must be set when specifying multiple vectors" + ) + ret_dict["vectorConfig"][item.name] = item._to_dict() else: assert isinstance(val, _ConfigCreateModel) ret_dict[cls_field] = val._to_dict() @@ -2078,212 +2027,6 @@ def __add_props( ret_dict["properties"] = existing_props -class _VectorIndexMultivectorEncoding: - @staticmethod - def muvera( - ksim: Optional[int] = None, - dprojections: Optional[int] = None, - repetitions: Optional[int] = None, - ) -> _EncodingConfigCreate: - return _MuveraConfigCreate( - enabled=True, - ksim=ksim, - dprojections=dprojections, - repetitions=repetitions, - ) - - -class _VectorIndexMultiVector: - Encoding = _VectorIndexMultivectorEncoding - - @staticmethod - def multi_vector( - encoding: Optional[_EncodingConfigCreate] = None, - aggregation: Optional[MultiVectorAggregation] = None, - ) -> _MultiVectorConfigCreate: - return _MultiVectorConfigCreate( - encoding=encoding if encoding is not None else None, - aggregation=aggregation.value if aggregation is not None else None, - ) - - -class _VectorIndexQuantizer: - @staticmethod - def pq( - bit_compression: Optional[bool] = None, - centroids: Optional[int] = None, - encoder_distribution: Optional[PQEncoderDistribution] = None, - encoder_type: Optional[PQEncoderType] = None, - segments: Optional[int] = None, - training_limit: Optional[int] = None, - ) -> _PQConfigCreate: - """Create a `_PQConfigCreate` object to be used when defining the product quantization (PQ) configuration of Weaviate. - - Use this method when defining the `quantizer` argument in the `vector_index` configuration. - - Args: - See [the docs](https://weaviate.io/developers/weaviate/concepts/vector-index#hnsw-with-compression) for a more detailed view! - """ # noqa: D417 (missing argument descriptions in the docstring) - if bit_compression is not None: - _Warnings.bit_compression_in_pq_config() - return _PQConfigCreate( - centroids=centroids, - segments=segments, - trainingLimit=training_limit, - encoder=_PQEncoderConfigCreate(type_=encoder_type, distribution=encoder_distribution), - ) - - @staticmethod - def bq( - cache: Optional[bool] = None, - rescore_limit: Optional[int] = None, - ) -> _BQConfigCreate: - """Create a `_BQConfigCreate` object to be used when defining the binary quantization (BQ) configuration of Weaviate. - - Use this method when defining the `quantizer` argument in the `vector_index` configuration. Note that the arguments have no effect for HNSW. - - Args: - See [the docs](https://weaviate.io/developers/weaviate/concepts/vector-index#binary-quantization) for a more detailed view! - """ # noqa: D417 (missing argument descriptions in the docstring) - return _BQConfigCreate( - cache=cache, - rescoreLimit=rescore_limit, - ) - - @staticmethod - def sq( - cache: Optional[bool] = None, - rescore_limit: Optional[int] = None, - training_limit: Optional[int] = None, - ) -> _SQConfigCreate: - """Create a `_SQConfigCreate` object to be used when defining the scalar quantization (SQ) configuration of Weaviate. - - Use this method when defining the `quantizer` argument in the `vector_index` configuration. Note that the arguments have no effect for HNSW. - - Args: - See [the docs](https://weaviate.io/developers/weaviate/concepts/vector-index#binary-quantization) for a more detailed view! - """ # noqa: D417 (missing argument descriptions in the docstring) - return _SQConfigCreate( - cache=cache, - rescoreLimit=rescore_limit, - trainingLimit=training_limit, - ) - - @staticmethod - def rq( - bits: Optional[int] = None, - ) -> _RQConfigCreate: - """Create a `_RQConfigCreate` object to be used when defining the Rotational quantization (RQ) configuration of Weaviate. - - Use this method when defining the `quantizer` argument in the `vector_index` configuration. Note that the arguments have no effect for HNSW. - - Arguments: - See [the docs](https://weaviate.io/developers/weaviate/concepts/vector-index) for a more detailed view! - """ # noqa: D417 (missing argument descriptions in the docstring) - return _RQConfigCreate( - bits=bits, - ) - - -class _VectorIndex: - MultiVector = _VectorIndexMultiVector - Quantizer = _VectorIndexQuantizer - - @staticmethod - def none() -> _VectorIndexConfigSkipCreate: - """Create a `_VectorIndexConfigSkipCreate` object to be used when configuring Weaviate to not index your vectors. - - Use this method when defining the `vector_index_config` argument in `collections.create()`. - """ - return _VectorIndexConfigSkipCreate( - distance=None, - quantizer=None, - multivector=None, - ) - - @staticmethod - def hnsw( - cleanup_interval_seconds: Optional[int] = None, - distance_metric: Optional[VectorDistances] = None, - dynamic_ef_factor: Optional[int] = None, - dynamic_ef_max: Optional[int] = None, - dynamic_ef_min: Optional[int] = None, - ef: Optional[int] = None, - ef_construction: Optional[int] = None, - filter_strategy: Optional[VectorFilterStrategy] = None, - flat_search_cutoff: Optional[int] = None, - max_connections: Optional[int] = None, - vector_cache_max_objects: Optional[int] = None, - quantizer: Optional[_QuantizerConfigCreate] = None, - multi_vector: Optional[_MultiVectorConfigCreate] = None, - ) -> _VectorIndexConfigHNSWCreate: - """Create a `_VectorIndexConfigHNSWCreate` object to be used when defining the HNSW vector index configuration of Weaviate. - - Use this method when defining the `vector_index_config` argument in `collections.create()`. - - Args: - See [the docs](https://weaviate.io/developers/weaviate/configuration/indexes#how-to-configure-hnsw) for a more detailed view! - """ # noqa: D417 (missing argument descriptions in the docstring) - return _VectorIndexConfigHNSWCreate( - cleanupIntervalSeconds=cleanup_interval_seconds, - distance=distance_metric, - dynamicEfMin=dynamic_ef_min, - dynamicEfMax=dynamic_ef_max, - dynamicEfFactor=dynamic_ef_factor, - efConstruction=ef_construction, - ef=ef, - filterStrategy=filter_strategy, - flatSearchCutoff=flat_search_cutoff, - maxConnections=max_connections, - vectorCacheMaxObjects=vector_cache_max_objects, - quantizer=quantizer, - multivector=multi_vector, - ) - - @staticmethod - def flat( - distance_metric: Optional[VectorDistances] = None, - vector_cache_max_objects: Optional[int] = None, - quantizer: Optional[_BQConfigCreate] = None, - ) -> _VectorIndexConfigFlatCreate: - """Create a `_VectorIndexConfigFlatCreate` object to be used when defining the FLAT vector index configuration of Weaviate. - - Use this method when defining the `vector_index_config` argument in `collections.create()`. - - Args: - See [the docs](https://weaviate.io/developers/weaviate/configuration/indexes#how-to-configure-hnsw) for a more detailed view! - """ # noqa: D417 (missing argument descriptions in the docstring) - return _VectorIndexConfigFlatCreate( - distance=distance_metric, - vectorCacheMaxObjects=vector_cache_max_objects, - quantizer=quantizer, - multivector=None, - ) - - @staticmethod - def dynamic( - distance_metric: Optional[VectorDistances] = None, - threshold: Optional[int] = None, - hnsw: Optional[_VectorIndexConfigHNSWCreate] = None, - flat: Optional[_VectorIndexConfigFlatCreate] = None, - ) -> _VectorIndexConfigDynamicCreate: - """Create a `_VectorIndexConfigDynamicCreate` object to be used when defining the DYNAMIC vector index configuration of Weaviate. - - Use this method when defining the `vector_index_config` argument in `collections.create()`. - - Args: - See [the docs](https://weaviate.io/developers/weaviate/configuration/indexes#how-to-configure-hnsw) for a more detailed view! - """ # noqa: D417 (missing argument descriptions in the docstring) - return _VectorIndexConfigDynamicCreate( - distance=distance_metric, - threshold=threshold, - hnsw=hnsw, - flat=flat, - quantizer=None, - multivector=None, - ) - - class Configure: """Use this factory class to generate the correct object for use when using the `collections.create()` method. E.g., `.multi_tenancy()` will return a `MultiTenancyConfigCreate` object to be used in the `multi_tenancy_config` argument. @@ -2296,6 +2039,8 @@ class Configure: Vectorizer = _Vectorizer VectorIndex = _VectorIndex NamedVectors = _NamedVectors + Vectors = _Vectors + MultiVectors = _MultiVectors @staticmethod def inverted_index( @@ -2567,6 +2312,7 @@ class Reconfigure: """ NamedVectors = _NamedVectorsUpdate + Vectors = _VectorsUpdate VectorIndex = _VectorIndexUpdate Generative = _Generative # config is the same for create and update Reranker = _Reranker # config is the same for create and update diff --git a/weaviate/collections/classes/config_named_vectors.py b/weaviate/collections/classes/config_named_vectors.py index 9bd4a3bf9..bda64035d 100644 --- a/weaviate/collections/classes/config_named_vectors.py +++ b/weaviate/collections/classes/config_named_vectors.py @@ -66,18 +66,6 @@ from ...warnings import _Warnings -class _NamedVectorizerConfigCreate(_ConfigCreateModel): - vectorizer: Vectorizers - properties: Optional[List[str]] = Field(default=None, min_length=1, alias="source_properties") - - def _to_dict(self) -> Dict[str, Any]: - return self._to_vectorizer_dict(self.vectorizer, super()._to_dict()) - - @staticmethod - def _to_vectorizer_dict(vectorizer: Vectorizers, values: Dict[str, Any]) -> Dict[str, Any]: - return {str(vectorizer.value): values} - - class _NamedVectorConfigCreate(_ConfigCreateModel): name: str properties: Optional[List[str]] = Field(default=None, min_length=1, alias="source_properties") diff --git a/weaviate/collections/classes/config_vector_index.py b/weaviate/collections/classes/config_vector_index.py index 89ac4cd7e..ff4664240 100644 --- a/weaviate/collections/classes/config_vector_index.py +++ b/weaviate/collections/classes/config_vector_index.py @@ -1,8 +1,9 @@ from abc import abstractmethod from enum import Enum -from typing import Any, Dict, Optional +from typing import Any, Dict, Optional, overload from pydantic import Field +from typing_extensions import deprecated from weaviate.collections.classes.config_base import ( _ConfigCreateModel, @@ -11,6 +12,8 @@ _QuantizerConfigUpdate, ) from weaviate.collections.classes.config_vectorizers import VectorDistances +from weaviate.str_enum import BaseEnum +from weaviate.warnings import _Warnings class VectorFilterStrategy(str, Enum): @@ -42,7 +45,7 @@ class _MultiVectorConfigCreateBase(_ConfigCreateModel): enabled: bool = Field(default=True) -class _EncodingConfigCreate(_MultiVectorConfigCreateBase): +class _MultiVectorEncodingConfigCreate(_MultiVectorConfigCreateBase): enabled: bool = Field(default=True) @staticmethod @@ -50,7 +53,7 @@ class _EncodingConfigCreate(_MultiVectorConfigCreateBase): def encoding_name() -> str: ... -class _MuveraConfigCreate(_EncodingConfigCreate): +class _MuveraConfigCreate(_MultiVectorEncodingConfigCreate): ksim: Optional[int] dprojections: Optional[int] repetitions: Optional[int] @@ -61,7 +64,7 @@ def encoding_name() -> str: class _MultiVectorConfigCreate(_MultiVectorConfigCreateBase): - encoding: Optional[_EncodingConfigCreate] = Field(exclude=True) + encoding: Optional[_MultiVectorEncodingConfigCreate] = Field(exclude=True) aggregation: Optional[str] @@ -180,3 +183,404 @@ class _VectorIndexConfigDynamicUpdate(_VectorIndexConfigUpdate): @staticmethod def vector_index_type() -> VectorIndexType: return VectorIndexType.DYNAMIC + + +class PQEncoderType(str, BaseEnum): + """Type of the PQ encoder. + + Attributes: + KMEANS: K-means encoder. + TILE: Tile encoder. + """ + + KMEANS = "kmeans" + TILE = "tile" + + +class PQEncoderDistribution(str, BaseEnum): + """Distribution of the PQ encoder. + + Attributes: + LOG_NORMAL: Log-normal distribution. + NORMAL: Normal distribution. + """ + + LOG_NORMAL = "log-normal" + NORMAL = "normal" + + +class MultiVectorAggregation(str, BaseEnum): + """Aggregation type to use for multivector indices. + + Attributes: + MAX_SIM: Maximum similarity. + """ + + MAX_SIM = "maxSim" + + +class _PQEncoderConfigCreate(_ConfigCreateModel): + type_: Optional[PQEncoderType] = Field(serialization_alias="type") + distribution: Optional[PQEncoderDistribution] + + +class _PQEncoderConfigUpdate(_ConfigUpdateModel): + type_: Optional[PQEncoderType] + distribution: Optional[PQEncoderDistribution] + + def merge_with_existing(self, schema: Dict[str, Any]) -> Dict[str, Any]: + """Must be done manually since Pydantic does not work well with type and type_. + + Errors shadowing type occur if we want to use type as a field name. + """ + if self.type_ is not None: + schema["type"] = str(self.type_.value) + if self.distribution is not None: + schema["distribution"] = str(self.distribution.value) + return schema + + +class _PQConfigCreate(_QuantizerConfigCreate): + bitCompression: Optional[bool] = Field(default=None) + centroids: Optional[int] + encoder: _PQEncoderConfigCreate + segments: Optional[int] + trainingLimit: Optional[int] + + @staticmethod + def quantizer_name() -> str: + return "pq" + + +class _BQConfigCreate(_QuantizerConfigCreate): + cache: Optional[bool] + rescoreLimit: Optional[int] + + @staticmethod + def quantizer_name() -> str: + return "bq" + + +class _SQConfigCreate(_QuantizerConfigCreate): + cache: Optional[bool] + rescoreLimit: Optional[int] + trainingLimit: Optional[int] + + @staticmethod + def quantizer_name() -> str: + return "sq" + + +class _RQConfigCreate(_QuantizerConfigCreate): + bits: Optional[int] + + @staticmethod + def quantizer_name() -> str: + return "rq" + + +class _PQConfigUpdate(_QuantizerConfigUpdate): + bitCompression: Optional[bool] = Field(default=None) + centroids: Optional[int] + enabled: Optional[bool] + segments: Optional[int] + trainingLimit: Optional[int] + encoder: Optional[_PQEncoderConfigUpdate] + + @staticmethod + def quantizer_name() -> str: + return "pq" + + +class _BQConfigUpdate(_QuantizerConfigUpdate): + enabled: Optional[bool] + rescoreLimit: Optional[int] + + @staticmethod + def quantizer_name() -> str: + return "bq" + + +class _RQConfigUpdate(_QuantizerConfigUpdate): + enabled: Optional[bool] + + @staticmethod + def quantizer_name() -> str: + return "rq" + + +class _SQConfigUpdate(_QuantizerConfigUpdate): + enabled: Optional[bool] + rescoreLimit: Optional[int] + trainingLimit: Optional[int] + + @staticmethod + def quantizer_name() -> str: + return "sq" + + +class _VectorIndexMultivectorEncoding: + @staticmethod + def muvera( + ksim: Optional[int] = None, + dprojections: Optional[int] = None, + repetitions: Optional[int] = None, + ) -> _MultiVectorEncodingConfigCreate: + return _MuveraConfigCreate( + enabled=True, + ksim=ksim, + dprojections=dprojections, + repetitions=repetitions, + ) + + +class _VectorIndexMultiVector: + Encoding = _VectorIndexMultivectorEncoding + + @deprecated( + 'Using the "encoding" argument is deprecated. Instead, specify it at the top-level when creating your `vector_config`' + ) + @overload + @staticmethod + def multi_vector( + encoding: _MultiVectorEncodingConfigCreate, + aggregation: Optional[MultiVectorAggregation] = None, + ) -> _MultiVectorConfigCreate: ... + + @overload + @staticmethod + def multi_vector( + encoding: Optional[_MultiVectorEncodingConfigCreate] = None, + aggregation: Optional[MultiVectorAggregation] = None, + ) -> _MultiVectorConfigCreate: ... + + @staticmethod + def multi_vector( + encoding: Optional[_MultiVectorEncodingConfigCreate] = None, + aggregation: Optional[MultiVectorAggregation] = None, + ) -> _MultiVectorConfigCreate: + if encoding is not None: + _Warnings.encoding_in_multi_vector_config() + return _MultiVectorConfigCreate( + encoding=encoding if encoding is not None else None, + aggregation=aggregation.value if aggregation is not None else None, + ) + + +class _VectorIndexQuantizer: + @staticmethod + def pq( + bit_compression: Optional[bool] = None, + centroids: Optional[int] = None, + encoder_distribution: Optional[PQEncoderDistribution] = None, + encoder_type: Optional[PQEncoderType] = None, + segments: Optional[int] = None, + training_limit: Optional[int] = None, + ) -> _PQConfigCreate: + """Create a `_PQConfigCreate` object to be used when defining the product quantization (PQ) configuration of Weaviate. + + Use this method when defining the `quantizer` argument in the `vector_index` configuration. + + Args: + See [the docs](https://weaviate.io/developers/weaviate/concepts/vector-index#hnsw-with-compression) for a more detailed view! + """ # noqa: D417 (missing argument descriptions in the docstring) + if bit_compression is not None: + _Warnings.bit_compression_in_pq_config() + return _PQConfigCreate( + centroids=centroids, + segments=segments, + trainingLimit=training_limit, + encoder=_PQEncoderConfigCreate(type_=encoder_type, distribution=encoder_distribution), + ) + + @staticmethod + def bq( + cache: Optional[bool] = None, + rescore_limit: Optional[int] = None, + ) -> _BQConfigCreate: + """Create a `_BQConfigCreate` object to be used when defining the binary quantization (BQ) configuration of Weaviate. + + Use this method when defining the `quantizer` argument in the `vector_index` configuration. Note that the arguments have no effect for HNSW. + + Args: + See [the docs](https://weaviate.io/developers/weaviate/concepts/vector-index#binary-quantization) for a more detailed view! + """ # noqa: D417 (missing argument descriptions in the docstring) + return _BQConfigCreate( + cache=cache, + rescoreLimit=rescore_limit, + ) + + @staticmethod + def sq( + cache: Optional[bool] = None, + rescore_limit: Optional[int] = None, + training_limit: Optional[int] = None, + ) -> _SQConfigCreate: + """Create a `_SQConfigCreate` object to be used when defining the scalar quantization (SQ) configuration of Weaviate. + + Use this method when defining the `quantizer` argument in the `vector_index` configuration. Note that the arguments have no effect for HNSW. + + Args: + See [the docs](https://weaviate.io/developers/weaviate/concepts/vector-index#binary-quantization) for a more detailed view! + """ # noqa: D417 (missing argument descriptions in the docstring) + return _SQConfigCreate( + cache=cache, + rescoreLimit=rescore_limit, + trainingLimit=training_limit, + ) + + @staticmethod + def rq( + bits: Optional[int] = None, + ) -> _RQConfigCreate: + """Create a `_RQConfigCreate` object to be used when defining the Rotational quantization (RQ) configuration of Weaviate. + + Use this method when defining the `quantizer` argument in the `vector_index` configuration. Note that the arguments have no effect for HNSW. + + Arguments: + See [the docs](https://weaviate.io/developers/weaviate/concepts/vector-index) for a more detailed view! + """ # noqa: D417 (missing argument descriptions in the docstring) + return _RQConfigCreate( + bits=bits, + ) + + +class _VectorIndex: + MultiVector = _VectorIndexMultiVector + Quantizer = _VectorIndexQuantizer + + @staticmethod + def none() -> _VectorIndexConfigSkipCreate: + """Create a `_VectorIndexConfigSkipCreate` object to be used when configuring Weaviate to not index your vectors. + + Use this method when defining the `vector_index_config` argument in `collections.create()`. + """ + return _VectorIndexConfigSkipCreate( + distance=None, + quantizer=None, + multivector=None, + ) + + @overload + @staticmethod + @deprecated( + 'Using the "multi_vector" argument is deprecated. Instead, specify it at the top-level in `multi_vector_index_config` when creating your `vector_config` with `MultiVectors.module()`' + ) + def hnsw( + cleanup_interval_seconds: Optional[int] = None, + distance_metric: Optional[VectorDistances] = None, + dynamic_ef_factor: Optional[int] = None, + dynamic_ef_max: Optional[int] = None, + dynamic_ef_min: Optional[int] = None, + ef: Optional[int] = None, + ef_construction: Optional[int] = None, + filter_strategy: Optional[VectorFilterStrategy] = None, + flat_search_cutoff: Optional[int] = None, + max_connections: Optional[int] = None, + vector_cache_max_objects: Optional[int] = None, + *, + quantizer: Optional[_QuantizerConfigCreate] = None, + multi_vector: _MultiVectorConfigCreate, + ) -> _VectorIndexConfigHNSWCreate: ... + + @overload + @staticmethod + def hnsw( + cleanup_interval_seconds: Optional[int] = None, + distance_metric: Optional[VectorDistances] = None, + dynamic_ef_factor: Optional[int] = None, + dynamic_ef_max: Optional[int] = None, + dynamic_ef_min: Optional[int] = None, + ef: Optional[int] = None, + ef_construction: Optional[int] = None, + filter_strategy: Optional[VectorFilterStrategy] = None, + flat_search_cutoff: Optional[int] = None, + max_connections: Optional[int] = None, + vector_cache_max_objects: Optional[int] = None, + quantizer: Optional[_QuantizerConfigCreate] = None, + multi_vector: Optional[_MultiVectorConfigCreate] = None, + ) -> _VectorIndexConfigHNSWCreate: ... + + @staticmethod + def hnsw( + cleanup_interval_seconds: Optional[int] = None, + distance_metric: Optional[VectorDistances] = None, + dynamic_ef_factor: Optional[int] = None, + dynamic_ef_max: Optional[int] = None, + dynamic_ef_min: Optional[int] = None, + ef: Optional[int] = None, + ef_construction: Optional[int] = None, + filter_strategy: Optional[VectorFilterStrategy] = None, + flat_search_cutoff: Optional[int] = None, + max_connections: Optional[int] = None, + vector_cache_max_objects: Optional[int] = None, + quantizer: Optional[_QuantizerConfigCreate] = None, + multi_vector: Optional[_MultiVectorConfigCreate] = None, + ) -> _VectorIndexConfigHNSWCreate: + """Create a `_VectorIndexConfigHNSWCreate` object to be used when defining the HNSW vector index configuration of Weaviate. + + Use this method when defining the `vector_index_config` argument in `collections.create()`. + + Args: + See [the docs](https://weaviate.io/developers/weaviate/configuration/indexes#how-to-configure-hnsw) for a more detailed view! + """ # noqa: D417 (missing argument descriptions in the docstring) + if multi_vector is not None: + _Warnings.multi_vector_in_hnsw_config() + return _VectorIndexConfigHNSWCreate( + cleanupIntervalSeconds=cleanup_interval_seconds, + distance=distance_metric, + dynamicEfMin=dynamic_ef_min, + dynamicEfMax=dynamic_ef_max, + dynamicEfFactor=dynamic_ef_factor, + efConstruction=ef_construction, + ef=ef, + filterStrategy=filter_strategy, + flatSearchCutoff=flat_search_cutoff, + maxConnections=max_connections, + vectorCacheMaxObjects=vector_cache_max_objects, + quantizer=quantizer, + multivector=multi_vector, + ) + + @staticmethod + def flat( + distance_metric: Optional[VectorDistances] = None, + vector_cache_max_objects: Optional[int] = None, + quantizer: Optional[_QuantizerConfigCreate] = None, + ) -> _VectorIndexConfigFlatCreate: + """Create a `_VectorIndexConfigFlatCreate` object to be used when defining the FLAT vector index configuration of Weaviate. + + Use this method when defining the `vector_index_config` argument in `collections.create()`. + + Args: + See [the docs](https://weaviate.io/developers/weaviate/configuration/indexes#how-to-configure-hnsw) for a more detailed view! + """ # noqa: D417 (missing argument descriptions in the docstring) + return _VectorIndexConfigFlatCreate( + distance=distance_metric, + vectorCacheMaxObjects=vector_cache_max_objects, + quantizer=quantizer, + multivector=None, + ) + + @staticmethod + def dynamic( + distance_metric: Optional[VectorDistances] = None, + threshold: Optional[int] = None, + hnsw: Optional[_VectorIndexConfigHNSWCreate] = None, + flat: Optional[_VectorIndexConfigFlatCreate] = None, + ) -> _VectorIndexConfigDynamicCreate: + """Create a `_VectorIndexConfigDynamicCreate` object to be used when defining the DYNAMIC vector index configuration of Weaviate. + + Use this method when defining the `vector_index_config` argument in `collections.create()`. + + Args: + See [the docs](https://weaviate.io/developers/weaviate/configuration/indexes#how-to-configure-hnsw) for a more detailed view! + """ # noqa: D417 (missing argument descriptions in the docstring) + return _VectorIndexConfigDynamicCreate( + distance=distance_metric, + threshold=threshold, + hnsw=hnsw, + flat=flat, + quantizer=None, + multivector=None, + ) diff --git a/weaviate/collections/classes/config_vectors.py b/weaviate/collections/classes/config_vectors.py new file mode 100644 index 000000000..5354d6cd7 --- /dev/null +++ b/weaviate/collections/classes/config_vectors.py @@ -0,0 +1,1372 @@ +from typing import Any, Dict, List, Literal, Optional, Union + +from pydantic import AnyHttpUrl, Field + +from weaviate.collections.classes.config_base import ( + _ConfigCreateModel, + _ConfigUpdateModel, + _EnumLikeStr, +) +from weaviate.collections.classes.config_vector_index import ( + VectorIndexType, + _MultiVectorConfigCreate, + _MultiVectorEncodingConfigCreate, + _QuantizerConfigCreate, + _VectorIndexConfigCreate, + _VectorIndexConfigDynamicCreate, + _VectorIndexConfigDynamicUpdate, + _VectorIndexConfigFlatCreate, + _VectorIndexConfigFlatUpdate, + _VectorIndexConfigHNSWCreate, + _VectorIndexConfigHNSWUpdate, + _VectorIndexConfigUpdate, +) +from weaviate.collections.classes.config_vectorizers import ( + AWSModel, + AWSService, + CohereModel, + CohereMultimodalModel, + CohereTruncation, + JinaModel, + JinaMultimodalModel, + Multi2VecField, + OpenAIModel, + OpenAIType, + Vectorizers, + VoyageModel, + VoyageMultimodalModel, + WeaviateModel, + _Img2VecNeuralConfig, + _map_multi2vec_fields, + _Multi2VecBindConfig, + _Multi2VecClipConfig, + _Multi2VecCohereConfig, + _Multi2VecGoogleConfig, + _Multi2VecJinaConfig, + _Multi2VecNvidiaConfig, + _Multi2VecVoyageaiConfig, + _Ref2VecCentroidConfig, + _Text2ColbertJinaAIConfig, + _Text2VecAWSConfig, + _Text2VecAzureOpenAIConfig, + _Text2VecCohereConfig, + _Text2VecContextionaryConfig, + _Text2VecDatabricksConfig, + _Text2VecGoogleConfig, + _Text2VecGPT4AllConfig, + _Text2VecHuggingFaceConfig, + _Text2VecJinaConfig, + _Text2VecMistralConfig, + _Text2VecNvidiaConfig, + _Text2VecOllamaConfig, + _Text2VecOpenAIConfig, + _Text2VecTransformersConfig, + _Text2VecVoyageConfig, + _Text2VecWeaviateConfig, + _VectorizerConfigCreate, + _VectorizerCustomConfig, +) + + +class _VectorConfigCreate(_ConfigCreateModel): + name: Optional[str] + properties: Optional[List[str]] = Field(default=None, min_length=1, alias="source_properties") + vectorizer: _VectorizerConfigCreate + vectorIndexType: VectorIndexType = Field(default=VectorIndexType.HNSW, exclude=True) + vectorIndexConfig: Optional[_VectorIndexConfigCreate] = Field( + default=None, alias="vector_index_config" + ) + + def _to_dict(self) -> Dict[str, Any]: + ret_dict: Dict[str, Any] = self.__parse_vectorizer() + if self.vectorIndexConfig is not None: + ret_dict["vectorIndexType"] = self.vectorIndexConfig.vector_index_type().value + ret_dict["vectorIndexConfig"] = self.vectorIndexConfig._to_dict() + else: + ret_dict["vectorIndexType"] = self.vectorIndexType.value + return ret_dict + + def __parse_vectorizer(self) -> Dict[str, Any]: + vectorizer_options = self.vectorizer._to_dict() + if self.properties is not None: + vectorizer_options["properties"] = self.properties + return {"vectorizer": {self.vectorizer.vectorizer.value: vectorizer_options}} + + +class _VectorConfigUpdate(_ConfigUpdateModel): + name: str + vectorIndexConfig: _VectorIndexConfigUpdate = Field(..., alias="vector_index_config") + + +class _IndexWrappers: + @staticmethod + def __hnsw( + *, + quantizer: Optional[_QuantizerConfigCreate] = None, + multivector: Optional[_MultiVectorConfigCreate] = None, + ) -> _VectorIndexConfigHNSWCreate: + return _VectorIndexConfigHNSWCreate( + cleanupIntervalSeconds=None, + distance=None, + dynamicEfMin=None, + dynamicEfMax=None, + dynamicEfFactor=None, + efConstruction=None, + ef=None, + filterStrategy=None, + flatSearchCutoff=None, + maxConnections=None, + vectorCacheMaxObjects=None, + quantizer=quantizer, + multivector=multivector, + ) + + @staticmethod + def __flat(*, quantizer: Optional[_QuantizerConfigCreate]) -> _VectorIndexConfigFlatCreate: + return _VectorIndexConfigFlatCreate( + distance=None, + vectorCacheMaxObjects=None, + quantizer=quantizer, + multivector=None, + ) + + @staticmethod + def single( + vector_index_config: Optional[_VectorIndexConfigCreate], + quantizer: Optional[_QuantizerConfigCreate], + ) -> Optional[_VectorIndexConfigCreate]: + if quantizer is not None: + if vector_index_config is None: + vector_index_config = _IndexWrappers.__hnsw(quantizer=quantizer) + else: + if isinstance(vector_index_config, _VectorIndexConfigDynamicCreate): + if vector_index_config.hnsw is None: + vector_index_config.hnsw = _IndexWrappers.__hnsw(quantizer=quantizer) + else: + vector_index_config.hnsw.quantizer = quantizer + if vector_index_config.flat is None: + vector_index_config.flat = _IndexWrappers.__flat(quantizer=quantizer) + else: + vector_index_config.flat.quantizer = quantizer + else: + vector_index_config.quantizer = quantizer + return vector_index_config + + @staticmethod + def multi( + vector_index_config: Optional[_VectorIndexConfigCreate], + quantizer: Optional[_QuantizerConfigCreate], + multi_vector_config: Optional[_MultiVectorConfigCreate], + encoding: Optional[_MultiVectorEncodingConfigCreate], + ) -> Optional[_VectorIndexConfigCreate]: + if multi_vector_config is None: + multi_vector_config = _MultiVectorConfigCreate(aggregation=None, encoding=None) + if encoding is not None: + multi_vector_config.encoding = encoding + if vector_index_config is None: + vector_index_config = _IndexWrappers.__hnsw(multivector=multi_vector_config) + else: + vector_index_config.multivector = multi_vector_config + return _IndexWrappers.single(vector_index_config, quantizer) + + +class _MultiVectors: + @staticmethod + def self_provided( + *, + name: Optional[str] = None, + encoding: Optional[_MultiVectorEncodingConfigCreate] = None, + quantizer: Optional[_QuantizerConfigCreate] = None, + multi_vector_config: Optional[_MultiVectorConfigCreate] = None, + vector_index_config: Optional[_VectorIndexConfigCreate] = None, + ): + """Create a multi-vector using no vectorizer. You will need to provide the vectors yourself. + + Args: + name: The name of the vector. + encoding: The type of multi-vector encoding to use in the vector index. Defaults to `None`, which uses the server-defined default. + quantizer: The quantizer to use for the vector index. If not provided, no quantization will be applied. + multi_vector_config: The configuration for the multi-vector index. Use `wvc.config.Configure.VectorIndex.MultiVector` to create a multi-vector configuration. None by default + vector_index_config: The configuration for Weaviate's vector index. Use `wvc.config.Configure.VectorIndex` to create a vector index configuration. None by default + """ + return _VectorConfigCreate( + name=name, + vectorizer=_VectorizerConfigCreate(vectorizer=Vectorizers.NONE), + vector_index_config=_IndexWrappers.multi( + vector_index_config, quantizer, multi_vector_config, encoding + ), + ) + + @staticmethod + def text2vec_jinaai( + *, + name: Optional[str] = None, + encoding: Optional[_MultiVectorEncodingConfigCreate] = None, + quantizer: Optional[_QuantizerConfigCreate] = None, + dimensions: Optional[int] = None, + model: Optional[str] = None, + source_properties: Optional[List[str]] = None, + multi_vector_config: Optional[_MultiVectorConfigCreate] = None, + vector_index_config: Optional[_VectorIndexConfigCreate] = None, + vectorize_collection_name: bool = True, + ) -> _VectorConfigCreate: + """Create a multi-vector using the `text2colbert-jinaai` module. + + See the [documentation](https://weaviate.io/developers/weaviate/model-providers/jinaai/colbert) for detailed usage. + + Args: + name: The name of the vector. + encoding: The type of multi-vector encoding to use in the vector index. Defaults to `None`, which uses the server-defined default. + quantizer: The quantizer to use for the vector index. If not provided, no quantization will be applied. + dimensions: Number of dimensions. Applicable to v3 OpenAI models only. Defaults to `None`, which uses the server-defined default. + model: The model to use. Defaults to `None`, which uses the server-defined default. + encoding: The type of multi-vector encoding to use in the vector index. Defaults to `None`, which uses the server-defined default. + source_properties: Which properties should be included when vectorizing. By default all text properties are included. + multi_vector_config: The configuration for the multi-vector index. Use `wvc.config.Configure.VectorIndex.MultiVector` to create a multi-vector configuration. None by default + vector_index_config: The configuration for Weaviate's vector index. Use `wvc.config.Configure.VectorIndex` to create a vector index configuration. None by default + vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`. + """ + return _VectorConfigCreate( + name=name, + source_properties=source_properties, + vector_index_config=_IndexWrappers.multi( + vector_index_config, quantizer, multi_vector_config, encoding + ), + vectorizer=_Text2ColbertJinaAIConfig( + model=model, dimensions=dimensions, vectorizeClassName=vectorize_collection_name + ), + ) + + +class _Vectors: + @staticmethod + def self_provided( + *, + name: Optional[str] = None, + quantizer: Optional[_QuantizerConfigCreate] = None, + vector_index_config: Optional[_VectorIndexConfigCreate] = None, + ): + """Create a vector using no vectorizer. You will need to provide the vectors yourself. + + Args: + name: The name of the vector. + quantizer: The quantizer to use for the vector index. If not provided, no quantization will be applied. + vector_index_config: The configuration for Weaviate's vector index. Use `wvc.config.Configure.VectorIndex` to create a vector index configuration. None by default + """ + return _VectorConfigCreate( + name=name, + vectorizer=_VectorizerConfigCreate(vectorizer=Vectorizers.NONE), + vector_index_config=_IndexWrappers.single(vector_index_config, quantizer), + ) + + @staticmethod + def custom( + *, + name: Optional[str] = None, + quantizer: Optional[_QuantizerConfigCreate] = None, + module_name: str, + module_config: Optional[Dict[str, Any]] = None, + source_properties: Optional[List[str]] = None, + vector_index_config: Optional[_VectorIndexConfigCreate] = None, + ) -> _VectorConfigCreate: + """Create a vector using a custom module that is not currently supported by the SDK. + + Args: + name: The name of the vector. + quantizer: The quantizer to use for the vector index. If not provided, no quantization will be applied. + module_name: The name of the custom module to use. + module_config: The configuration of the custom module to use. + source_properties: Which properties should be included when vectorizing. By default all text properties are included. + vector_index_config: The configuration for Weaviate's vector index. Use `wvc.config.Configure.VectorIndex` to create a vector index configuration. None by default + """ + return _VectorConfigCreate( + name=name, + source_properties=source_properties, + vectorizer=_VectorizerCustomConfig( + vectorizer=_EnumLikeStr(module_name), module_config=module_config + ), + vector_index_config=_IndexWrappers.single(vector_index_config, quantizer), + ) + + @staticmethod + def text2vec_cohere( + *, + name: Optional[str] = None, + quantizer: Optional[_QuantizerConfigCreate] = None, + base_url: Optional[AnyHttpUrl] = None, + model: Optional[Union[CohereModel, str]] = None, + truncate: Optional[CohereTruncation] = None, + source_properties: Optional[List[str]] = None, + vector_index_config: Optional[_VectorIndexConfigCreate] = None, + vectorize_collection_name: bool = True, + ) -> _VectorConfigCreate: + """Create a vector using the `text2vec-cohere` module. + + See the [documentation](https://weaviate.io/developers/weaviate/model-providers/cohere/embeddings) + for detailed usage. + + Args: + name: The name of the vector. + quantizer: The quantizer to use for the vector index. If not provided, no quantization will be applied. + base_url: The base URL to use where API requests should go. Defaults to `None`, which uses the server-defined default. + model: The model to use. Defaults to `None`, which uses the server-defined default. + truncate: The truncation strategy to use. Defaults to `None`, which uses the server-defined default. + source_properties: Which properties should be included when vectorizing. By default all text properties are included. + vector_index_config: The configuration for Weaviate's vector index. Use `wvc.config.Configure.VectorIndex` to create a vector index configuration. None by default + vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`. + + Raises: + pydantic.ValidationError: If `model` is not a valid value from the `CohereModel` type or if `truncate` is not a valid value from the `CohereTruncation` type. + """ + return _VectorConfigCreate( + name=name, + source_properties=source_properties, + vectorizer=_Text2VecCohereConfig( + baseURL=base_url, + model=model, + truncate=truncate, + vectorizeClassName=vectorize_collection_name, + ), + vector_index_config=_IndexWrappers.single(vector_index_config, quantizer), + ) + + @staticmethod + def multi2vec_cohere( + *, + name: Optional[str] = None, + quantizer: Optional[_QuantizerConfigCreate] = None, + base_url: Optional[AnyHttpUrl] = None, + image_fields: Optional[Union[List[str], List[Multi2VecField]]] = None, + model: Optional[Union[CohereMultimodalModel, str]] = None, + text_fields: Optional[Union[List[str], List[Multi2VecField]]] = None, + truncate: Optional[CohereTruncation] = None, + vector_index_config: Optional[_VectorIndexConfigCreate] = None, + vectorize_collection_name: bool = True, + ) -> _VectorConfigCreate: + """Create a vector using the `multi2vec_cohere` module. + + See the [documentation](https://weaviate.io/developers/weaviate/model-providers/cohere/embeddings-multimodal) + for detailed usage. + + Args: + name: The name of the vector. + quantizer: The quantizer to use for the vector index. If not provided, no quantization will be applied. + base_url: The base URL to use where API requests should go. Defaults to `None`, which uses the server-defined default. + image_fields: The image fields to use in vectorization. + model: The model to use. Defaults to `None`, which uses the server-defined default. + text_fields: The text fields to use in vectorization. + truncate: The truncation strategy to use. Defaults to `None`, which uses the server-defined default. + vector_index_config: The configuration for Weaviate's vector index. Use `wvc.config.Configure.VectorIndex` to create a vector index configuration. None by default + vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`. + + Raises: + pydantic.ValidationError: If `model` is not a valid value from the `CohereMultimodalModel` type or if `truncate` is not a valid value from the `CohereTruncation` type. + """ + return _VectorConfigCreate( + name=name, + vectorizer=_Multi2VecCohereConfig( + baseURL=base_url, + model=model, + truncate=truncate, + vectorizeClassName=vectorize_collection_name, + imageFields=_map_multi2vec_fields(image_fields), + textFields=_map_multi2vec_fields(text_fields), + ), + vector_index_config=_IndexWrappers.single(vector_index_config, quantizer), + ) + + @staticmethod + def text2vec_contextionary( + *, + name: Optional[str] = None, + quantizer: Optional[_QuantizerConfigCreate] = None, + source_properties: Optional[List[str]] = None, + vector_index_config: Optional[_VectorIndexConfigCreate] = None, + vectorize_collection_name: bool = True, + ) -> _VectorConfigCreate: + """Create a vector using the `text2vec_contextionary` module. + + See the [documentation](https://weaviate.io/developers/weaviate/modules/retriever-vectorizer-modules/text2vec-contextionary) + for detailed usage. + + Args: + name: The name of the vector. + quantizer: The quantizer to use for the vector index. If not provided, no quantization will be applied. + source_properties: Which properties should be included when vectorizing. By default all text properties are included. + vector_index_config: The configuration for Weaviate's vector index. Use `wvc.config.Configure.VectorIndex` to create a vector index configuration. None by default + vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`. + """ + return _VectorConfigCreate( + name=name, + source_properties=source_properties, + vectorizer=_Text2VecContextionaryConfig( + vectorizeClassName=vectorize_collection_name, + ), + vector_index_config=_IndexWrappers.single(vector_index_config, quantizer), + ) + + @staticmethod + def text2vec_databricks( + *, + name: Optional[str] = None, + quantizer: Optional[_QuantizerConfigCreate] = None, + endpoint: str, + instruction: Optional[str] = None, + source_properties: Optional[List[str]] = None, + vector_index_config: Optional[_VectorIndexConfigCreate] = None, + vectorize_collection_name: bool = True, + ) -> _VectorConfigCreate: + """Create a vector using the `text2vec-databricks` module. + + See the [documentation](https://weaviate.io/developers/weaviate/model-providers/databricks/embeddings) + for detailed usage. + + Args: + name: The name of the vector. + quantizer: The quantizer to use for the vector index. If not provided, no quantization will be applied. + endpoint: The endpoint to use. + instruction: The instruction strategy to use. Defaults to `None`, which uses the server-defined default. + source_properties: Which properties should be included when vectorizing. By default all text properties are included. + vector_index_config: The configuration for Weaviate's vector index. Use `wvc.config.Configure.VectorIndex` to create a vector index configuration. None by default + vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`. + """ + return _VectorConfigCreate( + name=name, + source_properties=source_properties, + vectorizer=_Text2VecDatabricksConfig( + endpoint=endpoint, + instruction=instruction, + vectorizeClassName=vectorize_collection_name, + ), + vector_index_config=_IndexWrappers.single(vector_index_config, quantizer), + ) + + @staticmethod + def text2vec_mistral( + *, + name: Optional[str] = None, + quantizer: Optional[_QuantizerConfigCreate] = None, + base_url: Optional[AnyHttpUrl] = None, + model: Optional[str] = None, + source_properties: Optional[List[str]] = None, + vector_index_config: Optional[_VectorIndexConfigCreate] = None, + vectorize_collection_name: bool = True, + ) -> _VectorConfigCreate: + """Create a vector using the `text2vec-mistral` module. + + See the [documentation](https://weaviate.io/developers/weaviate/model-providers/mistral/embeddings) + for detailed usage. + + Args: + name: The name of the vector. + quantizer: The quantizer to use for the vector index. If not provided, no quantization will be applied. + base_url: The base URL to use where API requests should go. Defaults to `None`, which uses the server-defined default. + model: The model to use. Defaults to `None`, which uses the server-defined default. + source_properties: Which properties should be included when vectorizing. By default all text properties are included. + vector_index_config: The configuration for Weaviate's vector index. Use `wvc.config.Configure.VectorIndex` to create a vector index configuration. None by default + vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`. + """ + return _VectorConfigCreate( + name=name, + source_properties=source_properties, + vectorizer=_Text2VecMistralConfig( + baseURL=base_url, + model=model, + vectorizeClassName=vectorize_collection_name, + ), + vector_index_config=_IndexWrappers.single(vector_index_config, quantizer), + ) + + @staticmethod + def text2vec_ollama( + *, + name: Optional[str] = None, + quantizer: Optional[_QuantizerConfigCreate] = None, + api_endpoint: Optional[str] = None, + model: Optional[str] = None, + source_properties: Optional[List[str]] = None, + vector_index_config: Optional[_VectorIndexConfigCreate] = None, + vectorize_collection_name: bool = True, + ) -> _VectorConfigCreate: + """Create a vector using the `text2vec-ollama` module. + + See the [documentation](https://weaviate.io/developers/weaviate/model-providers/ollama/embeddings) + for detailed usage. + + Args: + name: The name of the vector. + quantizer: The quantizer to use for the vector index. If not provided, no quantization will be applied. + api_endpoint: The base URL to use where API requests should go. Defaults to `None`, which uses the server-defined default. + Docker users may need to specify an alias, such as `http://host.docker.internal:11434` so that the container can access the host machine. + model: The model to use. Defaults to `None`, which uses the server-defined default. + source_properties: Which properties should be included when vectorizing. By default all text properties are included. + vector_index_config: The configuration for Weaviate's vector index. Use `wvc.config.Configure.VectorIndex` to create a vector index configuration. None by default + vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`. + + + """ + return _VectorConfigCreate( + name=name, + source_properties=source_properties, + vectorizer=_Text2VecOllamaConfig( + apiEndpoint=api_endpoint, + model=model, + vectorizeClassName=vectorize_collection_name, + ), + vector_index_config=_IndexWrappers.single(vector_index_config, quantizer), + ) + + @staticmethod + def text2vec_openai( + *, + name: Optional[str] = None, + quantizer: Optional[_QuantizerConfigCreate] = None, + base_url: Optional[AnyHttpUrl] = None, + dimensions: Optional[int] = None, + model: Optional[Union[OpenAIModel, str]] = None, + model_version: Optional[str] = None, + type_: Optional[OpenAIType] = None, + source_properties: Optional[List[str]] = None, + vector_index_config: Optional[_VectorIndexConfigCreate] = None, + vectorize_collection_name: bool = True, + ) -> _VectorConfigCreate: + """Create a vector using the `text2vec-openai` module. + + See the [documentation](https://weaviate.io/developers/weaviate/model-providers/openai/embeddings) + for detailed usage. + + Args: + name: The name of the vector. + quantizer: The quantizer to use for the vector index. If not provided, no quantization will be applied. + base_url: The base URL to use where API requests should go. Defaults to `None`, which uses the server-defined default. + dimensions: Number of dimensions. Applicable to v3 OpenAI models only. Defaults to `None`, which uses the server-defined default. + model: The model to use. Defaults to `None`, which uses the server-defined default. + model_version: The model version to use. Defaults to `None`, which uses the server-defined default. + type_: The type of model to use. Defaults to `None`, which uses the server-defined default. + source_properties: Which properties should be included when vectorizing. By default all text properties are included. + vector_index_config: The configuration for Weaviate's vector index. Use `wvc.config.Configure.VectorIndex` to create a vector index configuration. None by default + vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`. + + Raises: + pydantic.ValidationError: If `type_` is not a valid value from the `OpenAIType` type. + """ + return _VectorConfigCreate( + name=name, + source_properties=source_properties, + vectorizer=_Text2VecOpenAIConfig( + baseURL=base_url, + model=model, + modelVersion=model_version, + type_=type_, + vectorizeClassName=vectorize_collection_name, + dimensions=dimensions, + ), + vector_index_config=_IndexWrappers.single(vector_index_config, quantizer), + ) + + @staticmethod + def text2vec_aws( + *, + name: Optional[str] = None, + quantizer: Optional[_QuantizerConfigCreate] = None, + endpoint: Optional[str] = None, + model: Optional[Union[AWSModel, str]] = None, + region: str, + service: Union[AWSService, str] = "bedrock", + source_properties: Optional[List[str]] = None, + vector_index_config: Optional[_VectorIndexConfigCreate] = None, + vectorize_collection_name: bool = True, + ) -> _VectorConfigCreate: + """Create a vector using the `text2vec-aws` module. + + See the [documentation](https://weaviate.io/developers/weaviate/model-providers/aws/embeddings) + for detailed usage. + + Args: + name: The name of the vector. + quantizer: The quantizer to use for the vector index. If not provided, no quantization will be applied. + endpoint: The endpoint to use. Defaults to `None`, which uses the server-defined default. + model: The model to use. + region: The AWS region to run the model from, REQUIRED. + service: The AWS service to use. Defaults to `bedrock`. + source_properties: Which properties should be included when vectorizing. By default all text properties are included. + vector_index_config: The configuration for Weaviate's vector index. Use `wvc.config.Configure.VectorIndex` to create a vector index configuration. None by default + vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`. + """ + return _VectorConfigCreate( + name=name, + source_properties=source_properties, + vectorizer=_Text2VecAWSConfig( + model=model, + endpoint=endpoint, + region=region, + service=service, + vectorizeClassName=vectorize_collection_name, + ), + vector_index_config=_IndexWrappers.single(vector_index_config, quantizer), + ) + + @staticmethod + def img2vec_neural( + *, + name: Optional[str] = None, + quantizer: Optional[_QuantizerConfigCreate] = None, + image_fields: List[str], + vector_index_config: Optional[_VectorIndexConfigCreate] = None, + ) -> _VectorConfigCreate: + """Create a vector using the `img2vec-neural` module. + + See the [documentation](https://weaviate.io/developers/weaviate/modules/retriever-vectorizer-modules/img2vec-neural) + for detailed usage. + + Args: + name: The name of the vector. + quantizer: The quantizer to use for the vector index. If not provided, no quantization will be applied. + image_fields: The image fields to use. This is a required field and must match the property fields of the collection that are defined as `DataType.BLOB`. + vector_index_config: The configuration for Weaviate's vector index. Use `wvc.config.Configure.VectorIndex` to create a vector index configuration. None by default + + Raises: + pydantic.ValidationError: If `image_fields` is not a `list`. + """ + return _VectorConfigCreate( + name=name, + vectorizer=_Img2VecNeuralConfig(imageFields=image_fields), + vector_index_config=_IndexWrappers.single(vector_index_config, quantizer), + ) + + @staticmethod + def multi2vec_clip( + *, + name: Optional[str] = None, + quantizer: Optional[_QuantizerConfigCreate] = None, + inference_url: Optional[str] = None, + image_fields: Optional[Union[List[str], List[Multi2VecField]]] = None, + text_fields: Optional[Union[List[str], List[Multi2VecField]]] = None, + vector_index_config: Optional[_VectorIndexConfigCreate] = None, + vectorize_collection_name: bool = True, + ) -> _VectorConfigCreate: + """Create a vector using the `multi2vec-clip` module. + + See the [documentation](https://weaviate.io/developers/weaviate/model-providers/transformers/embeddings-multimodal) + for detailed usage. + + Args: + name: The name of the vector. + quantizer: The quantizer to use for the vector index. If not provided, no quantization will be applied. + inference_url: The inference url to use where API requests should go. Defaults to `None`, which uses the server-defined default. + image_fields: The image fields to use in vectorization. + text_fields: The text fields to use in vectorization. + vector_index_config: The configuration for Weaviate's vector index. Use `wvc.config.Configure.VectorIndex` to create a vector index configuration. None by default + vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`. + + """ + return _VectorConfigCreate( + name=name, + vectorizer=_Multi2VecClipConfig( + imageFields=_map_multi2vec_fields(image_fields), + textFields=_map_multi2vec_fields(text_fields), + vectorizeClassName=vectorize_collection_name, + inferenceUrl=inference_url, + ), + vector_index_config=_IndexWrappers.single(vector_index_config, quantizer), + ) + + @staticmethod + def multi2vec_google( + *, + name: Optional[str] = None, + quantizer: Optional[_QuantizerConfigCreate] = None, + dimensions: Optional[int] = None, + image_fields: Optional[Union[List[str], List[Multi2VecField]]] = None, + location: str, + model_id: Optional[str] = None, + project_id: str, + text_fields: Optional[Union[List[str], List[Multi2VecField]]] = None, + video_fields: Optional[Union[List[str], List[Multi2VecField]]] = None, + video_interval_seconds: Optional[int] = None, + vector_index_config: Optional[_VectorIndexConfigCreate] = None, + vectorize_collection_name: bool = True, + ) -> _VectorConfigCreate: + """Create a vector using the `multi2vec-google` module. + + See the [documentation](https://weaviate.io/developers/weaviate/model-providers/google/embeddings-multimodal) + for detailed usage. + + Args: + name: The name of the vector. + quantizer: The quantizer to use for the vector index. If not provided, no quantization will be applied. + dimensions: The number of dimensions to use. Defaults to `None`, which uses the server-defined default. + image_fields: The image fields to use in vectorization. + location: Where the model runs. REQUIRED. + model_id: The model ID to use. Defaults to `None`, which uses the server-defined default. + project_id: The project ID to use, REQUIRED. + text_fields: The text fields to use in vectorization. + video_fields: The video fields to use in vectorization. + video_interval_seconds: Length of a video interval. Defaults to `None`, which uses the server-defined default. + vector_index_config: The configuration for Weaviate's vector index. Use `wvc.config.Configure.VectorIndex` to create a vector index configuration. None by default + vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`. + """ + return _VectorConfigCreate( + name=name, + vectorizer=_Multi2VecGoogleConfig( + projectId=project_id, + location=location, + imageFields=_map_multi2vec_fields(image_fields), + textFields=_map_multi2vec_fields(text_fields), + videoFields=_map_multi2vec_fields(video_fields), + dimensions=dimensions, + modelId=model_id, + videoIntervalSeconds=video_interval_seconds, + vectorizeClassName=vectorize_collection_name, + ), + vector_index_config=_IndexWrappers.single(vector_index_config, quantizer), + ) + + @staticmethod + def multi2vec_bind( + *, + name: Optional[str] = None, + quantizer: Optional[_QuantizerConfigCreate] = None, + audio_fields: Optional[Union[List[str], List[Multi2VecField]]] = None, + depth_fields: Optional[Union[List[str], List[Multi2VecField]]] = None, + image_fields: Optional[Union[List[str], List[Multi2VecField]]] = None, + imu_fields: Optional[Union[List[str], List[Multi2VecField]]] = None, + text_fields: Optional[Union[List[str], List[Multi2VecField]]] = None, + thermal_fields: Optional[Union[List[str], List[Multi2VecField]]] = None, + video_fields: Optional[Union[List[str], List[Multi2VecField]]] = None, + vector_index_config: Optional[_VectorIndexConfigCreate] = None, + vectorize_collection_name: bool = True, + ) -> _VectorConfigCreate: + """Create a vector using the `multi2vec-bind` module. + + See the [documentation](https://weaviate.io/developers/weaviate/model-providers/imagebind/embeddings-multimodal) + for detailed usage. + + Args: + name: The name of the vector. + quantizer: The quantizer to use for the vector index. If not provided, no quantization will be applied. + audio_fields: The audio fields to use in vectorization. + depth_fields: The depth fields to use in vectorization. + image_fields: The image fields to use in vectorization. + imu_fields: The IMU fields to use in vectorization. + text_fields: The text fields to use in vectorization. + thermal_fields: The thermal fields to use in vectorization. + video_fields: The video fields to use in vectorization. + vector_index_config: The configuration for Weaviate's vector index. Use `wvc.config.Configure.VectorIndex` to create a vector index configuration. None by default + vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`. + """ + return _VectorConfigCreate( + name=name, + vectorizer=_Multi2VecBindConfig( + audioFields=_map_multi2vec_fields(audio_fields), + depthFields=_map_multi2vec_fields(depth_fields), + imageFields=_map_multi2vec_fields(image_fields), + IMUFields=_map_multi2vec_fields(imu_fields), + textFields=_map_multi2vec_fields(text_fields), + thermalFields=_map_multi2vec_fields(thermal_fields), + videoFields=_map_multi2vec_fields(video_fields), + vectorizeClassName=vectorize_collection_name, + ), + vector_index_config=_IndexWrappers.single(vector_index_config, quantizer), + ) + + @staticmethod + def multi2vec_voyageai( + *, + name: Optional[str] = None, + quantizer: Optional[_QuantizerConfigCreate] = None, + base_url: Optional[AnyHttpUrl] = None, + image_fields: Optional[Union[List[str], List[Multi2VecField]]] = None, + model: Optional[Union[VoyageMultimodalModel, str]] = None, + output_encoding: Optional[str] = None, + text_fields: Optional[Union[List[str], List[Multi2VecField]]] = None, + truncation: Optional[bool] = None, + vector_index_config: Optional[_VectorIndexConfigCreate] = None, + vectorize_collection_name: bool = True, + ) -> _VectorConfigCreate: + """Create a vector using the `multi2vec-voyageai` module. + + See the [documentation](https://weaviate.io/developers/weaviate/model-providers/voyageai/embeddings-multimodal) + for detailed usage. + + Args: + name: The name of the vector. + quantizer: The quantizer to use for the vector index. If not provided, no quantization will be applied. + base_url: The base URL to use where API requests should go. Defaults to `None`, which uses the server-defined default. + image_fields: The image fields to use in vectorization. + model: The model to use. Defaults to `None`, which uses the server-defined default. + output_encoding: The output encoding to use. Defaults to `None`, which uses the server-defined default. + text_fields: The text fields to use in vectorization. + truncation: The truncation strategy to use. Defaults to `None`, which uses the server-defined default. + vector_index_config: The configuration for Weaviate's vector index. Use `wvc.config.Configure.VectorIndex` to create a vector index configuration. None by default + vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`. + + Raises: + pydantic.ValidationError: If `model` is not a valid value from the `VoyageaiMultimodalModel` type. + """ + return _VectorConfigCreate( + name=name, + vectorizer=_Multi2VecVoyageaiConfig( + baseURL=base_url, + model=model, + truncation=truncation, + output_encoding=output_encoding, + vectorizeClassName=vectorize_collection_name, + imageFields=_map_multi2vec_fields(image_fields), + textFields=_map_multi2vec_fields(text_fields), + ), + vector_index_config=_IndexWrappers.single(vector_index_config, quantizer), + ) + + @staticmethod + def multi2vec_nvidia( + *, + name: Optional[str] = None, + quantizer: Optional[_QuantizerConfigCreate] = None, + base_url: Optional[AnyHttpUrl] = None, + image_fields: Optional[Union[List[str], List[Multi2VecField]]] = None, + model: Optional[str] = None, + output_encoding: Optional[str] = None, + text_fields: Optional[Union[List[str], List[Multi2VecField]]] = None, + truncation: Optional[bool] = None, + vector_index_config: Optional[_VectorIndexConfigCreate] = None, + vectorize_collection_name: bool = True, + ) -> _VectorConfigCreate: + """Create a vector using the `multi2vec-nvidia` module. + + See the [documentation](https://weaviate.io/developers/weaviate/model-providers/nvidia/embeddings-multimodal) + for detailed usage. + + Args: + name: The name of the vector. + quantizer: The quantizer to use for the vector index. If not provided, no quantization will be applied. + base_url: The base URL to use where API requests should go. Defaults to `None`, which uses the server-defined default. + image_fields: The image fields to use in vectorization. + model: The model to use. Defaults to `None`, which uses the server-defined default. + output_encoding: The output encoding to use. Defaults to `None`, which uses the server-defined default. + text_fields: The text fields to use in vectorization. + truncation: The truncation strategy to use. Defaults to `None`, which uses the server-defined default. + vector_index_config: The configuration for Weaviate's vector index. Use `wvc.config.Configure.VectorIndex` to create a vector index configuration. None by default + vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`. + + Raises: + pydantic.ValidationError: If `model` is not a valid value from the `NvidiaMultimodalModel` type. + """ + return _VectorConfigCreate( + name=name, + vectorizer=_Multi2VecNvidiaConfig( + baseURL=base_url, + model=model, + truncation=truncation, + output_encoding=output_encoding, + vectorizeClassName=vectorize_collection_name, + imageFields=_map_multi2vec_fields(image_fields), + textFields=_map_multi2vec_fields(text_fields), + ), + vector_index_config=_IndexWrappers.single(vector_index_config, quantizer), + ) + + @staticmethod + def ref2vec_centroid( + *, + name: Optional[str] = None, + quantizer: Optional[_QuantizerConfigCreate] = None, + method: Literal["mean"] = "mean", + reference_properties: List[str], + vector_index_config: Optional[_VectorIndexConfigCreate] = None, + ) -> _VectorConfigCreate: + """Create a vector using the `ref2vec-centroid` module. + + See the [documentation](https://weaviate.io/developers/weaviate/modules/retriever-vectorizer-modules/text2vec-gpt4all) + for detailed usage. + + Args: + name: The name of the vector. + quantizer: The quantizer to use for the vector index. If not provided, no quantization will be applied. + method: The method to use. Defaults to `mean`. + reference_properties: The reference properties to use in vectorization, REQUIRED. + vector_index_config: The configuration for Weaviate's vector index. Use `wvc.config.Configure.VectorIndex` to create a vector index configuration. None by default + """ + return _VectorConfigCreate( + name=name, + vectorizer=_Ref2VecCentroidConfig( + referenceProperties=reference_properties, + method=method, + ), + vector_index_config=_IndexWrappers.single(vector_index_config, quantizer), + ) + + @staticmethod + def text2vec_azure_openai( + *, + name: Optional[str] = None, + quantizer: Optional[_QuantizerConfigCreate] = None, + base_url: Optional[AnyHttpUrl] = None, + deployment_id: str, + dimensions: Optional[int] = None, + model: Optional[str] = None, + resource_name: str, + source_properties: Optional[List[str]] = None, + vector_index_config: Optional[_VectorIndexConfigCreate] = None, + vectorize_collection_name: bool = True, + ) -> _VectorConfigCreate: + """Create a vector using the `text2vec-openai` module running with Azure. + + See the [documentation](https://weaviate.io/developers/weaviate/model-providers/openai-azure/embeddings) + for detailed usage. + + Args: + name: The name of the vector. + quantizer: The quantizer to use for the vector index. If not provided, no quantization will be applied. + base_url: The base URL to use where API requests should go. Defaults to `None`, which uses the server-defined default. + deployment_id: The deployment ID to use, REQUIRED. + dimensions: The dimensionality of the vectors. Defaults to `None`, which uses the server-defined default. + model: The model to use. Defaults to `None`, which uses the server-defined default. + resource_name: The resource name to use, REQUIRED. + source_properties: Which properties should be included when vectorizing. By default all text properties are included. + vector_index_config: The configuration for Weaviate's vector index. Use `wvc.config.Configure.VectorIndex` to create a vector index configuration. None by default + vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`. + """ + return _VectorConfigCreate( + name=name, + source_properties=source_properties, + vectorizer=_Text2VecAzureOpenAIConfig( + baseURL=base_url, + dimensions=dimensions, + model=model, + resourceName=resource_name, + deploymentId=deployment_id, + vectorizeClassName=vectorize_collection_name, + ), + vector_index_config=_IndexWrappers.single(vector_index_config, quantizer), + ) + + @staticmethod + def text2vec_gpt4all( + *, + name: Optional[str] = None, + quantizer: Optional[_QuantizerConfigCreate] = None, + source_properties: Optional[List[str]] = None, + vector_index_config: Optional[_VectorIndexConfigCreate] = None, + vectorize_collection_name: bool = True, + ) -> _VectorConfigCreate: + """Create a vector using the `text2vec-gpt4all` module. + + See the [documentation](https://weaviate.io/developers/weaviate/model-providers/gpt4all/embeddings) + for detailed usage. + + Args: + name: The name of the vector. + quantizer: The quantizer to use for the vector index. If not provided, no quantization will be applied. + source_properties: Which properties should be included when vectorizing. By default all text properties are included. + vector_index_config: The configuration for Weaviate's vector index. Use `wvc.config.Configure.VectorIndex` to create a vector index configuration. None by default + vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`. + """ + return _VectorConfigCreate( + name=name, + source_properties=source_properties, + vectorizer=_Text2VecGPT4AllConfig( + vectorizeClassName=vectorize_collection_name, + ), + vector_index_config=_IndexWrappers.single(vector_index_config, quantizer), + ) + + @staticmethod + def text2vec_huggingface( + *, + name: Optional[str] = None, + quantizer: Optional[_QuantizerConfigCreate] = None, + endpoint_url: Optional[AnyHttpUrl] = None, + model: Optional[str] = None, + passage_model: Optional[str] = None, + query_model: Optional[str] = None, + wait_for_model: Optional[bool] = None, + use_gpu: Optional[bool] = None, + use_cache: Optional[bool] = None, + source_properties: Optional[List[str]] = None, + vector_index_config: Optional[_VectorIndexConfigCreate] = None, + vectorize_collection_name: bool = True, + ) -> _VectorConfigCreate: + """Create a vector using the `text2vec-huggingface` module. + + See the [documentation](https://weaviate.io/developers/weaviate/model-providers/huggingface/embeddings) + for detailed usage. + + Args: + name: The name of the vector. + quantizer: The quantizer to use for the vector index. If not provided, no quantization will be applied. + endpoint_url: The endpoint URL to use. Defaults to `None`, which uses the server-defined default. + model: The model to use. Defaults to `None`, which uses the server-defined default. + passage_model: The passage model to use. Defaults to `None`, which uses the server-defined default. + query_model: The query model to use. Defaults to `None`, which uses the server-defined default. + wait_for_model: Whether to wait for the model to be loaded. Defaults to `None`, which uses the server-defined default. + use_gpu: Whether to use the GPU. Defaults to `None`, which uses the server-defined default. + use_cache: Whether to use the cache. Defaults to `None`, which uses the server-defined default. + source_properties: Which properties should be included when vectorizing. By default all text properties are included. + vector_index_config: The configuration for Weaviate's vector index. Use `wvc.config.Configure.VectorIndex` to create a vector index configuration. None by default + vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`. + + Raises: + pydantic.ValidationError: If the arguments passed to the function are invalid. + It is important to note that some of these variables are mutually exclusive. + See the [documentation](https://weaviate.io/developers/weaviate/model-providers/huggingface/embeddings#vectorizer-parameters) for more details. + """ + return _VectorConfigCreate( + name=name, + source_properties=source_properties, + vectorizer=_Text2VecHuggingFaceConfig( + model=model, + passageModel=passage_model, + queryModel=query_model, + endpointURL=endpoint_url, + waitForModel=wait_for_model, + useGPU=use_gpu, + useCache=use_cache, + vectorizeClassName=vectorize_collection_name, + ), + vector_index_config=_IndexWrappers.single(vector_index_config, quantizer), + ) + + @staticmethod + def text2vec_google( + *, + name: Optional[str] = None, + quantizer: Optional[_QuantizerConfigCreate] = None, + api_endpoint: Optional[str] = None, + model_id: Optional[str] = None, + project_id: str, + title_property: Optional[str] = None, + source_properties: Optional[List[str]] = None, + vector_index_config: Optional[_VectorIndexConfigCreate] = None, + vectorize_collection_name: bool = True, + ) -> _VectorConfigCreate: + """Create a vector using the `text2vec-google` model. + + See the [documentation]https://weaviate.io/developers/weaviate/model-providers/google/embeddings) + for detailed usage. + + Args: + name: The name of the vector. + quantizer: The quantizer to use for the vector index. If not provided, no quantization will be applied. + api_endpoint: The API endpoint to use without a leading scheme such as `http://`. Defaults to `None`, which uses the server-defined default + model_id: The model ID to use. Defaults to `None`, which uses the server-defined default. + project_id: The project ID to use, REQUIRED. + title_property: The Weaviate property name for the `gecko-002` or `gecko-003` model to use as the title. + source_properties: Which properties should be included when vectorizing. By default all text properties are included. + vector_index_config: The configuration for Weaviate's vector index. Use `wvc.config.Configure.VectorIndex` to create a vector index configuration. None by default + vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`. + + Raises: + pydantic.ValidationError: If `api_endpoint` is not a valid URL. + """ + return _VectorConfigCreate( + name=name, + source_properties=source_properties, + vectorizer=_Text2VecGoogleConfig( + projectId=project_id, + apiEndpoint=api_endpoint, + modelId=model_id, + vectorizeClassName=vectorize_collection_name, + titleProperty=title_property, + ), + vector_index_config=_IndexWrappers.single(vector_index_config, quantizer), + ) + + @staticmethod + def text2vec_google_aistudio( + *, + name: Optional[str] = None, + quantizer: Optional[_QuantizerConfigCreate] = None, + model_id: Optional[str] = None, + title_property: Optional[str] = None, + source_properties: Optional[List[str]] = None, + vector_index_config: Optional[_VectorIndexConfigCreate] = None, + vectorize_collection_name: bool = True, + ) -> _VectorConfigCreate: + """Create a vector using the `text2vec-google` model. + + See the [documentation]https://weaviate.io/developers/weaviate/model-providers/google/embeddings) + for detailed usage. + + Args: + name: The name of the vector. + quantizer: The quantizer to use for the vector index. If not provided, no quantization will be applied. + model_id: The model ID to use. Defaults to `None`, which uses the server-defined default. + title_property: The Weaviate property name for the `gecko-002` or `gecko-003` model to use as the title. + source_properties: Which properties should be included when vectorizing. By default all text properties are included. + vector_index_config: The configuration for Weaviate's vector index. Use `wvc.config.Configure.VectorIndex` to create a vector index configuration. None by default + vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`. + + Raises: + pydantic.ValidationError: If `api_endpoint` is not a valid URL. + """ + return _VectorConfigCreate( + name=name, + source_properties=source_properties, + vectorizer=_Text2VecGoogleConfig( + projectId=None, + apiEndpoint="generativelanguage.googleapis.com", + modelId=model_id, + vectorizeClassName=vectorize_collection_name, + titleProperty=title_property, + ), + vector_index_config=_IndexWrappers.single(vector_index_config, quantizer), + ) + + @staticmethod + def text2vec_transformers( + *, + name: Optional[str] = None, + quantizer: Optional[_QuantizerConfigCreate] = None, + inference_url: Optional[str] = None, + passage_inference_url: Optional[str] = None, + pooling_strategy: Literal["masked_mean", "cls"] = "masked_mean", + query_inference_url: Optional[str] = None, + source_properties: Optional[List[str]] = None, + vector_index_config: Optional[_VectorIndexConfigCreate] = None, + vectorize_collection_name: bool = True, + ) -> _VectorConfigCreate: + """Create a vector using the `text2vec-transformers` module. + + See the [documentation](https://weaviate.io/developers/weaviate/model-providers/transformers/embeddings) + for detailed usage. + + Args: + name: The name of the vector. + quantizer: The quantizer to use for the vector index. If not provided, no quantization will be applied. + inference_url: The inferenceUrl to use where API requests should go. You can use either this OR passage/query_inference_url. Defaults to `None`, which uses the server-defined default. + passage_inference_url: The inferenceUrl to use where passage API requests should go. You can use either this and query_inference_url OR inference_url. Defaults to `None`, which uses the server-defined default. + pooling_strategy: The pooling strategy to use. Defaults to `masked_mean`. + query_inference_url: The inferenceUrl to use where query API requests should go. You can use either this and passage_inference_url OR inference_url. Defaults to `None`, which uses the server-defined default. + source_properties: Which properties should be included when vectorizing. By default all text properties are included. + vector_index_config: The configuration for Weaviate's vector index. Use `wvc.config.Configure.VectorIndex` to create a vector index configuration. None by default + vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`. + """ + return _VectorConfigCreate( + name=name, + source_properties=source_properties, + vectorizer=_Text2VecTransformersConfig( + poolingStrategy=pooling_strategy, + vectorizeClassName=vectorize_collection_name, + inferenceUrl=inference_url, + passageInferenceUrl=passage_inference_url, + queryInferenceUrl=query_inference_url, + ), + vector_index_config=_IndexWrappers.single(vector_index_config, quantizer), + ) + + @staticmethod + def text2vec_jinaai( + *, + name: Optional[str] = None, + quantizer: Optional[_QuantizerConfigCreate] = None, + base_url: Optional[str] = None, + dimensions: Optional[int] = None, + model: Optional[Union[JinaModel, str]] = None, + source_properties: Optional[List[str]] = None, + vector_index_config: Optional[_VectorIndexConfigCreate] = None, + vectorize_collection_name: bool = True, + ) -> _VectorConfigCreate: + """Create a vector using the `text2vec-jinaai` module. + + See the [documentation](https://weaviate.io/developers/weaviate/model-providers/jinaai/embeddings) for detailed usage. + + Args: + name: The name of the vector. + quantizer: The quantizer to use for the vector index. If not provided, no quantization will be applied. + base_url: The base URL to send the vectorization requests to. Defaults to `None`, which uses the server-defined default. + dimensions: The number of dimensions for the generated embeddings. Defaults to `None`, which uses the server-defined default. + model: The model to use. Defaults to `None`, which uses the server-defined default. + source_properties: Which properties should be included when vectorizing. By default all text properties are included. + vector_index_config: The configuration for Weaviate's vector index. Use `wvc.config.Configure.VectorIndex` to create a vector index configuration. None by default + vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`. + """ + return _VectorConfigCreate( + name=name, + source_properties=source_properties, + vectorizer=_Text2VecJinaConfig( + baseURL=base_url, + dimensions=dimensions, + model=model, + vectorizeClassName=vectorize_collection_name, + ), + vector_index_config=_IndexWrappers.single(vector_index_config, quantizer), + ) + + @staticmethod + def multi2vec_jinaai( + *, + name: Optional[str] = None, + quantizer: Optional[_QuantizerConfigCreate] = None, + base_url: Optional[AnyHttpUrl] = None, + dimensions: Optional[int] = None, + image_fields: Optional[Union[List[str], List[Multi2VecField]]] = None, + model: Optional[Union[JinaMultimodalModel, str]] = None, + text_fields: Optional[Union[List[str], List[Multi2VecField]]] = None, + vector_index_config: Optional[_VectorIndexConfigCreate] = None, + vectorize_collection_name: bool = True, + ) -> _VectorConfigCreate: + """Create a vector using the `multi2vec-jinaai` module. + + See the [documentation](https://weaviate.io/developers/weaviate/model-providers/jinaai/embeddings-multimodal) + for detailed usage. + + Args: + name: The name of the vector. + quantizer: The quantizer to use for the vector index. If not provided, no quantization will be applied. + base_url: The base URL to use where API requests should go. Defaults to `None`, which uses the server-defined default. + dimensions: The number of dimensions for the generated embeddings (only available for some models). Defaults to `None`, which uses the server-defined default. + image_fields: The image fields to use in vectorization. + model: The model to use. Defaults to `None`, which uses the server-defined default. + text_fields: The text fields to use in vectorization. + vector_index_config: The configuration for Weaviate's vector index. Use `wvc.config.Configure.VectorIndex` to create a vector index configuration. None by default + vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`. + + Raises: + pydantic.ValidationError: If `model` is not a valid value from the `JinaMultimodalModel` type. + """ + return _VectorConfigCreate( + name=name, + vectorizer=_Multi2VecJinaConfig( + baseURL=base_url, + model=model, + dimensions=dimensions, + vectorizeClassName=vectorize_collection_name, + imageFields=_map_multi2vec_fields(image_fields), + textFields=_map_multi2vec_fields(text_fields), + ), + vector_index_config=_IndexWrappers.single(vector_index_config, quantizer), + ) + + @staticmethod + def text2vec_voyageai( + *, + name: Optional[str] = None, + quantizer: Optional[_QuantizerConfigCreate] = None, + base_url: Optional[str] = None, + model: Optional[Union[VoyageModel, str]] = None, + truncate: Optional[bool] = None, + source_properties: Optional[List[str]] = None, + vector_index_config: Optional[_VectorIndexConfigCreate] = None, + vectorize_collection_name: bool = True, + ) -> _VectorConfigCreate: + """Create a vector using the `text2vec-voyageai` module. + + See the [documentation](https://weaviate.io/developers/weaviate/model-providers/voyageai/embeddings) + for detailed usage. + + Args: + name: The name of the vector. + quantizer: The quantizer to use for the vector index. If not provided, no quantization will be applied. + base_url: The base URL to use where API requests should go. Defaults to `None`, which uses the server-defined default. + model: The model to use. Defaults to `None`, which uses the server-defined default. + See the + [documentation](https://weaviate.io/developers/weaviate/model-providers/voyageai/embeddings#available-models) for more details. + truncate: Whether to truncate the input texts to fit within the context length. Defaults to `None`, which uses the server-defined default. + source_properties: Which properties should be included when vectorizing. By default all text properties are included. + vector_index_config: The configuration for Weaviate's vector index. Use `wvc.config.Configure.VectorIndex` to create a vector index configuration. None by default + vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`. + """ + return _VectorConfigCreate( + name=name, + source_properties=source_properties, + vectorizer=_Text2VecVoyageConfig( + model=model, + vectorizeClassName=vectorize_collection_name, + baseURL=base_url, + truncate=truncate, + ), + vector_index_config=_IndexWrappers.single(vector_index_config, quantizer), + ) + + @staticmethod + def text2vec_weaviate( + *, + name: Optional[str] = None, + quantizer: Optional[_QuantizerConfigCreate] = None, + base_url: Optional[str] = None, + dimensions: Optional[int] = None, + model: Optional[Union[WeaviateModel, str]] = None, + source_properties: Optional[List[str]] = None, + vector_index_config: Optional[_VectorIndexConfigCreate] = None, + vectorize_collection_name: bool = True, + ) -> _VectorConfigCreate: + return _VectorConfigCreate( + name=name, + source_properties=source_properties, + vectorizer=_Text2VecWeaviateConfig( + model=model, + vectorizeClassName=vectorize_collection_name, + baseURL=base_url, + dimensions=dimensions, + ), + vector_index_config=_IndexWrappers.single(vector_index_config, quantizer), + ) + + @staticmethod + def text2vec_nvidia( + *, + name: Optional[str] = None, + quantizer: Optional[_QuantizerConfigCreate] = None, + base_url: Optional[str] = None, + model: Optional[str] = None, + truncate: Optional[bool] = None, + source_properties: Optional[List[str]] = None, + vector_index_config: Optional[_VectorIndexConfigCreate] = None, + vectorize_collection_name: bool = True, + ) -> _VectorConfigCreate: + """Create a vector using the `text2vec-nvidia` module. + + See the [documentation](https://weaviate.io/developers/weaviate/model-providers/nvidia/embeddings) + for detailed usage. + + Args: + name: The name of the vector. + quantizer: The quantizer to use for the vector index. If not provided, no quantization will be applied. + base_url: The base URL to use where API requests should go. Defaults to `None`, which uses the server-defined default. + source_properties: Which properties should be included when vectorizing. By default all text properties are included. + vector_index_config: The configuration for Weaviate's vector index. Use `wvc.config.Configure.VectorIndex` to create a vector index configuration. None by default + vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`. + model: The model to use. Defaults to `None`, which uses the server-defined default. + See the + [documentation](https://weaviate.io/developers/weaviate/model-providers/nvidia/embeddings#available-models) for more details. + + truncate: Whether to truncate the input texts to fit within the context length. Defaults to `None`, which uses the server-defined default. + """ + return _VectorConfigCreate( + name=name, + source_properties=source_properties, + vectorizer=_Text2VecNvidiaConfig( + model=model, + vectorizeClassName=vectorize_collection_name, + baseURL=base_url, + truncate=truncate, + ), + vector_index_config=_IndexWrappers.single(vector_index_config, quantizer), + ) + + +class _VectorsUpdate: + @staticmethod + def update( + *, + name: Optional[str] = None, + vector_index_config: Union[ + _VectorIndexConfigHNSWUpdate, + _VectorIndexConfigFlatUpdate, + _VectorIndexConfigDynamicUpdate, + ], + ) -> _VectorConfigUpdate: + """Update the vector index configuration of a vector. + + This is the only update operation allowed currently. If you wish to change the vectorization configuration itself, you will have to + recreate the collection with the new configuration. + + Args: + name: The name of the vector. + vector_index_config: The configuration for Weaviate's vector index. Use `wvc.config.Reconfigure.VectorIndex` to create a vector index configuration. `None` by default + """ + return _VectorConfigUpdate( + name=name or "default", + vector_index_config=vector_index_config, + ) diff --git a/weaviate/collections/classes/grpc.py b/weaviate/collections/classes/grpc.py index 0a593ac27..03f43e56a 100644 --- a/weaviate/collections/classes/grpc.py +++ b/weaviate/collections/classes/grpc.py @@ -290,9 +290,9 @@ def and_() -> BM25OperatorOptions: OneDimensionalVectorType = Sequence[NUMBER] -"""Represents a one-dimensional vector, e.g. one produced by `text2vec-jinaai`""" +"""Represents a one-dimensional vector, e.g. one produced by the `Configure.Vectors.text2vec_jinaai()` module""" TwoDimensionalVectorType = Sequence[Sequence[NUMBER]] -"""Represents a two-dimensional vector, e.g. one produced by `text2colbert-jinaai""" +"""Represents a two-dimensional vector, e.g. one produced by the `Configure.MultiVectors.text2vec_jinaai()` module""" PrimitiveVectorType = Union[OneDimensionalVectorType, TwoDimensionalVectorType] diff --git a/weaviate/collections/collections/async_.pyi b/weaviate/collections/collections/async_.pyi index 71de2ab4b..d8d7e1521 100644 --- a/weaviate/collections/collections/async_.pyi +++ b/weaviate/collections/collections/async_.pyi @@ -1,5 +1,7 @@ from typing import Dict, List, Literal, Optional, Sequence, Type, Union, overload +from typing_extensions import deprecated + from weaviate.collections.classes.config import ( CollectionConfig, CollectionConfigSimple, @@ -12,6 +14,7 @@ from weaviate.collections.classes.config import ( _ReplicationConfigCreate, _RerankerProvider, _ShardingConfigCreate, + _VectorConfigCreate, _VectorIndexConfigCreate, _VectorizerConfigCreate, ) @@ -24,6 +27,57 @@ from weaviate.collections.collections.base import _CollectionsBase from weaviate.connect.v4 import ConnectionAsync class _CollectionsAsync(_CollectionsBase[ConnectionAsync]): + @overload + @deprecated( + 'Using the "vector_index_config" argument is deprecated. Instead, define the vector index for each specific vectorizer supplied to the "vectorizers_config" argument.' + ) + async def create( + self, + name: str, + *, + description: Optional[str] = None, + generative_config: Optional[_GenerativeProvider] = None, + inverted_index_config: Optional[_InvertedIndexConfigCreate] = None, + multi_tenancy_config: Optional[_MultiTenancyConfigCreate] = None, + properties: Optional[Sequence[Property]] = None, + references: Optional[List[_ReferencePropertyBase]] = None, + replication_config: Optional[_ReplicationConfigCreate] = None, + reranker_config: Optional[_RerankerProvider] = None, + sharding_config: Optional[_ShardingConfigCreate] = None, + vector_index_config: _VectorIndexConfigCreate, + vectorizer_config: Optional[ + Union[_VectorizerConfigCreate, List[_NamedVectorConfigCreate]] + ] = None, + vector_config: Optional[Union[_VectorConfigCreate, List[_VectorConfigCreate]]] = None, + data_model_properties: Optional[Type[Properties]] = None, + data_model_references: Optional[Type[References]] = None, + skip_argument_validation: bool = False, + ) -> CollectionAsync[Properties, References]: ... + @overload + @deprecated( + 'Using the "vectorizer_config" argument is deprecated. Instead, use the "vectorizers_config" argument.' + ) + async def create( + self, + name: str, + *, + description: Optional[str] = None, + generative_config: Optional[_GenerativeProvider] = None, + inverted_index_config: Optional[_InvertedIndexConfigCreate] = None, + multi_tenancy_config: Optional[_MultiTenancyConfigCreate] = None, + properties: Optional[Sequence[Property]] = None, + references: Optional[List[_ReferencePropertyBase]] = None, + replication_config: Optional[_ReplicationConfigCreate] = None, + reranker_config: Optional[_RerankerProvider] = None, + sharding_config: Optional[_ShardingConfigCreate] = None, + vector_index_config: Optional[_VectorIndexConfigCreate] = None, + vectorizer_config: Union[_VectorizerConfigCreate, List[_NamedVectorConfigCreate]], + vector_config: Optional[Union[_VectorConfigCreate, List[_VectorConfigCreate]]] = None, + data_model_properties: Optional[Type[Properties]] = None, + data_model_references: Optional[Type[References]] = None, + skip_argument_validation: bool = False, + ) -> CollectionAsync[Properties, References]: ... + @overload async def create( self, name: str, @@ -41,6 +95,7 @@ class _CollectionsAsync(_CollectionsBase[ConnectionAsync]): vectorizer_config: Optional[ Union[_VectorizerConfigCreate, List[_NamedVectorConfigCreate]] ] = None, + vector_config: Optional[Union[_VectorConfigCreate, List[_VectorConfigCreate]]] = None, data_model_properties: Optional[Type[Properties]] = None, data_model_references: Optional[Type[References]] = None, skip_argument_validation: bool = False, diff --git a/weaviate/collections/collections/executor.py b/weaviate/collections/collections/executor.py index 4f0a49d27..61b243cd6 100644 --- a/weaviate/collections/collections/executor.py +++ b/weaviate/collections/collections/executor.py @@ -27,6 +27,7 @@ _ReplicationConfigCreate, _RerankerProvider, _ShardingConfigCreate, + _VectorConfigCreate, _VectorIndexConfigCreate, _VectorizerConfigCreate, ) @@ -51,6 +52,7 @@ from weaviate.exceptions import WeaviateInvalidInputError from weaviate.util import _capitalize_first_letter, _decode_json_response_dict from weaviate.validator import _validate_input, _ValidateArgument +from weaviate.warnings import _Warnings CollectionType = TypeVar("CollectionType", Collection, CollectionAsync) @@ -159,6 +161,7 @@ def create( vectorizer_config: Optional[ Union[_VectorizerConfigCreate, List[_NamedVectorConfigCreate]] ] = None, + vector_config: Optional[Union[_VectorConfigCreate, List[_VectorConfigCreate]]] = None, data_model_properties: Optional[Type[Properties]] = None, data_model_references: Optional[Type[References]] = None, skip_argument_validation: bool = False, @@ -190,8 +193,9 @@ def create( references: The references of the objects in the collection. replication_config: The configuration for Weaviate's replication strategy. sharding_config: The configuration for Weaviate's sharding strategy. - vector_index_config: The configuration for Weaviate's default vector index. - vectorizer_config: The configuration for Weaviate's default vectorizer or a list of named vectorizers. + vector_index_config (DEPRECATED use `vector_config`): The configuration for Weaviate's default vector index. + vectorizer_config (DEPRECATED use `vector_config`): The configuration for Weaviate's default vectorizer or a list of named vectorizers. + vector_config: The configuration(s) for the vectorizer(s) to use for the collection. data_model_properties: The generic class that you want to use to represent the properties of objects in this collection. See the `get` method for more information. data_model_references: The generic class that you want to use to represent the references of objects in this collection. See the `get` method for more information. skip_argument_validation: If arguments to functions such as near_vector should be validated. Disable this if you need to squeeze out some extra performance. @@ -208,6 +212,10 @@ def create( raise WeaviateInvalidInputError( "Named vectorizers are only supported in Weaviate v1.24.0 and higher" ) + if vectorizer_config is not None: + _Warnings.vectorizer_config_in_config_create() + if vector_index_config is not None: + _Warnings.vector_index_config_in_config_create() try: config = _CollectionConfigCreate( description=description, @@ -221,6 +229,7 @@ def create( reranker_config=reranker_config, sharding_config=sharding_config, vectorizer_config=vectorizer_config, + vector_config=vector_config, vector_index_config=vector_index_config, ) except ValidationError as e: diff --git a/weaviate/collections/collections/sync.pyi b/weaviate/collections/collections/sync.pyi index 90ba29344..a6d92e313 100644 --- a/weaviate/collections/collections/sync.pyi +++ b/weaviate/collections/collections/sync.pyi @@ -1,5 +1,7 @@ from typing import Dict, List, Literal, Optional, Sequence, Type, Union, overload +from typing_extensions import deprecated + from weaviate.collections.classes.config import ( CollectionConfig, CollectionConfigSimple, @@ -12,6 +14,7 @@ from weaviate.collections.classes.config import ( _ReplicationConfigCreate, _RerankerProvider, _ShardingConfigCreate, + _VectorConfigCreate, _VectorIndexConfigCreate, _VectorizerConfigCreate, ) @@ -24,6 +27,57 @@ from weaviate.collections.collections.base import _CollectionsBase from weaviate.connect.v4 import ConnectionSync class _Collections(_CollectionsBase[ConnectionSync]): + @overload + @deprecated( + 'Using the "vector_index_config" argument is deprecated. Instead, define the vector index for each specific vectorizer supplied to the "vector_config" argument.' + ) + def create( + self, + name: str, + *, + description: Optional[str] = None, + generative_config: Optional[_GenerativeProvider] = None, + inverted_index_config: Optional[_InvertedIndexConfigCreate] = None, + multi_tenancy_config: Optional[_MultiTenancyConfigCreate] = None, + properties: Optional[Sequence[Property]] = None, + references: Optional[List[_ReferencePropertyBase]] = None, + replication_config: Optional[_ReplicationConfigCreate] = None, + reranker_config: Optional[_RerankerProvider] = None, + sharding_config: Optional[_ShardingConfigCreate] = None, + vector_index_config: _VectorIndexConfigCreate, + vectorizer_config: Optional[ + Union[_VectorizerConfigCreate, List[_NamedVectorConfigCreate]] + ] = None, + vector_config: Optional[Union[_VectorConfigCreate, List[_VectorConfigCreate]]] = None, + data_model_properties: Optional[Type[Properties]] = None, + data_model_references: Optional[Type[References]] = None, + skip_argument_validation: bool = False, + ) -> Collection[Properties, References]: ... + @overload + @deprecated( + 'Using the "vectorizer_config" argument is deprecated. Instead, use the "vector_config" argument.' + ) + def create( + self, + name: str, + *, + description: Optional[str] = None, + generative_config: Optional[_GenerativeProvider] = None, + inverted_index_config: Optional[_InvertedIndexConfigCreate] = None, + multi_tenancy_config: Optional[_MultiTenancyConfigCreate] = None, + properties: Optional[Sequence[Property]] = None, + references: Optional[List[_ReferencePropertyBase]] = None, + replication_config: Optional[_ReplicationConfigCreate] = None, + reranker_config: Optional[_RerankerProvider] = None, + sharding_config: Optional[_ShardingConfigCreate] = None, + vector_index_config: Optional[_VectorIndexConfigCreate] = None, + vectorizer_config: Union[_VectorizerConfigCreate, List[_NamedVectorConfigCreate]], + vector_config: Optional[Union[_VectorConfigCreate, List[_VectorConfigCreate]]] = None, + data_model_properties: Optional[Type[Properties]] = None, + data_model_references: Optional[Type[References]] = None, + skip_argument_validation: bool = False, + ) -> Collection[Properties, References]: ... + @overload def create( self, name: str, @@ -41,6 +95,7 @@ class _Collections(_CollectionsBase[ConnectionSync]): vectorizer_config: Optional[ Union[_VectorizerConfigCreate, List[_NamedVectorConfigCreate]] ] = None, + vector_config: Optional[Union[_VectorConfigCreate, List[_VectorConfigCreate]]] = None, data_model_properties: Optional[Type[Properties]] = None, data_model_references: Optional[Type[References]] = None, skip_argument_validation: bool = False, diff --git a/weaviate/collections/config/async_.pyi b/weaviate/collections/config/async_.pyi index 4e0871f35..8ada581a9 100644 --- a/weaviate/collections/config/async_.pyi +++ b/weaviate/collections/config/async_.pyi @@ -15,6 +15,7 @@ from weaviate.collections.classes.config import ( _ReferencePropertyMultiTarget, _ReplicationConfigUpdate, _RerankerProvider, + _VectorConfigUpdate, _VectorIndexConfigFlatUpdate, _VectorIndexConfigHNSWUpdate, ) @@ -51,6 +52,7 @@ class _ConfigCollectionAsync(_ConfigCollectionExecutor[ConnectionAsync]): List[_NamedVectorConfigUpdate], ] ] = None, + vector_config: Optional[Union[_VectorConfigUpdate, List[_VectorConfigUpdate]]] = None, generative_config: Optional[_GenerativeProvider] = None, reranker_config: Optional[_RerankerProvider] = None, ) -> None: ... diff --git a/weaviate/collections/config/executor.py b/weaviate/collections/config/executor.py index ede4b23b9..2dbf2c302 100644 --- a/weaviate/collections/config/executor.py +++ b/weaviate/collections/config/executor.py @@ -33,6 +33,7 @@ _ReplicationConfigUpdate, _RerankerProvider, _ShardStatus, + _VectorConfigUpdate, _VectorIndexConfigFlatUpdate, _VectorIndexConfigHNSWUpdate, ) @@ -141,6 +142,7 @@ def update( List[_NamedVectorConfigUpdate], ] ] = None, + vector_config: Optional[Union[_VectorConfigUpdate, List[_VectorConfigUpdate]]] = None, generative_config: Optional[_GenerativeProvider] = None, reranker_config: Optional[_RerankerProvider] = None, ) -> executor.Result[None]: @@ -153,9 +155,12 @@ def update( inverted_index_config: Configuration for the inverted index. Use `Reconfigure.inverted_index` to generate one. replication_config: Configuration for the replication. Use `Reconfigure.replication` to generate one. reranker_config: Configuration for the reranker. Use `Reconfigure.replication` to generate one. - vector_index_config`: DEPRECATED USE `vectorizer_config` INSTEAD. Configuration for the vector index of the default single vector. Use `Reconfigure.vector_index` to generate one. + vector_index_config (DEPRECATED use `vector_config`): Configuration for the vector index of the default single vector. Use `Reconfigure.vector_index` to generate one. vectorizer_config: Configurations for the vector index (or indices) of your collection. - Use `Reconfigure.vector_index` if there is only one vectorizer and `Reconfigure.NamedVectors` if you have many named vectors to generate them. + Use `Reconfigure.vector_index` if using legacy vectorization and `Reconfigure.NamedVectors` if you have many named vectors to generate them. + Using this argument with a list of `Reconfigure.NamedVectors` is **DEPRECATED**. Use the `vector_config` argument instead in such a case. + vector_config: Configuration for the vector index (or indices) of your collection. + Use `Reconfigure.Vectors` for both single and multiple vectorizers. Supply a list to update many vectorizers at once. multi_tenancy_config: Configuration for multi-tenancy settings. Use `Reconfigure.multi_tenancy` to generate one. Only `auto_tenant_creation` is supported. @@ -171,6 +176,15 @@ def update( """ if vector_index_config is not None: _Warnings.vector_index_config_in_config_update() + if vectorizer_config is not None and not isinstance( + vectorizer_config, + ( + _VectorIndexConfigHNSWUpdate, + _VectorIndexConfigFlatUpdate, + _VectorIndexConfigDynamicUpdate, + ), + ): + _Warnings.vectorizer_config_in_config_update() try: config = _CollectionConfigUpdate( description=description, @@ -182,6 +196,7 @@ def update( multi_tenancy_config=multi_tenancy_config, generative_config=generative_config, reranker_config=reranker_config, + vector_config=vector_config, ) except ValidationError as e: raise WeaviateInvalidInputError("Invalid collection config update parameters.") from e @@ -236,7 +251,10 @@ def resp(schema: Dict[str, Any]) -> executor.Result[None]: vector_config: Dict[str, Any] = schema.get("vectorConfig", {}) if len(vector_config) > 0: - obj["vectorConfig"] = {key: modconf for key in vector_config.keys()} + obj["moduleConfig"] = { + list(conf["vectorizer"].keys()).pop(): modconf + for conf in vector_config.values() + } def inner_resp(res: Response) -> None: return None diff --git a/weaviate/collections/config/sync.pyi b/weaviate/collections/config/sync.pyi index cac324f48..02157b03d 100644 --- a/weaviate/collections/config/sync.pyi +++ b/weaviate/collections/config/sync.pyi @@ -15,6 +15,7 @@ from weaviate.collections.classes.config import ( _ReferencePropertyMultiTarget, _ReplicationConfigUpdate, _RerankerProvider, + _VectorConfigUpdate, _VectorIndexConfigFlatUpdate, _VectorIndexConfigHNSWUpdate, ) @@ -49,6 +50,7 @@ class _ConfigCollection(_ConfigCollectionExecutor[ConnectionSync]): List[_NamedVectorConfigUpdate], ] ] = None, + vector_config: Optional[Union[_VectorConfigUpdate, List[_VectorConfigUpdate]]] = None, generative_config: Optional[_GenerativeProvider] = None, reranker_config: Optional[_RerankerProvider] = None, ) -> None: ... diff --git a/weaviate/collections/queries/base_executor.py b/weaviate/collections/queries/base_executor.py index c03675784..e12d3e53d 100644 --- a/weaviate/collections/queries/base_executor.py +++ b/weaviate/collections/queries/base_executor.py @@ -156,7 +156,8 @@ def __extract_vector_for_object( return {} if len(add_props.vector_bytes) > 0: - return {"default": _ByteOps.decode_float32s(add_props.vector_bytes)} + vec = _ByteOps.decode_float32s(add_props.vector_bytes) + return {"default": vec} vecs: Dict[str, Union[List[float], List[List[float]]]] = {} for vec in add_props.vectors: diff --git a/weaviate/connect/authentication.py b/weaviate/connect/authentication.py index 391cd8be3..b7ca53996 100644 --- a/weaviate/connect/authentication.py +++ b/weaviate/connect/authentication.py @@ -3,7 +3,10 @@ from typing import Awaitable, Callable, Dict, List, Optional, Union import httpx -from authlib.integrations.httpx_client import AsyncOAuth2Client, OAuth2Client # type: ignore +from authlib.integrations.httpx_client import ( # type: ignore + AsyncOAuth2Client, + OAuth2Client, +) from weaviate.auth import ( AuthBearerToken, diff --git a/weaviate/warnings.py b/weaviate/warnings.py index afbde7f39..5a2fd9277 100644 --- a/weaviate/warnings.py +++ b/weaviate/warnings.py @@ -134,7 +134,7 @@ def palm_to_google_gen() -> None: def vector_index_config_in_config_update() -> None: warnings.warn( message="""Dep017: You are using the `vector_index_config` argument in the `collection.config.update()` method, which is deprecated. - Use the `vectorizer_config` argument instead. + Use the `vector_config` argument instead. """, category=DeprecationWarning, stacklevel=1, @@ -181,6 +181,56 @@ def oidc_with_wcd_deprecated() -> None: stacklevel=1, ) + @staticmethod + def vectorizer_config_in_config_update() -> None: + warnings.warn( + message="""Dep023: You are using the `vectorizer_config` argument in the `collection.config.update()` method with a collection with named vectors, which is deprecated. + Use the `vector_config` argument instead. + """, + category=DeprecationWarning, + stacklevel=1, + ) + + @staticmethod + def vectorizer_config_in_config_create() -> None: + warnings.warn( + message="""Dep024: You are using the `vectorizer_config` argument in `collection.config.create()`, which is deprecated. + Use the `vector_config` argument instead. + """, + category=DeprecationWarning, + stacklevel=1, + ) + + @staticmethod + def vector_index_config_in_config_create() -> None: + warnings.warn( + message="""Dep025: You are using the `vector_index_config` argument in `collection.config.create()`, which is deprecated. + Use the `vector_config` argument instead defining `vector_index_config` as a sub-argument. + """, + category=DeprecationWarning, + stacklevel=1, + ) + + @staticmethod + def encoding_in_multi_vector_config() -> None: + warnings.warn( + message="""Dep026: You are using the `encoding` argument in `Configure.VectorIndex.MultiVectors.multi_vector()`, which is deprecated. + Use the `encoding` argument inside `Configure.MultiVectors.module()` instead. + """, + category=DeprecationWarning, + stacklevel=1, + ) + + @staticmethod + def multi_vector_in_hnsw_config() -> None: + warnings.warn( + message="""Dep027: You are using the `multi_vector` argument in `Configure.VectorIndex.hnsw()`, which is deprecated. + Use the `multi_vector` argument inside `Configure.MultiVectors.module()` instead. + """, + category=DeprecationWarning, + stacklevel=1, + ) + @staticmethod def datetime_insertion_with_no_specified_timezone(date: datetime) -> None: warnings.warn(