From 024d1f4db708c2dc651cb365486bf6ae889a39fc Mon Sep 17 00:00:00 2001 From: Justin Cechmanek Date: Wed, 5 Jun 2024 16:43:59 -0700 Subject: [PATCH 1/8] adds wrapper class for custom vectorizers --- redisvl/utils/vectorize/__init__.py | 2 + redisvl/utils/vectorize/text/custom.py | 294 +++++++++++++++++++++++++ 2 files changed, 296 insertions(+) create mode 100644 redisvl/utils/vectorize/text/custom.py diff --git a/redisvl/utils/vectorize/__init__.py b/redisvl/utils/vectorize/__init__.py index ea9d7bee..89fd63be 100644 --- a/redisvl/utils/vectorize/__init__.py +++ b/redisvl/utils/vectorize/__init__.py @@ -1,6 +1,7 @@ from redisvl.utils.vectorize.base import BaseVectorizer from redisvl.utils.vectorize.text.azureopenai import AzureOpenAITextVectorizer from redisvl.utils.vectorize.text.cohere import CohereTextVectorizer +from redisvl.utils.vectorize.text.custom import CustomTextVectorizer from redisvl.utils.vectorize.text.huggingface import HFTextVectorizer from redisvl.utils.vectorize.text.openai import OpenAITextVectorizer from redisvl.utils.vectorize.text.vertexai import VertexAITextVectorizer @@ -12,4 +13,5 @@ "OpenAITextVectorizer", "VertexAITextVectorizer", "AzureOpenAITextVectorizer", + "CustomTextVectorizer", ] diff --git a/redisvl/utils/vectorize/text/custom.py b/redisvl/utils/vectorize/text/custom.py new file mode 100644 index 00000000..4546dd10 --- /dev/null +++ b/redisvl/utils/vectorize/text/custom.py @@ -0,0 +1,294 @@ +import os +from typing import Any, Callable, Dict, List, Optional + +from pydantic.v1 import PrivateAttr + +from redisvl.utils.vectorize.base import BaseVectorizer + + +class CustomTextVectorizer(BaseVectorizer): + """The CustomTextVectorizer class wraps a user-defined vectorizer to create + embeddings for text data. + + This vectorizer is designed to accept a provided callable text vectorizer and + provides a class definition to allow for compatibility with RedisVL. + + The vectorizer may support both synchronous and asynchronous operations which + allows for batch processing of texts, but at a minimum only syncronous embedding + is required to satisfy the 'embed()' method. + + .. code-block:: python + + # Synchronous embedding of a single text + vectorizer = CustomTextVectorizer( + embed = my_vectorizer.generate_embedding + ) + embedding = vectorizer.embed("Hello, world!") + + # Asynchronous batch embedding of multiple texts + embeddings = await vectorizer.aembed_many( + ["Hello, world!", "How are you?"], + batch_size=2 + ) + + """ + + _embed_func: Callable = PrivateAttr() + _embed_many_func: Optional[Callable] = PrivateAttr() + _aembed_func: Optional[Callable] = PrivateAttr() + _aembed_many_func: Optional[Callable] = PrivateAttr() + + def __init__( + self, + embed: Callable, + embed_many: Optional[Callable] = None, + aembed: Optional[Callable] = None, + aembed_many: Optional[Callable] = None, + model: str = "custom_vectorizer", + ): + """Initialize the Custom vectorizer. + + Args: + embed (Optional[Callable]) a Callable function that accepts a list of string object and return a list containing lists of floats. Defaults to None. + + : + embed_many (Optional[Callable)]: a Callable function that accepts a list of string object and return a list containing lists of floats. Defaults to None. + aembed: Optional[Callable] = None, + aembed_many: Optional[Callable] = None, + + Raises: + ValueError if neither embed and embed_many are provided + """ + + self._validate_embed(embed) + self._embed_func = embed + if embed_many: + self._validate_embed_many(embed_many) + self._embed_many_func = embed_many + + if aembed: + self._validate_aembed(aembed) + self._aembed_func = aembed + if aembed_many: + self._validate_aembed_many(aembed_many) + self._aembed_many_func = aembed_many + + super().__init__(model=model, dims=self._set_model_dims()) + + def _validate_embed(self, func: Callable): + # calling the func with dummy input and validating that it returns a vector + try: + test_str = "this is a test sentence" + candidate_vector = func(test_str) + if type(candidate_vector) != list or type(candidate_vector[0]) != float: + raise ValueError( + f"Candidate function for embed() does not have the correct return type. Please provide a function with with return type List[float]" + ) + except TypeError: + raise TypeError(f"{func} is not a callable object") + + def _validate_embed_many(self, func: Callable): + # calling the func with dummy input and validating that it returns a list of vectors + try: + test_strs = ["first test sentence", "second test sentence"] + candidate_vectors = func(test_strs) + if ( + type(candidate_vectors) != list + or type(candidate_vectors[0]) != list + or type(candidate_vectors[0][0]) != float + ): + raise ValueError( + f"Candidate function for embed_many does not have the correct return type. Please provide a function with with return type List[List[float]]" + ) + except TypeError: + raise TypeError(f"{func} is not a callable object") + + def _validate_aembed(self, func: Callable): + # calling the func with dummy input and validating that it returns a vector + import asyncio + + try: + test_str = "this is a test sentence" + loop = asyncio.get_event_loop() + candidate_vector = loop.run_until_complete(func(test_str)) + if type(candidate_vector) != list or type(candidate_vector[0]) != float: + raise ValueError( + f"Candidate function for aembed() does not have the correct return type. Please provide a function with with return type List[float]" + ) + except TypeError: + raise TypeError(f"{func} is not a callable object") + + def _validate_aembed_many(self, func: Callable): + # calling the func with dummy input and validating that it returns a list of floats + import asyncio + + try: + test_strs = ["first test sentence", "second test sentence"] + loop = asyncio.get_event_loop() + candidate_vectors = loop.run_until_complete(func(test_strs)) + if ( + type(candidate_vectors) != list + or type(candidate_vectors[0]) != list + or type(candidate_vectors[0][0]) != float + ): + raise ValueError( + f"Candidate function for aembed_many does not have the correct return type. Please provide a function with with return type List[List[float]]" + ) + except TypeError: + raise TypeError(f"{func} is not a callable object") + + def _set_model_dims(self) -> int: + try: + test_string = "dimension test" + embedding = self._embed_func(test_string) + except Exception as e: # pylint: disable=broad-except + raise ValueError( + f"Error in checking model dimensions. Attempted to embed '{test_string}'. :{str(e)}" + ) + return len(embedding) + + def embed( + self, + text: str, + preprocess: Optional[Callable] = None, + as_buffer: bool = False, + **kwargs, + ) -> List[float]: + """Embed a chunk of text using the provided function. + + Args: + text (str): Chunk of text to embed. + preprocess (Optional[Callable], optional): Optional preprocessing callable to + perform before vectorization. Defaults to None. + as_buffer (bool, optional): Whether to convert the raw embedding + to a byte string. Defaults to False. + + Returns: + List[float]: Embedding. + + Raises: + TypeError: If the wrong input type is passed in for the text. + """ + if not isinstance(text, str): + raise TypeError("Must pass in a str value to embed.") + + if preprocess: + text = preprocess(text) + else: + result = self._embed_func(text, **kwargs) + return self._process_embedding(result, as_buffer) + + def embed_many( + self, + texts: List[str], + preprocess: Optional[Callable] = None, + batch_size: int = 10, + as_buffer: bool = False, + **kwargs, + ) -> List[List[float]]: + """Embed many chunks of texts using the provided function. + + Args: + texts (List[str]): List of text chunks to embed. + preprocess (Optional[Callable], optional): Optional preprocessing + callable to perform before vectorization. Defaults to None. + batch_size (int, optional): Batch size of texts to use when creating + embeddings. Defaults to 10. + as_buffer (bool, optional): Whether to convert the raw embedding + to a byte string. Defaults to False. + + Returns: + List[List[float]]: List of embeddings. + + Raises: + TypeError: If the wrong input type is passed in for the text. + NotImplementedError: if embed_many was not passed to constructor. + """ + if not isinstance(texts, list): + raise TypeError("Must pass in a list of str values to embed.") + if len(texts) > 0 and not isinstance(texts[0], str): + raise TypeError("Must pass in a list of str values to embed.") + + if not self._embed_many_func: + raise NotImplementedError + + embeddings: List = [] + for batch in self.batchify(texts, batch_size, preprocess): + results = self._embed_many_func(batch, **kwargs) + embeddings += [self._process_embedding(r, as_buffer) for r in results] + return embeddings + + async def aembed( + self, + text: str, + preprocess: Optional[Callable] = None, + as_buffer: bool = False, + **kwargs, + ) -> List[float]: + """Asynchronously embed a chunk of text. + + Args: + text (str): Chunk of text to embed. + preprocess (Optional[Callable], optional): Optional preprocessing callable to + perform before vectorization. Defaults to None. + as_buffer (bool, optional): Whether to convert the raw embedding + to a byte string. Defaults to False. + + Returns: + List[float]: Embedding. + + Raises: + TypeError: If the wrong input type is passed in for the text. + NotImplementedError: if aembed was not passed to constructor. + """ + if not isinstance(text, str): + raise TypeError("Must pass in a str value to embed.") + + if not self._aembed_func: + raise NotImplementedError + + if preprocess: + text = preprocess(text) + else: + result = await self._aembed_func(text, **kwargs) + return self._process_embedding(result, as_buffer) + + async def aembed_many( + self, + texts: List[str], + preprocess: Optional[Callable] = None, + batch_size: int = 1000, + as_buffer: bool = False, + **kwargs, + ) -> List[List[float]]: + """Asynchronously embed many chunks of texts. + + Args: + texts (List[str]): List of text chunks to embed. + preprocess (Optional[Callable], optional): Optional preprocessing callable to + perform before vectorization. Defaults to None. + batch_size (int, optional): Batch size of texts to use when creating + embeddings. Defaults to 10. + as_buffer (bool, optional): Whether to convert the raw embedding + to a byte string. Defaults to False. + + Returns: + List[List[float]]: List of embeddings. + + Raises: + TypeError: If the wrong input type is passed in for the text. + NotImplementedError: If aembed_many was not passed to constructor. + """ + if not isinstance(texts, list): + raise TypeError("Must pass in a list of str values to embed.") + if len(texts) > 0 and not isinstance(texts[0], str): + raise TypeError("Must pass in a list of str values to embed.") + + if not self._aembed_many_func: + raise NotImplementedError + + embeddings: List = [] + for batch in self.batchify(texts, batch_size, preprocess): + results = await self._aembed_many_func(batch, **kwargs) + embeddings += [self._process_embedding(r, as_buffer) for r in results] + return embeddings From 4b2f21d758c9e49b1b1cb23ec8736de063a5d76e Mon Sep 17 00:00:00 2001 From: Justin Cechmanek Date: Wed, 5 Jun 2024 16:45:06 -0700 Subject: [PATCH 2/8] fixes typo in comments --- redisvl/utils/vectorize/text/openai.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/redisvl/utils/vectorize/text/openai.py b/redisvl/utils/vectorize/text/openai.py index b5d2070c..c1c9d7e8 100644 --- a/redisvl/utils/vectorize/text/openai.py +++ b/redisvl/utils/vectorize/text/openai.py @@ -134,7 +134,7 @@ def embed_many( List[List[float]]: List of embeddings. Raises: - TypeError: If the wrong input type is passed in for the test. + TypeError: If the wrong input type is passed in for the text. """ if not isinstance(texts, list): raise TypeError("Must pass in a list of str values to embed.") @@ -174,7 +174,7 @@ def embed( List[float]: Embedding. Raises: - TypeError: If the wrong input type is passed in for the test. + TypeError: If the wrong input type is passed in for the text. """ if not isinstance(text, str): raise TypeError("Must pass in a str value to embed.") @@ -212,7 +212,7 @@ async def aembed_many( List[List[float]]: List of embeddings. Raises: - TypeError: If the wrong input type is passed in for the test. + TypeError: If the wrong input type is passed in for the text. """ if not isinstance(texts, list): raise TypeError("Must pass in a list of str values to embed.") @@ -254,7 +254,7 @@ async def aembed( List[float]: Embedding. Raises: - TypeError: If the wrong input type is passed in for the test. + TypeError: If the wrong input type is passed in for the text. """ if not isinstance(text, str): raise TypeError("Must pass in a str value to embed.") From c1aa18bcbf81921af7104cc92d8fc921af1fca49 Mon Sep 17 00:00:00 2001 From: Justin Cechmanek Date: Wed, 5 Jun 2024 16:45:52 -0700 Subject: [PATCH 3/8] adds tests for custom text vectorizer --- tests/integration/test_vectorizers.py | 159 +++++++++++++++++++++++++- 1 file changed, 158 insertions(+), 1 deletion(-) diff --git a/tests/integration/test_vectorizers.py b/tests/integration/test_vectorizers.py index 23952c65..36a78e1b 100644 --- a/tests/integration/test_vectorizers.py +++ b/tests/integration/test_vectorizers.py @@ -5,6 +5,7 @@ from redisvl.utils.vectorize import ( AzureOpenAITextVectorizer, CohereTextVectorizer, + CustomTextVectorizer, HFTextVectorizer, OpenAITextVectorizer, VertexAITextVectorizer, @@ -25,6 +26,7 @@ def skip_vectorizer() -> bool: VertexAITextVectorizer, CohereTextVectorizer, AzureOpenAITextVectorizer, + CustomTextVectorizer, ] ) def vectorizer(request, skip_vectorizer): @@ -43,6 +45,32 @@ def vectorizer(request, skip_vectorizer): return request.param( model=os.getenv("AZURE_OPENAI_DEPLOYMENT_NAME", "text-embedding-ada-002") ) + elif request.param == CustomTextVectorizer: + + def embed(text): + return [1.1, 2.2, 3.3, 4.4] + + def embed_many(texts): + return [[1.1, 2.2, 3.3, 4.4]] * len(texts) + + return request.param(embed=embed, embed_many=embed_many) + + +@pytest.fixture +def custom_embed_func(): + def embed(text: str): + return [0.1, 0.2, 0.3] + + return embed + + +@pytest.fixture +def custom_embedder_class(): + class embedder: + def embed(self, text: str): + return [0.1, 0.2, 0.3] + + return embedder def test_vectorizer_embed(vectorizer): @@ -81,7 +109,120 @@ def test_vectorizer_bad_input(vectorizer): vectorizer.embed_many(42) -@pytest.fixture(params=[OpenAITextVectorizer]) +def test_custom_vectorizer_embed(custom_embedder_class, custom_embed_func): + # test we can pass a stand alone function as embedder callable + def my_embedder(text: str): + return [1.1, 2.2, 3.3, 4.4] + + custom_wrapper = CustomTextVectorizer(embed=my_embedder) + embedding = custom_wrapper.embed("This is a test sentence.") + assert embedding == [1.1, 2.2, 3.3, 4.4] + + # test we can pass an instance of a class method as embedder callable + class EmbedClass: + def embed_method(self, text: str): + return [5.0, 6.0, 7.0, 8.0] + + custom_wrapper = CustomTextVectorizer(embed=EmbedClass().embed_method) + embedding = custom_wrapper.embed("This is a test sentence.") + assert embedding == [5.0, 6.0, 7.0, 8.0] + + # test we can pass additional parameters and kwargs to embedding methods + def embedder_with_args(text: str, max_len=None): + return [1.1, 2.2, 3.3, 4.4][0:max_len] + + custom_wrapper = CustomTextVectorizer(embed=embedder_with_args) + embedding = custom_wrapper.embed("This is a test sentence.", max_len=4) + assert embedding == [1.1, 2.2, 3.3, 4.4] + embedding = custom_wrapper.embed("This is a test sentence.", max_len=2) + assert embedding == [1.1, 2.2] + + # test that correct error is raised if a non-callable is passed + with pytest.raises(TypeError): + bad_wrapper = CustomTextVectorizer(embed="hello") + + with pytest.raises(TypeError): + bad_wrapper = CustomTextVectorizer(embed=42) + + with pytest.raises(TypeError): + bad_wrapper = CustomTextVectorizer(embed={"foo": "bar"}) + + # test that correct error is raised if passed function has incorrect types + def bad_arg_type(value: int): + return [value] + + with pytest.raises(ValueError): + bad_wrapper = CustomTextVectorizer(embed=bad_arg_type) + + def bad_return_type(text: str) -> str: + return text + + with pytest.raises(ValueError): + bad_wrapper = CustomTextVectorizer(embed=bad_return_type) + + +def test_custom_vectorizer_embed_many(custom_embedder_class, custom_embed_func): + # test we can pass a stand alone function as embed_many callable + def my_embed_many(text_list): + return [[1.1, 2.2, 3.3], [4.4, 5.5, 6.6]] + + custom_wrapper = CustomTextVectorizer(custom_embed_func, embed_many=my_embed_many) + embeddings = custom_wrapper.embed_many(["test one.", "test two"]) + assert embeddings == [[1.1, 2.2, 3.3], [4.4, 5.5, 6.6]] + + # test we can pass a class method as embedder callable + class EmbedClass: + def embed_many_method(self, text_list): + return [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]] + + custom_wrapper = CustomTextVectorizer( + custom_embed_func, embed_many=EmbedClass().embed_many_method + ) + embeddings = custom_wrapper.embed_many(["test one.", "test two"]) + assert embeddings == [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]] + + # test we can pass additional parameters and kwargs to embedding methods + def embed_many_with_args(texts, param=True): + if param: + return [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]] + else: + return [[6.0, 5.0, 4.0], [3.0, 2.0, 1.0]] + + custom_wrapper = CustomTextVectorizer( + custom_embed_func, embed_many=embed_many_with_args + ) + embeddings = custom_wrapper.embed_many(["test one.", "test two"], param=True) + assert embeddings == [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]] + embeddings = custom_wrapper.embed_many(["test one.", "test two"], param=False) + assert embeddings == [[6.0, 5.0, 4.0], [3.0, 2.0, 1.0]] + + # test that correct error is raised if a non-callable is passed + with pytest.raises(TypeError): + bad_wrapper = CustomTextVectorizer(custom_embed_func, embed_many="hello") + + with pytest.raises(TypeError): + bad_wrapper = CustomTextVectorizer(custom_embed_func, embed_many=42) + + with pytest.raises(TypeError): + bad_wrapper = CustomTextVectorizer(custom_embed_func, embed_many={"foo": "bar"}) + + # test that correct error is raised if passed function has incorrect types + def bad_arg_type(value: int): + return [value] + + with pytest.raises(ValueError): + bad_wrapper = CustomTextVectorizer(custom_embed_func, embed_many=bad_arg_type) + + def bad_return_type(text: str) -> str: + return text + + with pytest.raises(ValueError): + bad_wrapper = CustomTextVectorizer( + custom_embed_func, embed_many=bad_return_type + ) + + +@pytest.fixture(params=[OpenAITextVectorizer, CustomTextVectorizer]) def avectorizer(request, skip_vectorizer): if skip_vectorizer: pytest.skip("Skipping vectorizer instantiation...") @@ -90,6 +231,22 @@ def avectorizer(request, skip_vectorizer): if request.param == OpenAITextVectorizer: return request.param() + # Here we use actual models for integration test + if request.param == CustomTextVectorizer: + + def embed_func(text): + return [1.1, 2.2, 3.3, 4.4] + + async def aembed_func(text): + return [1.1, 2.2, 3.3, 4.4] + + async def aembed_many_func(texts): + return [[1.1, 2.2, 3.3, 4.4]] * len(texts) + + return request.param( + embed=embed_func, aembed=aembed_func, aembed_many=aembed_many_func + ) + @pytest.mark.asyncio async def test_vectorizer_aembed(avectorizer): From d69f4ffe4f9d47fb79894f285b1ad0a5add0435b Mon Sep 17 00:00:00 2001 From: Justin Cechmanek Date: Wed, 5 Jun 2024 17:08:17 -0700 Subject: [PATCH 4/8] updates doc strings --- redisvl/utils/vectorize/text/custom.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/redisvl/utils/vectorize/text/custom.py b/redisvl/utils/vectorize/text/custom.py index 4546dd10..e558c735 100644 --- a/redisvl/utils/vectorize/text/custom.py +++ b/redisvl/utils/vectorize/text/custom.py @@ -7,7 +7,7 @@ class CustomTextVectorizer(BaseVectorizer): - """The CustomTextVectorizer class wraps a user-defined vectorizer to create + """The CustomTextVectorizer class wraps user-defined embeding methods to create embeddings for text data. This vectorizer is designed to accept a provided callable text vectorizer and @@ -49,15 +49,16 @@ def __init__( """Initialize the Custom vectorizer. Args: - embed (Optional[Callable]) a Callable function that accepts a list of string object and return a list containing lists of floats. Defaults to None. + embed (Callable) a Callable function that accepts a string object and return a list of floats. : - embed_many (Optional[Callable)]: a Callable function that accepts a list of string object and return a list containing lists of floats. Defaults to None. - aembed: Optional[Callable] = None, - aembed_many: Optional[Callable] = None, + embed_many (Optional[Callable)]: a Callable function that accepts a list of string objects and returns a list containing lists of floats. Defaults to None. + aembed: Optional[Callable] = an asyncronous Callable function that accepts a string object and returns a lists of floats. Defaults to None. + aembed_many: Optional[Callable] = an asyncronous Callable function that accepts a list of string objects and returns a list containing lists of floats. Defaults to None. Raises: - ValueError if neither embed and embed_many are provided + ValueError if any of the provided functions accept or return incorrect types. + TypeError if any of the provided functions are not Callable objects. """ self._validate_embed(embed) @@ -76,7 +77,7 @@ def __init__( super().__init__(model=model, dims=self._set_model_dims()) def _validate_embed(self, func: Callable): - # calling the func with dummy input and validating that it returns a vector + """calls the func with dummy input and validates that it returns a vector""" try: test_str = "this is a test sentence" candidate_vector = func(test_str) @@ -88,7 +89,7 @@ def _validate_embed(self, func: Callable): raise TypeError(f"{func} is not a callable object") def _validate_embed_many(self, func: Callable): - # calling the func with dummy input and validating that it returns a list of vectors + """calls the func with dummy input and validates that it returns a list of vectors""" try: test_strs = ["first test sentence", "second test sentence"] candidate_vectors = func(test_strs) @@ -104,7 +105,7 @@ def _validate_embed_many(self, func: Callable): raise TypeError(f"{func} is not a callable object") def _validate_aembed(self, func: Callable): - # calling the func with dummy input and validating that it returns a vector + """calls the func with dummy input and validates that it returns a vector""" import asyncio try: @@ -119,7 +120,7 @@ def _validate_aembed(self, func: Callable): raise TypeError(f"{func} is not a callable object") def _validate_aembed_many(self, func: Callable): - # calling the func with dummy input and validating that it returns a list of floats + """calls the func with dummy input and validates that it returns a list of vectors""" import asyncio try: From 5221ed9eb32730a8730e1c88e6b4de713531f2b9 Mon Sep 17 00:00:00 2001 From: Justin Cechmanek Date: Thu, 6 Jun 2024 09:49:44 -0700 Subject: [PATCH 5/8] refactors vectorizer tests --- tests/integration/test_vectorizers.py | 63 ++++++++++++--------------- 1 file changed, 27 insertions(+), 36 deletions(-) diff --git a/tests/integration/test_vectorizers.py b/tests/integration/test_vectorizers.py index 36a78e1b..0d6add1f 100644 --- a/tests/integration/test_vectorizers.py +++ b/tests/integration/test_vectorizers.py @@ -59,16 +59,28 @@ def embed_many(texts): @pytest.fixture def custom_embed_func(): def embed(text: str): - return [0.1, 0.2, 0.3] + return [1.1, 2.2, 3.3, 4.4] return embed @pytest.fixture -def custom_embedder_class(): +def custom_embed_class(): class embedder: def embed(self, text: str): - return [0.1, 0.2, 0.3] + return [1.1, 2.2, 3.3, 4.4] + + def embed_with_args(self, text: str, max_len=None): + return [1.1, 2.2, 3.3, 4.4][0:max_len] + + def embed_many(self, text_list): + return [[1.1, 2.2, 3.3], [4.4, 5.5, 6.6]] + + def embed_many_with_args(self, texts, param=True): + if param: + return [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]] + else: + return [[6.0, 5.0, 4.0], [3.0, 2.0, 1.0]] return embedder @@ -109,29 +121,19 @@ def test_vectorizer_bad_input(vectorizer): vectorizer.embed_many(42) -def test_custom_vectorizer_embed(custom_embedder_class, custom_embed_func): +def test_custom_vectorizer_embed(custom_embed_class, custom_embed_func): # test we can pass a stand alone function as embedder callable - def my_embedder(text: str): - return [1.1, 2.2, 3.3, 4.4] - - custom_wrapper = CustomTextVectorizer(embed=my_embedder) + custom_wrapper = CustomTextVectorizer(embed=custom_embed_func) embedding = custom_wrapper.embed("This is a test sentence.") assert embedding == [1.1, 2.2, 3.3, 4.4] # test we can pass an instance of a class method as embedder callable - class EmbedClass: - def embed_method(self, text: str): - return [5.0, 6.0, 7.0, 8.0] - - custom_wrapper = CustomTextVectorizer(embed=EmbedClass().embed_method) + custom_wrapper = CustomTextVectorizer(embed=custom_embed_class().embed) embedding = custom_wrapper.embed("This is a test sentence.") - assert embedding == [5.0, 6.0, 7.0, 8.0] + assert embedding == [1.1, 2.2, 3.3, 4.4] # test we can pass additional parameters and kwargs to embedding methods - def embedder_with_args(text: str, max_len=None): - return [1.1, 2.2, 3.3, 4.4][0:max_len] - - custom_wrapper = CustomTextVectorizer(embed=embedder_with_args) + custom_wrapper = CustomTextVectorizer(embed=custom_embed_class().embed_with_args) embedding = custom_wrapper.embed("This is a test sentence.", max_len=4) assert embedding == [1.1, 2.2, 3.3, 4.4] embedding = custom_wrapper.embed("This is a test sentence.", max_len=2) @@ -161,35 +163,24 @@ def bad_return_type(text: str) -> str: bad_wrapper = CustomTextVectorizer(embed=bad_return_type) -def test_custom_vectorizer_embed_many(custom_embedder_class, custom_embed_func): +def test_custom_vectorizer_embed_many(custom_embed_class, custom_embed_func): # test we can pass a stand alone function as embed_many callable - def my_embed_many(text_list): - return [[1.1, 2.2, 3.3], [4.4, 5.5, 6.6]] - - custom_wrapper = CustomTextVectorizer(custom_embed_func, embed_many=my_embed_many) + custom_wrapper = CustomTextVectorizer( + custom_embed_func, embed_many=custom_embed_class().embed_many + ) embeddings = custom_wrapper.embed_many(["test one.", "test two"]) assert embeddings == [[1.1, 2.2, 3.3], [4.4, 5.5, 6.6]] # test we can pass a class method as embedder callable - class EmbedClass: - def embed_many_method(self, text_list): - return [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]] - custom_wrapper = CustomTextVectorizer( - custom_embed_func, embed_many=EmbedClass().embed_many_method + custom_embed_func, embed_many=custom_embed_class().embed_many ) embeddings = custom_wrapper.embed_many(["test one.", "test two"]) - assert embeddings == [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]] + assert embeddings == [[1.1, 2.2, 3.3], [4.4, 5.5, 6.6]] # test we can pass additional parameters and kwargs to embedding methods - def embed_many_with_args(texts, param=True): - if param: - return [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]] - else: - return [[6.0, 5.0, 4.0], [3.0, 2.0, 1.0]] - custom_wrapper = CustomTextVectorizer( - custom_embed_func, embed_many=embed_many_with_args + custom_embed_func, embed_many=custom_embed_class().embed_many_with_args ) embeddings = custom_wrapper.embed_many(["test one.", "test two"], param=True) assert embeddings == [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]] From 7e42b696727f1c007ebc863e5aa04e42a9713781 Mon Sep 17 00:00:00 2001 From: Justin Cechmanek Date: Thu, 6 Jun 2024 11:16:18 -0700 Subject: [PATCH 6/8] updates notebook to demonstrate CustomTextVectorizer --- docs/user_guide/vectorizers_04.ipynb | 203 ++++++++++++++++--------- redisvl/utils/vectorize/text/custom.py | 18 +-- 2 files changed, 137 insertions(+), 84 deletions(-) diff --git a/docs/user_guide/vectorizers_04.ipynb b/docs/user_guide/vectorizers_04.ipynb index 2dde2bad..79f6c3bb 100644 --- a/docs/user_guide/vectorizers_04.ipynb +++ b/docs/user_guide/vectorizers_04.ipynb @@ -12,6 +12,7 @@ "2. HuggingFace\n", "3. Vertex AI\n", "4. Cohere\n", + "5. Bringing your own vectorizer\n", "\n", "Before running this notebook, be sure to\n", "1. Have installed ``redisvl`` and have that environment active for this notebook.\n", @@ -64,7 +65,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -89,16 +90,16 @@ { "data": { "text/plain": [ - "[-0.001025049015879631,\n", - " -0.0030993607360869646,\n", - " 0.0024536605924367905,\n", - " -0.004484387580305338,\n", - " -0.010331203229725361,\n", - " 0.012700922787189484,\n", - " -0.005368996877223253,\n", - " -0.0029411641880869865,\n", - " -0.0070833307690918446,\n", - " -0.03386051580309868]" + "[-0.0010508307022973895,\n", + " -0.0031670420430600643,\n", + " 0.0023781107738614082,\n", + " -0.004539588466286659,\n", + " -0.010320774279534817,\n", + " 0.012868634425103664,\n", + " -0.0054513863287866116,\n", + " -0.002984359161928296,\n", + " -0.0072814482264220715,\n", + " -0.033704183995723724]" ] }, "execution_count": 3, @@ -128,16 +129,16 @@ { "data": { "text/plain": [ - "[-0.01747742109000683,\n", - " -5.228330701356754e-05,\n", - " 0.0013870716793462634,\n", - " -0.025637786835432053,\n", - " -0.01985435001552105,\n", - " 0.016117358580231667,\n", - " -0.0037306349258869886,\n", - " 0.0008945261361077428,\n", - " 0.006577865686267614,\n", - " -0.025091219693422318]" + "[-0.01749197021126747,\n", + " -5.238811718299985e-05,\n", + " 0.0013331907102838159,\n", + " -0.025576923042535782,\n", + " -0.019907286390662193,\n", + " 0.016106342896819115,\n", + " -0.003756451653316617,\n", + " 0.0009971122490242124,\n", + " 0.006661186460405588,\n", + " -0.024954024702310562]" ] }, "execution_count": 4, @@ -189,7 +190,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -202,34 +203,21 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 7, "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "Vector dimensions: 1536\n" + "ename": "ValueError", + "evalue": "AzureOpenAI API endpoint is required. Provide it in api_config or set the AZURE_OPENAI_ENDPOINT environment variable.", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[7], line 4\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mredisvl\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutils\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mvectorize\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m AzureOpenAITextVectorizer\n\u001b[1;32m 3\u001b[0m \u001b[38;5;66;03m# create a vectorizer\u001b[39;00m\n\u001b[0;32m----> 4\u001b[0m az_oai \u001b[38;5;241m=\u001b[39m \u001b[43mAzureOpenAITextVectorizer\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 5\u001b[0m \u001b[43m \u001b[49m\u001b[43mmodel\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdeployment_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# Must be your CUSTOM deployment name\u001b[39;49;00m\n\u001b[1;32m 6\u001b[0m \u001b[43m \u001b[49m\u001b[43mapi_config\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m{\u001b[49m\n\u001b[1;32m 7\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mapi_key\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mapi_key\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 8\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mapi_version\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mapi_version\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 9\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mazure_endpoint\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mazure_endpoint\u001b[49m\n\u001b[1;32m 10\u001b[0m \u001b[43m \u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 11\u001b[0m \u001b[43m)\u001b[49m\n\u001b[1;32m 13\u001b[0m test \u001b[38;5;241m=\u001b[39m az_oai\u001b[38;5;241m.\u001b[39membed(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThis is a test sentence.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 14\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mVector dimensions: \u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28mlen\u001b[39m(test))\n", + "File \u001b[0;32m~/Documents/redisvl/redisvl/utils/vectorize/text/azureopenai.py:70\u001b[0m, in \u001b[0;36mAzureOpenAITextVectorizer.__init__\u001b[0;34m(self, model, api_config)\u001b[0m\n\u001b[1;32m 54\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__init__\u001b[39m(\n\u001b[1;32m 55\u001b[0m \u001b[38;5;28mself\u001b[39m, model: \u001b[38;5;28mstr\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtext-embedding-ada-002\u001b[39m\u001b[38;5;124m\"\u001b[39m, api_config: Optional[Dict] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 56\u001b[0m ):\n\u001b[1;32m 57\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Initialize the AzureOpenAI vectorizer.\u001b[39;00m\n\u001b[1;32m 58\u001b[0m \n\u001b[1;32m 59\u001b[0m \u001b[38;5;124;03m Args:\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 68\u001b[0m \u001b[38;5;124;03m ValueError: If the AzureOpenAI API key, version, or endpoint are not provided.\u001b[39;00m\n\u001b[1;32m 69\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m---> 70\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_initialize_clients\u001b[49m\u001b[43m(\u001b[49m\u001b[43mapi_config\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 71\u001b[0m \u001b[38;5;28msuper\u001b[39m()\u001b[38;5;241m.\u001b[39m\u001b[38;5;21m__init__\u001b[39m(model\u001b[38;5;241m=\u001b[39mmodel, dims\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_set_model_dims(model))\n", + "File \u001b[0;32m~/Documents/redisvl/redisvl/utils/vectorize/text/azureopenai.py:95\u001b[0m, in \u001b[0;36mAzureOpenAITextVectorizer._initialize_clients\u001b[0;34m(self, api_config)\u001b[0m\n\u001b[1;32m 88\u001b[0m azure_endpoint \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m 89\u001b[0m api_config\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mazure_endpoint\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 90\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m api_config\n\u001b[1;32m 91\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m os\u001b[38;5;241m.\u001b[39mgetenv(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAZURE_OPENAI_ENDPOINT\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 92\u001b[0m )\n\u001b[1;32m 94\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m azure_endpoint:\n\u001b[0;32m---> 95\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 96\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAzureOpenAI API endpoint is required. \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 97\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mProvide it in api_config or set the AZURE_OPENAI_ENDPOINT\u001b[39m\u001b[38;5;130;01m\\\u001b[39;00m\n\u001b[1;32m 98\u001b[0m \u001b[38;5;124m environment variable.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 99\u001b[0m )\n\u001b[1;32m 101\u001b[0m api_version \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m 102\u001b[0m api_config\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mapi_version\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 103\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m api_config\n\u001b[1;32m 104\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m os\u001b[38;5;241m.\u001b[39mgetenv(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mOPENAI_API_VERSION\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 105\u001b[0m )\n\u001b[1;32m 107\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m api_version:\n", + "\u001b[0;31mValueError\u001b[0m: AzureOpenAI API endpoint is required. Provide it in api_config or set the AZURE_OPENAI_ENDPOINT environment variable." ] - }, - { - "data": { - "text/plain": [ - "[-0.0010088568087667227,\n", - " -0.003142790636047721,\n", - " 0.0024922797456383705,\n", - " -0.004522906616330147,\n", - " -0.010369433090090752,\n", - " 0.012739036232233047,\n", - " -0.005365503951907158,\n", - " -0.0029668458737432957,\n", - " -0.007141091860830784,\n", - " -0.03383301943540573]" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" } ], "source": [ @@ -252,27 +240,19 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "[-0.017460526898503304,\n", - " -6.895032856846228e-05,\n", - " 0.0013909287517890334,\n", - " -0.025688467547297478,\n", - " -0.019813183695077896,\n", - " 0.016087085008621216,\n", - " -0.003729278687387705,\n", - " 0.0009211922879330814,\n", - " 0.006606514099985361,\n", - " -0.025128915905952454]" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" + "ename": "NameError", + "evalue": "name 'az_oai' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[6], line 8\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# Just like OpenAI, AzureOpenAI supports batching embeddings and asynchronous requests.\u001b[39;00m\n\u001b[1;32m 2\u001b[0m sentences \u001b[38;5;241m=\u001b[39m [\n\u001b[1;32m 3\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThat is a happy dog\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 4\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThat is a happy person\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 5\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mToday is a sunny day\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 6\u001b[0m ]\n\u001b[0;32m----> 8\u001b[0m embeddings \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m \u001b[43maz_oai\u001b[49m\u001b[38;5;241m.\u001b[39maembed_many(sentences)\n\u001b[1;32m 9\u001b[0m embeddings[\u001b[38;5;241m0\u001b[39m][:\u001b[38;5;241m10\u001b[39m]\n", + "\u001b[0;31mNameError\u001b[0m: name 'az_oai' is not defined" + ] } ], "source": [ @@ -302,7 +282,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -349,7 +329,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -384,7 +364,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -438,7 +418,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -459,7 +439,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -500,6 +480,81 @@ "Learn more about using RedisVL and Cohere together through [this dedicated user guide](https://docs.cohere.com/docs/redis-and-cohere)." ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Custom Vectorizers\n", + "\n", + "RedisVL supports the use of other vectorizers and provides a class to enable compatibility with any function that generates a vector or vectors from string data" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1]" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from redisvl.utils.vectorize import CustomTextVectorizer\n", + "\n", + "def generate_embeddings(text_input):\n", + " return [0.1] * 768\n", + "\n", + " \n", + "custom_vectorizer = CustomTextVectorizer(generate_embeddings)\n", + "\n", + "custom_vectorizer.embed(\"This is a test sentence.\")[:10]" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "10:40:28 redisvl.index.index INFO Index already exists, not overwriting.\n" + ] + }, + { + "data": { + "text/plain": [ + "[{'id': 'llmcache:78bd2446a37a0c6ab62652af9b7e53845145c4471ea83ff9fb4280a528d36bbb',\n", + " 'vector_distance': '6.13927841187e-06',\n", + " 'prompt': 'this is a test prompt',\n", + " 'response': 'this is a test response',\n", + " 'prompt_vector': '================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================'}]" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# this enables the use of custom vectorizers with other RedisVL components\n", + "from redisvl.extensions.llmcache import SemanticCache\n", + "\n", + "#cache = SemanticCache(vectorizer=custom_vectorizer)\n", + "cache = SemanticCache()\n", + "\n", + "cache.store(\"this is a test prompt\", \"this is a test response\")\n", + "cache.check(\"this is also a test prompt\")" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -534,7 +589,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -552,7 +607,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -571,7 +626,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -600,7 +655,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -658,7 +713,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.5" + "version": "3.12.2" }, "orig_nbformat": 4, "vscode": { diff --git a/redisvl/utils/vectorize/text/custom.py b/redisvl/utils/vectorize/text/custom.py index e558c735..7ccca839 100644 --- a/redisvl/utils/vectorize/text/custom.py +++ b/redisvl/utils/vectorize/text/custom.py @@ -48,17 +48,15 @@ def __init__( ): """Initialize the Custom vectorizer. - Args: - embed (Callable) a Callable function that accepts a string object and return a list of floats. - - : - embed_many (Optional[Callable)]: a Callable function that accepts a list of string objects and returns a list containing lists of floats. Defaults to None. - aembed: Optional[Callable] = an asyncronous Callable function that accepts a string object and returns a lists of floats. Defaults to None. - aembed_many: Optional[Callable] = an asyncronous Callable function that accepts a list of string objects and returns a list containing lists of floats. Defaults to None. + Args: + embed (Callable): a Callable function that accepts a string object and returns a list of floats. + embed_many (Optional[Callable)]: a Callable function that accepts a list of string objects and returns a list containing lists of floats. Defaults to None. + aembed (Optional[Callable]): an asyncronous Callable function that accepts a string object and returns a lists of floats. Defaults to None. + aembed_many (Optional[Callable]): an asyncronous Callable function that accepts a list of string objects and returns a list containing lists of floats. Defaults to None. - Raises: - ValueError if any of the provided functions accept or return incorrect types. - TypeError if any of the provided functions are not Callable objects. + Raises: + ValueError if any of the provided functions accept or return incorrect types. + TypeError if any of the provided functions are not Callable objects. """ self._validate_embed(embed) From 73dfbd7d16adddff72b50c4302a05e7633b560a8 Mon Sep 17 00:00:00 2001 From: Justin Cechmanek Date: Tue, 18 Jun 2024 11:08:20 -0700 Subject: [PATCH 7/8] removes error messages in vectorizers notebook --- docs/user_guide/vectorizers_04.ipynb | 113 ++++++++++++++++----------- 1 file changed, 69 insertions(+), 44 deletions(-) diff --git a/docs/user_guide/vectorizers_04.ipynb b/docs/user_guide/vectorizers_04.ipynb index 79f6c3bb..54d33055 100644 --- a/docs/user_guide/vectorizers_04.ipynb +++ b/docs/user_guide/vectorizers_04.ipynb @@ -65,7 +65,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -190,7 +190,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -203,21 +203,34 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 5, "metadata": {}, "outputs": [ { - "ename": "ValueError", - "evalue": "AzureOpenAI API endpoint is required. Provide it in api_config or set the AZURE_OPENAI_ENDPOINT environment variable.", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[7], line 4\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mredisvl\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutils\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mvectorize\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m AzureOpenAITextVectorizer\n\u001b[1;32m 3\u001b[0m \u001b[38;5;66;03m# create a vectorizer\u001b[39;00m\n\u001b[0;32m----> 4\u001b[0m az_oai \u001b[38;5;241m=\u001b[39m \u001b[43mAzureOpenAITextVectorizer\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 5\u001b[0m \u001b[43m \u001b[49m\u001b[43mmodel\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdeployment_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# Must be your CUSTOM deployment name\u001b[39;49;00m\n\u001b[1;32m 6\u001b[0m \u001b[43m \u001b[49m\u001b[43mapi_config\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m{\u001b[49m\n\u001b[1;32m 7\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mapi_key\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mapi_key\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 8\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mapi_version\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mapi_version\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 9\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mazure_endpoint\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mazure_endpoint\u001b[49m\n\u001b[1;32m 10\u001b[0m \u001b[43m \u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 11\u001b[0m \u001b[43m)\u001b[49m\n\u001b[1;32m 13\u001b[0m test \u001b[38;5;241m=\u001b[39m az_oai\u001b[38;5;241m.\u001b[39membed(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThis is a test sentence.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 14\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mVector dimensions: \u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28mlen\u001b[39m(test))\n", - "File \u001b[0;32m~/Documents/redisvl/redisvl/utils/vectorize/text/azureopenai.py:70\u001b[0m, in \u001b[0;36mAzureOpenAITextVectorizer.__init__\u001b[0;34m(self, model, api_config)\u001b[0m\n\u001b[1;32m 54\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__init__\u001b[39m(\n\u001b[1;32m 55\u001b[0m \u001b[38;5;28mself\u001b[39m, model: \u001b[38;5;28mstr\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtext-embedding-ada-002\u001b[39m\u001b[38;5;124m\"\u001b[39m, api_config: Optional[Dict] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 56\u001b[0m ):\n\u001b[1;32m 57\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Initialize the AzureOpenAI vectorizer.\u001b[39;00m\n\u001b[1;32m 58\u001b[0m \n\u001b[1;32m 59\u001b[0m \u001b[38;5;124;03m Args:\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 68\u001b[0m \u001b[38;5;124;03m ValueError: If the AzureOpenAI API key, version, or endpoint are not provided.\u001b[39;00m\n\u001b[1;32m 69\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m---> 70\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_initialize_clients\u001b[49m\u001b[43m(\u001b[49m\u001b[43mapi_config\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 71\u001b[0m \u001b[38;5;28msuper\u001b[39m()\u001b[38;5;241m.\u001b[39m\u001b[38;5;21m__init__\u001b[39m(model\u001b[38;5;241m=\u001b[39mmodel, dims\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_set_model_dims(model))\n", - "File \u001b[0;32m~/Documents/redisvl/redisvl/utils/vectorize/text/azureopenai.py:95\u001b[0m, in \u001b[0;36mAzureOpenAITextVectorizer._initialize_clients\u001b[0;34m(self, api_config)\u001b[0m\n\u001b[1;32m 88\u001b[0m azure_endpoint \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m 89\u001b[0m api_config\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mazure_endpoint\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 90\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m api_config\n\u001b[1;32m 91\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m os\u001b[38;5;241m.\u001b[39mgetenv(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAZURE_OPENAI_ENDPOINT\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 92\u001b[0m )\n\u001b[1;32m 94\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m azure_endpoint:\n\u001b[0;32m---> 95\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 96\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAzureOpenAI API endpoint is required. \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 97\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mProvide it in api_config or set the AZURE_OPENAI_ENDPOINT\u001b[39m\u001b[38;5;130;01m\\\u001b[39;00m\n\u001b[1;32m 98\u001b[0m \u001b[38;5;124m environment variable.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 99\u001b[0m )\n\u001b[1;32m 101\u001b[0m api_version \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m 102\u001b[0m api_config\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mapi_version\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 103\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m api_config\n\u001b[1;32m 104\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m os\u001b[38;5;241m.\u001b[39mgetenv(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mOPENAI_API_VERSION\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 105\u001b[0m )\n\u001b[1;32m 107\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m api_version:\n", - "\u001b[0;31mValueError\u001b[0m: AzureOpenAI API endpoint is required. Provide it in api_config or set the AZURE_OPENAI_ENDPOINT environment variable." + "name": "stdout", + "output_type": "stream", + "text": [ + "Vector dimensions: 1536\n" ] + }, + { + "data": { + "text/plain": [ + "[-0.0010088568087667227,\n", + " -0.003142790636047721,\n", + " 0.0024922797456383705,\n", + " -0.004522906616330147,\n", + " -0.010369433090090752,\n", + " 0.012739036232233047,\n", + " -0.005365503951907158,\n", + " -0.0029668458737432957,\n", + " -0.007141091860830784,\n", + " -0.03383301943540573]" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ @@ -240,19 +253,27 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": {}, "outputs": [ { - "ename": "NameError", - "evalue": "name 'az_oai' is not defined", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[6], line 8\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# Just like OpenAI, AzureOpenAI supports batching embeddings and asynchronous requests.\u001b[39;00m\n\u001b[1;32m 2\u001b[0m sentences \u001b[38;5;241m=\u001b[39m [\n\u001b[1;32m 3\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThat is a happy dog\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 4\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThat is a happy person\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 5\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mToday is a sunny day\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 6\u001b[0m ]\n\u001b[0;32m----> 8\u001b[0m embeddings \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m \u001b[43maz_oai\u001b[49m\u001b[38;5;241m.\u001b[39maembed_many(sentences)\n\u001b[1;32m 9\u001b[0m embeddings[\u001b[38;5;241m0\u001b[39m][:\u001b[38;5;241m10\u001b[39m]\n", - "\u001b[0;31mNameError\u001b[0m: name 'az_oai' is not defined" - ] + "data": { + "text/plain": [ + "[-0.017460526898503304,\n", + " -6.895032856846228e-05,\n", + " 0.0013909287517890334,\n", + " -0.025688467547297478,\n", + " -0.019813183695077896,\n", + " 0.016087085008621216,\n", + " -0.003729278687387705,\n", + " 0.0009211922879330814,\n", + " 0.006606514099985361,\n", + " -0.025128915905952454]" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ @@ -282,7 +303,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -329,7 +350,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -364,7 +385,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -418,7 +439,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ @@ -439,7 +460,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "metadata": {}, "outputs": [ { @@ -491,16 +512,16 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "[0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1]" + "[0.101, 0.101, 0.101, 0.101, 0.101, 0.101, 0.101, 0.101, 0.101, 0.101]" ] }, - "execution_count": 9, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -509,24 +530,30 @@ "from redisvl.utils.vectorize import CustomTextVectorizer\n", "\n", "def generate_embeddings(text_input):\n", - " return [0.1] * 768\n", + " return [0.101] * 768\n", "\n", - " \n", "custom_vectorizer = CustomTextVectorizer(generate_embeddings)\n", "\n", "custom_vectorizer.embed(\"This is a test sentence.\")[:10]" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This enables the use of custom vectorizers with other RedisVL components" + ] + }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "10:40:28 redisvl.index.index INFO Index already exists, not overwriting.\n" + "11:04:14 redisvl.index.index INFO Index already exists, not overwriting.\n" ] }, { @@ -536,20 +563,18 @@ " 'vector_distance': '6.13927841187e-06',\n", " 'prompt': 'this is a test prompt',\n", " 'response': 'this is a test response',\n", - " 'prompt_vector': '================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================'}]" + " 'prompt_vector': '\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17='}]" ] }, - "execution_count": 14, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "# this enables the use of custom vectorizers with other RedisVL components\n", "from redisvl.extensions.llmcache import SemanticCache\n", "\n", - "#cache = SemanticCache(vectorizer=custom_vectorizer)\n", - "cache = SemanticCache()\n", + "cache = SemanticCache(vectorizer=custom_vectorizer)\n", "\n", "cache.store(\"this is a test prompt\", \"this is a test response\")\n", "cache.check(\"this is also a test prompt\")" @@ -589,7 +614,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "metadata": {}, "outputs": [], "source": [ @@ -607,7 +632,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "metadata": {}, "outputs": [ { @@ -626,7 +651,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "metadata": {}, "outputs": [ { @@ -655,7 +680,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 18, "metadata": {}, "outputs": [ { From 24fd8fc4e6fd9797eb9b38ecc9135bf0933fbac3 Mon Sep 17 00:00:00 2001 From: Justin Cechmanek Date: Mon, 24 Jun 2024 13:58:16 -0700 Subject: [PATCH 8/8] formatting --- tests/integration/test_vectorizers.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/integration/test_vectorizers.py b/tests/integration/test_vectorizers.py index da39fd82..69d7ec62 100644 --- a/tests/integration/test_vectorizers.py +++ b/tests/integration/test_vectorizers.py @@ -217,7 +217,9 @@ def bad_return_type(text: str) -> str: ) -@pytest.fixture(params=[OpenAITextVectorizer, MistralAITextVectorizer, CustomTextVectorizer]) +@pytest.fixture( + params=[OpenAITextVectorizer, MistralAITextVectorizer, CustomTextVectorizer] +) def avectorizer(request, skip_vectorizer): if skip_vectorizer: pytest.skip("Skipping vectorizer instantiation...")