From 024d1f4db708c2dc651cb365486bf6ae889a39fc Mon Sep 17 00:00:00 2001
From: Justin Cechmanek <justin.cechmanek@redis.com>
Date: Wed, 5 Jun 2024 16:43:59 -0700
Subject: [PATCH 1/8] adds wrapper class for custom vectorizers

---
 redisvl/utils/vectorize/__init__.py    |   2 +
 redisvl/utils/vectorize/text/custom.py | 294 +++++++++++++++++++++++++
 2 files changed, 296 insertions(+)
 create mode 100644 redisvl/utils/vectorize/text/custom.py

diff --git a/redisvl/utils/vectorize/__init__.py b/redisvl/utils/vectorize/__init__.py
index ea9d7bee..89fd63be 100644
--- a/redisvl/utils/vectorize/__init__.py
+++ b/redisvl/utils/vectorize/__init__.py
@@ -1,6 +1,7 @@
 from redisvl.utils.vectorize.base import BaseVectorizer
 from redisvl.utils.vectorize.text.azureopenai import AzureOpenAITextVectorizer
 from redisvl.utils.vectorize.text.cohere import CohereTextVectorizer
+from redisvl.utils.vectorize.text.custom import CustomTextVectorizer
 from redisvl.utils.vectorize.text.huggingface import HFTextVectorizer
 from redisvl.utils.vectorize.text.openai import OpenAITextVectorizer
 from redisvl.utils.vectorize.text.vertexai import VertexAITextVectorizer
@@ -12,4 +13,5 @@
     "OpenAITextVectorizer",
     "VertexAITextVectorizer",
     "AzureOpenAITextVectorizer",
+    "CustomTextVectorizer",
 ]
diff --git a/redisvl/utils/vectorize/text/custom.py b/redisvl/utils/vectorize/text/custom.py
new file mode 100644
index 00000000..4546dd10
--- /dev/null
+++ b/redisvl/utils/vectorize/text/custom.py
@@ -0,0 +1,294 @@
+import os
+from typing import Any, Callable, Dict, List, Optional
+
+from pydantic.v1 import PrivateAttr
+
+from redisvl.utils.vectorize.base import BaseVectorizer
+
+
+class CustomTextVectorizer(BaseVectorizer):
+    """The CustomTextVectorizer class wraps a user-defined vectorizer to create
+    embeddings for text data.
+
+    This vectorizer is designed to accept a provided callable text vectorizer and
+    provides a class definition to allow for compatibility with RedisVL.
+
+    The vectorizer may support both synchronous and asynchronous operations which
+    allows for batch processing of texts, but at a minimum only syncronous embedding
+    is required to satisfy the 'embed()' method.
+
+    .. code-block:: python
+
+        # Synchronous embedding of a single text
+        vectorizer = CustomTextVectorizer(
+            embed = my_vectorizer.generate_embedding
+        )
+        embedding = vectorizer.embed("Hello, world!")
+
+        # Asynchronous batch embedding of multiple texts
+        embeddings = await vectorizer.aembed_many(
+            ["Hello, world!", "How are you?"],
+            batch_size=2
+        )
+
+    """
+
+    _embed_func: Callable = PrivateAttr()
+    _embed_many_func: Optional[Callable] = PrivateAttr()
+    _aembed_func: Optional[Callable] = PrivateAttr()
+    _aembed_many_func: Optional[Callable] = PrivateAttr()
+
+    def __init__(
+        self,
+        embed: Callable,
+        embed_many: Optional[Callable] = None,
+        aembed: Optional[Callable] = None,
+        aembed_many: Optional[Callable] = None,
+        model: str = "custom_vectorizer",
+    ):
+        """Initialize the Custom vectorizer.
+
+                Args:
+                    embed (Optional[Callable]) a Callable function that accepts a list of string object and return a list containing lists of floats. Defaults to None.
+
+        :
+                    embed_many (Optional[Callable)]: a Callable function that accepts a list of string object and return a list containing lists of floats. Defaults to None.
+                    aembed: Optional[Callable] = None,
+                    aembed_many: Optional[Callable] = None,
+
+                Raises:
+                    ValueError if neither embed and embed_many are provided
+        """
+
+        self._validate_embed(embed)
+        self._embed_func = embed
+        if embed_many:
+            self._validate_embed_many(embed_many)
+            self._embed_many_func = embed_many
+
+        if aembed:
+            self._validate_aembed(aembed)
+            self._aembed_func = aembed
+        if aembed_many:
+            self._validate_aembed_many(aembed_many)
+            self._aembed_many_func = aembed_many
+
+        super().__init__(model=model, dims=self._set_model_dims())
+
+    def _validate_embed(self, func: Callable):
+        # calling the func with dummy input and validating that it returns a vector
+        try:
+            test_str = "this is a test sentence"
+            candidate_vector = func(test_str)
+            if type(candidate_vector) != list or type(candidate_vector[0]) != float:
+                raise ValueError(
+                    f"Candidate function for embed() does not have the correct return type. Please provide a function with with return type List[float]"
+                )
+        except TypeError:
+            raise TypeError(f"{func} is not a callable object")
+
+    def _validate_embed_many(self, func: Callable):
+        # calling the func with dummy input and validating that it returns a list of vectors
+        try:
+            test_strs = ["first test sentence", "second test sentence"]
+            candidate_vectors = func(test_strs)
+            if (
+                type(candidate_vectors) != list
+                or type(candidate_vectors[0]) != list
+                or type(candidate_vectors[0][0]) != float
+            ):
+                raise ValueError(
+                    f"Candidate function for embed_many does not have the correct return type. Please provide a function with with return type List[List[float]]"
+                )
+        except TypeError:
+            raise TypeError(f"{func} is not a callable object")
+
+    def _validate_aembed(self, func: Callable):
+        # calling the func with dummy input and validating that it returns a vector
+        import asyncio
+
+        try:
+            test_str = "this is a test sentence"
+            loop = asyncio.get_event_loop()
+            candidate_vector = loop.run_until_complete(func(test_str))
+            if type(candidate_vector) != list or type(candidate_vector[0]) != float:
+                raise ValueError(
+                    f"Candidate function for aembed() does not have the correct return type. Please provide a function with with return type List[float]"
+                )
+        except TypeError:
+            raise TypeError(f"{func} is not a callable object")
+
+    def _validate_aembed_many(self, func: Callable):
+        # calling the func with dummy input and validating that it returns a list of floats
+        import asyncio
+
+        try:
+            test_strs = ["first test sentence", "second test sentence"]
+            loop = asyncio.get_event_loop()
+            candidate_vectors = loop.run_until_complete(func(test_strs))
+            if (
+                type(candidate_vectors) != list
+                or type(candidate_vectors[0]) != list
+                or type(candidate_vectors[0][0]) != float
+            ):
+                raise ValueError(
+                    f"Candidate function for aembed_many does not have the correct return type. Please provide a function with with return type List[List[float]]"
+                )
+        except TypeError:
+            raise TypeError(f"{func} is not a callable object")
+
+    def _set_model_dims(self) -> int:
+        try:
+            test_string = "dimension test"
+            embedding = self._embed_func(test_string)
+        except Exception as e:  # pylint: disable=broad-except
+            raise ValueError(
+                f"Error in checking model dimensions. Attempted to embed '{test_string}'. :{str(e)}"
+            )
+        return len(embedding)
+
+    def embed(
+        self,
+        text: str,
+        preprocess: Optional[Callable] = None,
+        as_buffer: bool = False,
+        **kwargs,
+    ) -> List[float]:
+        """Embed a chunk of text using the provided function.
+
+        Args:
+            text (str): Chunk of text to embed.
+            preprocess (Optional[Callable], optional): Optional preprocessing callable to
+                perform before vectorization. Defaults to None.
+            as_buffer (bool, optional): Whether to convert the raw embedding
+                to a byte string. Defaults to False.
+
+        Returns:
+            List[float]: Embedding.
+
+        Raises:
+            TypeError: If the wrong input type is passed in for the text.
+        """
+        if not isinstance(text, str):
+            raise TypeError("Must pass in a str value to embed.")
+
+        if preprocess:
+            text = preprocess(text)
+        else:
+            result = self._embed_func(text, **kwargs)
+        return self._process_embedding(result, as_buffer)
+
+    def embed_many(
+        self,
+        texts: List[str],
+        preprocess: Optional[Callable] = None,
+        batch_size: int = 10,
+        as_buffer: bool = False,
+        **kwargs,
+    ) -> List[List[float]]:
+        """Embed many chunks of texts using the provided function.
+
+        Args:
+            texts (List[str]): List of text chunks to embed.
+            preprocess (Optional[Callable], optional): Optional preprocessing
+                callable to perform before vectorization. Defaults to None.
+            batch_size (int, optional): Batch size of texts to use when creating
+                embeddings. Defaults to 10.
+            as_buffer (bool, optional): Whether to convert the raw embedding
+                to a byte string. Defaults to False.
+
+        Returns:
+            List[List[float]]: List of embeddings.
+
+        Raises:
+            TypeError: If the wrong input type is passed in for the text.
+            NotImplementedError: if embed_many was not passed to constructor.
+        """
+        if not isinstance(texts, list):
+            raise TypeError("Must pass in a list of str values to embed.")
+        if len(texts) > 0 and not isinstance(texts[0], str):
+            raise TypeError("Must pass in a list of str values to embed.")
+
+        if not self._embed_many_func:
+            raise NotImplementedError
+
+        embeddings: List = []
+        for batch in self.batchify(texts, batch_size, preprocess):
+            results = self._embed_many_func(batch, **kwargs)
+            embeddings += [self._process_embedding(r, as_buffer) for r in results]
+        return embeddings
+
+    async def aembed(
+        self,
+        text: str,
+        preprocess: Optional[Callable] = None,
+        as_buffer: bool = False,
+        **kwargs,
+    ) -> List[float]:
+        """Asynchronously embed a chunk of text.
+
+        Args:
+            text (str): Chunk of text to embed.
+            preprocess (Optional[Callable], optional): Optional preprocessing callable to
+                perform before vectorization. Defaults to None.
+            as_buffer (bool, optional): Whether to convert the raw embedding
+                to a byte string. Defaults to False.
+
+        Returns:
+            List[float]: Embedding.
+
+        Raises:
+            TypeError: If the wrong input type is passed in for the text.
+            NotImplementedError: if aembed was not passed to constructor.
+        """
+        if not isinstance(text, str):
+            raise TypeError("Must pass in a str value to embed.")
+
+        if not self._aembed_func:
+            raise NotImplementedError
+
+        if preprocess:
+            text = preprocess(text)
+        else:
+            result = await self._aembed_func(text, **kwargs)
+        return self._process_embedding(result, as_buffer)
+
+    async def aembed_many(
+        self,
+        texts: List[str],
+        preprocess: Optional[Callable] = None,
+        batch_size: int = 1000,
+        as_buffer: bool = False,
+        **kwargs,
+    ) -> List[List[float]]:
+        """Asynchronously embed many chunks of texts.
+
+        Args:
+            texts (List[str]): List of text chunks to embed.
+            preprocess (Optional[Callable], optional): Optional preprocessing callable to
+                perform before vectorization. Defaults to None.
+            batch_size (int, optional): Batch size of texts to use when creating
+                embeddings. Defaults to 10.
+            as_buffer (bool, optional): Whether to convert the raw embedding
+                to a byte string. Defaults to False.
+
+        Returns:
+            List[List[float]]: List of embeddings.
+
+        Raises:
+            TypeError: If the wrong input type is passed in for the text.
+            NotImplementedError: If aembed_many was not passed to constructor.
+        """
+        if not isinstance(texts, list):
+            raise TypeError("Must pass in a list of str values to embed.")
+        if len(texts) > 0 and not isinstance(texts[0], str):
+            raise TypeError("Must pass in a list of str values to embed.")
+
+        if not self._aembed_many_func:
+            raise NotImplementedError
+
+        embeddings: List = []
+        for batch in self.batchify(texts, batch_size, preprocess):
+            results = await self._aembed_many_func(batch, **kwargs)
+            embeddings += [self._process_embedding(r, as_buffer) for r in results]
+        return embeddings

From 4b2f21d758c9e49b1b1cb23ec8736de063a5d76e Mon Sep 17 00:00:00 2001
From: Justin Cechmanek <justin.cechmanek@redis.com>
Date: Wed, 5 Jun 2024 16:45:06 -0700
Subject: [PATCH 2/8] fixes typo in comments

---
 redisvl/utils/vectorize/text/openai.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/redisvl/utils/vectorize/text/openai.py b/redisvl/utils/vectorize/text/openai.py
index b5d2070c..c1c9d7e8 100644
--- a/redisvl/utils/vectorize/text/openai.py
+++ b/redisvl/utils/vectorize/text/openai.py
@@ -134,7 +134,7 @@ def embed_many(
             List[List[float]]: List of embeddings.
 
         Raises:
-            TypeError: If the wrong input type is passed in for the test.
+            TypeError: If the wrong input type is passed in for the text.
         """
         if not isinstance(texts, list):
             raise TypeError("Must pass in a list of str values to embed.")
@@ -174,7 +174,7 @@ def embed(
             List[float]: Embedding.
 
         Raises:
-            TypeError: If the wrong input type is passed in for the test.
+            TypeError: If the wrong input type is passed in for the text.
         """
         if not isinstance(text, str):
             raise TypeError("Must pass in a str value to embed.")
@@ -212,7 +212,7 @@ async def aembed_many(
             List[List[float]]: List of embeddings.
 
         Raises:
-            TypeError: If the wrong input type is passed in for the test.
+            TypeError: If the wrong input type is passed in for the text.
         """
         if not isinstance(texts, list):
             raise TypeError("Must pass in a list of str values to embed.")
@@ -254,7 +254,7 @@ async def aembed(
             List[float]: Embedding.
 
         Raises:
-            TypeError: If the wrong input type is passed in for the test.
+            TypeError: If the wrong input type is passed in for the text.
         """
         if not isinstance(text, str):
             raise TypeError("Must pass in a str value to embed.")

From c1aa18bcbf81921af7104cc92d8fc921af1fca49 Mon Sep 17 00:00:00 2001
From: Justin Cechmanek <justin.cechmanek@redis.com>
Date: Wed, 5 Jun 2024 16:45:52 -0700
Subject: [PATCH 3/8] adds tests for custom text vectorizer

---
 tests/integration/test_vectorizers.py | 159 +++++++++++++++++++++++++-
 1 file changed, 158 insertions(+), 1 deletion(-)

diff --git a/tests/integration/test_vectorizers.py b/tests/integration/test_vectorizers.py
index 23952c65..36a78e1b 100644
--- a/tests/integration/test_vectorizers.py
+++ b/tests/integration/test_vectorizers.py
@@ -5,6 +5,7 @@
 from redisvl.utils.vectorize import (
     AzureOpenAITextVectorizer,
     CohereTextVectorizer,
+    CustomTextVectorizer,
     HFTextVectorizer,
     OpenAITextVectorizer,
     VertexAITextVectorizer,
@@ -25,6 +26,7 @@ def skip_vectorizer() -> bool:
         VertexAITextVectorizer,
         CohereTextVectorizer,
         AzureOpenAITextVectorizer,
+        CustomTextVectorizer,
     ]
 )
 def vectorizer(request, skip_vectorizer):
@@ -43,6 +45,32 @@ def vectorizer(request, skip_vectorizer):
         return request.param(
             model=os.getenv("AZURE_OPENAI_DEPLOYMENT_NAME", "text-embedding-ada-002")
         )
+    elif request.param == CustomTextVectorizer:
+
+        def embed(text):
+            return [1.1, 2.2, 3.3, 4.4]
+
+        def embed_many(texts):
+            return [[1.1, 2.2, 3.3, 4.4]] * len(texts)
+
+        return request.param(embed=embed, embed_many=embed_many)
+
+
+@pytest.fixture
+def custom_embed_func():
+    def embed(text: str):
+        return [0.1, 0.2, 0.3]
+
+    return embed
+
+
+@pytest.fixture
+def custom_embedder_class():
+    class embedder:
+        def embed(self, text: str):
+            return [0.1, 0.2, 0.3]
+
+    return embedder
 
 
 def test_vectorizer_embed(vectorizer):
@@ -81,7 +109,120 @@ def test_vectorizer_bad_input(vectorizer):
         vectorizer.embed_many(42)
 
 
-@pytest.fixture(params=[OpenAITextVectorizer])
+def test_custom_vectorizer_embed(custom_embedder_class, custom_embed_func):
+    # test we can pass a stand alone function as embedder callable
+    def my_embedder(text: str):
+        return [1.1, 2.2, 3.3, 4.4]
+
+    custom_wrapper = CustomTextVectorizer(embed=my_embedder)
+    embedding = custom_wrapper.embed("This is a test sentence.")
+    assert embedding == [1.1, 2.2, 3.3, 4.4]
+
+    # test we can pass an instance of a class method as embedder callable
+    class EmbedClass:
+        def embed_method(self, text: str):
+            return [5.0, 6.0, 7.0, 8.0]
+
+    custom_wrapper = CustomTextVectorizer(embed=EmbedClass().embed_method)
+    embedding = custom_wrapper.embed("This is a test sentence.")
+    assert embedding == [5.0, 6.0, 7.0, 8.0]
+
+    # test we can pass additional parameters and kwargs to embedding methods
+    def embedder_with_args(text: str, max_len=None):
+        return [1.1, 2.2, 3.3, 4.4][0:max_len]
+
+    custom_wrapper = CustomTextVectorizer(embed=embedder_with_args)
+    embedding = custom_wrapper.embed("This is a test sentence.", max_len=4)
+    assert embedding == [1.1, 2.2, 3.3, 4.4]
+    embedding = custom_wrapper.embed("This is a test sentence.", max_len=2)
+    assert embedding == [1.1, 2.2]
+
+    # test that correct error is raised if a non-callable is passed
+    with pytest.raises(TypeError):
+        bad_wrapper = CustomTextVectorizer(embed="hello")
+
+    with pytest.raises(TypeError):
+        bad_wrapper = CustomTextVectorizer(embed=42)
+
+    with pytest.raises(TypeError):
+        bad_wrapper = CustomTextVectorizer(embed={"foo": "bar"})
+
+    # test that correct error is raised if passed function has incorrect types
+    def bad_arg_type(value: int):
+        return [value]
+
+    with pytest.raises(ValueError):
+        bad_wrapper = CustomTextVectorizer(embed=bad_arg_type)
+
+    def bad_return_type(text: str) -> str:
+        return text
+
+    with pytest.raises(ValueError):
+        bad_wrapper = CustomTextVectorizer(embed=bad_return_type)
+
+
+def test_custom_vectorizer_embed_many(custom_embedder_class, custom_embed_func):
+    # test we can pass a stand alone function as embed_many callable
+    def my_embed_many(text_list):
+        return [[1.1, 2.2, 3.3], [4.4, 5.5, 6.6]]
+
+    custom_wrapper = CustomTextVectorizer(custom_embed_func, embed_many=my_embed_many)
+    embeddings = custom_wrapper.embed_many(["test one.", "test two"])
+    assert embeddings == [[1.1, 2.2, 3.3], [4.4, 5.5, 6.6]]
+
+    # test we can pass a class method as embedder callable
+    class EmbedClass:
+        def embed_many_method(self, text_list):
+            return [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]
+
+    custom_wrapper = CustomTextVectorizer(
+        custom_embed_func, embed_many=EmbedClass().embed_many_method
+    )
+    embeddings = custom_wrapper.embed_many(["test one.", "test two"])
+    assert embeddings == [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]
+
+    # test we can pass additional parameters and kwargs to embedding methods
+    def embed_many_with_args(texts, param=True):
+        if param:
+            return [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]
+        else:
+            return [[6.0, 5.0, 4.0], [3.0, 2.0, 1.0]]
+
+    custom_wrapper = CustomTextVectorizer(
+        custom_embed_func, embed_many=embed_many_with_args
+    )
+    embeddings = custom_wrapper.embed_many(["test one.", "test two"], param=True)
+    assert embeddings == [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]
+    embeddings = custom_wrapper.embed_many(["test one.", "test two"], param=False)
+    assert embeddings == [[6.0, 5.0, 4.0], [3.0, 2.0, 1.0]]
+
+    # test that correct error is raised if a non-callable is passed
+    with pytest.raises(TypeError):
+        bad_wrapper = CustomTextVectorizer(custom_embed_func, embed_many="hello")
+
+    with pytest.raises(TypeError):
+        bad_wrapper = CustomTextVectorizer(custom_embed_func, embed_many=42)
+
+    with pytest.raises(TypeError):
+        bad_wrapper = CustomTextVectorizer(custom_embed_func, embed_many={"foo": "bar"})
+
+    # test that correct error is raised if passed function has incorrect types
+    def bad_arg_type(value: int):
+        return [value]
+
+    with pytest.raises(ValueError):
+        bad_wrapper = CustomTextVectorizer(custom_embed_func, embed_many=bad_arg_type)
+
+    def bad_return_type(text: str) -> str:
+        return text
+
+    with pytest.raises(ValueError):
+        bad_wrapper = CustomTextVectorizer(
+            custom_embed_func, embed_many=bad_return_type
+        )
+
+
+@pytest.fixture(params=[OpenAITextVectorizer, CustomTextVectorizer])
 def avectorizer(request, skip_vectorizer):
     if skip_vectorizer:
         pytest.skip("Skipping vectorizer instantiation...")
@@ -90,6 +231,22 @@ def avectorizer(request, skip_vectorizer):
     if request.param == OpenAITextVectorizer:
         return request.param()
 
+    # Here we use actual models for integration test
+    if request.param == CustomTextVectorizer:
+
+        def embed_func(text):
+            return [1.1, 2.2, 3.3, 4.4]
+
+        async def aembed_func(text):
+            return [1.1, 2.2, 3.3, 4.4]
+
+        async def aembed_many_func(texts):
+            return [[1.1, 2.2, 3.3, 4.4]] * len(texts)
+
+        return request.param(
+            embed=embed_func, aembed=aembed_func, aembed_many=aembed_many_func
+        )
+
 
 @pytest.mark.asyncio
 async def test_vectorizer_aembed(avectorizer):

From d69f4ffe4f9d47fb79894f285b1ad0a5add0435b Mon Sep 17 00:00:00 2001
From: Justin Cechmanek <justin.cechmanek@redis.com>
Date: Wed, 5 Jun 2024 17:08:17 -0700
Subject: [PATCH 4/8] updates doc strings

---
 redisvl/utils/vectorize/text/custom.py | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/redisvl/utils/vectorize/text/custom.py b/redisvl/utils/vectorize/text/custom.py
index 4546dd10..e558c735 100644
--- a/redisvl/utils/vectorize/text/custom.py
+++ b/redisvl/utils/vectorize/text/custom.py
@@ -7,7 +7,7 @@
 
 
 class CustomTextVectorizer(BaseVectorizer):
-    """The CustomTextVectorizer class wraps a user-defined vectorizer to create
+    """The CustomTextVectorizer class wraps user-defined embeding methods to create
     embeddings for text data.
 
     This vectorizer is designed to accept a provided callable text vectorizer and
@@ -49,15 +49,16 @@ def __init__(
         """Initialize the Custom vectorizer.
 
                 Args:
-                    embed (Optional[Callable]) a Callable function that accepts a list of string object and return a list containing lists of floats. Defaults to None.
+                    embed (Callable) a Callable function that accepts a string object and return a list of floats.
 
         :
-                    embed_many (Optional[Callable)]: a Callable function that accepts a list of string object and return a list containing lists of floats. Defaults to None.
-                    aembed: Optional[Callable] = None,
-                    aembed_many: Optional[Callable] = None,
+                    embed_many (Optional[Callable)]: a Callable function that accepts a list of string objects and returns a list containing lists of floats. Defaults to None.
+                    aembed: Optional[Callable] =  an asyncronous Callable function that accepts a string object and returns a lists of floats. Defaults to None.
+                    aembed_many: Optional[Callable] =  an asyncronous Callable function that accepts a list of string objects and returns a list containing lists of floats. Defaults to None.
 
                 Raises:
-                    ValueError if neither embed and embed_many are provided
+                    ValueError if any of the provided functions accept or return incorrect types.
+                    TypeError if any of the provided functions are not Callable objects.
         """
 
         self._validate_embed(embed)
@@ -76,7 +77,7 @@ def __init__(
         super().__init__(model=model, dims=self._set_model_dims())
 
     def _validate_embed(self, func: Callable):
-        # calling the func with dummy input and validating that it returns a vector
+        """calls the func with dummy input and validates that it returns a vector"""
         try:
             test_str = "this is a test sentence"
             candidate_vector = func(test_str)
@@ -88,7 +89,7 @@ def _validate_embed(self, func: Callable):
             raise TypeError(f"{func} is not a callable object")
 
     def _validate_embed_many(self, func: Callable):
-        # calling the func with dummy input and validating that it returns a list of vectors
+        """calls the func with dummy input and validates that it returns a list of vectors"""
         try:
             test_strs = ["first test sentence", "second test sentence"]
             candidate_vectors = func(test_strs)
@@ -104,7 +105,7 @@ def _validate_embed_many(self, func: Callable):
             raise TypeError(f"{func} is not a callable object")
 
     def _validate_aembed(self, func: Callable):
-        # calling the func with dummy input and validating that it returns a vector
+        """calls the func with dummy input and validates that it returns a vector"""
         import asyncio
 
         try:
@@ -119,7 +120,7 @@ def _validate_aembed(self, func: Callable):
             raise TypeError(f"{func} is not a callable object")
 
     def _validate_aembed_many(self, func: Callable):
-        # calling the func with dummy input and validating that it returns a list of floats
+        """calls the func with dummy input and validates that it returns a list of vectors"""
         import asyncio
 
         try:

From 5221ed9eb32730a8730e1c88e6b4de713531f2b9 Mon Sep 17 00:00:00 2001
From: Justin Cechmanek <justin.cechmanek@redis.com>
Date: Thu, 6 Jun 2024 09:49:44 -0700
Subject: [PATCH 5/8] refactors vectorizer tests

---
 tests/integration/test_vectorizers.py | 63 ++++++++++++---------------
 1 file changed, 27 insertions(+), 36 deletions(-)

diff --git a/tests/integration/test_vectorizers.py b/tests/integration/test_vectorizers.py
index 36a78e1b..0d6add1f 100644
--- a/tests/integration/test_vectorizers.py
+++ b/tests/integration/test_vectorizers.py
@@ -59,16 +59,28 @@ def embed_many(texts):
 @pytest.fixture
 def custom_embed_func():
     def embed(text: str):
-        return [0.1, 0.2, 0.3]
+        return [1.1, 2.2, 3.3, 4.4]
 
     return embed
 
 
 @pytest.fixture
-def custom_embedder_class():
+def custom_embed_class():
     class embedder:
         def embed(self, text: str):
-            return [0.1, 0.2, 0.3]
+            return [1.1, 2.2, 3.3, 4.4]
+
+        def embed_with_args(self, text: str, max_len=None):
+            return [1.1, 2.2, 3.3, 4.4][0:max_len]
+
+        def embed_many(self, text_list):
+            return [[1.1, 2.2, 3.3], [4.4, 5.5, 6.6]]
+
+        def embed_many_with_args(self, texts, param=True):
+            if param:
+                return [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]
+            else:
+                return [[6.0, 5.0, 4.0], [3.0, 2.0, 1.0]]
 
     return embedder
 
@@ -109,29 +121,19 @@ def test_vectorizer_bad_input(vectorizer):
         vectorizer.embed_many(42)
 
 
-def test_custom_vectorizer_embed(custom_embedder_class, custom_embed_func):
+def test_custom_vectorizer_embed(custom_embed_class, custom_embed_func):
     # test we can pass a stand alone function as embedder callable
-    def my_embedder(text: str):
-        return [1.1, 2.2, 3.3, 4.4]
-
-    custom_wrapper = CustomTextVectorizer(embed=my_embedder)
+    custom_wrapper = CustomTextVectorizer(embed=custom_embed_func)
     embedding = custom_wrapper.embed("This is a test sentence.")
     assert embedding == [1.1, 2.2, 3.3, 4.4]
 
     # test we can pass an instance of a class method as embedder callable
-    class EmbedClass:
-        def embed_method(self, text: str):
-            return [5.0, 6.0, 7.0, 8.0]
-
-    custom_wrapper = CustomTextVectorizer(embed=EmbedClass().embed_method)
+    custom_wrapper = CustomTextVectorizer(embed=custom_embed_class().embed)
     embedding = custom_wrapper.embed("This is a test sentence.")
-    assert embedding == [5.0, 6.0, 7.0, 8.0]
+    assert embedding == [1.1, 2.2, 3.3, 4.4]
 
     # test we can pass additional parameters and kwargs to embedding methods
-    def embedder_with_args(text: str, max_len=None):
-        return [1.1, 2.2, 3.3, 4.4][0:max_len]
-
-    custom_wrapper = CustomTextVectorizer(embed=embedder_with_args)
+    custom_wrapper = CustomTextVectorizer(embed=custom_embed_class().embed_with_args)
     embedding = custom_wrapper.embed("This is a test sentence.", max_len=4)
     assert embedding == [1.1, 2.2, 3.3, 4.4]
     embedding = custom_wrapper.embed("This is a test sentence.", max_len=2)
@@ -161,35 +163,24 @@ def bad_return_type(text: str) -> str:
         bad_wrapper = CustomTextVectorizer(embed=bad_return_type)
 
 
-def test_custom_vectorizer_embed_many(custom_embedder_class, custom_embed_func):
+def test_custom_vectorizer_embed_many(custom_embed_class, custom_embed_func):
     # test we can pass a stand alone function as embed_many callable
-    def my_embed_many(text_list):
-        return [[1.1, 2.2, 3.3], [4.4, 5.5, 6.6]]
-
-    custom_wrapper = CustomTextVectorizer(custom_embed_func, embed_many=my_embed_many)
+    custom_wrapper = CustomTextVectorizer(
+        custom_embed_func, embed_many=custom_embed_class().embed_many
+    )
     embeddings = custom_wrapper.embed_many(["test one.", "test two"])
     assert embeddings == [[1.1, 2.2, 3.3], [4.4, 5.5, 6.6]]
 
     # test we can pass a class method as embedder callable
-    class EmbedClass:
-        def embed_many_method(self, text_list):
-            return [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]
-
     custom_wrapper = CustomTextVectorizer(
-        custom_embed_func, embed_many=EmbedClass().embed_many_method
+        custom_embed_func, embed_many=custom_embed_class().embed_many
     )
     embeddings = custom_wrapper.embed_many(["test one.", "test two"])
-    assert embeddings == [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]
+    assert embeddings == [[1.1, 2.2, 3.3], [4.4, 5.5, 6.6]]
 
     # test we can pass additional parameters and kwargs to embedding methods
-    def embed_many_with_args(texts, param=True):
-        if param:
-            return [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]
-        else:
-            return [[6.0, 5.0, 4.0], [3.0, 2.0, 1.0]]
-
     custom_wrapper = CustomTextVectorizer(
-        custom_embed_func, embed_many=embed_many_with_args
+        custom_embed_func, embed_many=custom_embed_class().embed_many_with_args
     )
     embeddings = custom_wrapper.embed_many(["test one.", "test two"], param=True)
     assert embeddings == [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]

From 7e42b696727f1c007ebc863e5aa04e42a9713781 Mon Sep 17 00:00:00 2001
From: Justin Cechmanek <justin.cechmanek@redis.com>
Date: Thu, 6 Jun 2024 11:16:18 -0700
Subject: [PATCH 6/8] updates notebook to demonstrate CustomTextVectorizer

---
 docs/user_guide/vectorizers_04.ipynb   | 203 ++++++++++++++++---------
 redisvl/utils/vectorize/text/custom.py |  18 +--
 2 files changed, 137 insertions(+), 84 deletions(-)

diff --git a/docs/user_guide/vectorizers_04.ipynb b/docs/user_guide/vectorizers_04.ipynb
index 2dde2bad..79f6c3bb 100644
--- a/docs/user_guide/vectorizers_04.ipynb
+++ b/docs/user_guide/vectorizers_04.ipynb
@@ -12,6 +12,7 @@
     "2. HuggingFace\n",
     "3. Vertex AI\n",
     "4. Cohere\n",
+    "5. Bringing your own vectorizer\n",
     "\n",
     "Before running this notebook, be sure to\n",
     "1. Have installed ``redisvl`` and have that environment active for this notebook.\n",
@@ -64,7 +65,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -89,16 +90,16 @@
     {
      "data": {
       "text/plain": [
-       "[-0.001025049015879631,\n",
-       " -0.0030993607360869646,\n",
-       " 0.0024536605924367905,\n",
-       " -0.004484387580305338,\n",
-       " -0.010331203229725361,\n",
-       " 0.012700922787189484,\n",
-       " -0.005368996877223253,\n",
-       " -0.0029411641880869865,\n",
-       " -0.0070833307690918446,\n",
-       " -0.03386051580309868]"
+       "[-0.0010508307022973895,\n",
+       " -0.0031670420430600643,\n",
+       " 0.0023781107738614082,\n",
+       " -0.004539588466286659,\n",
+       " -0.010320774279534817,\n",
+       " 0.012868634425103664,\n",
+       " -0.0054513863287866116,\n",
+       " -0.002984359161928296,\n",
+       " -0.0072814482264220715,\n",
+       " -0.033704183995723724]"
       ]
      },
      "execution_count": 3,
@@ -128,16 +129,16 @@
     {
      "data": {
       "text/plain": [
-       "[-0.01747742109000683,\n",
-       " -5.228330701356754e-05,\n",
-       " 0.0013870716793462634,\n",
-       " -0.025637786835432053,\n",
-       " -0.01985435001552105,\n",
-       " 0.016117358580231667,\n",
-       " -0.0037306349258869886,\n",
-       " 0.0008945261361077428,\n",
-       " 0.006577865686267614,\n",
-       " -0.025091219693422318]"
+       "[-0.01749197021126747,\n",
+       " -5.238811718299985e-05,\n",
+       " 0.0013331907102838159,\n",
+       " -0.025576923042535782,\n",
+       " -0.019907286390662193,\n",
+       " 0.016106342896819115,\n",
+       " -0.003756451653316617,\n",
+       " 0.0009971122490242124,\n",
+       " 0.006661186460405588,\n",
+       " -0.024954024702310562]"
       ]
      },
      "execution_count": 4,
@@ -189,7 +190,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -202,34 +203,21 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [
     {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Vector dimensions:  1536\n"
+     "ename": "ValueError",
+     "evalue": "AzureOpenAI API endpoint is required. Provide it in api_config or set the AZURE_OPENAI_ENDPOINT                    environment variable.",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[7], line 4\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mredisvl\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutils\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mvectorize\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m AzureOpenAITextVectorizer\n\u001b[1;32m      3\u001b[0m \u001b[38;5;66;03m# create a vectorizer\u001b[39;00m\n\u001b[0;32m----> 4\u001b[0m az_oai \u001b[38;5;241m=\u001b[39m \u001b[43mAzureOpenAITextVectorizer\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m      5\u001b[0m \u001b[43m    \u001b[49m\u001b[43mmodel\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdeployment_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# Must be your CUSTOM deployment name\u001b[39;49;00m\n\u001b[1;32m      6\u001b[0m \u001b[43m    \u001b[49m\u001b[43mapi_config\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m{\u001b[49m\n\u001b[1;32m      7\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mapi_key\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mapi_key\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m      8\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mapi_version\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mapi_version\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m      9\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mazure_endpoint\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mazure_endpoint\u001b[49m\n\u001b[1;32m     10\u001b[0m \u001b[43m    \u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     11\u001b[0m \u001b[43m)\u001b[49m\n\u001b[1;32m     13\u001b[0m test \u001b[38;5;241m=\u001b[39m az_oai\u001b[38;5;241m.\u001b[39membed(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThis is a test sentence.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m     14\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mVector dimensions: \u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28mlen\u001b[39m(test))\n",
+      "File \u001b[0;32m~/Documents/redisvl/redisvl/utils/vectorize/text/azureopenai.py:70\u001b[0m, in \u001b[0;36mAzureOpenAITextVectorizer.__init__\u001b[0;34m(self, model, api_config)\u001b[0m\n\u001b[1;32m     54\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__init__\u001b[39m(\n\u001b[1;32m     55\u001b[0m     \u001b[38;5;28mself\u001b[39m, model: \u001b[38;5;28mstr\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtext-embedding-ada-002\u001b[39m\u001b[38;5;124m\"\u001b[39m, api_config: Optional[Dict] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m     56\u001b[0m ):\n\u001b[1;32m     57\u001b[0m \u001b[38;5;250m    \u001b[39m\u001b[38;5;124;03m\"\"\"Initialize the AzureOpenAI vectorizer.\u001b[39;00m\n\u001b[1;32m     58\u001b[0m \n\u001b[1;32m     59\u001b[0m \u001b[38;5;124;03m    Args:\u001b[39;00m\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m     68\u001b[0m \u001b[38;5;124;03m        ValueError: If the AzureOpenAI API key, version, or endpoint are not provided.\u001b[39;00m\n\u001b[1;32m     69\u001b[0m \u001b[38;5;124;03m    \"\"\"\u001b[39;00m\n\u001b[0;32m---> 70\u001b[0m     \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_initialize_clients\u001b[49m\u001b[43m(\u001b[49m\u001b[43mapi_config\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     71\u001b[0m     \u001b[38;5;28msuper\u001b[39m()\u001b[38;5;241m.\u001b[39m\u001b[38;5;21m__init__\u001b[39m(model\u001b[38;5;241m=\u001b[39mmodel, dims\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_set_model_dims(model))\n",
+      "File \u001b[0;32m~/Documents/redisvl/redisvl/utils/vectorize/text/azureopenai.py:95\u001b[0m, in \u001b[0;36mAzureOpenAITextVectorizer._initialize_clients\u001b[0;34m(self, api_config)\u001b[0m\n\u001b[1;32m     88\u001b[0m azure_endpoint \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m     89\u001b[0m     api_config\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mazure_endpoint\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m     90\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m api_config\n\u001b[1;32m     91\u001b[0m     \u001b[38;5;28;01melse\u001b[39;00m os\u001b[38;5;241m.\u001b[39mgetenv(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAZURE_OPENAI_ENDPOINT\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m     92\u001b[0m )\n\u001b[1;32m     94\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m azure_endpoint:\n\u001b[0;32m---> 95\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m     96\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAzureOpenAI API endpoint is required. \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m     97\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mProvide it in api_config or set the AZURE_OPENAI_ENDPOINT\u001b[39m\u001b[38;5;130;01m\\\u001b[39;00m\n\u001b[1;32m     98\u001b[0m \u001b[38;5;124m            environment variable.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m     99\u001b[0m     )\n\u001b[1;32m    101\u001b[0m api_version \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m    102\u001b[0m     api_config\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mapi_version\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m    103\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m api_config\n\u001b[1;32m    104\u001b[0m     \u001b[38;5;28;01melse\u001b[39;00m os\u001b[38;5;241m.\u001b[39mgetenv(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mOPENAI_API_VERSION\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m    105\u001b[0m )\n\u001b[1;32m    107\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m api_version:\n",
+      "\u001b[0;31mValueError\u001b[0m: AzureOpenAI API endpoint is required. Provide it in api_config or set the AZURE_OPENAI_ENDPOINT                    environment variable."
      ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "[-0.0010088568087667227,\n",
-       " -0.003142790636047721,\n",
-       " 0.0024922797456383705,\n",
-       " -0.004522906616330147,\n",
-       " -0.010369433090090752,\n",
-       " 0.012739036232233047,\n",
-       " -0.005365503951907158,\n",
-       " -0.0029668458737432957,\n",
-       " -0.007141091860830784,\n",
-       " -0.03383301943540573]"
-      ]
-     },
-     "execution_count": 5,
-     "metadata": {},
-     "output_type": "execute_result"
     }
    ],
    "source": [
@@ -252,27 +240,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
-     "data": {
-      "text/plain": [
-       "[-0.017460526898503304,\n",
-       " -6.895032856846228e-05,\n",
-       " 0.0013909287517890334,\n",
-       " -0.025688467547297478,\n",
-       " -0.019813183695077896,\n",
-       " 0.016087085008621216,\n",
-       " -0.003729278687387705,\n",
-       " 0.0009211922879330814,\n",
-       " 0.006606514099985361,\n",
-       " -0.025128915905952454]"
-      ]
-     },
-     "execution_count": 6,
-     "metadata": {},
-     "output_type": "execute_result"
+     "ename": "NameError",
+     "evalue": "name 'az_oai' is not defined",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[6], line 8\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[38;5;66;03m# Just like OpenAI, AzureOpenAI supports batching embeddings and asynchronous requests.\u001b[39;00m\n\u001b[1;32m      2\u001b[0m sentences \u001b[38;5;241m=\u001b[39m [\n\u001b[1;32m      3\u001b[0m     \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThat is a happy dog\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m      4\u001b[0m     \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThat is a happy person\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m      5\u001b[0m     \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mToday is a sunny day\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m      6\u001b[0m ]\n\u001b[0;32m----> 8\u001b[0m embeddings \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m \u001b[43maz_oai\u001b[49m\u001b[38;5;241m.\u001b[39maembed_many(sentences)\n\u001b[1;32m      9\u001b[0m embeddings[\u001b[38;5;241m0\u001b[39m][:\u001b[38;5;241m10\u001b[39m]\n",
+      "\u001b[0;31mNameError\u001b[0m: name 'az_oai' is not defined"
+     ]
     }
    ],
    "source": [
@@ -302,7 +282,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -349,7 +329,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -384,7 +364,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -438,7 +418,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -459,7 +439,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -500,6 +480,81 @@
     "Learn more about using RedisVL and Cohere together through [this dedicated user guide](https://docs.cohere.com/docs/redis-and-cohere)."
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Custom Vectorizers\n",
+    "\n",
+    "RedisVL supports the use of other vectorizers and provides a class to enable compatibility with any function that generates a vector or vectors from string data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1]"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from redisvl.utils.vectorize import CustomTextVectorizer\n",
+    "\n",
+    "def generate_embeddings(text_input):\n",
+    "    return [0.1] * 768\n",
+    "\n",
+    "    \n",
+    "custom_vectorizer = CustomTextVectorizer(generate_embeddings)\n",
+    "\n",
+    "custom_vectorizer.embed(\"This is a test sentence.\")[:10]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "10:40:28 redisvl.index.index INFO   Index already exists, not overwriting.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "[{'id': 'llmcache:78bd2446a37a0c6ab62652af9b7e53845145c4471ea83ff9fb4280a528d36bbb',\n",
+       "  'vector_distance': '6.13927841187e-06',\n",
+       "  'prompt': 'this is a test prompt',\n",
+       "  'response': 'this is a test response',\n",
+       "  'prompt_vector': '================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================'}]"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# this enables the use of custom vectorizers with other RedisVL components\n",
+    "from redisvl.extensions.llmcache import SemanticCache\n",
+    "\n",
+    "#cache = SemanticCache(vectorizer=custom_vectorizer)\n",
+    "cache = SemanticCache()\n",
+    "\n",
+    "cache.store(\"this is a test prompt\", \"this is a test response\")\n",
+    "cache.check(\"this is also a test prompt\")"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -534,7 +589,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -552,7 +607,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -571,7 +626,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -600,7 +655,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -658,7 +713,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.5"
+   "version": "3.12.2"
   },
   "orig_nbformat": 4,
   "vscode": {
diff --git a/redisvl/utils/vectorize/text/custom.py b/redisvl/utils/vectorize/text/custom.py
index e558c735..7ccca839 100644
--- a/redisvl/utils/vectorize/text/custom.py
+++ b/redisvl/utils/vectorize/text/custom.py
@@ -48,17 +48,15 @@ def __init__(
     ):
         """Initialize the Custom vectorizer.
 
-                Args:
-                    embed (Callable) a Callable function that accepts a string object and return a list of floats.
-
-        :
-                    embed_many (Optional[Callable)]: a Callable function that accepts a list of string objects and returns a list containing lists of floats. Defaults to None.
-                    aembed: Optional[Callable] =  an asyncronous Callable function that accepts a string object and returns a lists of floats. Defaults to None.
-                    aembed_many: Optional[Callable] =  an asyncronous Callable function that accepts a list of string objects and returns a list containing lists of floats. Defaults to None.
+        Args:
+            embed (Callable): a Callable function that accepts a string object and returns a list of floats.
+            embed_many (Optional[Callable)]: a Callable function that accepts a list of string objects and returns a list containing lists of floats. Defaults to None.
+            aembed (Optional[Callable]): an asyncronous Callable function that accepts a string object and returns a lists of floats. Defaults to None.
+            aembed_many (Optional[Callable]):  an asyncronous Callable function that accepts a list of string objects and returns a list containing lists of floats. Defaults to None.
 
-                Raises:
-                    ValueError if any of the provided functions accept or return incorrect types.
-                    TypeError if any of the provided functions are not Callable objects.
+        Raises:
+            ValueError if any of the provided functions accept or return incorrect types.
+            TypeError if any of the provided functions are not Callable objects.
         """
 
         self._validate_embed(embed)

From 73dfbd7d16adddff72b50c4302a05e7633b560a8 Mon Sep 17 00:00:00 2001
From: Justin Cechmanek <justin.cechmanek@redis.com>
Date: Tue, 18 Jun 2024 11:08:20 -0700
Subject: [PATCH 7/8] removes error messages in vectorizers notebook

---
 docs/user_guide/vectorizers_04.ipynb | 113 ++++++++++++++++-----------
 1 file changed, 69 insertions(+), 44 deletions(-)

diff --git a/docs/user_guide/vectorizers_04.ipynb b/docs/user_guide/vectorizers_04.ipynb
index 79f6c3bb..54d33055 100644
--- a/docs/user_guide/vectorizers_04.ipynb
+++ b/docs/user_guide/vectorizers_04.ipynb
@@ -65,7 +65,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -190,7 +190,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -203,21 +203,34 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [
     {
-     "ename": "ValueError",
-     "evalue": "AzureOpenAI API endpoint is required. Provide it in api_config or set the AZURE_OPENAI_ENDPOINT                    environment variable.",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
-      "Cell \u001b[0;32mIn[7], line 4\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mredisvl\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutils\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mvectorize\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m AzureOpenAITextVectorizer\n\u001b[1;32m      3\u001b[0m \u001b[38;5;66;03m# create a vectorizer\u001b[39;00m\n\u001b[0;32m----> 4\u001b[0m az_oai \u001b[38;5;241m=\u001b[39m \u001b[43mAzureOpenAITextVectorizer\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m      5\u001b[0m \u001b[43m    \u001b[49m\u001b[43mmodel\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdeployment_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# Must be your CUSTOM deployment name\u001b[39;49;00m\n\u001b[1;32m      6\u001b[0m \u001b[43m    \u001b[49m\u001b[43mapi_config\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m{\u001b[49m\n\u001b[1;32m      7\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mapi_key\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mapi_key\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m      8\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mapi_version\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mapi_version\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m      9\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mazure_endpoint\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mazure_endpoint\u001b[49m\n\u001b[1;32m     10\u001b[0m \u001b[43m    \u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     11\u001b[0m \u001b[43m)\u001b[49m\n\u001b[1;32m     13\u001b[0m test \u001b[38;5;241m=\u001b[39m az_oai\u001b[38;5;241m.\u001b[39membed(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThis is a test sentence.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m     14\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mVector dimensions: \u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28mlen\u001b[39m(test))\n",
-      "File \u001b[0;32m~/Documents/redisvl/redisvl/utils/vectorize/text/azureopenai.py:70\u001b[0m, in \u001b[0;36mAzureOpenAITextVectorizer.__init__\u001b[0;34m(self, model, api_config)\u001b[0m\n\u001b[1;32m     54\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__init__\u001b[39m(\n\u001b[1;32m     55\u001b[0m     \u001b[38;5;28mself\u001b[39m, model: \u001b[38;5;28mstr\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtext-embedding-ada-002\u001b[39m\u001b[38;5;124m\"\u001b[39m, api_config: Optional[Dict] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m     56\u001b[0m ):\n\u001b[1;32m     57\u001b[0m \u001b[38;5;250m    \u001b[39m\u001b[38;5;124;03m\"\"\"Initialize the AzureOpenAI vectorizer.\u001b[39;00m\n\u001b[1;32m     58\u001b[0m \n\u001b[1;32m     59\u001b[0m \u001b[38;5;124;03m    Args:\u001b[39;00m\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m     68\u001b[0m \u001b[38;5;124;03m        ValueError: If the AzureOpenAI API key, version, or endpoint are not provided.\u001b[39;00m\n\u001b[1;32m     69\u001b[0m \u001b[38;5;124;03m    \"\"\"\u001b[39;00m\n\u001b[0;32m---> 70\u001b[0m     \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_initialize_clients\u001b[49m\u001b[43m(\u001b[49m\u001b[43mapi_config\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     71\u001b[0m     \u001b[38;5;28msuper\u001b[39m()\u001b[38;5;241m.\u001b[39m\u001b[38;5;21m__init__\u001b[39m(model\u001b[38;5;241m=\u001b[39mmodel, dims\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_set_model_dims(model))\n",
-      "File \u001b[0;32m~/Documents/redisvl/redisvl/utils/vectorize/text/azureopenai.py:95\u001b[0m, in \u001b[0;36mAzureOpenAITextVectorizer._initialize_clients\u001b[0;34m(self, api_config)\u001b[0m\n\u001b[1;32m     88\u001b[0m azure_endpoint \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m     89\u001b[0m     api_config\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mazure_endpoint\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m     90\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m api_config\n\u001b[1;32m     91\u001b[0m     \u001b[38;5;28;01melse\u001b[39;00m os\u001b[38;5;241m.\u001b[39mgetenv(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAZURE_OPENAI_ENDPOINT\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m     92\u001b[0m )\n\u001b[1;32m     94\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m azure_endpoint:\n\u001b[0;32m---> 95\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m     96\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAzureOpenAI API endpoint is required. \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m     97\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mProvide it in api_config or set the AZURE_OPENAI_ENDPOINT\u001b[39m\u001b[38;5;130;01m\\\u001b[39;00m\n\u001b[1;32m     98\u001b[0m \u001b[38;5;124m            environment variable.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m     99\u001b[0m     )\n\u001b[1;32m    101\u001b[0m api_version \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m    102\u001b[0m     api_config\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mapi_version\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m    103\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m api_config\n\u001b[1;32m    104\u001b[0m     \u001b[38;5;28;01melse\u001b[39;00m os\u001b[38;5;241m.\u001b[39mgetenv(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mOPENAI_API_VERSION\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m    105\u001b[0m )\n\u001b[1;32m    107\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m api_version:\n",
-      "\u001b[0;31mValueError\u001b[0m: AzureOpenAI API endpoint is required. Provide it in api_config or set the AZURE_OPENAI_ENDPOINT                    environment variable."
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Vector dimensions:  1536\n"
      ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "[-0.0010088568087667227,\n",
+       " -0.003142790636047721,\n",
+       " 0.0024922797456383705,\n",
+       " -0.004522906616330147,\n",
+       " -0.010369433090090752,\n",
+       " 0.012739036232233047,\n",
+       " -0.005365503951907158,\n",
+       " -0.0029668458737432957,\n",
+       " -0.007141091860830784,\n",
+       " -0.03383301943540573]"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
     }
    ],
    "source": [
@@ -240,19 +253,27 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [
     {
-     "ename": "NameError",
-     "evalue": "name 'az_oai' is not defined",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
-      "Cell \u001b[0;32mIn[6], line 8\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[38;5;66;03m# Just like OpenAI, AzureOpenAI supports batching embeddings and asynchronous requests.\u001b[39;00m\n\u001b[1;32m      2\u001b[0m sentences \u001b[38;5;241m=\u001b[39m [\n\u001b[1;32m      3\u001b[0m     \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThat is a happy dog\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m      4\u001b[0m     \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThat is a happy person\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m      5\u001b[0m     \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mToday is a sunny day\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m      6\u001b[0m ]\n\u001b[0;32m----> 8\u001b[0m embeddings \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m \u001b[43maz_oai\u001b[49m\u001b[38;5;241m.\u001b[39maembed_many(sentences)\n\u001b[1;32m      9\u001b[0m embeddings[\u001b[38;5;241m0\u001b[39m][:\u001b[38;5;241m10\u001b[39m]\n",
-      "\u001b[0;31mNameError\u001b[0m: name 'az_oai' is not defined"
-     ]
+     "data": {
+      "text/plain": [
+       "[-0.017460526898503304,\n",
+       " -6.895032856846228e-05,\n",
+       " 0.0013909287517890334,\n",
+       " -0.025688467547297478,\n",
+       " -0.019813183695077896,\n",
+       " 0.016087085008621216,\n",
+       " -0.003729278687387705,\n",
+       " 0.0009211922879330814,\n",
+       " 0.006606514099985361,\n",
+       " -0.025128915905952454]"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
     }
    ],
    "source": [
@@ -282,7 +303,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [
     {
@@ -329,7 +350,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -364,7 +385,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [
     {
@@ -418,7 +439,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -439,7 +460,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [
     {
@@ -491,16 +512,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "[0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1]"
+       "[0.101, 0.101, 0.101, 0.101, 0.101, 0.101, 0.101, 0.101, 0.101, 0.101]"
       ]
      },
-     "execution_count": 9,
+     "execution_count": 6,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -509,24 +530,30 @@
     "from redisvl.utils.vectorize import CustomTextVectorizer\n",
     "\n",
     "def generate_embeddings(text_input):\n",
-    "    return [0.1] * 768\n",
+    "    return [0.101] * 768\n",
     "\n",
-    "    \n",
     "custom_vectorizer = CustomTextVectorizer(generate_embeddings)\n",
     "\n",
     "custom_vectorizer.embed(\"This is a test sentence.\")[:10]"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "This enables the use of custom vectorizers with other RedisVL components"
+   ]
+  },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "10:40:28 redisvl.index.index INFO   Index already exists, not overwriting.\n"
+      "11:04:14 redisvl.index.index INFO   Index already exists, not overwriting.\n"
      ]
     },
     {
@@ -536,20 +563,18 @@
        "  'vector_distance': '6.13927841187e-06',\n",
        "  'prompt': 'this is a test prompt',\n",
        "  'response': 'this is a test response',\n",
-       "  'prompt_vector': '================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================'}]"
+       "  'prompt_vector': '\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17='}]"
       ]
      },
-     "execution_count": 14,
+     "execution_count": 7,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "# this enables the use of custom vectorizers with other RedisVL components\n",
     "from redisvl.extensions.llmcache import SemanticCache\n",
     "\n",
-    "#cache = SemanticCache(vectorizer=custom_vectorizer)\n",
-    "cache = SemanticCache()\n",
+    "cache = SemanticCache(vectorizer=custom_vectorizer)\n",
     "\n",
     "cache.store(\"this is a test prompt\", \"this is a test response\")\n",
     "cache.check(\"this is also a test prompt\")"
@@ -589,7 +614,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 15,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -607,7 +632,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 16,
    "metadata": {},
    "outputs": [
     {
@@ -626,7 +651,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 17,
    "metadata": {},
    "outputs": [
     {
@@ -655,7 +680,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 18,
    "metadata": {},
    "outputs": [
     {

From 24fd8fc4e6fd9797eb9b38ecc9135bf0933fbac3 Mon Sep 17 00:00:00 2001
From: Justin Cechmanek <justin.cechmanek@redis.com>
Date: Mon, 24 Jun 2024 13:58:16 -0700
Subject: [PATCH 8/8] formatting

---
 tests/integration/test_vectorizers.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/integration/test_vectorizers.py b/tests/integration/test_vectorizers.py
index da39fd82..69d7ec62 100644
--- a/tests/integration/test_vectorizers.py
+++ b/tests/integration/test_vectorizers.py
@@ -217,7 +217,9 @@ def bad_return_type(text: str) -> str:
         )
 
 
-@pytest.fixture(params=[OpenAITextVectorizer, MistralAITextVectorizer, CustomTextVectorizer])
+@pytest.fixture(
+    params=[OpenAITextVectorizer, MistralAITextVectorizer, CustomTextVectorizer]
+)
 def avectorizer(request, skip_vectorizer):
     if skip_vectorizer:
         pytest.skip("Skipping vectorizer instantiation...")