diff --git a/.gitignore b/.gitignore index 5dced209..dca4f4e3 100644 --- a/.gitignore +++ b/.gitignore @@ -220,3 +220,5 @@ pip-selfcheck.json libs/redis/docs/.Trash* .python-version .idea/* +.vscode/settings.json +.python-version diff --git a/README.md b/README.md index 3f4ee75f..1d138377 100644 --- a/README.md +++ b/README.md @@ -71,7 +71,7 @@ Choose from multiple Redis deployment options: ## 🗃️ Redis Index Management -1. [Design a schema for your use case](https://docs.redisvl.com/en/stable/user_guide/getting_started_01.html#define-an-indexschema) that models your dataset with built-in Redis and indexable fields (*e.g. text, tags, numerics, geo, and vectors*). [Load a schema](https://docs.redisvl.com/en/stable/user_guide/getting_started_01.html#example-schema-creation) from a YAML file: +1. [Design a schema for your use case](https://docs.redisvl.com/en/stable/user_guide/01_getting_started.html#define-an-indexschema) that models your dataset with built-in Redis and indexable fields (*e.g. text, tags, numerics, geo, and vectors*). [Load a schema](https://docs.redisvl.com/en/stable/user_guide/01_getting_started.html#example-schema-creation) from a YAML file: ```yaml index: name: user-idx @@ -121,7 +121,7 @@ Choose from multiple Redis deployment options: }) ``` -2. [Create a SearchIndex](https://docs.redisvl.com/en/stable/user_guide/getting_started_01.html#create-a-searchindex) class with an input schema and client connection in order to perform admin and search operations on your index in Redis: +2. [Create a SearchIndex](https://docs.redisvl.com/en/stable/user_guide/01_getting_started.html#create-a-searchindex) class with an input schema and client connection in order to perform admin and search operations on your index in Redis: ```python from redis import Redis from redisvl.index import SearchIndex @@ -135,8 +135,8 @@ Choose from multiple Redis deployment options: ``` > Async compliant search index class also available: [AsyncSearchIndex](https://docs.redisvl.com/en/stable/api/searchindex.html#redisvl.index.AsyncSearchIndex). -3. [Load](https://docs.redisvl.com/en/stable/user_guide/getting_started_01.html#load-data-to-searchindex) -and [fetch](https://docs.redisvl.com/en/stable/user_guide/getting_started_01.html#fetch-an-object-from-redis) data to/from your Redis instance: +3. [Load](https://docs.redisvl.com/en/stable/user_guide/01_getting_started.html#load-data-to-searchindex) +and [fetch](https://docs.redisvl.com/en/stable/user_guide/01_getting_started.html#fetch-an-object-from-redis) data to/from your Redis instance: ```python data = {"user": "john", "credit_score": "high", "embedding": [0.23, 0.49, -0.18, 0.95]} @@ -183,7 +183,7 @@ Define queries and perform advanced searches over your indices, including the co - [FilterQuery](https://docs.redisvl.com/en/stable/api/query.html#filterquery) - Standard search using filters and the full-text search - [CountQuery](https://docs.redisvl.com/en/stable/api/query.html#countquery) - Count the number of indexed records given attributes -> Read more about building [advanced Redis queries](https://docs.redisvl.com/en/stable/user_guide/hybrid_queries_02.html). +> Read more about building [advanced Redis queries](https://docs.redisvl.com/en/stable/user_guide/02_hybrid_queries.html). ## 🔧 Utilities @@ -216,11 +216,11 @@ embeddings = co.embed_many( ) ``` -> Learn more about using [vectorizers]((https://docs.redisvl.com/en/stable/user_guide/vectorizers_04.html)) in your embedding workflows. +> Learn more about using [vectorizers]((https://docs.redisvl.com/en/stable/user_guide/04_vectorizers.html)) in your embedding workflows. ### Rerankers -[Integrate with popular reranking providers](https://docs.redisvl.com/en/stable/user_guide/rerankers_06.html) to improve the relevancy of the initial search results from Redis +[Integrate with popular reranking providers](https://docs.redisvl.com/en/stable/user_guide/06_rerankers.html) to improve the relevancy of the initial search results from Redis @@ -257,7 +257,7 @@ print(response[0]["response"]) >>> Paris ``` -> Learn more about [semantic caching]((https://docs.redisvl.com/en/stable/user_guide/llmcache_03.html)) for LLMs. +> Learn more about [semantic caching]((https://docs.redisvl.com/en/stable/user_guide/03_llmcache.html)) for LLMs. ### LLM Session Management @@ -293,7 +293,7 @@ session.get_relevant("weather", top_k=1) ```stdout >>> [{"role": "user", "content": "what is the weather going to be today?"}] ``` -> Learn more about [LLM session management]((https://docs.redisvl.com/en/stable/user_guide/session_manager_07.html)). +> Learn more about [LLM session management]((https://docs.redisvl.com/en/stable/user_guide/07_session_manager.html)). ### LLM Semantic Routing @@ -330,7 +330,7 @@ router("Hi, good morning") ```stdout >>> RouteMatch(name='greeting', distance=0.273891836405) ``` -> Learn more about [semantic routing](https://docs.redisvl.com/en/stable/user_guide/semantic_router_08.html). +> Learn more about [semantic routing](https://docs.redisvl.com/en/stable/user_guide/08_semantic_router.html). ## 🖥️ Command Line Interface Create, destroy, and manage Redis index configurations from a purpose-built CLI interface: `rvl`. @@ -360,7 +360,7 @@ The Redis Vector Library bridges the gap between the AI-native developer ecosyst ## 😁 Helpful Links For additional help, check out the following resources: - - [Getting Started Guide](https://docs.redisvl.com/en/stable/user_guide/getting_started_01.html) + - [Getting Started Guide](https://docs.redisvl.com/en/stable/user_guide/01_getting_started.html) - [API Reference](https://docs.redisvl.com/en/stable/api/index.html) - [Example Gallery](https://docs.redisvl.com/en/stable/examples/index.html) - [Redis AI Recipes](https://github.com/redis-developer/redis-ai-resources) diff --git a/docs/index.md b/docs/index.md index ef345fd8..a9bbd094 100644 --- a/docs/index.md +++ b/docs/index.md @@ -20,12 +20,12 @@ A powerful, AI-native Python client library for [Redis](https://redis.io). Lever content: "Perform powerful vector search queries with complex filtering support." - header: "{fas}`circle-half-stroke;pst-color-primary` Embedding Creation" content: "Use OpenAI or any of the other supported vectorizers to create embeddings." - link: "user_guide/vectorizers_04" + link: "user_guide/04_vectorizers" - header: "{fas}`palette;pst-color-primary` CLI" content: "Interact with RedisVL using a Command Line Interface (CLI) for ease of use." - header: "{fab}`python;pst-color-primary` Semantic Caching" content: "Extend RedisVL to cache LLM results, increasing QPS and decreasing system cost." - link: "user_guide/llmcache_03" + link: "user_guide/03_llmcache" - header: "{fas}`lightbulb;pst-color-primary` Example Gallery" content: "Explore the gallery of examples to get started." link: "examples/index" diff --git a/docs/user_guide/getting_started_01.ipynb b/docs/user_guide/01_getting_started.ipynb similarity index 99% rename from docs/user_guide/getting_started_01.ipynb rename to docs/user_guide/01_getting_started.ipynb index e4fa6544..dfa2b581 100644 --- a/docs/user_guide/getting_started_01.ipynb +++ b/docs/user_guide/01_getting_started.ipynb @@ -697,7 +697,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3.8.13 ('redisvl2')", + "display_name": "Python 3", "language": "python", "name": "python3" }, @@ -711,14 +711,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.10" + "version": "3.11.9" }, - "orig_nbformat": 4, - "vscode": { - "interpreter": { - "hash": "9b1e6e9c2967143209c2f955cb869d1d3234f92dc4787f49f155f3abbdfb1316" - } - } + "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 diff --git a/docs/user_guide/hybrid_queries_02.ipynb b/docs/user_guide/02_hybrid_queries.ipynb similarity index 100% rename from docs/user_guide/hybrid_queries_02.ipynb rename to docs/user_guide/02_hybrid_queries.ipynb diff --git a/docs/user_guide/llmcache_03.ipynb b/docs/user_guide/03_llmcache.ipynb similarity index 100% rename from docs/user_guide/llmcache_03.ipynb rename to docs/user_guide/03_llmcache.ipynb diff --git a/docs/user_guide/04_vectorizers.ipynb b/docs/user_guide/04_vectorizers.ipynb new file mode 100644 index 00000000..d5870b88 --- /dev/null +++ b/docs/user_guide/04_vectorizers.ipynb @@ -0,0 +1,749 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Vectorizers\n", + "\n", + "In this notebook, we will show how to use RedisVL to create embeddings using the built-in text embedding vectorizers. Today RedisVL supports:\n", + "1. OpenAI\n", + "2. HuggingFace\n", + "3. Vertex AI\n", + "4. Cohere\n", + "5. Mistral AI\n", + "6. Amazon Bedrock\n", + "7. Bringing your own vectorizer\n", + "8. VoyageAI\n", + "\n", + "Before running this notebook, be sure to\n", + "1. Have installed ``redisvl`` and have that environment active for this notebook.\n", + "2. Have a running Redis Stack instance with RediSearch > 2.4 active.\n", + "\n", + "For example, you can run Redis Stack locally with Docker:\n", + "\n", + "```bash\n", + "docker run -d -p 6379:6379 -p 8001:8001 redis/redis-stack:latest\n", + "```\n", + "\n", + "This will run Redis on port 6379 and RedisInsight at http://localhost:8001." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# import necessary modules\n", + "import os" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Creating Text Embeddings\n", + "\n", + "This example will show how to create an embedding from 3 simple sentences with a number of different text vectorizers in RedisVL.\n", + "\n", + "- \"That is a happy dog\"\n", + "- \"That is a happy person\"\n", + "- \"Today is a nice day\"\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### OpenAI\n", + "\n", + "The ``OpenAITextVectorizer`` makes it simple to use RedisVL with the embeddings models at OpenAI. For this you will need to install ``openai``. \n", + "\n", + "```bash\n", + "pip install openai\n", + "```\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "import getpass\n", + "\n", + "# setup the API Key\n", + "api_key = os.environ.get(\"OPENAI_API_KEY\") or getpass.getpass(\"Enter your OpenAI API key: \")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from redisvl.utils.vectorize import OpenAITextVectorizer\n", + "\n", + "# create a vectorizer\n", + "oai = OpenAITextVectorizer(\n", + " model=\"text-embedding-ada-002\",\n", + " api_config={\"api_key\": api_key},\n", + ")\n", + "\n", + "test = oai.embed(\"This is a test sentence.\")\n", + "print(\"Vector dimensions: \", len(test))\n", + "test[:10]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Create many embeddings at once\n", + "sentences = [\n", + " \"That is a happy dog\",\n", + " \"That is a happy person\",\n", + " \"Today is a sunny day\"\n", + "]\n", + "\n", + "embeddings = oai.embed_many(sentences)\n", + "embeddings[0][:10]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# openai also supports asyncronous requests, which we can use to speed up the vectorization process.\n", + "embeddings = await oai.aembed_many(sentences)\n", + "print(\"Number of Embeddings:\", len(embeddings))\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Azure OpenAI\n", + "\n", + "The ``AzureOpenAITextVectorizer`` is a variation of the OpenAI vectorizer that calls OpenAI models within Azure. If you've already installed ``openai``, then you're ready to use Azure OpenAI.\n", + "\n", + "The only practical difference between OpenAI and Azure OpenAI is the variables required to call the API." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# additionally to the API Key, setup the API endpoint and version\n", + "api_key = os.environ.get(\"AZURE_OPENAI_API_KEY\") or getpass.getpass(\"Enter your AzureOpenAI API key: \")\n", + "api_version = os.environ.get(\"OPENAI_API_VERSION\") or getpass.getpass(\"Enter your AzureOpenAI API version: \")\n", + "azure_endpoint = os.environ.get(\"AZURE_OPENAI_ENDPOINT\") or getpass.getpass(\"Enter your AzureOpenAI API endpoint: \")\n", + "deployment_name = os.environ.get(\"AZURE_OPENAI_DEPLOYMENT_NAME\", \"text-embedding-ada-002\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from redisvl.utils.vectorize import AzureOpenAITextVectorizer\n", + "\n", + "# create a vectorizer\n", + "az_oai = AzureOpenAITextVectorizer(\n", + " model=deployment_name, # Must be your CUSTOM deployment name\n", + " api_config={\n", + " \"api_key\": api_key,\n", + " \"api_version\": api_version,\n", + " \"azure_endpoint\": azure_endpoint\n", + " },\n", + ")\n", + "\n", + "test = az_oai.embed(\"This is a test sentence.\")\n", + "print(\"Vector dimensions: \", len(test))\n", + "test[:10]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Just like OpenAI, AzureOpenAI supports batching embeddings and asynchronous requests.\n", + "sentences = [\n", + " \"That is a happy dog\",\n", + " \"That is a happy person\",\n", + " \"Today is a sunny day\"\n", + "]\n", + "\n", + "embeddings = await az_oai.aembed_many(sentences)\n", + "embeddings[0][:10]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Huggingface\n", + "\n", + "[Huggingface](https://huggingface.co/models) is a popular NLP platform that has a number of pre-trained models you can use off the shelf. RedisVL supports using Huggingface \"Sentence Transformers\" to create embeddings from text. To use Huggingface, you will need to install the ``sentence-transformers`` library.\n", + "\n", + "```bash\n", + "pip install sentence-transformers\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "os.environ[\"TOKENIZERS_PARALLELISM\"] = \"false\"\n", + "from redisvl.utils.vectorize import HFTextVectorizer\n", + "\n", + "\n", + "# create a vectorizer\n", + "# choose your model from the huggingface website\n", + "hf = HFTextVectorizer(model=\"sentence-transformers/all-mpnet-base-v2\")\n", + "\n", + "# embed a sentence\n", + "test = hf.embed(\"This is a test sentence.\")\n", + "test[:10]" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "# You can also create many embeddings at once\n", + "embeddings = hf.embed_many(sentences, as_buffer=True, dtype=\"float32\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### VertexAI\n", + "\n", + "[VertexAI](https://cloud.google.com/vertex-ai/docs/generative-ai/embeddings/get-text-embeddings) is GCP's fully-featured AI platform including a number of pretrained LLMs. RedisVL supports using VertexAI to create embeddings from these models. To use VertexAI, you will first need to install the ``google-cloud-aiplatform`` library.\n", + "\n", + "```bash\n", + "pip install google-cloud-aiplatform>=1.26\n", + "```\n", + "\n", + "1. Then you need to gain access to a [Google Cloud Project](https://cloud.google.com/gcp?hl=en) and provide [access to credentials](https://cloud.google.com/docs/authentication/application-default-credentials). This is accomplished by setting the `GOOGLE_APPLICATION_CREDENTIALS` environment variable pointing to the path of a JSON key file downloaded from your service account on GCP.\n", + "2. Lastly, you need to find your [project ID](https://support.google.com/googleapi/answer/7014113?hl=en) and [geographic region for VertexAI](https://cloud.google.com/vertex-ai/docs/general/locations).\n", + "\n", + "\n", + "**Make sure the following env vars are set:**\n", + "\n", + "```\n", + "GOOGLE_APPLICATION_CREDENTIALS=\n", + "GCP_PROJECT_ID=\n", + "GCP_LOCATION=\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from redisvl.utils.vectorize import VertexAITextVectorizer\n", + "\n", + "\n", + "# create a vectorizer\n", + "vtx = VertexAITextVectorizer(api_config={\n", + " \"project_id\": os.environ.get(\"GCP_PROJECT_ID\") or getpass.getpass(\"Enter your GCP Project ID: \"),\n", + " \"location\": os.environ.get(\"GCP_LOCATION\") or getpass.getpass(\"Enter your GCP Location: \"),\n", + " \"google_application_credentials\": os.environ.get(\"GOOGLE_APPLICATION_CREDENTIALS\") or getpass.getpass(\"Enter your Google App Credentials path: \")\n", + "})\n", + "\n", + "# embed a sentence\n", + "test = vtx.embed(\"This is a test sentence.\")\n", + "test[:10]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Cohere\n", + "\n", + "[Cohere](https://dashboard.cohere.ai/) allows you to implement language AI into your product. The `CohereTextVectorizer` makes it simple to use RedisVL with the embeddings models at Cohere. For this you will need to install `cohere`.\n", + "\n", + "```bash\n", + "pip install cohere\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "import getpass\n", + "# setup the API Key\n", + "api_key = os.environ.get(\"COHERE_API_KEY\") or getpass.getpass(\"Enter your Cohere API key: \")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "Special attention needs to be paid to the `input_type` parameter for each `embed` call. For example, for embedding \n", + "queries, you should set `input_type='search_query'`; for embedding documents, set `input_type='search_document'`. See\n", + "more information [here](https://docs.cohere.com/reference/embed)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from redisvl.utils.vectorize import CohereTextVectorizer\n", + "\n", + "# create a vectorizer\n", + "co = CohereTextVectorizer(\n", + " model=\"embed-english-v3.0\",\n", + " api_config={\"api_key\": api_key},\n", + ")\n", + "\n", + "# embed a search query\n", + "test = co.embed(\"This is a test sentence.\", input_type='search_query')\n", + "print(\"Vector dimensions: \", len(test))\n", + "print(test[:10])\n", + "\n", + "# embed a document\n", + "test = co.embed(\"This is a test sentence.\", input_type='search_document')\n", + "print(\"Vector dimensions: \", len(test))\n", + "print(test[:10])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Learn more about using RedisVL and Cohere together through [this dedicated user guide](https://docs.cohere.com/docs/redis-and-cohere)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### VoyageAI\n", + "\n", + "[VoyageAI](https://dash.voyageai.com/) allows you to implement language AI into your product. The `VoyageAITextVectorizer` makes it simple to use RedisVL with the embeddings models at VoyageAI. For this you will need to install `voyageai`.\n", + "\n", + "```bash\n", + "pip install voyageai\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "import getpass\n", + "# setup the API Key\n", + "api_key = os.environ.get(\"VOYAGE_API_KEY\") or getpass.getpass(\"Enter your VoyageAI API key: \")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "Special attention needs to be paid to the `input_type` parameter for each `embed` call. For example, for embedding \n", + "queries, you should set `input_type='query'`; for embedding documents, set `input_type='document'`. See\n", + "more information [here](https://docs.voyageai.com/docs/embeddings)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from redisvl.utils.vectorize import VoyageAITextVectorizer\n", + "\n", + "# create a vectorizer\n", + "vo = VoyageAITextVectorizer(\n", + " model=\"voyage-law-2\", # Please check the available models at https://docs.voyageai.com/docs/embeddings\n", + " api_config={\"api_key\": api_key},\n", + ")\n", + "\n", + "# embed a search query\n", + "test = vo.embed(\"This is a test sentence.\", input_type='query')\n", + "print(\"Vector dimensions: \", len(test))\n", + "print(test[:10])\n", + "\n", + "# embed a document\n", + "test = vo.embed(\"This is a test sentence.\", input_type='document')\n", + "print(\"Vector dimensions: \", len(test))\n", + "print(test[:10])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Mistral AI\n", + "\n", + "[Mistral](https://console.mistral.ai/) offers LLM and embedding APIs for you to implement into your product. The `MistralAITextVectorizer` makes it simple to use RedisVL with their embeddings model.\n", + "You will need to install `mistralai`.\n", + "\n", + "```bash\n", + "pip install mistralai\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from redisvl.utils.vectorize import MistralAITextVectorizer\n", + "\n", + "mistral = MistralAITextVectorizer()\n", + "\n", + "# embed a sentence using their asyncronous method\n", + "test = await mistral.aembed(\"This is a test sentence.\")\n", + "print(\"Vector dimensions: \", len(test))\n", + "print(test[:10])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Amazon Bedrock\n", + "\n", + "Amazon Bedrock provides fully managed foundation models for text embeddings. Install the required dependencies:\n", + "\n", + "```bash\n", + "pip install 'redisvl[bedrock]' # Installs boto3\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Configure AWS credentials:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import getpass\n", + "\n", + "if \"AWS_ACCESS_KEY_ID\" not in os.environ:\n", + " os.environ[\"AWS_ACCESS_KEY_ID\"] = getpass.getpass(\"Enter AWS Access Key ID: \")\n", + "if \"AWS_SECRET_ACCESS_KEY\" not in os.environ:\n", + " os.environ[\"AWS_SECRET_ACCESS_KEY\"] = getpass.getpass(\"Enter AWS Secret Key: \")\n", + "\n", + "os.environ[\"AWS_REGION\"] = \"us-east-1\" # Change as needed" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Create embeddings:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from redisvl.utils.vectorize import BedrockTextVectorizer\n", + "\n", + "bedrock = BedrockTextVectorizer(\n", + " model=\"amazon.titan-embed-text-v2:0\"\n", + ")\n", + "\n", + "# Single embedding\n", + "text = \"This is a test sentence.\"\n", + "embedding = bedrock.embed(text)\n", + "print(f\"Vector dimensions: {len(embedding)}\")\n", + "\n", + "# Multiple embeddings\n", + "sentences = [\n", + " \"That is a happy dog\",\n", + " \"That is a happy person\",\n", + " \"Today is a sunny day\"\n", + "]\n", + "embeddings = bedrock.embed_many(sentences)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Custom Vectorizers\n", + "\n", + "RedisVL supports the use of other vectorizers and provides a class to enable compatibility with any function that generates a vector or vectors from string data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from redisvl.utils.vectorize import CustomTextVectorizer\n", + "\n", + "def generate_embeddings(text_input, **kwargs):\n", + " return [0.101] * 768\n", + "\n", + "custom_vectorizer = CustomTextVectorizer(generate_embeddings)\n", + "\n", + "custom_vectorizer.embed(\"This is a test sentence.\")[:10]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This enables the use of custom vectorizers with other RedisVL components" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from redisvl.extensions.llmcache import SemanticCache\n", + "\n", + "cache = SemanticCache(name=\"custom_cache\", vectorizer=custom_vectorizer)\n", + "\n", + "cache.store(\"this is a test prompt\", \"this is a test response\")\n", + "cache.check(\"this is also a test prompt\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Search with Provider Embeddings\n", + "\n", + "Now that we've created our embeddings, we can use them to search for similar sentences. We will use the same 3 sentences from above and search for similar sentences.\n", + "\n", + "First, we need to create the schema for our index.\n", + "\n", + "Here's what the schema for the example looks like in yaml for the HuggingFace vectorizer:\n", + "\n", + "```yaml\n", + "version: '0.1.0'\n", + "\n", + "index:\n", + " name: vectorizers\n", + " prefix: doc\n", + " storage_type: hash\n", + "\n", + "fields:\n", + " - name: sentence\n", + " type: text\n", + " - name: embedding\n", + " type: vector\n", + " attrs:\n", + " dims: 768\n", + " algorithm: flat\n", + " distance_metric: cosine\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "from redisvl.index import SearchIndex\n", + "\n", + "# construct a search index from the schema\n", + "index = SearchIndex.from_yaml(\"./schema.yaml\")\n", + "\n", + "# connect to local redis instance\n", + "index.connect(\"redis://localhost:6379\")\n", + "\n", + "# create the index (no data yet)\n", + "index.create(overwrite=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# use the CLI to see the created index\n", + "!rvl index listall" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Loading data to RedisVL is easy. It expects a list of dictionaries. The vector is stored as bytes." + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "from redisvl.redis.utils import array_to_buffer\n", + "\n", + "embeddings = hf.embed_many(sentences)\n", + "\n", + "data = [{\"text\": t,\n", + " \"embedding\": array_to_buffer(v, dtype=\"float32\")}\n", + " for t, v in zip(sentences, embeddings)]\n", + "\n", + "index.load(data)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from redisvl.query import VectorQuery\n", + "\n", + "# use the HuggingFace vectorizer again to create a query embedding\n", + "query_embedding = hf.embed(\"That is a happy cat\")\n", + "\n", + "query = VectorQuery(\n", + " vector=query_embedding,\n", + " vector_field_name=\"embedding\",\n", + " return_fields=[\"text\"],\n", + " num_results=3\n", + ")\n", + "\n", + "results = index.query(query)\n", + "for doc in results:\n", + " print(doc[\"text\"], doc[\"vector_distance\"])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Selecting your float data type\n", + "When embedding text as byte arrays RedisVL supports 4 different floating point data types, `float16`, `float32`, `float64` and `bfloat16`.\n", + "Your dtype set for your vectorizer must match what is defined in your search index. If one is not explicitly set the default is `float32`." + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "vectorizer = HFTextVectorizer(dtype=\"float16\")\n", + "\n", + "# subsequent calls to embed('', as_buffer=True) and embed_many('', as_buffer=True) will now encode as float16\n", + "float16_bytes = vectorizer.embed('test sentence', as_buffer=True)\n", + "\n", + "# you can override this setting on each individual method call\n", + "float64_bytes = vectorizer.embed('test sentence', as_buffer=True, dtype=\"float64\")\n", + "\n", + "float16_bytes != float64_bytes" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "# cleanup\n", + "index.delete()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "dist = max(i for i in range(10))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dist" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/docs/user_guide/hash_vs_json_05.ipynb b/docs/user_guide/05_hash_vs_json.ipynb similarity index 100% rename from docs/user_guide/hash_vs_json_05.ipynb rename to docs/user_guide/05_hash_vs_json.ipynb diff --git a/docs/user_guide/rerankers_06.ipynb b/docs/user_guide/06_rerankers.ipynb similarity index 100% rename from docs/user_guide/rerankers_06.ipynb rename to docs/user_guide/06_rerankers.ipynb diff --git a/docs/user_guide/session_manager_07.ipynb b/docs/user_guide/07_session_manager.ipynb similarity index 100% rename from docs/user_guide/session_manager_07.ipynb rename to docs/user_guide/07_session_manager.ipynb diff --git a/docs/user_guide/semantic_router_08.ipynb b/docs/user_guide/08_semantic_router.ipynb similarity index 85% rename from docs/user_guide/semantic_router_08.ipynb rename to docs/user_guide/08_semantic_router.ipynb index bfe8b193..ab3bf2cc 100644 --- a/docs/user_guide/semantic_router_08.ipynb +++ b/docs/user_guide/08_semantic_router.ipynb @@ -25,7 +25,9 @@ "\n", "Each route has a set of references that cover the \"semantic surface area\" of the\n", "route. The incoming query from a user needs to be semantically similar to one or\n", - "more of the references in order to \"match\" on the route." + "more of the references in order to \"match\" on the route.\n", + "\n", + "Additionally, each route has a `distance_threshold` which determines the maximum distance between the query and the reference for the query to be routed to the route. This value is unique to each route." ] }, { @@ -45,7 +47,8 @@ " \"tell me about the newest gadgets\",\n", " \"what's trending in tech?\"\n", " ],\n", - " metadata={\"category\": \"tech\", \"priority\": 1}\n", + " metadata={\"category\": \"tech\", \"priority\": 1},\n", + " distance_threshold=1.0\n", ")\n", "\n", "sports = Route(\n", @@ -57,7 +60,8 @@ " \"sports\",\n", " \"basketball and football\"\n", " ],\n", - " metadata={\"category\": \"sports\", \"priority\": 2}\n", + " metadata={\"category\": \"sports\", \"priority\": 2},\n", + " distance_threshold=0.5\n", ")\n", "\n", "entertainment = Route(\n", @@ -67,7 +71,8 @@ " \"who won the best actor award?\",\n", " \"what's new in the entertainment industry?\"\n", " ],\n", - " metadata={\"category\": \"entertainment\", \"priority\": 3}\n", + " metadata={\"category\": \"entertainment\", \"priority\": 3},\n", + " distance_threshold=0.7\n", ")\n" ] }, @@ -85,7 +90,25 @@ "cell_type": "code", "execution_count": 2, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/robert.shelton/.pyenv/versions/3.11.9/lib/python3.11/site-packages/huggingface_hub/file_download.py:1142: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n", + " warnings.warn(\n", + "/Users/robert.shelton/.pyenv/versions/3.11.9/lib/python3.11/site-packages/huggingface_hub/file_download.py:1142: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "14:07:31 redisvl.index.index INFO Index already exists, overwriting.\n" + ] + } + ], "source": [ "import os\n", "from redisvl.extensions.router import SemanticRouter\n", @@ -171,7 +194,7 @@ { "data": { "text/plain": [ - "RouteMatch(name='technology', distance=0.119614243507)" + "RouteMatch(name='technology', distance=0.119614303112)" ] }, "execution_count": 5, @@ -215,7 +238,7 @@ { "data": { "text/plain": [ - "RouteMatch(name='sports', distance=0.554210186005)" + "RouteMatch(name=None, distance=None)" ] }, "execution_count": 7, @@ -225,7 +248,7 @@ ], "source": [ "# Toggle the runtime distance threshold\n", - "route_match = router(\"Which basketball team will win the NBA finals?\", distance_threshold=0.7)\n", + "route_match = router(\"Which basketball team will win the NBA finals?\")\n", "route_match" ] }, @@ -244,9 +267,7 @@ { "data": { "text/plain": [ - "[RouteMatch(name='sports', distance=0.758580708504),\n", - " RouteMatch(name='entertainment', distance=0.812423825264),\n", - " RouteMatch(name='technology', distance=0.88423516353)]" + "[]" ] }, "execution_count": 8, @@ -256,7 +277,7 @@ ], "source": [ "# Perform multi-class classification with route_many() -- toggle the max_k and the distance_threshold\n", - "route_matches = router.route_many(\"Lebron James\", distance_threshold=1.0, max_k=3)\n", + "route_matches = router.route_many(\"Lebron James\", max_k=3)\n", "route_matches" ] }, @@ -268,9 +289,7 @@ { "data": { "text/plain": [ - "[RouteMatch(name='sports', distance=0.663253903389),\n", - " RouteMatch(name='entertainment', distance=0.712985396385),\n", - " RouteMatch(name='technology', distance=0.832674384117)]" + "[]" ] }, "execution_count": 9, @@ -282,7 +301,7 @@ "# Toggle the aggregation method -- note the different distances in the result\n", "from redisvl.extensions.router.schema import DistanceAggregationMethod\n", "\n", - "route_matches = router.route_many(\"Lebron James\", aggregation_method=DistanceAggregationMethod.min, distance_threshold=1.0, max_k=3)\n", + "route_matches = router.route_many(\"Lebron James\", aggregation_method=DistanceAggregationMethod.min, max_k=3)\n", "route_matches" ] }, @@ -309,7 +328,7 @@ "from redisvl.extensions.router import RoutingConfig\n", "\n", "router.update_routing_config(\n", - " RoutingConfig(distance_threshold=1.0, aggregation_method=DistanceAggregationMethod.min, max_k=3)\n", + " RoutingConfig(aggregation_method=DistanceAggregationMethod.min, max_k=3)\n", ")" ] }, @@ -321,9 +340,7 @@ { "data": { "text/plain": [ - "[RouteMatch(name='sports', distance=0.663253903389),\n", - " RouteMatch(name='entertainment', distance=0.712985396385),\n", - " RouteMatch(name='technology', distance=0.832674384117)]" + "[]" ] }, "execution_count": 11, @@ -356,22 +373,25 @@ " 'references': ['what are the latest advancements in AI?',\n", " 'tell me about the newest gadgets',\n", " \"what's trending in tech?\"],\n", - " 'metadata': {'category': 'tech', 'priority': '1'}},\n", + " 'metadata': {'category': 'tech', 'priority': '1'},\n", + " 'distance_threshold': 1.0},\n", " {'name': 'sports',\n", " 'references': ['who won the game last night?',\n", " 'tell me about the upcoming sports events',\n", " \"what's the latest in the world of sports?\",\n", " 'sports',\n", " 'basketball and football'],\n", - " 'metadata': {'category': 'sports', 'priority': '2'}},\n", + " 'metadata': {'category': 'sports', 'priority': '2'},\n", + " 'distance_threshold': 0.5},\n", " {'name': 'entertainment',\n", " 'references': ['what are the top movies right now?',\n", " 'who won the best actor award?',\n", " \"what's new in the entertainment industry?\"],\n", - " 'metadata': {'category': 'entertainment', 'priority': '3'}}],\n", + " 'metadata': {'category': 'entertainment', 'priority': '3'},\n", + " 'distance_threshold': 0.7}],\n", " 'vectorizer': {'type': 'hf',\n", " 'model': 'sentence-transformers/all-mpnet-base-v2'},\n", - " 'routing_config': {'distance_threshold': 1.0,\n", + " 'routing_config': {'distance_threshold': 0.5,\n", " 'max_k': 3,\n", " 'aggregation_method': 'min'}}" ] @@ -394,7 +414,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "15:16:28 redisvl.index.index INFO Index already exists, not overwriting.\n" + "14:07:34 redisvl.index.index INFO Index already exists, not overwriting.\n" ] } ], @@ -415,14 +435,14 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "15:17:42 redisvl.index.index INFO Index already exists, not overwriting.\n" + "14:07:34 redisvl.index.index INFO Index already exists, not overwriting.\n" ] } ], @@ -441,7 +461,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 16, "metadata": {}, "outputs": [], "source": [ @@ -451,7 +471,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 17, "metadata": {}, "outputs": [], "source": [ @@ -462,7 +482,7 @@ ], "metadata": { "kernelspec": { - "display_name": "rvl", + "display_name": "Python 3", "language": "python", "name": "python3" }, @@ -476,7 +496,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.14" + "version": "3.11.9" }, "orig_nbformat": 4 }, diff --git a/docs/user_guide/index.md b/docs/user_guide/index.md index e5633ba6..b6592e3d 100644 --- a/docs/user_guide/index.md +++ b/docs/user_guide/index.md @@ -12,12 +12,12 @@ User guides provide helpful resources for using RedisVL and its different compon :caption: User Guides :maxdepth: 2 -getting_started_01 -hybrid_queries_02 -llmcache_03 -vectorizers_04 -hash_vs_json_05 -rerankers_06 -session_manager_07 -semantic_router_08 +01_getting_started +02_hybrid_queries +03_llmcache +04_vectorizers +05_hash_vs_json +06_rerankers +07_session_manager +08_semantic_router ``` diff --git a/docs/user_guide/router.yaml b/docs/user_guide/router.yaml index ec0453c4..38aecfdd 100644 --- a/docs/user_guide/router.yaml +++ b/docs/user_guide/router.yaml @@ -8,6 +8,7 @@ routes: metadata: category: tech priority: '1' + distance_threshold: 1.0 - name: sports references: - who won the game last night? @@ -18,6 +19,7 @@ routes: metadata: category: sports priority: '2' + distance_threshold: 0.5 - name: entertainment references: - what are the top movies right now? @@ -26,10 +28,11 @@ routes: metadata: category: entertainment priority: '3' + distance_threshold: 0.7 vectorizer: type: hf model: sentence-transformers/all-mpnet-base-v2 routing_config: - distance_threshold: 1.0 + distance_threshold: 0.5 max_k: 3 aggregation_method: min diff --git a/docs/user_guide/vectorizers_04.ipynb b/docs/user_guide/vectorizers_04.ipynb deleted file mode 100644 index 5f6997a1..00000000 --- a/docs/user_guide/vectorizers_04.ipynb +++ /dev/null @@ -1,974 +0,0 @@ -{ - "cells": [ - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Vectorizers\n", - "\n", - "In this notebook, we will show how to use RedisVL to create embeddings using the built-in text embedding vectorizers. Today RedisVL supports:\n", - "1. OpenAI\n", - "2. HuggingFace\n", - "3. Vertex AI\n", - "4. Cohere\n", - "5. Mistral AI\n", - "6. Amazon Bedrock\n", - "7. Bringing your own vectorizer\n", - "8. VoyageAI\n", - "\n", - "Before running this notebook, be sure to\n", - "1. Have installed ``redisvl`` and have that environment active for this notebook.\n", - "2. Have a running Redis Stack instance with RediSearch > 2.4 active.\n", - "\n", - "For example, you can run Redis Stack locally with Docker:\n", - "\n", - "```bash\n", - "docker run -d -p 6379:6379 -p 8001:8001 redis/redis-stack:latest\n", - "```\n", - "\n", - "This will run Redis on port 6379 and RedisInsight at http://localhost:8001." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "# import necessary modules\n", - "import os" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Creating Text Embeddings\n", - "\n", - "This example will show how to create an embedding from 3 simple sentences with a number of different text vectorizers in RedisVL.\n", - "\n", - "- \"That is a happy dog\"\n", - "- \"That is a happy person\"\n", - "- \"Today is a nice day\"\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### OpenAI\n", - "\n", - "The ``OpenAITextVectorizer`` makes it simple to use RedisVL with the embeddings models at OpenAI. For this you will need to install ``openai``. \n", - "\n", - "```bash\n", - "pip install openai\n", - "```\n" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "import getpass\n", - "\n", - "# setup the API Key\n", - "api_key = os.environ.get(\"OPENAI_API_KEY\") or getpass.getpass(\"Enter your OpenAI API key: \")" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Vector dimensions: 1536\n" - ] - }, - { - "data": { - "text/plain": [ - "[-0.0010508307022973895,\n", - " -0.0031670420430600643,\n", - " 0.0023781107738614082,\n", - " -0.004539588466286659,\n", - " -0.010320774279534817,\n", - " 0.012868634425103664,\n", - " -0.0054513863287866116,\n", - " -0.002984359161928296,\n", - " -0.0072814482264220715,\n", - " -0.033704183995723724]" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from redisvl.utils.vectorize import OpenAITextVectorizer\n", - "\n", - "# create a vectorizer\n", - "oai = OpenAITextVectorizer(\n", - " model=\"text-embedding-ada-002\",\n", - " api_config={\"api_key\": api_key},\n", - ")\n", - "\n", - "test = oai.embed(\"This is a test sentence.\")\n", - "print(\"Vector dimensions: \", len(test))\n", - "test[:10]" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[-0.01749197021126747,\n", - " -5.238811718299985e-05,\n", - " 0.0013331907102838159,\n", - " -0.025576923042535782,\n", - " -0.019907286390662193,\n", - " 0.016106342896819115,\n", - " -0.003756451653316617,\n", - " 0.0009971122490242124,\n", - " 0.006661186460405588,\n", - " -0.024954024702310562]" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Create many embeddings at once\n", - "sentences = [\n", - " \"That is a happy dog\",\n", - " \"That is a happy person\",\n", - " \"Today is a sunny day\"\n", - "]\n", - "\n", - "embeddings = oai.embed_many(sentences)\n", - "embeddings[0][:10]" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Number of Embeddings: 3\n" - ] - } - ], - "source": [ - "# openai also supports asyncronous requests, which we can use to speed up the vectorization process.\n", - "embeddings = await oai.aembed_many(sentences)\n", - "print(\"Number of Embeddings:\", len(embeddings))\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Azure OpenAI\n", - "\n", - "The ``AzureOpenAITextVectorizer`` is a variation of the OpenAI vectorizer that calls OpenAI models within Azure. If you've already installed ``openai``, then you're ready to use Azure OpenAI.\n", - "\n", - "The only practical difference between OpenAI and Azure OpenAI is the variables required to call the API." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "# additionally to the API Key, setup the API endpoint and version\n", - "api_key = os.environ.get(\"AZURE_OPENAI_API_KEY\") or getpass.getpass(\"Enter your AzureOpenAI API key: \")\n", - "api_version = os.environ.get(\"OPENAI_API_VERSION\") or getpass.getpass(\"Enter your AzureOpenAI API version: \")\n", - "azure_endpoint = os.environ.get(\"AZURE_OPENAI_ENDPOINT\") or getpass.getpass(\"Enter your AzureOpenAI API endpoint: \")\n", - "deployment_name = os.environ.get(\"AZURE_OPENAI_DEPLOYMENT_NAME\", \"text-embedding-ada-002\")\n" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Vector dimensions: 1536\n" - ] - }, - { - "data": { - "text/plain": [ - "[-0.0010088568087667227,\n", - " -0.003142790636047721,\n", - " 0.0024922797456383705,\n", - " -0.004522906616330147,\n", - " -0.010369433090090752,\n", - " 0.012739036232233047,\n", - " -0.005365503951907158,\n", - " -0.0029668458737432957,\n", - " -0.007141091860830784,\n", - " -0.03383301943540573]" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from redisvl.utils.vectorize import AzureOpenAITextVectorizer\n", - "\n", - "# create a vectorizer\n", - "az_oai = AzureOpenAITextVectorizer(\n", - " model=deployment_name, # Must be your CUSTOM deployment name\n", - " api_config={\n", - " \"api_key\": api_key,\n", - " \"api_version\": api_version,\n", - " \"azure_endpoint\": azure_endpoint\n", - " },\n", - ")\n", - "\n", - "test = az_oai.embed(\"This is a test sentence.\")\n", - "print(\"Vector dimensions: \", len(test))\n", - "test[:10]" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[-0.017460526898503304,\n", - " -6.895032856846228e-05,\n", - " 0.0013909287517890334,\n", - " -0.025688467547297478,\n", - " -0.019813183695077896,\n", - " 0.016087085008621216,\n", - " -0.003729278687387705,\n", - " 0.0009211922879330814,\n", - " 0.006606514099985361,\n", - " -0.025128915905952454]" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Just like OpenAI, AzureOpenAI supports batching embeddings and asynchronous requests.\n", - "sentences = [\n", - " \"That is a happy dog\",\n", - " \"That is a happy person\",\n", - " \"Today is a sunny day\"\n", - "]\n", - "\n", - "embeddings = await az_oai.aembed_many(sentences)\n", - "embeddings[0][:10]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Huggingface\n", - "\n", - "[Huggingface](https://huggingface.co/models) is a popular NLP platform that has a number of pre-trained models you can use off the shelf. RedisVL supports using Huggingface \"Sentence Transformers\" to create embeddings from text. To use Huggingface, you will need to install the ``sentence-transformers`` library.\n", - "\n", - "```bash\n", - "pip install sentence-transformers\n", - "```" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[0.0003780885017476976,\n", - " -0.05080340430140495,\n", - " -0.035147231072187424,\n", - " -0.02325103059411049,\n", - " -0.04415831342339516,\n", - " 0.02048780582845211,\n", - " 0.0014618589775636792,\n", - " 0.03126184269785881,\n", - " 0.05605152249336243,\n", - " 0.018815429881215096]" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "os.environ[\"TOKENIZERS_PARALLELISM\"] = \"false\"\n", - "from redisvl.utils.vectorize import HFTextVectorizer\n", - "\n", - "\n", - "# create a vectorizer\n", - "# choose your model from the huggingface website\n", - "hf = HFTextVectorizer(model=\"sentence-transformers/all-mpnet-base-v2\")\n", - "\n", - "# embed a sentence\n", - "test = hf.embed(\"This is a test sentence.\")\n", - "test[:10]" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "# You can also create many embeddings at once\n", - "embeddings = hf.embed_many(sentences, as_buffer=True, dtype=\"float32\")\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### VertexAI\n", - "\n", - "[VertexAI](https://cloud.google.com/vertex-ai/docs/generative-ai/embeddings/get-text-embeddings) is GCP's fully-featured AI platform including a number of pretrained LLMs. RedisVL supports using VertexAI to create embeddings from these models. To use VertexAI, you will first need to install the ``google-cloud-aiplatform`` library.\n", - "\n", - "```bash\n", - "pip install google-cloud-aiplatform>=1.26\n", - "```\n", - "\n", - "1. Then you need to gain access to a [Google Cloud Project](https://cloud.google.com/gcp?hl=en) and provide [access to credentials](https://cloud.google.com/docs/authentication/application-default-credentials). This is accomplished by setting the `GOOGLE_APPLICATION_CREDENTIALS` environment variable pointing to the path of a JSON key file downloaded from your service account on GCP.\n", - "2. Lastly, you need to find your [project ID](https://support.google.com/googleapi/answer/7014113?hl=en) and [geographic region for VertexAI](https://cloud.google.com/vertex-ai/docs/general/locations).\n", - "\n", - "\n", - "**Make sure the following env vars are set:**\n", - "\n", - "```\n", - "GOOGLE_APPLICATION_CREDENTIALS=\n", - "GCP_PROJECT_ID=\n", - "GCP_LOCATION=\n", - "```" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[0.04373306408524513,\n", - " -0.05040992051362991,\n", - " -0.011946038343012333,\n", - " -0.043528858572244644,\n", - " 0.021510830149054527,\n", - " 0.028604144230484962,\n", - " 0.014770914800465107,\n", - " -0.01610461436212063,\n", - " -0.0036560404114425182,\n", - " 0.013746795244514942]" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from redisvl.utils.vectorize import VertexAITextVectorizer\n", - "\n", - "\n", - "# create a vectorizer\n", - "vtx = VertexAITextVectorizer(api_config={\n", - " \"project_id\": os.environ.get(\"GCP_PROJECT_ID\") or getpass.getpass(\"Enter your GCP Project ID: \"),\n", - " \"location\": os.environ.get(\"GCP_LOCATION\") or getpass.getpass(\"Enter your GCP Location: \"),\n", - " \"google_application_credentials\": os.environ.get(\"GOOGLE_APPLICATION_CREDENTIALS\") or getpass.getpass(\"Enter your Google App Credentials path: \")\n", - "})\n", - "\n", - "# embed a sentence\n", - "test = vtx.embed(\"This is a test sentence.\")\n", - "test[:10]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Cohere\n", - "\n", - "[Cohere](https://dashboard.cohere.ai/) allows you to implement language AI into your product. The `CohereTextVectorizer` makes it simple to use RedisVL with the embeddings models at Cohere. For this you will need to install `cohere`.\n", - "\n", - "```bash\n", - "pip install cohere\n", - "```" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [], - "source": [ - "import getpass\n", - "# setup the API Key\n", - "api_key = os.environ.get(\"COHERE_API_KEY\") or getpass.getpass(\"Enter your Cohere API key: \")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "Special attention needs to be paid to the `input_type` parameter for each `embed` call. For example, for embedding \n", - "queries, you should set `input_type='search_query'`; for embedding documents, set `input_type='search_document'`. See\n", - "more information [here](https://docs.cohere.com/reference/embed)" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Vector dimensions: 1024\n", - "[-0.010856628, -0.019683838, -0.0062179565, 0.003545761, -0.047943115, 0.0009365082, -0.005924225, 0.016174316, -0.03289795, 0.049194336]\n", - "Vector dimensions: 1024\n", - "[-0.009712219, -0.016036987, 2.8073788e-05, -0.022491455, -0.041259766, 0.002281189, -0.033294678, -0.00057029724, -0.026260376, 0.0579834]\n" - ] - } - ], - "source": [ - "from redisvl.utils.vectorize import CohereTextVectorizer\n", - "\n", - "# create a vectorizer\n", - "co = CohereTextVectorizer(\n", - " model=\"embed-english-v3.0\",\n", - " api_config={\"api_key\": api_key},\n", - ")\n", - "\n", - "# embed a search query\n", - "test = co.embed(\"This is a test sentence.\", input_type='search_query')\n", - "print(\"Vector dimensions: \", len(test))\n", - "print(test[:10])\n", - "\n", - "# embed a document\n", - "test = co.embed(\"This is a test sentence.\", input_type='search_document')\n", - "print(\"Vector dimensions: \", len(test))\n", - "print(test[:10])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Learn more about using RedisVL and Cohere together through [this dedicated user guide](https://docs.cohere.com/docs/redis-and-cohere)." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### VoyageAI\n", - "\n", - "[VoyageAI](https://dash.voyageai.com/) allows you to implement language AI into your product. The `VoyageAITextVectorizer` makes it simple to use RedisVL with the embeddings models at VoyageAI. For this you will need to install `voyageai`.\n", - "\n", - "```bash\n", - "pip install voyageai\n", - "```" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [], - "source": [ - "import getpass\n", - "# setup the API Key\n", - "api_key = os.environ.get(\"VOYAGE_API_KEY\") or getpass.getpass(\"Enter your VoyageAI API key: \")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "Special attention needs to be paid to the `input_type` parameter for each `embed` call. For example, for embedding \n", - "queries, you should set `input_type='query'`; for embedding documents, set `input_type='document'`. See\n", - "more information [here](https://docs.voyageai.com/docs/embeddings)" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Vector dimensions: 1024\n", - "[0.015814896672964096, 0.046988241374492645, -0.00518248463049531, -0.05383478105068207, -0.015586535446345806, -0.0837097093462944, 0.03744547441601753, -0.007797810714691877, 0.00717928446829319, 0.06857716292142868]\n", - "Vector dimensions: 1024\n", - "[0.006725038401782513, 0.01441393606364727, -0.030212024226784706, -0.06782275438308716, -0.021446991711854935, -0.07667966187000275, 0.01804908737540245, -0.015767497941851616, -0.02152789570391178, 0.049741245806217194]\n" - ] - } - ], - "source": [ - "from redisvl.utils.vectorize import VoyageAITextVectorizer\n", - "\n", - "# create a vectorizer\n", - "vo = VoyageAITextVectorizer(\n", - " model=\"voyage-law-2\", # Please check the available models at https://docs.voyageai.com/docs/embeddings\n", - " api_config={\"api_key\": api_key},\n", - ")\n", - "\n", - "# embed a search query\n", - "test = vo.embed(\"This is a test sentence.\", input_type='query')\n", - "print(\"Vector dimensions: \", len(test))\n", - "print(test[:10])\n", - "\n", - "# embed a document\n", - "test = vo.embed(\"This is a test sentence.\", input_type='document')\n", - "print(\"Vector dimensions: \", len(test))\n", - "print(test[:10])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Mistral AI\n", - "\n", - "[Mistral](https://console.mistral.ai/) offers LLM and embedding APIs for you to implement into your product. The `MistralAITextVectorizer` makes it simple to use RedisVL with their embeddings model.\n", - "You will need to install `mistralai`.\n", - "\n", - "```bash\n", - "pip install mistralai\n", - "```" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Vector dimensions: 1024\n", - "[-0.02801513671875, 0.02532958984375, 0.04278564453125, 0.0185699462890625, 0.041015625, 0.006053924560546875, 0.03607177734375, -0.0030155181884765625, 0.0033893585205078125, -0.01390838623046875]\n" - ] - } - ], - "source": [ - "from redisvl.utils.vectorize import MistralAITextVectorizer\n", - "\n", - "mistral = MistralAITextVectorizer()\n", - "\n", - "# embed a sentence using their asyncronous method\n", - "test = await mistral.aembed(\"This is a test sentence.\")\n", - "print(\"Vector dimensions: \", len(test))\n", - "print(test[:10])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Amazon Bedrock\n", - "\n", - "Amazon Bedrock provides fully managed foundation models for text embeddings. Install the required dependencies:\n", - "\n", - "```bash\n", - "pip install 'redisvl[bedrock]' # Installs boto3\n", - "```" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Configure AWS credentials:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "import getpass\n", - "\n", - "if \"AWS_ACCESS_KEY_ID\" not in os.environ:\n", - " os.environ[\"AWS_ACCESS_KEY_ID\"] = getpass.getpass(\"Enter AWS Access Key ID: \")\n", - "if \"AWS_SECRET_ACCESS_KEY\" not in os.environ:\n", - " os.environ[\"AWS_SECRET_ACCESS_KEY\"] = getpass.getpass(\"Enter AWS Secret Key: \")\n", - "\n", - "os.environ[\"AWS_REGION\"] = \"us-east-1\" # Change as needed" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### Create embeddings:" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Vector dimensions: 1024\n" - ] - } - ], - "source": [ - "from redisvl.utils.vectorize import BedrockTextVectorizer\n", - "\n", - "bedrock = BedrockTextVectorizer(\n", - " model=\"amazon.titan-embed-text-v2:0\"\n", - ")\n", - "\n", - "# Single embedding\n", - "text = \"This is a test sentence.\"\n", - "embedding = bedrock.embed(text)\n", - "print(f\"Vector dimensions: {len(embedding)}\")\n", - "\n", - "# Multiple embeddings\n", - "sentences = [\n", - " \"That is a happy dog\",\n", - " \"That is a happy person\",\n", - " \"Today is a sunny day\"\n", - "]\n", - "embeddings = bedrock.embed_many(sentences)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Custom Vectorizers\n", - "\n", - "RedisVL supports the use of other vectorizers and provides a class to enable compatibility with any function that generates a vector or vectors from string data" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[0.101, 0.101, 0.101, 0.101, 0.101, 0.101, 0.101, 0.101, 0.101, 0.101]" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from redisvl.utils.vectorize import CustomTextVectorizer\n", - "\n", - "def generate_embeddings(text_input, **kwargs):\n", - " return [0.101] * 768\n", - "\n", - "custom_vectorizer = CustomTextVectorizer(generate_embeddings)\n", - "\n", - "custom_vectorizer.embed(\"This is a test sentence.\")[:10]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "This enables the use of custom vectorizers with other RedisVL components" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "11:04:14 redisvl.index.index INFO Index already exists, not overwriting.\n" - ] - }, - { - "data": { - "text/plain": [ - "[{'id': 'llmcache:78bd2446a37a0c6ab62652af9b7e53845145c4471ea83ff9fb4280a528d36bbb',\n", - " 'vector_distance': '6.13927841187e-06',\n", - " 'prompt': 'this is a test prompt',\n", - " 'response': 'this is a test response',\n", - " 'prompt_vector': '\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17=\\x17='}]" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from redisvl.extensions.llmcache import SemanticCache\n", - "\n", - "cache = SemanticCache(name=\"custom_cache\", vectorizer=custom_vectorizer)\n", - "\n", - "cache.store(\"this is a test prompt\", \"this is a test response\")\n", - "cache.check(\"this is also a test prompt\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Search with Provider Embeddings\n", - "\n", - "Now that we've created our embeddings, we can use them to search for similar sentences. We will use the same 3 sentences from above and search for similar sentences.\n", - "\n", - "First, we need to create the schema for our index.\n", - "\n", - "Here's what the schema for the example looks like in yaml for the HuggingFace vectorizer:\n", - "\n", - "```yaml\n", - "version: '0.1.0'\n", - "\n", - "index:\n", - " name: vectorizers\n", - " prefix: doc\n", - " storage_type: hash\n", - "\n", - "fields:\n", - " - name: sentence\n", - " type: text\n", - " - name: embedding\n", - " type: vector\n", - " attrs:\n", - " dims: 768\n", - " algorithm: flat\n", - " distance_metric: cosine\n", - "```" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [], - "source": [ - "from redisvl.index import SearchIndex\n", - "\n", - "# construct a search index from the schema\n", - "index = SearchIndex.from_yaml(\"./schema.yaml\")\n", - "\n", - "# connect to local redis instance\n", - "index.connect(\"redis://localhost:6379\")\n", - "\n", - "# create the index (no data yet)\n", - "index.create(overwrite=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[32m20:22:42\u001b[0m \u001b[34m[RedisVL]\u001b[0m \u001b[1;30mINFO\u001b[0m Indices:\n", - "\u001b[32m20:22:42\u001b[0m \u001b[34m[RedisVL]\u001b[0m \u001b[1;30mINFO\u001b[0m 1. vectorizers\n" - ] - } - ], - "source": [ - "# use the CLI to see the created index\n", - "!rvl index listall" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Loading data to RedisVL is easy. It expects a list of dictionaries. The vector is stored as bytes." - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['doc:529c6d58da9e4be4a29dd0481f59c286',\n", - " 'doc:81aa1ef8a9494b299e8593548d0af34a',\n", - " 'doc:6ab03d6da8f041ffa3fdb83996d3b297']" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from redisvl.redis.utils import array_to_buffer\n", - "\n", - "embeddings = hf.embed_many(sentences)\n", - "\n", - "data = [{\"text\": t,\n", - " \"embedding\": array_to_buffer(v, dtype=\"float32\")}\n", - " for t, v in zip(sentences, embeddings)]\n", - "\n", - "index.load(data)" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "That is a happy dog 0.160862207413\n", - "That is a happy dog 0.160862207413\n", - "That is a happy person 0.273598313332\n" - ] - } - ], - "source": [ - "from redisvl.query import VectorQuery\n", - "\n", - "# use the HuggingFace vectorizer again to create a query embedding\n", - "query_embedding = hf.embed(\"That is a happy cat\")\n", - "\n", - "query = VectorQuery(\n", - " vector=query_embedding,\n", - " vector_field_name=\"embedding\",\n", - " return_fields=[\"text\"],\n", - " num_results=3\n", - ")\n", - "\n", - "results = index.query(query)\n", - "for doc in results:\n", - " print(doc[\"text\"], doc[\"vector_distance\"])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Selecting your float data type\n", - "When embedding text as byte arrays RedisVL supports 4 different floating point data types, `float16`, `float32`, `float64` and `bfloat16`.\n", - "Your dtype set for your vectorizer must match what is defined in your search index. If one is not explicitly set the default is `float32`." - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "True" - ] - }, - "execution_count": 19, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "vectorizer = HFTextVectorizer(dtype=\"float16\")\n", - "\n", - "# subsequent calls to embed('', as_buffer=True) and embed_many('', as_buffer=True) will now encode as float16\n", - "float16_bytes = vectorizer.embed('test sentence', as_buffer=True)\n", - "\n", - "# you can override this setting on each individual method call\n", - "float64_bytes = vectorizer.embed('test sentence', as_buffer=True, dtype=\"float64\")\n", - "\n", - "float16_bytes != float64_bytes" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [], - "source": [ - "# cleanup\n", - "index.delete()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "redisvl-dev", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.2" - }, - "orig_nbformat": 4 - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/poetry.lock b/poetry.lock index b3f9f669..75c221b8 100644 --- a/poetry.lock +++ b/poetry.lock @@ -6253,4 +6253,4 @@ voyageai = ["voyageai"] [metadata] lock-version = "2.1" python-versions = ">=3.9,<3.14" -content-hash = "e4a76982923cc6e4d9da48a4f924a1f6dad4df913db1195f78a22674eaf04eaa" +content-hash = "e4a76982923cc6e4d9da48a4f924a1f6dad4df913db1195f78a22674eaf04eaa" \ No newline at end of file diff --git a/redisvl/extensions/router/schema.py b/redisvl/extensions/router/schema.py index 6de61fb0..9c0d24c6 100644 --- a/redisvl/extensions/router/schema.py +++ b/redisvl/extensions/router/schema.py @@ -16,7 +16,7 @@ class Route(BaseModel): """List of reference phrases for the route.""" metadata: Dict[str, str] = Field(default={}) """Metadata associated with the route.""" - distance_threshold: Optional[float] = Field(default=None) + distance_threshold: float = Field(default=0.5) """Distance threshold for matching the route.""" @validator("name") @@ -63,14 +63,21 @@ class DistanceAggregationMethod(Enum): class RoutingConfig(BaseModel): """Configuration for routing behavior.""" - distance_threshold: float = Field(default=0.5) + # distance_threshold: float = Field(default=0.5) """The threshold for semantic distance.""" max_k: int = Field(default=1) - """The maximum number of top matches to return.""" + + """Aggregation method to use to classify queries.""" aggregation_method: DistanceAggregationMethod = Field( default=DistanceAggregationMethod.avg ) - """Aggregation method to use to classify queries.""" + + """The maximum number of top matches to return.""" + distance_threshold: float = Field( + default=0.5, + deprecated=True, + description="Global distance threshold is deprecated all distance_thresholds now apply at route level.", + ) @validator("max_k") def max_k_must_be_positive(cls, v): @@ -78,12 +85,6 @@ def max_k_must_be_positive(cls, v): raise ValueError("max_k must be a positive integer") return v - @validator("distance_threshold") - def distance_threshold_must_be_valid(cls, v): - if v <= 0 or v > 1: - raise ValueError("distance_threshold must be between 0 and 1") - return v - class SemanticRouterIndexSchema(IndexSchema): """Customized index schema for SemanticRouter.""" diff --git a/redisvl/extensions/router/semantic.py b/redisvl/extensions/router/semantic.py index c9a85547..8a349f46 100644 --- a/redisvl/extensions/router/semantic.py +++ b/redisvl/extensions/router/semantic.py @@ -215,6 +215,17 @@ def _process_route(self, result: Dict[str, Any]) -> RouteMatch: name=route_dict["route_name"], distance=float(route_dict["distance"]) ) + def _distance_threshold_filter(self) -> str: + """Apply distance threshold on a route by route basis.""" + filter = "" + for i, route in enumerate(self.routes): + filter_str = f"(@route_name == '{route.name}' && @distance < {route.distance_threshold})" + if i > 0: + filter += " || " + filter += filter_str + + return filter + def _build_aggregate_request( self, vector_range_query: RangeQuery, @@ -241,15 +252,26 @@ def _build_aggregate_request( .dialect(2) ) + filter = self._distance_threshold_filter() + + aggregate_request.filter(filter) + return aggregate_request - def _classify_route( + def _get_route_matches( self, vector: List[float], - distance_threshold: float, aggregation_method: DistanceAggregationMethod, - ) -> RouteMatch: - """Classify to a single route using a vector.""" + max_k: int = 1, + ) -> List[RouteMatch]: + """Get the route matches for a given vector and aggregation method.""" + + thresholds = [route.distance_threshold for route in self.routes] + if thresholds: + distance_threshold = max(thresholds) + else: + raise ValueError("No distance thresholds provided for the semantic router") + vector_range_query = RangeQuery( vector=vector, vector_field_name=ROUTE_VECTOR_FIELD_NAME, @@ -258,7 +280,7 @@ def _classify_route( ) aggregate_request = self._build_aggregate_request( - vector_range_query, aggregation_method, max_k=1 + vector_range_query, aggregation_method, max_k=max_k ) try: @@ -273,103 +295,63 @@ def _classify_route( raise e # process aggregation results into route matches - route_matches = [ + return [ self._process_route(route_match) for route_match in aggregation_result.rows ] + def _classify_route( + self, + vector: List[float], + aggregation_method: DistanceAggregationMethod, + ) -> RouteMatch: + """Classify to a single route using a vector.""" + + # take max route as distance threshold + route_matches = self._get_route_matches(vector, aggregation_method) + + if not route_matches: + return RouteMatch() + # process route matches - if route_matches: - top_route_match = route_matches[0] - if top_route_match.name is not None: - if route := self.get(top_route_match.name): - # use the matched route's distance threshold - _distance_threshold = route.distance_threshold or distance_threshold - if self._pass_threshold(top_route_match, _distance_threshold): - return top_route_match - else: - raise ValueError( - f"{top_route_match.name} not a supported route for the {self.name} semantic router." - ) + top_route_match = route_matches[0] - # fallback to empty route match if no hits - return RouteMatch() + if top_route_match.name is not None: + return top_route_match + else: + raise ValueError( + f"{top_route_match.name} not a supported route for the {self.name} semantic router." + ) def _classify_multi_route( self, vector: List[float], max_k: int, - distance_threshold: float, aggregation_method: DistanceAggregationMethod, ) -> List[RouteMatch]: """Classify to multiple routes, up to max_k (int), using a vector.""" - vector_range_query = RangeQuery( - vector=vector, - vector_field_name=ROUTE_VECTOR_FIELD_NAME, - distance_threshold=distance_threshold, - return_fields=["route_name"], - ) - aggregate_request = self._build_aggregate_request( - vector_range_query, aggregation_method, max_k - ) - - try: - aggregation_result: AggregateResult = self._index.aggregate( - aggregate_request, vector_range_query.params - ) - except ResponseError as e: - if "VSS is not yet supported on FT.AGGREGATE" in str(e): - raise RuntimeError( - "Semantic routing is only available on Redis version 7.x.x or greater" - ) - raise e - # process aggregation results into route matches - route_matches = [ - self._process_route(route_match) for route_match in aggregation_result.rows - ] + route_matches = self._get_route_matches(vector, aggregation_method, max_k) # process route matches top_route_matches: List[RouteMatch] = [] if route_matches: for route_match in route_matches: if route_match.name is not None: - if route := self.get(route_match.name): - # use the matched route's distance threshold - _distance_threshold = ( - route.distance_threshold or distance_threshold - ) - if self._pass_threshold(route_match, _distance_threshold): - top_route_matches.append(route_match) - else: - raise ValueError( - f"{route_match.name} not a supported route for the {self.name} semantic router." - ) + top_route_matches.append(route_match) + else: + raise ValueError( + f"{route_match.name} not a supported route for the {self.name} semantic router." + ) return top_route_matches - def _pass_threshold( - self, route_match: Optional[RouteMatch], distance_threshold: float - ) -> bool: - """Check if a route match passes the distance threshold. - - Args: - route_match (Optional[RouteMatch]): The route match to check. - distance_threshold (float): The fallback distance threshold to use if not assigned to a route. - - Returns: - bool: True if the route match passes the threshold, False otherwise. - """ - if route_match and distance_threshold: - if route_match.distance is not None: - return route_match.distance <= distance_threshold - return False - + @deprecated_argument("distance_threshold") def __call__( self, statement: Optional[str] = None, vector: Optional[List[float]] = None, - distance_threshold: Optional[float] = None, aggregation_method: Optional[DistanceAggregationMethod] = None, + distance_threshold: Optional[float] = None, ) -> RouteMatch: """Query the semantic router with a given statement or vector. @@ -387,20 +369,15 @@ def __call__( raise ValueError("Must provide a vector or statement to the router") vector = self.vectorizer.embed(statement) - # override routing config - distance_threshold = ( - distance_threshold or self.routing_config.distance_threshold - ) aggregation_method = ( aggregation_method or self.routing_config.aggregation_method ) # perform route classification - top_route_match = self._classify_route( - vector, distance_threshold, aggregation_method - ) + top_route_match = self._classify_route(vector, aggregation_method) return top_route_match + @deprecated_argument("distance_threshold") def route_many( self, statement: Optional[str] = None, @@ -426,10 +403,6 @@ def route_many( raise ValueError("Must provide a vector or statement to the router") vector = self.vectorizer.embed(statement) - # override routing config defaults - distance_threshold = ( - distance_threshold or self.routing_config.distance_threshold - ) max_k = max_k or self.routing_config.max_k aggregation_method = ( aggregation_method or self.routing_config.aggregation_method @@ -437,8 +410,9 @@ def route_many( # classify routes top_route_matches = self._classify_multi_route( - vector, max_k, distance_threshold, aggregation_method + vector, max_k, aggregation_method ) + return top_route_matches def remove_route(self, route_name: str) -> None: diff --git a/schemas/semantic_router.yaml b/schemas/semantic_router.yaml index 7b504154..7175efb7 100644 --- a/schemas/semantic_router.yaml +++ b/schemas/semantic_router.yaml @@ -18,6 +18,5 @@ vectorizer: type: hf model: sentence-transformers/all-mpnet-base-v2 routing_config: - distance_threshold: 0.3 max_k: 2 - aggregation_method: avg + aggregation_method: avg \ No newline at end of file diff --git a/tests/integration/test_semantic_router.py b/tests/integration/test_semantic_router.py index 04489aec..d88fcc6f 100644 --- a/tests/integration/test_semantic_router.py +++ b/tests/integration/test_semantic_router.py @@ -358,3 +358,64 @@ def test_deprecated_dtype_argument(routes, redis_url): redis_url=redis_url, overwrite=True, ) + + +def test_deprecated_distance_threshold_argument(semantic_router, routes, redis_url): + redis_version = semantic_router._index.client.info()["redis_version"] + if not compare_versions(redis_version, "7.0.0"): + pytest.skip("Not using a late enough version of Redis") + + router = SemanticRouter( + name="test_pass_through_dtype", + routes=routes, + redis_url=redis_url, + overwrite=True, + ) + with pytest.warns(DeprecationWarning): + router("hello", distance_threshold=0.3) + + +def test_routes_different_distance_thresholds_get_two( + semantic_router, routes, redis_url +): + redis_version = semantic_router._index.client.info()["redis_version"] + if not compare_versions(redis_version, "7.0.0"): + pytest.skip("Not using a late enough version of Redis") + routes[0].distance_threshold = 0.5 + routes[1].distance_threshold = 0.7 + + router = SemanticRouter( + name="test_routes_different_distance_thresholds", + routes=routes, + redis_url=redis_url, + overwrite=True, + ) + + matches = router.route_many("hello", max_k=2) + assert len(matches) == 2 + assert matches[0].name == "greeting" + assert matches[1].name == "farewell" + + +def test_routes_different_distance_thresholds_get_one( + semantic_router, routes, redis_url +): + redis_version = semantic_router._index.client.info()["redis_version"] + if not compare_versions(redis_version, "7.0.0"): + pytest.skip("Not using a late enough version of Redis") + + routes[0].distance_threshold = 0.5 + + # don't match on second + routes[1].distance_threshold = 0.3 + + router = SemanticRouter( + name="test_routes_different_distance_thresholds", + routes=routes, + redis_url=redis_url, + overwrite=True, + ) + + matches = router.route_many("hello", max_k=2) + assert len(matches) == 1 + assert matches[0].name == "greeting" diff --git a/tests/unit/test_route_schema.py b/tests/unit/test_route_schema.py index f1ad5cb5..746c1182 100644 --- a/tests/unit/test_route_schema.py +++ b/tests/unit/test_route_schema.py @@ -64,7 +64,6 @@ def test_route_valid_no_threshold(): assert route.name == "Test Route" assert route.references == ["reference1", "reference2"] assert route.metadata == {"key": "value"} - assert route.distance_threshold is None def test_route_invalid_threshold_zero(): @@ -108,18 +107,12 @@ def test_distance_aggregation_method(): def test_routing_config_valid(): - config = RoutingConfig(distance_threshold=0.6, max_k=5) - assert config.distance_threshold == 0.6 + config = RoutingConfig(aggregation_method=DistanceAggregationMethod.min, max_k=5) + assert config.aggregation_method == DistanceAggregationMethod("min") assert config.max_k == 5 def test_routing_config_invalid_max_k(): with pytest.raises(ValidationError) as excinfo: - RoutingConfig(distance_threshold=0.6, max_k=0) + RoutingConfig(max_k=0) assert "max_k must be a positive integer" in str(excinfo.value) - - -def test_routing_config_invalid_distance_threshold(): - with pytest.raises(ValidationError) as excinfo: - RoutingConfig(distance_threshold=1.5, max_k=5) - assert "distance_threshold must be between 0 and 1" in str(excinfo.value)