From 0b2b033802f372e5ed48133763c96038d31f9106 Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Wed, 20 Aug 2025 16:25:20 -0500 Subject: [PATCH] Support Azure OpenAPI in MongoDB integration tests --- .../pyproject.toml | 1 + .../tests/conftest.py | 45 +++++--- .../tests/test_integration.py | 9 -- .../tests/test_vectorstore.py | 18 +-- .../llama-index-vector-stores-mongodb/uv.lock | 103 +++++++++++++++++- 5 files changed, 140 insertions(+), 36 deletions(-) diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-mongodb/pyproject.toml b/llama-index-integrations/vector_stores/llama-index-vector-stores-mongodb/pyproject.toml index bfe9ce1713..81167b0fdc 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-mongodb/pyproject.toml +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-mongodb/pyproject.toml @@ -7,6 +7,7 @@ dev = [ "ipython==8.10.0", "jupyter>=1.0.0,<2", "llama-index-embeddings-openai>=0.5.0,<0.6", + "llama-index-embeddings-azure_openai>=0.4.0,<0.5", "llama-index-llms-openai>=0.5.0,<0.6", "llama-index-readers-file>=0.5.0,<0.6", "mypy==0.991", diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-mongodb/tests/conftest.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-mongodb/tests/conftest.py index 30050c97de..f1bb7d0222 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-mongodb/tests/conftest.py +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-mongodb/tests/conftest.py @@ -6,16 +6,34 @@ from llama_index.core.node_parser import SentenceSplitter from llama_index.core.schema import Document, TextNode from llama_index.embeddings.openai import OpenAIEmbedding +from llama_index.core import Settings +from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding from llama_index.vector_stores.mongodb import MongoDBAtlasVectorSearch +from llama_index.llms.azure_openai import AzureOpenAI from pymongo import MongoClient -OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY") - import threading lock = threading.Lock() +@pytest.fixture(scope="session") +def embed_model() -> OpenAIEmbedding: + if "OPENAI_API_KEY" in os.environ: + return OpenAIEmbedding() + if "AZURE_OPENAI_API_KEY" in os.environ: + deployment_name = os.environ.get( + "AZURE_TEXT_DEPLOYMENT", "text-embedding-3-small" + ) + api_key = os.environ["AZURE_OPENAI_API_KEY"] + embedding = AzureOpenAIEmbedding( + api_key=api_key, deployment_name=deployment_name + ) + Settings.embed_model = embedding + return embedding + pytest.skip("Requires OPENAI_API_KEY or AZURE_OPENAI_API_KEY in os.environ") + + @pytest.fixture(scope="session") def documents() -> List[Document]: """ @@ -29,17 +47,13 @@ def documents() -> List[Document]: @pytest.fixture(scope="session") -def nodes(documents) -> List[TextNode]: - if OPENAI_API_KEY is None: - return None - +def nodes(documents, embed_model) -> List[TextNode]: pipeline = IngestionPipeline( transformations=[ SentenceSplitter(chunk_size=1024, chunk_overlap=200), - OpenAIEmbedding(), + embed_model, ], ) - return pipeline.run(documents=documents) @@ -52,7 +66,7 @@ def nodes(documents) -> List[TextNode]: @pytest.fixture(scope="session") def atlas_client() -> MongoClient: if MONGODB_URI is None: - return None + raise pytest.skip("Requires MONGODB_URI in os.environ") client = MongoClient(MONGODB_URI) assert DB_NAME in client.list_database_names() @@ -60,10 +74,15 @@ def atlas_client() -> MongoClient: @pytest.fixture() -def vector_store(atlas_client: MongoClient) -> MongoDBAtlasVectorSearch: - if MONGODB_URI is None: - return None - +def vector_store( + atlas_client: MongoClient, embed_model: OpenAIEmbedding +) -> MongoDBAtlasVectorSearch: + # Set up the default llm to be used in tests. + if isinstance(embed_model, AzureOpenAIEmbedding): + deployment_name = os.environ.get("AZURE_LLM_DEPLOYMENT", "gpt-4o-mini") + Settings.llm = AzureOpenAI( + engine=deployment_name, api_key=os.environ["AZURE_OPENAI_API_KEY"] + ) return MongoDBAtlasVectorSearch( mongodb_client=atlas_client, db_name=DB_NAME, diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-mongodb/tests/test_integration.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-mongodb/tests/test_integration.py index 5952ea0da3..3792065eb7 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-mongodb/tests/test_integration.py +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-mongodb/tests/test_integration.py @@ -7,11 +7,9 @@ provide a valid OPENAI_API_KEY. """ -import os from time import sleep from typing import List -import pytest from llama_index.core import StorageContext, VectorStoreIndex from llama_index.core.schema import Document from llama_index.vector_stores.mongodb import MongoDBAtlasVectorSearch @@ -20,18 +18,11 @@ from .conftest import lock -@pytest.mark.skipif( - os.environ.get("MONGODB_URI") is None, reason="Requires MONGODB_URI in os.environ" -) def test_mongodb_connection(atlas_client: MongoClient) -> None: """Confirm that the connection to the datastore works.""" assert atlas_client.admin.command("ping")["ok"] -@pytest.mark.skipif( - os.environ.get("MONGODB_URI") is None or os.environ.get("OPENAI_API_KEY") is None, - reason="Requires MONGODB_URI and OPENAI_API_KEY in os.environ", -) def test_index( documents: List[Document], vector_store: MongoDBAtlasVectorSearch ) -> None: diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-mongodb/tests/test_vectorstore.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-mongodb/tests/test_vectorstore.py index a803fb6688..11041a942a 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-mongodb/tests/test_vectorstore.py +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-mongodb/tests/test_vectorstore.py @@ -1,8 +1,6 @@ -import os from time import sleep from typing import List -import pytest from llama_index.core.schema import Document, TextNode from llama_index.core.vector_stores.types import ( FilterCondition, @@ -24,22 +22,16 @@ def test_documents(documents: List[Document]) -> None: assert isinstance(documents[0], Document) -@pytest.mark.skipif( - os.environ.get("OPENAI_API_KEY") is None, - reason="Requires OPENAI_API_KEY in os.environ", -) def test_nodes(nodes: List[TextNode]) -> None: """Test Ingestion Pipeline transforming documents into nodes with embeddings.""" assert isinstance(nodes, list) assert isinstance(nodes[0], TextNode) -@pytest.mark.skipif( - os.environ.get("MONGODB_URI") is None or os.environ.get("OPENAI_API_KEY") is None, - reason="Requires MONGODB_URI and OPENAI_API_KEY in os.environ", -) def test_vectorstore( - nodes: List[TextNode], vector_store: MongoDBAtlasVectorSearch + nodes: List[TextNode], + vector_store: MongoDBAtlasVectorSearch, + embed_model: OpenAIEmbedding, ) -> None: """Test add, query, delete API of MongoDBAtlasVectorSearch.""" with lock: @@ -54,7 +46,7 @@ def test_vectorstore( # 2a. test query(): default (vector search) query_str = "What are LLMs useful for?" n_similar = 2 - query_embedding = OpenAIEmbedding().get_text_embedding(query_str) + query_embedding = embed_model.get_text_embedding(query_str) query = VectorStoreQuery( query_embedding=query_embedding, similarity_top_k=n_similar, @@ -70,7 +62,7 @@ def test_vectorstore( sleep(2) retries -= 1 - assert all(score > 0.89 for score in query_responses.similarities) + assert all(score > 0.75 for score in query_responses.similarities) assert any("LLM" in node.text for node in query_responses.nodes) assert all(id_res in ids for id_res in query_responses.ids) diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-mongodb/uv.lock b/llama-index-integrations/vector_stores/llama-index-vector-stores-mongodb/uv.lock index aa1d47afa7..72b87feb8c 100644 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-mongodb/uv.lock +++ b/llama-index-integrations/vector_stores/llama-index-vector-stores-mongodb/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 2 +revision = 3 requires-python = ">=3.9, <4.0" resolution-markers = [ "python_full_version >= '3.12'", @@ -272,6 +272,36 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/77/06/bb80f5f86020c4551da315d78b3ab75e8228f89f0162f2c3a819e407941a/attrs-25.3.0-py3-none-any.whl", hash = "sha256:427318ce031701fea540783410126f03899a97ffc6f61596ad581ac2e40e3bc3", size = 63815, upload-time = "2025-03-13T11:10:21.14Z" }, ] +[[package]] +name = "azure-core" +version = "1.35.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "requests" }, + { name = "six" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ce/89/f53968635b1b2e53e4aad2dd641488929fef4ca9dfb0b97927fa7697ddf3/azure_core-1.35.0.tar.gz", hash = "sha256:c0be528489485e9ede59b6971eb63c1eaacf83ef53001bfe3904e475e972be5c", size = 339689, upload-time = "2025-07-03T00:55:23.496Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d4/78/bf94897361fdd650850f0f2e405b2293e2f12808239046232bdedf554301/azure_core-1.35.0-py3-none-any.whl", hash = "sha256:8db78c72868a58f3de8991eb4d22c4d368fae226dac1002998d6c50437e7dad1", size = 210708, upload-time = "2025-07-03T00:55:25.238Z" }, +] + +[[package]] +name = "azure-identity" +version = "1.24.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "azure-core" }, + { name = "cryptography" }, + { name = "msal" }, + { name = "msal-extensions" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b5/44/f3ee20bacb220b6b4a2b0a6cf7e742eecb383a5ccf604dd79ec27c286b7e/azure_identity-1.24.0.tar.gz", hash = "sha256:6c3a40b2a70af831e920b89e6421e8dcd4af78a0cb38b9642d86c67643d4930c", size = 271630, upload-time = "2025-08-07T22:27:36.258Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a9/74/17428cb429e8d52f6d0d69ed685f4760a545cb0156594963a9337b53b6c9/azure_identity-1.24.0-py3-none-any.whl", hash = "sha256:9e04997cde0ab02ed66422c74748548e620b7b29361c72ce622acab0267ff7c4", size = 187890, upload-time = "2025-08-07T22:27:38.033Z" }, +] + [[package]] name = "babel" version = "2.17.0" @@ -1714,6 +1744,20 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f3/27/36a3ad19c8e2e7967ca1c9e469e5205901d2b6d299420e9aba4eafd90d8d/llama_index_core-0.13.0-py3-none-any.whl", hash = "sha256:46c14fc2a26b8f7618c2dd2daf6e430e3f94b1908474baee539f705c9c638348", size = 7573714, upload-time = "2025-07-30T17:23:52.355Z" }, ] +[[package]] +name = "llama-index-embeddings-azure-openai" +version = "0.4.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "llama-index-core" }, + { name = "llama-index-embeddings-openai" }, + { name = "llama-index-llms-azure-openai" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/36/c8/9b0eb78531ec2b42ca06750e5b256b122c0449d9f4e4ce3be5f4b1601a3e/llama_index_embeddings_azure_openai-0.4.0.tar.gz", hash = "sha256:092e48e79e47d9c552792dc17fd527ec2ebdc657781ccadeb43cfcbc0b5d354a", size = 4785, upload-time = "2025-07-31T00:31:02.584Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3a/e3/f030182f1c9268b1d59ae7d2e73e2782ab8a152ec1dd04ed1946532825b1/llama_index_embeddings_azure_openai-0.4.0-py3-none-any.whl", hash = "sha256:4a570fb4478493baf6eeb07f584880d7369728eaf6beff6e250ce46244e37cac", size = 4419, upload-time = "2025-07-31T00:31:01.661Z" }, +] + [[package]] name = "llama-index-embeddings-openai" version = "0.5.0" @@ -1740,6 +1784,21 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/49/e6/5eeaf98146e26af28cae36b1eaba7c405246697dfe208f249a4b9f113f0a/llama_index_instrumentation-0.3.1-py3-none-any.whl", hash = "sha256:68527a926ac1db84fe7c4426048c4a7c25fdb71626d465b963d80f05f526124a", size = 14947, upload-time = "2025-07-30T14:11:14.269Z" }, ] +[[package]] +name = "llama-index-llms-azure-openai" +version = "0.4.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "azure-identity" }, + { name = "httpx" }, + { name = "llama-index-core" }, + { name = "llama-index-llms-openai" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/25/3e/70c189502d1ee84dd73db66f3c4978dc5ce975e233954dc2724c9374d659/llama_index_llms_azure_openai-0.4.0.tar.gz", hash = "sha256:bba297fd7d0e85e9cf17ac03f7617ff9812719b6312e0f56ee4242ae11fa5d9b", size = 7054, upload-time = "2025-07-30T21:36:39.408Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5f/1a/3992ac83c237eba455411dbd5ab2ec65dbefa8670aecd8a3f809b30cbcbc/llama_index_llms_azure_openai-0.4.0-py3-none-any.whl", hash = "sha256:f7f69cad12d7e6da75a58f6ec49f719dee3f03d30bbafc7ec29b2bf9087b0d51", size = 7257, upload-time = "2025-07-30T21:36:38.398Z" }, +] + [[package]] name = "llama-index-llms-openai" version = "0.5.0" @@ -1787,6 +1846,7 @@ dev = [ { name = "diff-cover", version = "9.2.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.9.17'" }, { name = "ipython" }, { name = "jupyter" }, + { name = "llama-index-embeddings-azure-openai" }, { name = "llama-index-embeddings-openai" }, { name = "llama-index-llms-openai" }, { name = "llama-index-readers-file" }, @@ -1818,6 +1878,7 @@ dev = [ { name = "diff-cover", specifier = ">=9.2.0" }, { name = "ipython", specifier = "==8.10.0" }, { name = "jupyter", specifier = ">=1.0.0,<2" }, + { name = "llama-index-embeddings-azure-openai", specifier = ">=0.4.0,<0.5" }, { name = "llama-index-embeddings-openai", specifier = ">=0.5.0,<0.6" }, { name = "llama-index-llms-openai", specifier = ">=0.5.0,<0.6" }, { name = "llama-index-readers-file", specifier = ">=0.5.0,<0.6" }, @@ -1963,6 +2024,32 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/01/4d/23c4e4f09da849e127e9f123241946c23c1e30f45a88366879e064211815/mistune-3.1.3-py3-none-any.whl", hash = "sha256:1a32314113cff28aa6432e99e522677c8587fd83e3d51c29b82a52409c842bd9", size = 53410, upload-time = "2025-03-19T14:27:23.451Z" }, ] +[[package]] +name = "msal" +version = "1.33.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cryptography" }, + { name = "pyjwt", extra = ["crypto"] }, + { name = "requests" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/d5/da/81acbe0c1fd7e9e4ec35f55dadeba9833a847b9a6ba2e2d1e4432da901dd/msal-1.33.0.tar.gz", hash = "sha256:836ad80faa3e25a7d71015c990ce61f704a87328b1e73bcbb0623a18cbf17510", size = 153801, upload-time = "2025-07-22T19:36:33.693Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/86/5b/fbc73e91f7727ae1e79b21ed833308e99dc11cc1cd3d4717f579775de5e9/msal-1.33.0-py3-none-any.whl", hash = "sha256:c0cd41cecf8eaed733ee7e3be9e040291eba53b0f262d3ae9c58f38b04244273", size = 116853, upload-time = "2025-07-22T19:36:32.403Z" }, +] + +[[package]] +name = "msal-extensions" +version = "1.3.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "msal" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/01/99/5d239b6156eddf761a636bded1118414d161bd6b7b37a9335549ed159396/msal_extensions-1.3.1.tar.gz", hash = "sha256:c5b0fd10f65ef62b5f1d62f4251d51cbcaf003fcedae8c91b040a488614be1a4", size = 23315, upload-time = "2025-03-14T23:51:03.902Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5e/75/bd9b7bb966668920f06b200e84454c8f3566b102183bc55c5473d96cb2b9/msal_extensions-1.3.1-py3-none-any.whl", hash = "sha256:96d3de4d034504e969ac5e85bae8106c8373b5c6568e4c8fa7af2eca9dbe6bca", size = 20583, upload-time = "2025-03-14T23:51:03.016Z" }, +] + [[package]] name = "multidict" version = "6.4.3" @@ -2949,6 +3036,20 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/8a/0b/9fcc47d19c48b59121088dd6da2488a49d5f72dacf8262e2790a1d2c7d15/pygments-2.19.1-py3-none-any.whl", hash = "sha256:9ea1544ad55cecf4b8242fab6dd35a93bbce657034b0611ee383099054ab6d8c", size = 1225293, upload-time = "2025-01-06T17:26:25.553Z" }, ] +[[package]] +name = "pyjwt" +version = "2.10.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e7/46/bd74733ff231675599650d3e47f361794b22ef3e3770998dda30d3b63726/pyjwt-2.10.1.tar.gz", hash = "sha256:3cc5772eb20009233caf06e9d8a0577824723b44e6648ee0a2aedb6cf9381953", size = 87785, upload-time = "2024-11-28T03:43:29.933Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/61/ad/689f02752eeec26aed679477e80e632ef1b682313be70793d798c1d5fc8f/PyJWT-2.10.1-py3-none-any.whl", hash = "sha256:dcdd193e30abefd5debf142f9adfcdd2b58004e644f25406ffaebd50bd98dacb", size = 22997, upload-time = "2024-11-28T03:43:27.893Z" }, +] + +[package.optional-dependencies] +crypto = [ + { name = "cryptography" }, +] + [[package]] name = "pylint" version = "2.15.10"