From 94f27f64546fe0fa2ee6dbbf8e5aeb1566ed06dd Mon Sep 17 00:00:00 2001 From: Tyler Hutcherson Date: Tue, 11 Feb 2025 10:32:52 -0500 Subject: [PATCH 01/10] update scripts and makefile --- Makefile | 2 +- scripts.py | 19 +++++++++++++++---- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/Makefile b/Makefile index 1451a2f5..688f09fd 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -.PHONY: install format lint test clean redis-start redis-stop check-types integration-test docs-build docs-serve check +.PHONY: install format lint test test-all clean redis-start redis-stop check-types docs-build docs-serve check install: poetry install --all-extras diff --git a/scripts.py b/scripts.py index 0bfbac01..4eef395e 100644 --- a/scripts.py +++ b/scripts.py @@ -1,4 +1,5 @@ import subprocess +import sys def format(): @@ -29,13 +30,23 @@ def check_mypy(): def test(): - subprocess.run(["python", "-m", "pytest", "-n", "auto", "--log-level=CRITICAL"], check=True) + # Get any extra arguments passed to the script + extra_args = sys.argv[1:] + if not extra_args: + test_cmd = ["python", "-m", "pytest", "-n", "auto", "--log-level=CRITICAL"] + else: + test_cmd = ["python", "-m", "pytest", "-n", "auto", "--log-level=CRITICAL"] + extra_args + subprocess.run(test_cmd, check=True) def test_verbose(): - subprocess.run( - ["python", "-m", "pytest", "-n", "auto", "-vv", "-s", "--log-level=CRITICAL"], check=True - ) + # Get any extra arguments passed to the script + extra_args = sys.argv[1:] + if not extra_args: + test_cmd = ["python", "-m", "pytest", "-n", "auto", "-vv", "-s", "--log-level=CRITICAL"] + else: + test_cmd = ["python", "-m", "pytest", "-n", "auto", "-vv", "-s", "--log-level=CRITICAL"] + extra_args + subprocess.run(test_cmd, check=True) def test_notebooks(): From 44f4ba88fc01015ff6e416db979d94e1e69bba82 Mon Sep 17 00:00:00 2001 From: Tyler Hutcherson Date: Wed, 12 Feb 2025 09:57:14 -0500 Subject: [PATCH 02/10] tweak pytest config --- scripts.py | 14 ++- tests/conftest.py | 106 +++++++++++++--------- tests/integration/test_session_manager.py | 5 + 3 files changed, 73 insertions(+), 52 deletions(-) diff --git a/scripts.py b/scripts.py index 4eef395e..76adc986 100644 --- a/scripts.py +++ b/scripts.py @@ -30,22 +30,20 @@ def check_mypy(): def test(): + test_cmd = ["python", "-m", "pytest", "-n", "auto", "--log-level=CRITICAL"] # Get any extra arguments passed to the script extra_args = sys.argv[1:] - if not extra_args: - test_cmd = ["python", "-m", "pytest", "-n", "auto", "--log-level=CRITICAL"] - else: - test_cmd = ["python", "-m", "pytest", "-n", "auto", "--log-level=CRITICAL"] + extra_args + if extra_args: + test_cmd.extend(extra_args) subprocess.run(test_cmd, check=True) def test_verbose(): + test_cmd = ["python", "-m", "pytest", "-n", "auto", "-vv", "-s", "--log-level=CRITICAL"] # Get any extra arguments passed to the script extra_args = sys.argv[1:] - if not extra_args: - test_cmd = ["python", "-m", "pytest", "-n", "auto", "-vv", "-s", "--log-level=CRITICAL"] - else: - test_cmd = ["python", "-m", "pytest", "-n", "auto", "-vv", "-s", "--log-level=CRITICAL"] + extra_args + if extra_args: + test_cmd.extend(extra_args) subprocess.run(test_cmd, check=True) diff --git a/tests/conftest.py b/tests/conftest.py index 28ea7735..b5802750 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -54,13 +54,15 @@ async def async_client(redis_url): """ An async Redis client that uses the dynamic `redis_url`. """ - client = await RedisConnectionFactory.get_async_redis_connection(redis_url) - yield client - try: - await client.aclose() - except RuntimeError as e: - if "Event loop is closed" not in str(e): - raise + async with await RedisConnectionFactory.get_async_redis_connection( + redis_url + ) as client: + yield client + # try: + # await client.aclose() + # except RuntimeError as e: + # if "Event loop is closed" not in str(e): + # raise @pytest.fixture @@ -70,51 +72,51 @@ def client(redis_url): """ conn = RedisConnectionFactory.get_redis_connection(redis_url) yield conn - conn.close() + # conn.close() -@pytest.fixture -def openai_key(): - return os.getenv("OPENAI_API_KEY") +# @pytest.fixture +# def openai_key(): +# return os.getenv("OPENAI_API_KEY") -@pytest.fixture -def openai_version(): - return os.getenv("OPENAI_API_VERSION") +# @pytest.fixture +# def openai_version(): +# return os.getenv("OPENAI_API_VERSION") -@pytest.fixture -def azure_endpoint(): - return os.getenv("AZURE_OPENAI_ENDPOINT") +# @pytest.fixture +# def azure_endpoint(): +# return os.getenv("AZURE_OPENAI_ENDPOINT") -@pytest.fixture -def cohere_key(): - return os.getenv("COHERE_API_KEY") +# @pytest.fixture +# def cohere_key(): +# return os.getenv("COHERE_API_KEY") -@pytest.fixture -def mistral_key(): - return os.getenv("MISTRAL_API_KEY") +# @pytest.fixture +# def mistral_key(): +# return os.getenv("MISTRAL_API_KEY") -@pytest.fixture -def gcp_location(): - return os.getenv("GCP_LOCATION") +# @pytest.fixture +# def gcp_location(): +# return os.getenv("GCP_LOCATION") -@pytest.fixture -def gcp_project_id(): - return os.getenv("GCP_PROJECT_ID") +# @pytest.fixture +# def gcp_project_id(): +# return os.getenv("GCP_PROJECT_ID") -@pytest.fixture -def aws_credentials(): - return { - "aws_access_key_id": os.getenv("AWS_ACCESS_KEY_ID"), - "aws_secret_access_key": os.getenv("AWS_SECRET_ACCESS_KEY"), - "aws_region": os.getenv("AWS_REGION", "us-east-1"), - } +# @pytest.fixture +# def aws_credentials(): +# return { +# "aws_access_key_id": os.getenv("AWS_ACCESS_KEY_ID"), +# "aws_secret_access_key": os.getenv("AWS_SECRET_ACCESS_KEY"), +# "aws_region": os.getenv("AWS_REGION", "us-east-1"), +# } @pytest.fixture @@ -179,13 +181,29 @@ def sample_data(): ] -@pytest.fixture -def clear_db(redis): - redis.flushall() - yield - redis.flushall() +def pytest_addoption(parser: pytest.Parser) -> None: + parser.addoption( + "--run-api-tests", + action="store_true", + default=False, + help="Run tests that require API keys", + ) -@pytest.fixture -def app_name(): - return "test_app" +def pytest_configure(config: pytest.Config) -> None: + config.addinivalue_line( + "markers", "requires_api_keys: mark test as requiring API keys" + ) + + +def pytest_collection_modifyitems( + config: pytest.Config, items: list[pytest.Item] +) -> None: + if config.getoption("--run-api-tests"): + return + skip_api = pytest.mark.skip( + reason="Skipping test because API keys are not provided. Use --run-api-tests to run these tests." + ) + for item in items: + if item.get_closest_marker("requires_api_keys"): + item.add_marker(skip_api) diff --git a/tests/integration/test_session_manager.py b/tests/integration/test_session_manager.py index 05188db5..59d64b97 100644 --- a/tests/integration/test_session_manager.py +++ b/tests/integration/test_session_manager.py @@ -12,6 +12,11 @@ from redisvl.utils.vectorize.text.huggingface import HFTextVectorizer +@pytest.fixture +def app_name(): + return "test_app" + + @pytest.fixture def standard_session(app_name, client): session = StandardSessionManager(app_name, redis_client=client) From 50e13a73a467ba34a019756a4de86fdc6b9831d2 Mon Sep 17 00:00:00 2001 From: Tyler Hutcherson Date: Wed, 12 Feb 2025 12:09:36 -0500 Subject: [PATCH 03/10] clean up makefile and scripts --- .github/workflows/test.yml | 6 +- Makefile | 9 +- scripts.py | 2 +- tests/conftest.py | 52 +---------- tests/integration/test_rerankers.py | 18 +--- tests/integration/test_vectorizers.py | 123 +++++++++++--------------- 6 files changed, 66 insertions(+), 144 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index b72da87b..69068809 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -83,12 +83,12 @@ jobs: AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} run: | - poetry run test-verbose + make test-all - name: Run tests if: matrix.connection != 'plain' || matrix.redis-stack-version != 'latest' run: | - SKIP_VECTORIZERS=True SKIP_RERANKERS=True poetry run test-verbose + make test - name: Run notebooks if: matrix.connection == 'plain' && matrix.redis-stack-version == 'latest' @@ -106,7 +106,7 @@ jobs: AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} run: | - cd docs/ && poetry run pytest --nbval-lax ./user_guide -vv + make test-notebooks docs: runs-on: ubuntu-latest diff --git a/Makefile b/Makefile index 688f09fd..a71f8d22 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -.PHONY: install format lint test test-all clean redis-start redis-stop check-types docs-build docs-serve check +.PHONY: install format lint test test-all test-notebooks clean redis-start redis-stop check-types docs-build docs-serve check install: poetry install --all-extras @@ -19,10 +19,13 @@ check-types: lint: format check-types test: - SKIP_RERANKERS=true SKIP_VECTORIZERS=true poetry run test-verbose + poetry run test-verbose test-all: - poetry run test-verbose + poetry run test-verbose --run-api-tests + +test-notebooks: + poetry run test-notebooks check: lint test diff --git a/scripts.py b/scripts.py index 76adc986..c2034590 100644 --- a/scripts.py +++ b/scripts.py @@ -48,7 +48,7 @@ def test_verbose(): def test_notebooks(): - subprocess.run(["cd", "docs/", "&&", "poetry run treon", "-v"], check=True) + subprocess.run(["cd", "docs/", "&&", "poetry run pytest --nbval-lax ./user_guide", "-vv"], check=True) def build_docs(): diff --git a/tests/conftest.py b/tests/conftest.py index b5802750..de41b4b2 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -58,11 +58,6 @@ async def async_client(redis_url): redis_url ) as client: yield client - # try: - # await client.aclose() - # except RuntimeError as e: - # if "Event loop is closed" not in str(e): - # raise @pytest.fixture @@ -72,51 +67,6 @@ def client(redis_url): """ conn = RedisConnectionFactory.get_redis_connection(redis_url) yield conn - # conn.close() - - -# @pytest.fixture -# def openai_key(): -# return os.getenv("OPENAI_API_KEY") - - -# @pytest.fixture -# def openai_version(): -# return os.getenv("OPENAI_API_VERSION") - - -# @pytest.fixture -# def azure_endpoint(): -# return os.getenv("AZURE_OPENAI_ENDPOINT") - - -# @pytest.fixture -# def cohere_key(): -# return os.getenv("COHERE_API_KEY") - - -# @pytest.fixture -# def mistral_key(): -# return os.getenv("MISTRAL_API_KEY") - - -# @pytest.fixture -# def gcp_location(): -# return os.getenv("GCP_LOCATION") - - -# @pytest.fixture -# def gcp_project_id(): -# return os.getenv("GCP_PROJECT_ID") - - -# @pytest.fixture -# def aws_credentials(): -# return { -# "aws_access_key_id": os.getenv("AWS_ACCESS_KEY_ID"), -# "aws_secret_access_key": os.getenv("AWS_SECRET_ACCESS_KEY"), -# "aws_region": os.getenv("AWS_REGION", "us-east-1"), -# } @pytest.fixture @@ -201,6 +151,8 @@ def pytest_collection_modifyitems( ) -> None: if config.getoption("--run-api-tests"): return + + # Otherwise skip all tests requiring an API key skip_api = pytest.mark.skip( reason="Skipping test because API keys are not provided. Use --run-api-tests to run these tests." ) diff --git a/tests/integration/test_rerankers.py b/tests/integration/test_rerankers.py index 65aad333..caee5a47 100644 --- a/tests/integration/test_rerankers.py +++ b/tests/integration/test_rerankers.py @@ -9,13 +9,6 @@ ) -@pytest.fixture -def skip_reranker() -> bool: - # os.getenv returns a string - v = os.getenv("SKIP_RERANKERS", "False").lower() == "true" - return v - - # Fixture for the reranker instance @pytest.fixture( params=[ @@ -23,10 +16,7 @@ def skip_reranker() -> bool: VoyageAIReranker, ] ) -def reranker(request, skip_reranker): - if skip_reranker: - pytest.skip("Skipping reranker instantiation...") - +def reranker(request): if request.param == CohereReranker: return request.param() elif request.param == VoyageAIReranker: @@ -43,7 +33,7 @@ def hfCrossEncoderRerankerWithCustomModel(): return HFCrossEncoderReranker("cross-encoder/stsb-distilroberta-base") -# Test for basic ranking functionality +@pytest.mark.requires_api_keys def test_rank_documents(reranker): docs = ["document one", "document two", "document three"] query = "search query" @@ -55,7 +45,7 @@ def test_rank_documents(reranker): assert all(isinstance(score, float) for score in scores) # Scores should be floats -# Test for asynchronous ranking functionality +@pytest.mark.requires_api_keys @pytest.mark.asyncio async def test_async_rank_documents(reranker): docs = ["document one", "document two", "document three"] @@ -68,7 +58,7 @@ async def test_async_rank_documents(reranker): assert all(isinstance(score, float) for score in scores) # Scores should be floats -# Test handling of bad input +@pytest.mark.requires_api_keys def test_bad_input(reranker): with pytest.raises(Exception): reranker.rank("", []) # Empty query or documents diff --git a/tests/integration/test_vectorizers.py b/tests/integration/test_vectorizers.py index 52a32eca..95cce195 100644 --- a/tests/integration/test_vectorizers.py +++ b/tests/integration/test_vectorizers.py @@ -15,12 +15,6 @@ ) -@pytest.fixture -def skip_vectorizer() -> bool: - v = os.getenv("SKIP_VECTORIZERS", "False").lower() == "true" - return v - - @pytest.fixture( params=[ HFTextVectorizer, @@ -34,10 +28,7 @@ def skip_vectorizer() -> bool: VoyageAITextVectorizer, ] ) -def vectorizer(request, skip_vectorizer): - if skip_vectorizer: - pytest.skip("Skipping vectorizer instantiation...") - +def vectorizer(request): if request.param == HFTextVectorizer: return request.param() elif request.param == OpenAITextVectorizer: @@ -70,10 +61,7 @@ def embed_many(texts): @pytest.fixture -def bedrock_vectorizer(skip_vectorizer): - if skip_vectorizer: - pytest.skip("Skipping Bedrock vectorizer tests...") - +def bedrock_vectorizer(): return BedrockTextVectorizer( model=os.getenv("BEDROCK_MODEL_ID", "amazon.titan-embed-text-v2:0") ) @@ -108,6 +96,7 @@ def embed_many_with_args(self, texts, param=True): return MyEmbedder +@pytest.mark.requires_api_keys def test_vectorizer_embed(vectorizer): text = "This is a test sentence." if isinstance(vectorizer, CohereTextVectorizer): @@ -121,6 +110,7 @@ def test_vectorizer_embed(vectorizer): assert len(embedding) == vectorizer.dims +@pytest.mark.requires_api_keys def test_vectorizer_embed_many(vectorizer): texts = ["This is the first test sentence.", "This is the second test sentence."] if isinstance(vectorizer, CohereTextVectorizer): @@ -137,6 +127,7 @@ def test_vectorizer_embed_many(vectorizer): ) +@pytest.mark.requires_api_keys def test_vectorizer_bad_input(vectorizer): with pytest.raises(TypeError): vectorizer.embed(1) @@ -148,6 +139,7 @@ def test_vectorizer_bad_input(vectorizer): vectorizer.embed_many(42) +@pytest.mark.requires_api_keys def test_bedrock_bad_credentials(): with pytest.raises(ValueError): BedrockTextVectorizer( @@ -158,6 +150,7 @@ def test_bedrock_bad_credentials(): ) +@pytest.mark.requires_api_keys def test_bedrock_invalid_model(bedrock_vectorizer): with pytest.raises(ValueError): bedrock = BedrockTextVectorizer(model="invalid-model") @@ -250,64 +243,48 @@ def bad_return_type(text: str) -> str: ) -@pytest.mark.parametrize( - "vector_class", - [ - AzureOpenAITextVectorizer, - BedrockTextVectorizer, - CohereTextVectorizer, - CustomTextVectorizer, - HFTextVectorizer, - MistralAITextVectorizer, - OpenAITextVectorizer, - VertexAITextVectorizer, - VoyageAITextVectorizer, - ], -) -def test_dtypes(vector_class, skip_vectorizer): - if skip_vectorizer: - pytest.skip("Skipping vectorizer instantiation...") - - # test dtype defaults to float32 - if issubclass(vector_class, CustomTextVectorizer): - vectorizer = vector_class(embed=lambda x, input_type=None: [1.0, 2.0, 3.0]) - elif issubclass(vector_class, AzureOpenAITextVectorizer): - vectorizer = vector_class( - model=os.getenv("AZURE_OPENAI_DEPLOYMENT_NAME", "text-embedding-ada-002") - ) - else: - vectorizer = vector_class() - - assert vectorizer.dtype == "float32" - - # test initializing dtype in constructor - for dtype in ["float16", "float32", "float64", "bfloat16"]: - if issubclass(vector_class, CustomTextVectorizer): - vectorizer = vector_class(embed=lambda x: [1.0, 2.0, 3.0], dtype=dtype) - elif issubclass(vector_class, AzureOpenAITextVectorizer): - vectorizer = vector_class( - model=os.getenv( - "AZURE_OPENAI_DEPLOYMENT_NAME", "text-embedding-ada-002" - ), - dtype=dtype, - ) - else: - vectorizer = vector_class(dtype=dtype) - - assert vectorizer.dtype == dtype - - # test validation of dtype on init - if issubclass(vector_class, CustomTextVectorizer): - pytest.skip("skipping custom text vectorizer") +# @pytest.mark.requires_api_keys +# def test_dtypes(vectorizer): +# # # test dtype defaults to float32 +# # if issubclass(vectorizer, CustomTextVectorizer): +# # vectorizer = vectorizer(embed=lambda x, input_type=None: [1.0, 2.0, 3.0]) +# # elif issubclass(vectorizer, AzureOpenAITextVectorizer): +# # vectorizer = vectorizer( +# # model=os.getenv("AZURE_OPENAI_DEPLOYMENT_NAME", "text-embedding-ada-002") +# # ) +# # else: +# # vectorizer = vector_class() - with pytest.raises(ValueError): - vectorizer = vector_class(dtype="float25") +# assert vectorizer.dtype == "float32" - with pytest.raises(ValueError): - vectorizer = vector_class(dtype=7) +# # test initializing dtype in constructor +# for dtype in ["float16", "float32", "float64", "bfloat16"]: +# if issubclass(vectorizer, CustomTextVectorizer): +# vectorizer = vectorizer(embed=lambda x: [1.0, 2.0, 3.0], dtype=dtype) +# elif issubclass(vectorizer, AzureOpenAITextVectorizer): +# vectorizer = vectorizer( +# model=os.getenv( +# "AZURE_OPENAI_DEPLOYMENT_NAME", "text-embedding-ada-002" +# ), +# dtype=dtype, +# ) +# else: +# vectorizer = vectorizer(dtype=dtype) - with pytest.raises(ValueError): - vectorizer = vector_class(dtype=None) +# assert vectorizer.dtype == dtype + +# # test validation of dtype on init +# if issubclass(vectorizer, CustomTextVectorizer): +# pytest.skip("skipping custom text vectorizer") + +# with pytest.raises(ValueError): +# vectorizer = vectorizer(dtype="float25") + +# with pytest.raises(ValueError): +# vectorizer = vectorizer(dtype=7) + +# with pytest.raises(ValueError): +# vectorizer = vectorizer(dtype=None) @pytest.fixture( @@ -319,10 +296,7 @@ def test_dtypes(vector_class, skip_vectorizer): VoyageAITextVectorizer, ] ) -def avectorizer(request, skip_vectorizer): - if skip_vectorizer: - pytest.skip("Skipping vectorizer instantiation...") - +def avectorizer(request): if request.param == CustomTextVectorizer: def embed_func(text): @@ -341,6 +315,7 @@ async def aembed_many_func(texts): return request.param() +@pytest.mark.requires_api_keys @pytest.mark.asyncio async def test_vectorizer_aembed(avectorizer): text = "This is a test sentence." @@ -350,6 +325,7 @@ async def test_vectorizer_aembed(avectorizer): assert len(embedding) == avectorizer.dims +@pytest.mark.requires_api_keys @pytest.mark.asyncio async def test_vectorizer_aembed_many(avectorizer): texts = ["This is the first test sentence.", "This is the second test sentence."] @@ -362,6 +338,7 @@ async def test_vectorizer_aembed_many(avectorizer): ) +@pytest.mark.requires_api_keys @pytest.mark.asyncio async def test_avectorizer_bad_input(avectorizer): with pytest.raises(TypeError): From 832bac63ce727e808bc2a3c2a30b20de84337c13 Mon Sep 17 00:00:00 2001 From: Tyler Hutcherson Date: Wed, 12 Feb 2025 12:12:13 -0500 Subject: [PATCH 04/10] update contributing guide --- CONTRIBUTING.md | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 7b35b96b..6bb7d37b 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -50,8 +50,9 @@ If you use `make`, we've created shortcuts for running the commands in this docu | make format | Runs code formatting and import sorting | | make check-types | Runs mypy type checking | | make lint | Runs formatting, import sorting, and type checking | -| make test | Runs tests, excluding those that require API keys and/or remote network calls)| -| make test-all | Runs all tests, including those that require API keys and/or remote network calls)| +| make test | Runs tests, excluding those that require API keys and/or remote network calls| +| make test-all | Runs all tests, including those that require API keys and/or remote network calls| +| make test-notebooks | Runs all notebook tests| | make check | Runs all linting targets and a subset of tests | | make docs-build | Builds the documentation | | make docs-serve | Serves the documentation locally | @@ -76,19 +77,14 @@ To run Testcontainers-based tests you need a local Docker installation such as: #### Running the Tests -Tests w/ vectorizers: +Tests w/ external APIs: ```bash -poetry run test-verbose -``` - -Tests w/out vectorizers: -```bash -SKIP_VECTORIZERS=true poetry run test-verbose +poetry run test-verbose --run-api-tests ``` -Tests w/out rerankers: +Tests w/out external APIs: ```bash -SKIP_RERANKERS=true poetry run test-verbose +poetry run test-verbose ``` ### Documentation @@ -112,6 +108,17 @@ In order for your applications to use RedisVL, you must have [Redis](https://red docker run -d --name redis-stack -p 6379:6379 -p 8001:8001 redis/redis-stack:latest ``` +Or from your makefile simply run: + +```bash +make redis-start +``` + +And then: +```bash +make redis-stop +``` + This will also spin up the [FREE RedisInsight GUI](https://redis.io/insight/) at `http://localhost:8001`. ## How to Report a Bug From b2f8b4c3af1f2b30653abc621ca7661f2632f9c2 Mon Sep 17 00:00:00 2001 From: Tyler Hutcherson Date: Wed, 12 Feb 2025 12:13:14 -0500 Subject: [PATCH 05/10] Add another example to contributor guide --- CONTRIBUTING.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 6bb7d37b..52db3032 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -87,6 +87,11 @@ Tests w/out external APIs: poetry run test-verbose ``` +Run a test on a specific file: +```bash +poetry run test-verbose tests/unit/test_fields.py +``` + ### Documentation Docs are served from the `docs/` directory. From bf12a611abeb88bff0f742a8675d1a04dd2ee3a5 Mon Sep 17 00:00:00 2001 From: Tyler Hutcherson Date: Wed, 12 Feb 2025 12:20:59 -0500 Subject: [PATCH 06/10] fix notebook test entry point --- scripts.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts.py b/scripts.py index c2034590..77020aad 100644 --- a/scripts.py +++ b/scripts.py @@ -48,7 +48,7 @@ def test_verbose(): def test_notebooks(): - subprocess.run(["cd", "docs/", "&&", "poetry run pytest --nbval-lax ./user_guide", "-vv"], check=True) + subprocess.run("cd docs/ && python -m pytest --nbval-lax ./user_guide -vv", shell=True, check=True) def build_docs(): From 44df26344b6fd03491ad34e909544a4d15386c58 Mon Sep 17 00:00:00 2001 From: Tyler Hutcherson Date: Wed, 12 Feb 2025 13:54:17 -0500 Subject: [PATCH 07/10] uncomment test --- tests/integration/test_vectorizers.py | 94 +++++++++++++++------------ 1 file changed, 52 insertions(+), 42 deletions(-) diff --git a/tests/integration/test_vectorizers.py b/tests/integration/test_vectorizers.py index 95cce195..26c1f21f 100644 --- a/tests/integration/test_vectorizers.py +++ b/tests/integration/test_vectorizers.py @@ -243,48 +243,58 @@ def bad_return_type(text: str) -> str: ) -# @pytest.mark.requires_api_keys -# def test_dtypes(vectorizer): -# # # test dtype defaults to float32 -# # if issubclass(vectorizer, CustomTextVectorizer): -# # vectorizer = vectorizer(embed=lambda x, input_type=None: [1.0, 2.0, 3.0]) -# # elif issubclass(vectorizer, AzureOpenAITextVectorizer): -# # vectorizer = vectorizer( -# # model=os.getenv("AZURE_OPENAI_DEPLOYMENT_NAME", "text-embedding-ada-002") -# # ) -# # else: -# # vectorizer = vector_class() - -# assert vectorizer.dtype == "float32" - -# # test initializing dtype in constructor -# for dtype in ["float16", "float32", "float64", "bfloat16"]: -# if issubclass(vectorizer, CustomTextVectorizer): -# vectorizer = vectorizer(embed=lambda x: [1.0, 2.0, 3.0], dtype=dtype) -# elif issubclass(vectorizer, AzureOpenAITextVectorizer): -# vectorizer = vectorizer( -# model=os.getenv( -# "AZURE_OPENAI_DEPLOYMENT_NAME", "text-embedding-ada-002" -# ), -# dtype=dtype, -# ) -# else: -# vectorizer = vectorizer(dtype=dtype) - -# assert vectorizer.dtype == dtype - -# # test validation of dtype on init -# if issubclass(vectorizer, CustomTextVectorizer): -# pytest.skip("skipping custom text vectorizer") - -# with pytest.raises(ValueError): -# vectorizer = vectorizer(dtype="float25") - -# with pytest.raises(ValueError): -# vectorizer = vectorizer(dtype=7) - -# with pytest.raises(ValueError): -# vectorizer = vectorizer(dtype=None) +@pytest.mark.requires_api_keys +@pytest.mark.parametrize( + "vectorizer_", + [ + AzureOpenAITextVectorizer, + BedrockTextVectorizer, + CohereTextVectorizer, + CustomTextVectorizer, + HFTextVectorizer, + MistralAITextVectorizer, + OpenAITextVectorizer, + VertexAITextVectorizer, + VoyageAITextVectorizer, + ], +) +def test_dtypes(vectorizer_): + # test dtype defaults to float32 + if issubclass(vectorizer_, CustomTextVectorizer): + vectorizer = vectorizer_(embed=lambda x, input_type=None: [1.0, 2.0, 3.0]) + elif issubclass(vectorizer, AzureOpenAITextVectorizer): + vectorizer = vectorizer_( + model=os.getenv("AZURE_OPENAI_DEPLOYMENT_NAME", "text-embedding-ada-002") + ) + else: + vectorizer = vectorizer_() + + assert vectorizer.dtype == "float32" + + # test initializing dtype in constructor + for dtype in ["float16", "float32", "float64", "bfloat16"]: + if issubclass(vectorizer_, CustomTextVectorizer): + vectorizer = vectorizer_(embed=lambda x: [1.0, 2.0, 3.0], dtype=dtype) + elif issubclass(vectorizer_, AzureOpenAITextVectorizer): + vectorizer = vectorizer_( + model=os.getenv( + "AZURE_OPENAI_DEPLOYMENT_NAME", "text-embedding-ada-002" + ), + dtype=dtype, + ) + else: + vectorizer = vectorizer_(dtype=dtype) + + assert vectorizer.dtype == dtype + + with pytest.raises(ValueError): + vectorizer = vectorizer(dtype="float25") + + with pytest.raises(ValueError): + vectorizer = vectorizer(dtype=7) + + with pytest.raises(ValueError): + vectorizer = vectorizer(dtype=None) @pytest.fixture( From 0ef54991c620901eee61538816dae461dbbd5e08 Mon Sep 17 00:00:00 2001 From: Tyler Hutcherson Date: Wed, 12 Feb 2025 14:16:50 -0500 Subject: [PATCH 08/10] fix test --- tests/integration/test_vectorizers.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/integration/test_vectorizers.py b/tests/integration/test_vectorizers.py index 26c1f21f..6ac67e9c 100644 --- a/tests/integration/test_vectorizers.py +++ b/tests/integration/test_vectorizers.py @@ -288,13 +288,13 @@ def test_dtypes(vectorizer_): assert vectorizer.dtype == dtype with pytest.raises(ValueError): - vectorizer = vectorizer(dtype="float25") + vectorizer = vectorizer_(dtype="float25") with pytest.raises(ValueError): - vectorizer = vectorizer(dtype=7) + vectorizer = vectorizer_(dtype=7) with pytest.raises(ValueError): - vectorizer = vectorizer(dtype=None) + vectorizer = vectorizer_(dtype=None) @pytest.fixture( From 1436e2400380475ac6d770042dbf366003600309 Mon Sep 17 00:00:00 2001 From: Tyler Hutcherson Date: Wed, 12 Feb 2025 14:51:32 -0500 Subject: [PATCH 09/10] fix test --- tests/integration/test_vectorizers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_vectorizers.py b/tests/integration/test_vectorizers.py index 6ac67e9c..9e6e06f6 100644 --- a/tests/integration/test_vectorizers.py +++ b/tests/integration/test_vectorizers.py @@ -262,7 +262,7 @@ def test_dtypes(vectorizer_): # test dtype defaults to float32 if issubclass(vectorizer_, CustomTextVectorizer): vectorizer = vectorizer_(embed=lambda x, input_type=None: [1.0, 2.0, 3.0]) - elif issubclass(vectorizer, AzureOpenAITextVectorizer): + elif issubclass(vectorizer_, AzureOpenAITextVectorizer): vectorizer = vectorizer_( model=os.getenv("AZURE_OPENAI_DEPLOYMENT_NAME", "text-embedding-ada-002") ) From 01b544ffd3686d698d05bec52a31231bad493c64 Mon Sep 17 00:00:00 2001 From: Tyler Hutcherson Date: Wed, 12 Feb 2025 15:13:18 -0500 Subject: [PATCH 10/10] break out dtype tests --- tests/integration/test_vectorizers.py | 41 ++++++++++++++++++++++++--- 1 file changed, 37 insertions(+), 4 deletions(-) diff --git a/tests/integration/test_vectorizers.py b/tests/integration/test_vectorizers.py index 9e6e06f6..65a07f3b 100644 --- a/tests/integration/test_vectorizers.py +++ b/tests/integration/test_vectorizers.py @@ -258,7 +258,7 @@ def bad_return_type(text: str) -> str: VoyageAITextVectorizer, ], ) -def test_dtypes(vectorizer_): +def test_default_dtype(vectorizer_): # test dtype defaults to float32 if issubclass(vectorizer_, CustomTextVectorizer): vectorizer = vectorizer_(embed=lambda x, input_type=None: [1.0, 2.0, 3.0]) @@ -271,6 +271,23 @@ def test_dtypes(vectorizer_): assert vectorizer.dtype == "float32" + +@pytest.mark.requires_api_keys +@pytest.mark.parametrize( + "vectorizer_", + [ + AzureOpenAITextVectorizer, + BedrockTextVectorizer, + CohereTextVectorizer, + CustomTextVectorizer, + HFTextVectorizer, + MistralAITextVectorizer, + OpenAITextVectorizer, + VertexAITextVectorizer, + VoyageAITextVectorizer, + ], +) +def test_other_dtypes(vectorizer_): # test initializing dtype in constructor for dtype in ["float16", "float32", "float64", "bfloat16"]: if issubclass(vectorizer_, CustomTextVectorizer): @@ -287,14 +304,30 @@ def test_dtypes(vectorizer_): assert vectorizer.dtype == dtype + +@pytest.mark.requires_api_keys +@pytest.mark.parametrize( + "vectorizer_", + [ + AzureOpenAITextVectorizer, + BedrockTextVectorizer, + CohereTextVectorizer, + HFTextVectorizer, + MistralAITextVectorizer, + OpenAITextVectorizer, + VertexAITextVectorizer, + VoyageAITextVectorizer, + ], +) +def test_bad_dtypes(vectorizer_): with pytest.raises(ValueError): - vectorizer = vectorizer_(dtype="float25") + vectorizer_(dtype="float25") with pytest.raises(ValueError): - vectorizer = vectorizer_(dtype=7) + vectorizer_(dtype=7) with pytest.raises(ValueError): - vectorizer = vectorizer_(dtype=None) + vectorizer_(dtype=None) @pytest.fixture(