diff --git a/Makefile b/Makefile
index 4e191a95c2..3a46e6e1d2 100644
--- a/Makefile
+++ b/Makefile
@@ -43,6 +43,7 @@ typecheck-pyright:
 .PHONY: typecheck-mypy
 typecheck-mypy:
 	uv run mypy
+	uv run mypy stubs/ --strict
 
 .PHONY: typecheck
 typecheck: typecheck-pyright ## Run static type checking
diff --git a/pydantic_ai_slim/pydantic_ai/models/outlines.py b/pydantic_ai_slim/pydantic_ai/models/outlines.py
index 5b439952c1..e2f16ffa0d 100644
--- a/pydantic_ai_slim/pydantic_ai/models/outlines.py
+++ b/pydantic_ai_slim/pydantic_ai/models/outlines.py
@@ -60,7 +60,7 @@
     )
     from outlines.models.vllm_offline import (
         VLLMOffline,
-        from_vllm_offline,  # pyright: ignore[reportUnknownVariableType]
+        from_vllm_offline,
     )
     from outlines.types.dsl import JsonSchema
     from PIL import Image as PILImage
@@ -393,7 +393,7 @@ def _format_vllm_offline_inference_kwargs(  # pragma: no cover
         self, model_settings: dict[str, Any]
     ) -> dict[str, Any]:
         """Select the model settings supported by the vLLMOffline model."""
-        from vllm.sampling_params import SamplingParams  # pyright: ignore
+        from vllm.sampling_params import SamplingParams
 
         supported_args = [
             'max_tokens',
diff --git a/pydantic_ai_slim/pyproject.toml b/pydantic_ai_slim/pyproject.toml
index 1b5909140d..262c54d7f2 100644
--- a/pydantic_ai_slim/pyproject.toml
+++ b/pydantic_ai_slim/pyproject.toml
@@ -78,7 +78,7 @@ bedrock = ["boto3>=1.40.14"]
 huggingface = ["huggingface-hub[inference]>=0.33.5"]
 outlines-transformers = ["outlines[transformers]>=1.0.0, <1.3.0; (sys_platform != 'darwin' or platform_machine != 'x86_64')", "transformers>=4.0.0", "pillow", "torch; (sys_platform != 'darwin' or platform_machine != 'x86_64')"]
 outlines-llamacpp = ["outlines[llamacpp]>=1.0.0, <1.3.0"]
-outlines-mlxlm = ["outlines[mlxlm]>=1.0.0, <1.3.0; (sys_platform != 'darwin' or platform_machine != 'x86_64')"]
+outlines-mlxlm = ["outlines[mlxlm]>=1.0.0, <1.3.0; platform_system == 'Darwin' and platform_machine == 'arm64'"]
 outlines-sglang = ["outlines[sglang]>=1.0.0, <1.3.0", "pillow"]
 outlines-vllm-offline = ["vllm; python_version < '3.12' and (sys_platform != 'darwin' or platform_machine != 'x86_64')", "torch; (sys_platform != 'darwin' or platform_machine != 'x86_64')", "outlines>=1.0.0, <1.3.0"]
 # Tools
diff --git a/pyproject.toml b/pyproject.toml
index 3c13afdece..695fec9e1c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -56,7 +56,7 @@ dbos = ["pydantic-ai-slim[dbos]=={{ version }}"]
 prefect = ["pydantic-ai-slim[prefect]=={{ version }}"]
 outlines-transformers = ["pydantic-ai-slim[outlines-transformers]=={{ version }}"]
 outlines-llamacpp = ["pydantic-ai-slim[outlines-llamacpp]=={{ version }}"]
-outlines-mlxlm = ["pydantic-ai-slim[outlines-mlxlm]=={{ version }}"]
+outlines-mlxlm = ["pydantic-ai-slim[outlines-mlxlm]=={{ version }}; platform_system == 'Darwin' and platform_machine == 'arm64'"]
 outlines-sglang = ["pydantic-ai-slim[outlines-sglang]=={{ version }}"]
 outlines-vllm-offline = ["pydantic-ai-slim[outlines-vllm-offline]=={{ version }}"]
 
@@ -142,6 +142,7 @@ include = [
     "clai/**/*.py",
     "tests/**/*.py",
     "docs/**/*.py",
+    "stubs/**/*.pyi",
 ]
 
 [tool.ruff.lint]
@@ -186,8 +187,10 @@ quote-style = "single"
 "examples/**/*.py" = ["D101", "D103"]
 "tests/**/*.py" = ["D"]
 "docs/**/*.py" = ["D"]
+"stubs/**/*.pyi" = ["F401", "PYI044", "PYI035", "ANN401"]
 
 [tool.pyright]
+stubPath = "stubs"
 pythonVersion = "3.12"
 typeCheckingMode = "strict"
 reportMissingTypeStubs = false
@@ -217,6 +220,7 @@ exclude = [
 [tool.mypy]
 files = "tests/typed_agent.py,tests/typed_graph.py"
 strict = true
+mypy_path = "stubs"
 
 [tool.pytest.ini_options]
 testpaths = ["tests", "docs/.hooks"]
diff --git a/stubs/README.md b/stubs/README.md
new file mode 100644
index 0000000000..7b90a80b3e
--- /dev/null
+++ b/stubs/README.md
@@ -0,0 +1,26 @@
+Stub files (`*.pyi`) contain type hints used only by type checkers, not at
+runtime. They were introduced in
+[PEP 484](https://peps.python.org/pep-0484/#stub-files). For example, the
+[`typeshed`](https://github.com/python/typeshed) repository maintains a
+collection of such stubs for the Python standard library and some third-party
+libraries.
+
+The `./stubs` folder contains type information only for the parts of third-party
+dependencies used in the `pydantic-ai` codebase. These stubs must be manually
+maintained. When a dependency's API changes, both the codebase and the stubs
+need to be updated. There are two ways to update the stubs:
+
+(1) **Manual update:** Check the dependency's source code and copy the type
+information to `./stubs`. Take for example the `from_pretrained()` method of the
+`Llama` class in `llama-cpp-python`. The
+[source code](https://github.com/abetlen/llama-cpp-python/blob/main/llama_cpp/llama.py#L2240)
+contains the type information that is copied to `./stubs/llama_cpp.pyi`. This
+eliminates the need for `# type: ignore` comments in the codebase.
+
+(2) **Update with AI coding assistants:** Most dependencies maintain `llms.txt`
+and `llms-full.txt` files with their documentation. This information is compiled
+by [Context7](https://context7.com). For example, the `llama-cpp-python` library
+is documented [here](https://github.com/abetlen/llama-cpp-python). MCP servers
+such as [this one by Upstash](https://github.com/upstash/context7) provide AI
+coding assistants access to Context7. AI coding assistants such as VS Code
+Copilot or Cursor can reliably generate and update the stubs.
diff --git a/stubs/llama_cpp.pyi b/stubs/llama_cpp.pyi
new file mode 100644
index 0000000000..f324f07254
--- /dev/null
+++ b/stubs/llama_cpp.pyi
@@ -0,0 +1,19 @@
+from collections.abc import Sequence
+from os import PathLike
+from typing import Any, Literal
+
+from typing_extensions import Self
+
+class Llama:
+    def __init__(self, *args: Any, **kwargs: Any) -> None: ...
+    @classmethod
+    def from_pretrained(
+        cls,
+        repo_id: str,
+        filename: str | None = None,
+        additional_files: Sequence[str] | None = None,
+        local_dir: str | PathLike[str] | None = None,
+        local_dir_use_symlinks: bool | Literal['auto'] = 'auto',
+        cache_dir: str | PathLike[str] | None = None,
+        **kwargs: Any,
+    ) -> Self: ...
diff --git a/stubs/mlx/__init__.pyi b/stubs/mlx/__init__.pyi
new file mode 100644
index 0000000000..9299184781
--- /dev/null
+++ b/stubs/mlx/__init__.pyi
@@ -0,0 +1,6 @@
+from typing import Any
+
+from . import nn
+
+# mlx is imported as a package, primarily for mlx.nn
+__all__: list[str] = []
diff --git a/stubs/mlx/nn.pyi b/stubs/mlx/nn.pyi
new file mode 100644
index 0000000000..db66a81e5f
--- /dev/null
+++ b/stubs/mlx/nn.pyi
@@ -0,0 +1,3 @@
+from typing import Any
+
+class Module: ...
diff --git a/stubs/mlx_lm.pyi b/stubs/mlx_lm.pyi
new file mode 100644
index 0000000000..ea23cb4207
--- /dev/null
+++ b/stubs/mlx_lm.pyi
@@ -0,0 +1,7 @@
+from typing import Any
+
+from mlx.nn import Module
+from transformers.tokenization_utils import PreTrainedTokenizer
+
+def load(model_path: str | None = None, *args: Any, **kwargs: Any) -> tuple[Module, PreTrainedTokenizer]: ...
+def generate_step(*args: Any, **kwargs: Any) -> Any: ...
diff --git a/stubs/outlines/__init__.pyi b/stubs/outlines/__init__.pyi
new file mode 100644
index 0000000000..aa54175662
--- /dev/null
+++ b/stubs/outlines/__init__.pyi
@@ -0,0 +1,3 @@
+from . import models
+
+__all__: list[str] = []
diff --git a/stubs/outlines/inputs.pyi b/stubs/outlines/inputs.pyi
new file mode 100644
index 0000000000..a62efbc8fa
--- /dev/null
+++ b/stubs/outlines/inputs.pyi
@@ -0,0 +1,18 @@
+from __future__ import annotations
+
+from collections.abc import Sequence
+from typing import Any
+
+from PIL.Image import Image as PILImage
+
+class Chat:
+    def __init__(self, messages: list[dict[str, Any]] | None = None) -> None: ...
+    def add_system_message(self, content: str) -> None: ...
+    def add_user_message(self, content: str | Sequence[str | Image]) -> None: ...
+    def add_assistant_message(self, content: str | list[str | Image]) -> None: ...
+    def extend(self, messages: list[dict[str, Any]]) -> None: ...
+    def append(self, message: dict[str, Any]) -> None: ...
+    def pop(self) -> dict[str, Any] | None: ...
+
+class Image:
+    def __init__(self, image: PILImage) -> None: ...
diff --git a/stubs/outlines/models/__init__.pyi b/stubs/outlines/models/__init__.pyi
new file mode 100644
index 0000000000..b4931a7631
--- /dev/null
+++ b/stubs/outlines/models/__init__.pyi
@@ -0,0 +1,3 @@
+from . import base, llamacpp, mlxlm, sglang, transformers, vllm_offline
+
+__all__: list[str] = []
diff --git a/stubs/outlines/models/base.pyi b/stubs/outlines/models/base.pyi
new file mode 100644
index 0000000000..4bb8329e8d
--- /dev/null
+++ b/stubs/outlines/models/base.pyi
@@ -0,0 +1,10 @@
+from collections.abc import AsyncIterable, Iterable
+from typing import Any
+
+class Model:
+    def __call__(self, *args: Any, **kwargs: Any) -> Any: ...
+    def stream(self, *args: Any, **kwargs: Any) -> Iterable[Any]: ...
+
+class AsyncModel:
+    async def __call__(self, *args: Any, **kwargs: Any) -> Any: ...
+    def stream(self, *args: Any, **kwargs: Any) -> AsyncIterable[Any]: ...
diff --git a/stubs/outlines/models/llamacpp.pyi b/stubs/outlines/models/llamacpp.pyi
new file mode 100644
index 0000000000..650576fb26
--- /dev/null
+++ b/stubs/outlines/models/llamacpp.pyi
@@ -0,0 +1,10 @@
+from typing import TYPE_CHECKING
+
+from outlines.models.base import Model
+
+if TYPE_CHECKING:
+    from llama_cpp import Llama
+
+class LlamaCpp(Model): ...
+
+def from_llamacpp(model: Llama) -> LlamaCpp: ...
diff --git a/stubs/outlines/models/mlxlm.pyi b/stubs/outlines/models/mlxlm.pyi
new file mode 100644
index 0000000000..77a1a5686a
--- /dev/null
+++ b/stubs/outlines/models/mlxlm.pyi
@@ -0,0 +1,10 @@
+from typing import Any
+
+from mlx.nn import Module
+from outlines.models.base import Model
+from transformers.tokenization_utils import PreTrainedTokenizer
+
+class MLXLM(Model):
+    def __init__(self, *args: Any, **kwargs: Any) -> None: ...
+
+def from_mlxlm(model: Module, tokenizer: PreTrainedTokenizer) -> MLXLM: ...
diff --git a/stubs/outlines/models/sglang.pyi b/stubs/outlines/models/sglang.pyi
new file mode 100644
index 0000000000..c72d500eac
--- /dev/null
+++ b/stubs/outlines/models/sglang.pyi
@@ -0,0 +1,11 @@
+from typing import TYPE_CHECKING, Any, Union
+
+from outlines.models.base import AsyncModel, Model
+
+if TYPE_CHECKING:
+    from openai import AsyncOpenAI, OpenAI
+
+class SGLang(Model): ...
+class AsyncSGLang(AsyncModel): ...
+
+def from_sglang(client: OpenAI | AsyncOpenAI, *args: Any, **kwargs: Any) -> SGLang | AsyncSGLang: ...
diff --git a/stubs/outlines/models/transformers.pyi b/stubs/outlines/models/transformers.pyi
new file mode 100644
index 0000000000..7094d2c9ce
--- /dev/null
+++ b/stubs/outlines/models/transformers.pyi
@@ -0,0 +1,17 @@
+from typing import Any
+
+from outlines.models.base import Model
+from transformers import AutoModelForCausalLM, AutoProcessor, AutoTokenizer, LlavaForConditionalGeneration
+from transformers.modeling_utils import PreTrainedModel
+from transformers.processing_utils import ProcessorMixin
+from transformers.tokenization_utils import PreTrainedTokenizer
+
+class Transformers(Model): ...
+class TransformersMultiModal(Model): ...
+
+def from_transformers(
+    model: PreTrainedModel | AutoModelForCausalLM | LlavaForConditionalGeneration,
+    tokenizer_or_processor: PreTrainedTokenizer | ProcessorMixin | AutoTokenizer | AutoProcessor,
+    *,
+    device_dtype: Any = None,
+) -> Transformers | TransformersMultiModal: ...
diff --git a/stubs/outlines/models/vllm_offline.pyi b/stubs/outlines/models/vllm_offline.pyi
new file mode 100644
index 0000000000..e5cb85cffe
--- /dev/null
+++ b/stubs/outlines/models/vllm_offline.pyi
@@ -0,0 +1,10 @@
+from typing import TYPE_CHECKING
+
+from outlines.models.base import Model
+
+if TYPE_CHECKING:
+    from vllm import LLM
+
+class VLLMOffline(Model): ...
+
+def from_vllm_offline(model: LLM) -> VLLMOffline: ...
diff --git a/stubs/outlines/types/__init__.pyi b/stubs/outlines/types/__init__.pyi
new file mode 100644
index 0000000000..041e97f748
--- /dev/null
+++ b/stubs/outlines/types/__init__.pyi
@@ -0,0 +1,3 @@
+from typing import Any
+
+__all__: list[str] = []
diff --git a/stubs/outlines/types/dsl.pyi b/stubs/outlines/types/dsl.pyi
new file mode 100644
index 0000000000..b3e108548f
--- /dev/null
+++ b/stubs/outlines/types/dsl.pyi
@@ -0,0 +1,4 @@
+from typing import Any
+
+class JsonSchema:
+    def __init__(self, schema: dict[str, Any]) -> None: ...
diff --git a/stubs/transformers/__init__.pyi b/stubs/transformers/__init__.pyi
new file mode 100644
index 0000000000..1d04068382
--- /dev/null
+++ b/stubs/transformers/__init__.pyi
@@ -0,0 +1,26 @@
+from typing import Any
+
+from typing_extensions import Self
+
+from . import modeling_utils, processing_utils, tokenization_utils
+from .modeling_utils import PreTrainedModel
+from .processing_utils import ProcessorMixin
+from .tokenization_utils import PreTrainedTokenizer
+
+class AutoModelForCausalLM(PreTrainedModel):
+    @classmethod
+    def from_pretrained(cls, *args: Any, **kwargs: Any) -> Self: ...
+
+class AutoTokenizer(PreTrainedTokenizer):
+    @classmethod
+    def from_pretrained(cls, *args: Any, **kwargs: Any) -> Self: ...
+
+class AutoProcessor(ProcessorMixin):
+    @classmethod
+    def from_pretrained(cls, *args: Any, **kwargs: Any) -> Self: ...
+
+class LlavaForConditionalGeneration(PreTrainedModel):
+    @classmethod
+    def from_pretrained(cls, *args: Any, **kwargs: Any) -> Self: ...
+
+def from_pretrained(*args: Any, **kwargs: Any) -> Any: ...
diff --git a/stubs/transformers/modeling_utils.pyi b/stubs/transformers/modeling_utils.pyi
new file mode 100644
index 0000000000..bd1aafe2ae
--- /dev/null
+++ b/stubs/transformers/modeling_utils.pyi
@@ -0,0 +1,3 @@
+from typing import Any
+
+class PreTrainedModel: ...
diff --git a/stubs/transformers/processing_utils.pyi b/stubs/transformers/processing_utils.pyi
new file mode 100644
index 0000000000..6c0397bcf8
--- /dev/null
+++ b/stubs/transformers/processing_utils.pyi
@@ -0,0 +1,3 @@
+from typing import Any
+
+class ProcessorMixin: ...
diff --git a/stubs/transformers/tokenization_utils.pyi b/stubs/transformers/tokenization_utils.pyi
new file mode 100644
index 0000000000..5afbd67b79
--- /dev/null
+++ b/stubs/transformers/tokenization_utils.pyi
@@ -0,0 +1,6 @@
+from typing import Any
+
+class PreTrainedTokenizer:
+    chat_template: str | None
+
+    def __init__(self, *args: Any, **kwargs: Any) -> None: ...
diff --git a/stubs/vllm/__init__.pyi b/stubs/vllm/__init__.pyi
new file mode 100644
index 0000000000..eb7907acfd
--- /dev/null
+++ b/stubs/vllm/__init__.pyi
@@ -0,0 +1,4 @@
+from typing import Any
+
+class LLM:
+    def __init__(self, model: str, *args: Any, **kwargs: Any) -> None: ...
diff --git a/stubs/vllm/sampling_params.pyi b/stubs/vllm/sampling_params.pyi
new file mode 100644
index 0000000000..651c824162
--- /dev/null
+++ b/stubs/vllm/sampling_params.pyi
@@ -0,0 +1,25 @@
+from typing import Any
+
+class SamplingParams:
+    max_tokens: int | None
+    temperature: float | None
+    top_p: float | None
+    seed: int | None
+    presence_penalty: float | None
+    frequency_penalty: float | None
+    logit_bias: dict[int, float] | None
+    extra_args: dict[str, Any] | None
+
+    def __init__(
+        self,
+        max_tokens: int | None = None,
+        temperature: float | None = None,
+        top_p: float | None = None,
+        seed: int | None = None,
+        presence_penalty: float | None = None,
+        frequency_penalty: float | None = None,
+        logit_bias: dict[int, float] | None = None,
+        extra_args: dict[str, Any] | None = None,
+        *args: Any,
+        **kwargs: Any,
+    ) -> None: ...
diff --git a/tests/models/test_outlines.py b/tests/models/test_outlines.py
index 73adc28853..580e34ea89 100644
--- a/tests/models/test_outlines.py
+++ b/tests/models/test_outlines.py
@@ -43,7 +43,7 @@
 with try_import() as imports_successful:
     import outlines
 
-    from pydantic_ai.models.outlines import OutlinesModel
+    from pydantic_ai.models.outlines import OutlinesAsyncBaseModel, OutlinesModel
     from pydantic_ai.providers.outlines import OutlinesProvider
 
 with try_import() as transformer_imports_successful:
@@ -53,11 +53,11 @@
     import llama_cpp
 
 with try_import() as vllm_imports_successful:
-    import vllm  # type: ignore[reportMissingImports]
+    import vllm
 
     # We try to load the vllm model to ensure it is available
     try:  # pragma: no lax cover
-        vllm.LLM('microsoft/Phi-3-mini-4k-instruct')  # type: ignore
+        vllm.LLM('microsoft/Phi-3-mini-4k-instruct')
     except RuntimeError as e:  # pragma: lax no cover
         if 'Found no NVIDIA driver' in str(e) or 'Device string must not be empty' in str(e):
             # Treat as import failure
@@ -93,60 +93,59 @@
 
 @pytest.fixture
 def mock_async_model() -> OutlinesModel:
-    class MockOutlinesAsyncModel(outlines.models.base.AsyncModel):
+    class MockOutlinesAsyncModel(OutlinesAsyncBaseModel):
         """Mock an OutlinesAsyncModel because no Outlines local models have an async version.
 
         The `__call__` and `stream` methods will be called by the Pydantic AI model while the other methods are
         only implemented because they are abstract methods in the OutlinesAsyncModel class.
         """
 
-        async def __call__(self, model_input, output_type, backend, **inference_kwargs):  # type: ignore[reportMissingParameterType]
+        async def __call__(self, model_input: Any, output_type: Any, backend: Any, **inference_kwargs: Any) -> str:
             return 'test'
 
-        async def stream(self, model_input, output_type, backend, **inference_kwargs):  # type: ignore[reportMissingParameterType]
+        async def stream(self, model_input: Any, output_type: Any, backend: Any, **inference_kwargs: Any):
             for _ in range(2):
                 yield 'test'
 
-        async def generate(self, model_input, output_type, **inference_kwargs):  # type: ignore[reportMissingParameterType]
-            ...  # pragma: no cover
+        async def generate(self, model_input: Any, output_type: Any, **inference_kwargs: Any): ...  # pragma: no cover
 
-        async def generate_batch(self, model_input, output_type, **inference_kwargs):  # type: ignore[reportMissingParameterType]
-            ...  # pragma: no cover
+        async def generate_batch(
+            self, model_input: Any, output_type: Any, **inference_kwargs: Any
+        ): ...  # pragma: no cover
 
-        async def generate_stream(self, model_input, output_type, **inference_kwargs):  # type: ignore[reportMissingParameterType]
-            ...  # pragma: no cover
+        async def generate_stream(
+            self, model_input: Any, output_type: Any, **inference_kwargs: Any
+        ): ...  # pragma: no cover
 
     return OutlinesModel(MockOutlinesAsyncModel(), provider=OutlinesProvider())
 
 
 @pytest.fixture
 def transformers_model() -> OutlinesModel:
-    hf_model = transformers.AutoModelForCausalLM.from_pretrained(  # type: ignore
+    hf_model = transformers.AutoModelForCausalLM.from_pretrained(
         'erwanf/gpt2-mini',
         device_map='cpu',
     )
-    hf_tokenizer = transformers.AutoTokenizer.from_pretrained('erwanf/gpt2-mini')  # type: ignore
+    hf_tokenizer = transformers.AutoTokenizer.from_pretrained('erwanf/gpt2-mini')
     chat_template = '{% for message in messages %}{{ message.role }}: {{ message.content }}{% endfor %}'
     hf_tokenizer.chat_template = chat_template
     outlines_model = outlines.models.transformers.from_transformers(
-        hf_model,  # type: ignore[reportUnknownArgumentType]
-        hf_tokenizer,  # type: ignore
+        hf_model,
+        hf_tokenizer,
     )
     return OutlinesModel(outlines_model, provider=OutlinesProvider())
 
 
 @pytest.fixture
 def transformers_multimodal_model() -> OutlinesModel:
-    hf_model = transformers.LlavaForConditionalGeneration.from_pretrained(  # type: ignore
+    hf_model = transformers.LlavaForConditionalGeneration.from_pretrained(
         'trl-internal-testing/tiny-LlavaForConditionalGeneration',
         device_map='cpu',
     )
-    hf_processor = transformers.AutoProcessor.from_pretrained(  # type: ignore
-        'trl-internal-testing/tiny-LlavaForConditionalGeneration'
-    )
+    hf_processor = transformers.AutoProcessor.from_pretrained('trl-internal-testing/tiny-LlavaForConditionalGeneration')
     outlines_model = outlines.models.transformers.from_transformers(
         hf_model,
-        hf_processor,  # type: ignore
+        hf_processor,
     )
     return OutlinesModel(outlines_model, provider=OutlinesProvider())
 
@@ -154,7 +153,7 @@ def transformers_multimodal_model() -> OutlinesModel:
 @pytest.fixture
 def llamacpp_model() -> OutlinesModel:
     outlines_model_llamacpp = outlines.models.llamacpp.from_llamacpp(
-        llama_cpp.Llama.from_pretrained(  # type: ignore
+        llama_cpp.Llama.from_pretrained(
             repo_id='M4-ai/TinyMistral-248M-v2-Instruct-GGUF',
             filename='TinyMistral-248M-v2-Instruct.Q4_K_M.gguf',
         )
@@ -164,9 +163,7 @@ def llamacpp_model() -> OutlinesModel:
 
 @pytest.fixture
 def mlxlm_model() -> OutlinesModel:  # pragma: no cover
-    outlines_model = outlines.models.mlxlm.from_mlxlm(
-        *mlx_lm.load('mlx-community/SmolLM-135M-Instruct-4bit')  # type: ignore
-    )
+    outlines_model = outlines.models.mlxlm.from_mlxlm(*mlx_lm.load('mlx-community/SmolLM-135M-Instruct-4bit'))
     return OutlinesModel(outlines_model, provider=OutlinesProvider())
 
 
@@ -180,9 +177,7 @@ def sglang_model() -> OutlinesModel:
 
 @pytest.fixture
 def vllm_model_offline() -> OutlinesModel:  # pragma: no cover
-    outlines_model = outlines.models.vllm_offline.from_vllm_offline(  # type: ignore
-        vllm.LLM('microsoft/Phi-3-mini-4k-instruct')  # type: ignore
-    )
+    outlines_model = outlines.models.vllm_offline.from_vllm_offline(vllm.LLM('microsoft/Phi-3-mini-4k-instruct'))
     return OutlinesModel(outlines_model, provider=OutlinesProvider())
 
 
@@ -197,18 +192,18 @@ def binary_image() -> BinaryImage:
     pytest.param(
         'from_transformers',
         lambda: (
-            transformers.AutoModelForCausalLM.from_pretrained(  # type: ignore
+            transformers.AutoModelForCausalLM.from_pretrained(
                 'erwanf/gpt2-mini',
                 device_map='cpu',
             ),
-            transformers.AutoTokenizer.from_pretrained('erwanf/gpt2-mini'),  # type: ignore
+            transformers.AutoTokenizer.from_pretrained('erwanf/gpt2-mini'),
         ),
         marks=skip_if_transformers_imports_unsuccessful,
     ),
     pytest.param(
         'from_llamacpp',
         lambda: (
-            llama_cpp.Llama.from_pretrained(  # type: ignore
+            llama_cpp.Llama.from_pretrained(
                 repo_id='M4-ai/TinyMistral-248M-v2-Instruct-GGUF',
                 filename='TinyMistral-248M-v2-Instruct.Q4_K_M.gguf',
             ),
@@ -217,7 +212,7 @@ def binary_image() -> BinaryImage:
     ),
     pytest.param(
         'from_mlxlm',
-        lambda: mlx_lm.load('mlx-community/SmolLM-135M-Instruct-4bit'),  # type: ignore
+        lambda: mlx_lm.load('mlx-community/SmolLM-135M-Instruct-4bit'),
         marks=skip_if_mlxlm_imports_unsuccessful,
     ),
     pytest.param(
@@ -227,7 +222,7 @@ def binary_image() -> BinaryImage:
     ),
     pytest.param(
         'from_vllm_offline',
-        lambda: (vllm.LLM('microsoft/Phi-3-mini-4k-instruct'),),  # type: ignore
+        lambda: (vllm.LLM('microsoft/Phi-3-mini-4k-instruct'),),
         marks=skip_if_vllm_imports_unsuccessful,
     ),
 ]
@@ -256,18 +251,18 @@ def test_init(model_loading_function_name: str, args: Callable[[], tuple[Any]])
     pytest.param(
         'from_transformers',
         lambda: (
-            transformers.AutoModelForCausalLM.from_pretrained(  # type: ignore
+            transformers.AutoModelForCausalLM.from_pretrained(
                 'erwanf/gpt2-mini',
                 device_map='cpu',
             ),
-            transformers.AutoTokenizer.from_pretrained('erwanf/gpt2-mini'),  # type: ignore
+            transformers.AutoTokenizer.from_pretrained('erwanf/gpt2-mini'),
         ),
         marks=skip_if_transformers_imports_unsuccessful,
     ),
     pytest.param(
         'from_llamacpp',
         lambda: (
-            llama_cpp.Llama.from_pretrained(  # type: ignore
+            llama_cpp.Llama.from_pretrained(
                 repo_id='M4-ai/TinyMistral-248M-v2-Instruct-GGUF',
                 filename='TinyMistral-248M-v2-Instruct.Q4_K_M.gguf',
             ),
@@ -276,7 +271,7 @@ def test_init(model_loading_function_name: str, args: Callable[[], tuple[Any]])
     ),
     pytest.param(
         'from_mlxlm',
-        lambda: mlx_lm.load('mlx-community/SmolLM-135M-Instruct-4bit'),  # type: ignore
+        lambda: mlx_lm.load('mlx-community/SmolLM-135M-Instruct-4bit'),
         marks=skip_if_mlxlm_imports_unsuccessful,
     ),
     pytest.param(
@@ -286,7 +281,7 @@ def test_init(model_loading_function_name: str, args: Callable[[], tuple[Any]])
     ),
     pytest.param(
         'from_vllm_offline',
-        lambda: (vllm.LLM('microsoft/Phi-3-mini-4k-instruct'),),  # type: ignore
+        lambda: (vllm.LLM('microsoft/Phi-3-mini-4k-instruct'),),
         marks=skip_if_vllm_imports_unsuccessful,
     ),
 ]
diff --git a/uv.lock b/uv.lock
index 62d3f22778..705d66096b 100644
--- a/uv.lock
+++ b/uv.lock
@@ -3345,19 +3345,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/a4/8a/743ff24a07f8cfd6fb14b3fe05f122f1d8e04e8a912b2f6d0e14369c8caf/mlx-0.29.3-cp310-cp310-macosx_13_0_arm64.whl", hash = "sha256:340d46443fe0b1e5d84c1e36aa633310de70365ce79aefcaa6f618e62bd4b045", size = 548930, upload-time = "2025-10-17T19:16:49.872Z" },
     { url = "https://files.pythonhosted.org/packages/b3/2a/af1b8391b6f543e59ca595f63aaddc33e320d3cc57a4c86ded6932d9dc3c/mlx-0.29.3-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:8449c0e7c221e38368a734a471e0c4b1a7fea072947c75e893a1ee214f208d34", size = 548928, upload-time = "2025-10-17T19:16:58.275Z" },
     { url = "https://files.pythonhosted.org/packages/49/85/0c58bdc5733ba92f78f067fc25e131e34db46562719d7909cebfad9313c5/mlx-0.29.3-cp310-cp310-macosx_15_0_arm64.whl", hash = "sha256:1bba5203ed3f785167f5b8891c2e91ede23401586b0a723bfaf815a3ed450e3d", size = 548931, upload-time = "2025-10-17T19:16:52.198Z" },
-    { url = "https://files.pythonhosted.org/packages/1b/53/a9648dff9544a201e9ed483f1e0b18cdd92d9be453d58f1fedfd99cde6e1/mlx-0.29.3-cp310-cp310-manylinux_2_35_x86_64.whl", hash = "sha256:ffb3a167da52baeb05756895298a2d582b5e9b9b9364b4a454b78b824a9fa482", size = 652400, upload-time = "2025-10-17T19:20:48.146Z" },
     { url = "https://files.pythonhosted.org/packages/94/13/3e91a37fa55dc0e9114620729ab61b27f45ed59053fc77846cad2df54f21/mlx-0.29.3-cp311-cp311-macosx_13_0_arm64.whl", hash = "sha256:0ffdf1f171c903adeaa688210ba39063059b102f3dcc52a64c2200d95d237f15", size = 549089, upload-time = "2025-10-17T19:17:00.446Z" },
     { url = "https://files.pythonhosted.org/packages/13/01/ce008d14fbd2e22b694f568ab4014e14c979a2262c5f8c10e06d4806709f/mlx-0.29.3-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:e7d1d815be0d4a41e598bdb2992822dafd9ab0d59d4b88af760ee0b6584506b7", size = 549091, upload-time = "2025-10-17T19:16:54.428Z" },
     { url = "https://files.pythonhosted.org/packages/72/1c/45642746d36e91e26f3401e9b7931f92d8cc1eb6015cc40218628f320747/mlx-0.29.3-cp311-cp311-macosx_15_0_arm64.whl", hash = "sha256:d33bff69887fadfd85ce67b8e11318c2319984f3ad4157f871aa9d3beb9de972", size = 549092, upload-time = "2025-10-17T19:17:10.963Z" },
-    { url = "https://files.pythonhosted.org/packages/cb/17/9c85fc6ebe6b8ad30c3e75c0cb869939df82146aa8728de1261adacc731d/mlx-0.29.3-cp311-cp311-manylinux_2_35_x86_64.whl", hash = "sha256:8ead74126ffcc1ae49f3b1e0e988620ffbb059c38184f4e9390e294808e2c614", size = 652523, upload-time = "2025-10-17T19:24:01.159Z" },
     { url = "https://files.pythonhosted.org/packages/07/f5/14e12e219a2715296150d35f930dc3a6ff319cd60126408e563f03100113/mlx-0.29.3-cp312-cp312-macosx_13_0_arm64.whl", hash = "sha256:86c62791ce930028d75c41b88b4e3ceb58f5f2e263ff9bfacda998b0c03d9544", size = 549516, upload-time = "2025-10-17T19:18:13.831Z" },
     { url = "https://files.pythonhosted.org/packages/c6/e2/5177c80e8c33a8be89fa45fa0a839d5b6a5578687d0ec973bf03638a4e73/mlx-0.29.3-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:cddf6bcdc561094af6b3f0706f8768ecc5216a97eb6973e838c3ac2e2fca2cc8", size = 549509, upload-time = "2025-10-17T19:17:21.517Z" },
     { url = "https://files.pythonhosted.org/packages/11/89/aa424217a7a0291b84f8969d504ac63f5af0ef60f248fe5562c3d6e44048/mlx-0.29.3-cp312-cp312-macosx_15_0_arm64.whl", hash = "sha256:b2e1a249437d017a7425358420d28e641b7bc9c2650f3e013c1b1f4f239d8533", size = 549511, upload-time = "2025-10-17T19:16:54.227Z" },
-    { url = "https://files.pythonhosted.org/packages/0e/c7/af484ab5a4864384dc8a5f6f8ade9a29bd6e7a652e535f2ca39cf473ce26/mlx-0.29.3-cp312-cp312-manylinux_2_35_x86_64.whl", hash = "sha256:f09f71ee958f04824b7c7b275a1c1deb052740f5e69eccbff6672e43d9d7f890", size = 649834, upload-time = "2025-10-17T19:23:59.474Z" },
     { url = "https://files.pythonhosted.org/packages/fe/a2/078152b45aa8a23949a1b09601d0044f8bb4ab85e909e4475a440c21aaea/mlx-0.29.3-cp313-cp313-macosx_13_0_arm64.whl", hash = "sha256:d59eccf6a1e1e131becc5a3910504507862da3a4e9b7bd9e73a625515d767844", size = 549585, upload-time = "2025-10-17T19:17:01.872Z" },
     { url = "https://files.pythonhosted.org/packages/ae/bb/869eaac4efaae033c13db5fddd6a8907b5d667d135a35a2e482b1af402ee/mlx-0.29.3-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:6642aa0a6dc2242c024fb8274d00631a7e7ffbdcef26148afd299b877c1e6a4a", size = 549586, upload-time = "2025-10-17T19:16:57.844Z" },
     { url = "https://files.pythonhosted.org/packages/ad/76/196c248c2b2a471f795356564ad1d7dc40284160c8b66370ffadfd991fa1/mlx-0.29.3-cp313-cp313-macosx_15_0_arm64.whl", hash = "sha256:ec0aef311fab10cb5f2c274afa6edf6c482636096a5f7886aba43676454aa462", size = 549586, upload-time = "2025-10-17T19:16:39.912Z" },
-    { url = "https://files.pythonhosted.org/packages/f2/90/d481dd70b351e28718cfc9a0deb229a75e140abda3ed59284cf635f93f12/mlx-0.29.3-cp313-cp313-manylinux_2_35_x86_64.whl", hash = "sha256:e217a99ece66832a2e631131df32e9feb047276b68ac59ca0ad63735842f6dd0", size = 649781, upload-time = "2025-10-17T19:21:26.075Z" },
 ]
 
 [[package]]
@@ -5385,7 +5381,7 @@ outlines-llamacpp = [
     { name = "pydantic-ai-slim", extra = ["outlines-llamacpp"] },
 ]
 outlines-mlxlm = [
-    { name = "pydantic-ai-slim", extra = ["outlines-mlxlm"] },
+    { name = "pydantic-ai-slim", extra = ["outlines-mlxlm"], marker = "platform_machine == 'arm64' and sys_platform == 'darwin'" },
 ]
 outlines-sglang = [
     { name = "pydantic-ai-slim", extra = ["outlines-sglang"] },
@@ -5450,7 +5446,7 @@ requires-dist = [
     { name = "pydantic-ai-slim", extras = ["ag-ui", "anthropic", "bedrock", "cli", "cohere", "evals", "fastmcp", "google", "groq", "huggingface", "logfire", "mcp", "mistral", "openai", "retries", "temporal", "ui", "vertexai"], editable = "pydantic_ai_slim" },
     { name = "pydantic-ai-slim", extras = ["dbos"], marker = "extra == 'dbos'", editable = "pydantic_ai_slim" },
     { name = "pydantic-ai-slim", extras = ["outlines-llamacpp"], marker = "extra == 'outlines-llamacpp'", editable = "pydantic_ai_slim" },
-    { name = "pydantic-ai-slim", extras = ["outlines-mlxlm"], marker = "extra == 'outlines-mlxlm'", editable = "pydantic_ai_slim" },
+    { name = "pydantic-ai-slim", extras = ["outlines-mlxlm"], marker = "platform_machine == 'arm64' and sys_platform == 'darwin' and extra == 'outlines-mlxlm'", editable = "pydantic_ai_slim" },
     { name = "pydantic-ai-slim", extras = ["outlines-sglang"], marker = "extra == 'outlines-sglang'", editable = "pydantic_ai_slim" },
     { name = "pydantic-ai-slim", extras = ["outlines-transformers"], marker = "extra == 'outlines-transformers'", editable = "pydantic_ai_slim" },
     { name = "pydantic-ai-slim", extras = ["outlines-vllm-offline"], marker = "extra == 'outlines-vllm-offline'", editable = "pydantic_ai_slim" },
@@ -5616,7 +5612,7 @@ outlines-llamacpp = [
     { name = "outlines", extra = ["llamacpp"] },
 ]
 outlines-mlxlm = [
-    { name = "outlines", extra = ["mlxlm"], marker = "platform_machine != 'x86_64' or sys_platform != 'darwin'" },
+    { name = "outlines", extra = ["mlxlm"], marker = "platform_machine == 'arm64' and sys_platform == 'darwin'" },
 ]
 outlines-sglang = [
     { name = "outlines", extra = ["sglang"] },
@@ -5682,7 +5678,7 @@ requires-dist = [
     { name = "opentelemetry-api", specifier = ">=1.28.0" },
     { name = "outlines", marker = "extra == 'outlines-vllm-offline'", specifier = ">=1.0.0,<1.3.0" },
     { name = "outlines", extras = ["llamacpp"], marker = "extra == 'outlines-llamacpp'", specifier = ">=1.0.0,<1.3.0" },
-    { name = "outlines", extras = ["mlxlm"], marker = "(platform_machine != 'x86_64' and extra == 'outlines-mlxlm') or (sys_platform != 'darwin' and extra == 'outlines-mlxlm')", specifier = ">=1.0.0,<1.3.0" },
+    { name = "outlines", extras = ["mlxlm"], marker = "platform_machine == 'arm64' and sys_platform == 'darwin' and extra == 'outlines-mlxlm'", specifier = ">=1.0.0,<1.3.0" },
     { name = "outlines", extras = ["sglang"], marker = "extra == 'outlines-sglang'", specifier = ">=1.0.0,<1.3.0" },
     { name = "outlines", extras = ["transformers"], marker = "(platform_machine != 'x86_64' and extra == 'outlines-transformers') or (sys_platform != 'darwin' and extra == 'outlines-transformers')", specifier = ">=1.0.0,<1.3.0" },
     { name = "pillow", marker = "extra == 'outlines-sglang'" },