pydantic · lars20070 · Nov 16, 2025 · Nov 16, 2025 · Nov 16, 2025 · Nov 16, 2025
diff --git a/Makefile b/Makefile
@@ -43,6 +43,7 @@ typecheck-pyright:
 .PHONY: typecheck-mypy
 typecheck-mypy:
 	uv run mypy
+	uv run mypy stubs/ --strict
 
 .PHONY: typecheck
 typecheck: typecheck-pyright ## Run static type checking

diff --git a/pydantic_ai_slim/pydantic_ai/models/outlines.py b/pydantic_ai_slim/pydantic_ai/models/outlines.py
@@ -60,7 +60,7 @@
     )
     from outlines.models.vllm_offline import (
         VLLMOffline,
-        from_vllm_offline,  # pyright: ignore[reportUnknownVariableType]
+        from_vllm_offline,
     )
     from outlines.types.dsl import JsonSchema
     from PIL import Image as PILImage
@@ -393,7 +393,7 @@ def _format_vllm_offline_inference_kwargs(  # pragma: no cover
         self, model_settings: dict[str, Any]
     ) -> dict[str, Any]:
         """Select the model settings supported by the vLLMOffline model."""
-        from vllm.sampling_params import SamplingParams  # pyright: ignore
+        from vllm.sampling_params import SamplingParams
 
         supported_args = [
             'max_tokens',

diff --git a/pydantic_ai_slim/pyproject.toml b/pydantic_ai_slim/pyproject.toml
@@ -78,7 +78,7 @@ bedrock = ["boto3>=1.40.14"]
 huggingface = ["huggingface-hub[inference]>=0.33.5"]
 outlines-transformers = ["outlines[transformers]>=1.0.0, <1.3.0; (sys_platform != 'darwin' or platform_machine != 'x86_64')", "transformers>=4.0.0", "pillow", "torch; (sys_platform != 'darwin' or platform_machine != 'x86_64')"]
 outlines-llamacpp = ["outlines[llamacpp]>=1.0.0, <1.3.0"]
-outlines-mlxlm = ["outlines[mlxlm]>=1.0.0, <1.3.0; (sys_platform != 'darwin' or platform_machine != 'x86_64')"]
+outlines-mlxlm = ["outlines[mlxlm]>=1.0.0, <1.3.0; platform_system == 'Darwin' and platform_machine == 'arm64'"]
 outlines-sglang = ["outlines[sglang]>=1.0.0, <1.3.0", "pillow"]
 outlines-vllm-offline = ["vllm; python_version < '3.12' and (sys_platform != 'darwin' or platform_machine != 'x86_64')", "torch; (sys_platform != 'darwin' or platform_machine != 'x86_64')", "outlines>=1.0.0, <1.3.0"]
 # Tools

diff --git a/pyproject.toml b/pyproject.toml
@@ -56,7 +56,7 @@ dbos = ["pydantic-ai-slim[dbos]=={{ version }}"]
 prefect = ["pydantic-ai-slim[prefect]=={{ version }}"]
 outlines-transformers = ["pydantic-ai-slim[outlines-transformers]=={{ version }}"]
 outlines-llamacpp = ["pydantic-ai-slim[outlines-llamacpp]=={{ version }}"]
-outlines-mlxlm = ["pydantic-ai-slim[outlines-mlxlm]=={{ version }}"]
+outlines-mlxlm = ["pydantic-ai-slim[outlines-mlxlm]=={{ version }}; platform_system == 'Darwin' and platform_machine == 'arm64'"]
 outlines-sglang = ["pydantic-ai-slim[outlines-sglang]=={{ version }}"]
 outlines-vllm-offline = ["pydantic-ai-slim[outlines-vllm-offline]=={{ version }}"]
 
@@ -142,6 +142,7 @@ include = [
     "clai/**/*.py",
     "tests/**/*.py",
     "docs/**/*.py",
+    "stubs/**/*.pyi",
 ]
 
 [tool.ruff.lint]
@@ -186,8 +187,10 @@ quote-style = "single"
 "examples/**/*.py" = ["D101", "D103"]
 "tests/**/*.py" = ["D"]
 "docs/**/*.py" = ["D"]
+"stubs/**/*.pyi" = ["F401", "PYI044", "PYI035", "ANN401"]
 
 [tool.pyright]
+stubPath = "stubs"
 pythonVersion = "3.12"
 typeCheckingMode = "strict"
 reportMissingTypeStubs = false
@@ -217,6 +220,7 @@ exclude = [
 [tool.mypy]
 files = "tests/typed_agent.py,tests/typed_graph.py"
 strict = true
+mypy_path = "stubs"
 
 [tool.pytest.ini_options]
 testpaths = ["tests", "docs/.hooks"]

diff --git a/stubs/README.md b/stubs/README.md
@@ -0,0 +1,26 @@
+Stub files (`*.pyi`) contain type hints used only by type checkers, not at
+runtime. They were introduced in
+[PEP 484](https://peps.python.org/pep-0484/#stub-files). For example, the
+[`typeshed`](https://github.com/python/typeshed) repository maintains a
+collection of such stubs for the Python standard library and some third-party
+libraries.
+
+The `./stubs` folder contains type information only for the parts of third-party
+dependencies used in the `pydantic-ai` codebase. These stubs must be manually
+maintained. When a dependency's API changes, both the codebase and the stubs
+need to be updated. There are two ways to update the stubs:
+
+(1) **Manual update:** Check the dependency's source code and copy the type
+information to `./stubs`. Take for example the `from_pretrained()` method of the
+`Llama` class in `llama-cpp-python`. The
+[source code](https://github.com/abetlen/llama-cpp-python/blob/main/llama_cpp/llama.py#L2240)
+contains the type information that is copied to `./stubs/llama_cpp.pyi`. This
+eliminates the need for `# type: ignore` comments in the codebase.
+
+(2) **Update with AI coding assistants:** Most dependencies maintain `llms.txt`
+and `llms-full.txt` files with their documentation. This information is compiled
+by [Context7](https://context7.com). For example, the `llama-cpp-python` library
+is documented [here](https://github.com/abetlen/llama-cpp-python). MCP servers
+such as [this one by Upstash](https://github.com/upstash/context7) provide AI
+coding assistants access to Context7. AI coding assistants such as VS Code
+Copilot or Cursor can reliably generate and update the stubs.
diff --git a/stubs/llama_cpp.pyi b/stubs/llama_cpp.pyi
@@ -0,0 +1,19 @@
+from collections.abc import Sequence
+from os import PathLike
+from typing import Any, Literal
+
+from typing_extensions import Self
+
+class Llama:
+    def __init__(self, *args: Any, **kwargs: Any) -> None: ...
+    @classmethod
+    def from_pretrained(
+        cls,
+        repo_id: str,
+        filename: str | None = None,
+        additional_files: Sequence[str] | None = None,
+        local_dir: str | PathLike[str] | None = None,
+        local_dir_use_symlinks: bool | Literal['auto'] = 'auto',
+        cache_dir: str | PathLike[str] | None = None,
+        **kwargs: Any,
+    ) -> Self: ...
diff --git a/stubs/mlx/__init__.pyi b/stubs/mlx/__init__.pyi
@@ -0,0 +1,6 @@
+from typing import Any
+
+from . import nn
+
+# mlx is imported as a package, primarily for mlx.nn
+__all__: list[str] = []
diff --git a/stubs/mlx/nn.pyi b/stubs/mlx/nn.pyi
@@ -0,0 +1,3 @@
+from typing import Any
+
+class Module: ...
diff --git a/stubs/mlx_lm.pyi b/stubs/mlx_lm.pyi
@@ -0,0 +1,7 @@
+from typing import Any
+
+from mlx.nn import Module
+from transformers.tokenization_utils import PreTrainedTokenizer
+
+def load(model_path: str | None = None, *args: Any, **kwargs: Any) -> tuple[Module, PreTrainedTokenizer]: ...
+def generate_step(*args: Any, **kwargs: Any) -> Any: ...
diff --git a/stubs/outlines/__init__.pyi b/stubs/outlines/__init__.pyi
@@ -0,0 +1,3 @@
+from . import models
+
+__all__: list[str] = []
diff --git a/stubs/outlines/inputs.pyi b/stubs/outlines/inputs.pyi
@@ -0,0 +1,18 @@
+from __future__ import annotations
+
+from collections.abc import Sequence
+from typing import Any
+
+from PIL.Image import Image as PILImage
+
+class Chat:
+    def __init__(self, messages: list[dict[str, Any]] | None = None) -> None: ...
+    def add_system_message(self, content: str) -> None: ...
+    def add_user_message(self, content: str | Sequence[str | Image]) -> None: ...
+    def add_assistant_message(self, content: str | list[str | Image]) -> None: ...
+    def extend(self, messages: list[dict[str, Any]]) -> None: ...
+    def append(self, message: dict[str, Any]) -> None: ...
+    def pop(self) -> dict[str, Any] | None: ...
+
+class Image:
+    def __init__(self, image: PILImage) -> None: ...
diff --git a/stubs/outlines/models/__init__.pyi b/stubs/outlines/models/__init__.pyi
@@ -0,0 +1,3 @@
+from . import base, llamacpp, mlxlm, sglang, transformers, vllm_offline
+
+__all__: list[str] = []
diff --git a/stubs/outlines/models/base.pyi b/stubs/outlines/models/base.pyi
@@ -0,0 +1,10 @@
+from collections.abc import AsyncIterable, Iterable
+from typing import Any
+
+class Model:
+    def __call__(self, *args: Any, **kwargs: Any) -> Any: ...
+    def stream(self, *args: Any, **kwargs: Any) -> Iterable[Any]: ...
+
+class AsyncModel:
+    async def __call__(self, *args: Any, **kwargs: Any) -> Any: ...
+    def stream(self, *args: Any, **kwargs: Any) -> AsyncIterable[Any]: ...
diff --git a/stubs/outlines/models/llamacpp.pyi b/stubs/outlines/models/llamacpp.pyi
@@ -0,0 +1,10 @@
+from typing import TYPE_CHECKING
+
+from outlines.models.base import Model
+
+if TYPE_CHECKING:
+    from llama_cpp import Llama
+
+class LlamaCpp(Model): ...
+
+def from_llamacpp(model: Llama) -> LlamaCpp: ...
diff --git a/stubs/outlines/models/mlxlm.pyi b/stubs/outlines/models/mlxlm.pyi
@@ -0,0 +1,10 @@
+from typing import Any
+
+from mlx.nn import Module
+from outlines.models.base import Model
+from transformers.tokenization_utils import PreTrainedTokenizer
+
+class MLXLM(Model):
+    def __init__(self, *args: Any, **kwargs: Any) -> None: ...
+
+def from_mlxlm(model: Module, tokenizer: PreTrainedTokenizer) -> MLXLM: ...
diff --git a/stubs/outlines/models/sglang.pyi b/stubs/outlines/models/sglang.pyi
@@ -0,0 +1,11 @@
+from typing import TYPE_CHECKING, Any, Union
+
+from outlines.models.base import AsyncModel, Model
+
+if TYPE_CHECKING:
+    from openai import AsyncOpenAI, OpenAI
+
+class SGLang(Model): ...
+class AsyncSGLang(AsyncModel): ...
+
+def from_sglang(client: OpenAI | AsyncOpenAI, *args: Any, **kwargs: Any) -> SGLang | AsyncSGLang: ...
diff --git a/stubs/outlines/models/transformers.pyi b/stubs/outlines/models/transformers.pyi
@@ -0,0 +1,17 @@
+from typing import Any
+
+from outlines.models.base import Model
+from transformers import AutoModelForCausalLM, AutoProcessor, AutoTokenizer, LlavaForConditionalGeneration
+from transformers.modeling_utils import PreTrainedModel
+from transformers.processing_utils import ProcessorMixin
+from transformers.tokenization_utils import PreTrainedTokenizer
+
+class Transformers(Model): ...
+class TransformersMultiModal(Model): ...
+
+def from_transformers(
+    model: PreTrainedModel | AutoModelForCausalLM | LlavaForConditionalGeneration,
+    tokenizer_or_processor: PreTrainedTokenizer | ProcessorMixin | AutoTokenizer | AutoProcessor,
+    *,
+    device_dtype: Any = None,
+) -> Transformers | TransformersMultiModal: ...
diff --git a/stubs/outlines/models/vllm_offline.pyi b/stubs/outlines/models/vllm_offline.pyi
@@ -0,0 +1,10 @@
+from typing import TYPE_CHECKING
+
+from outlines.models.base import Model
+
+if TYPE_CHECKING:
+    from vllm import LLM
+
+class VLLMOffline(Model): ...
+
+def from_vllm_offline(model: LLM) -> VLLMOffline: ...
diff --git a/stubs/outlines/types/__init__.pyi b/stubs/outlines/types/__init__.pyi
@@ -0,0 +1,3 @@
+from typing import Any
+
+__all__: list[str] = []
diff --git a/stubs/outlines/types/dsl.pyi b/stubs/outlines/types/dsl.pyi
@@ -0,0 +1,4 @@
+from typing import Any
+
+class JsonSchema:
+    def __init__(self, schema: dict[str, Any]) -> None: ...
diff --git a/stubs/transformers/__init__.pyi b/stubs/transformers/__init__.pyi
@@ -0,0 +1,26 @@
+from typing import Any
+
+from typing_extensions import Self
+
+from . import modeling_utils, processing_utils, tokenization_utils
+from .modeling_utils import PreTrainedModel
+from .processing_utils import ProcessorMixin
+from .tokenization_utils import PreTrainedTokenizer
+
+class AutoModelForCausalLM(PreTrainedModel):
+    @classmethod
+    def from_pretrained(cls, *args: Any, **kwargs: Any) -> Self: ...
+
+class AutoTokenizer(PreTrainedTokenizer):
+    @classmethod
+    def from_pretrained(cls, *args: Any, **kwargs: Any) -> Self: ...
+
+class AutoProcessor(ProcessorMixin):
+    @classmethod
+    def from_pretrained(cls, *args: Any, **kwargs: Any) -> Self: ...
+
+class LlavaForConditionalGeneration(PreTrainedModel):
+    @classmethod
+    def from_pretrained(cls, *args: Any, **kwargs: Any) -> Self: ...
+
+def from_pretrained(*args: Any, **kwargs: Any) -> Any: ...
diff --git a/stubs/transformers/modeling_utils.pyi b/stubs/transformers/modeling_utils.pyi
@@ -0,0 +1,3 @@
+from typing import Any
+
+class PreTrainedModel: ...
diff --git a/stubs/transformers/processing_utils.pyi b/stubs/transformers/processing_utils.pyi
@@ -0,0 +1,3 @@
+from typing import Any
+
+class ProcessorMixin: ...
diff --git a/stubs/transformers/tokenization_utils.pyi b/stubs/transformers/tokenization_utils.pyi
@@ -0,0 +1,6 @@
+from typing import Any
+
+class PreTrainedTokenizer:
+    chat_template: str | None
+
+    def __init__(self, *args: Any, **kwargs: Any) -> None: ...
diff --git a/stubs/vllm/__init__.pyi b/stubs/vllm/__init__.pyi
@@ -0,0 +1,4 @@
+from typing import Any
+
+class LLM:
+    def __init__(self, model: str, *args: Any, **kwargs: Any) -> None: ...
diff --git a/stubs/vllm/sampling_params.pyi b/stubs/vllm/sampling_params.pyi
@@ -0,0 +1,25 @@
+from typing import Any
+
+class SamplingParams:
+    max_tokens: int | None
+    temperature: float | None
+    top_p: float | None
+    seed: int | None
+    presence_penalty: float | None
+    frequency_penalty: float | None
+    logit_bias: dict[int, float] | None
+    extra_args: dict[str, Any] | None
+
+    def __init__(
+        self,
+        max_tokens: int | None = None,
+        temperature: float | None = None,
+        top_p: float | None = None,
+        seed: int | None = None,
+        presence_penalty: float | None = None,
+        frequency_penalty: float | None = None,
+        logit_bias: dict[int, float] | None = None,
+        extra_args: dict[str, Any] | None = None,
+        *args: Any,
+        **kwargs: Any,
+    ) -> None: ...
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		from . import base, llamacpp, mlxlm, sglang, transformers, vllm_offline

		__all__: list[str] = []
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		from typing import Any

		__all__: list[str] = []
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		from typing import Any

		class PreTrainedModel: ...
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		from typing import Any

		class ProcessorMixin: ...