From 89bfdc8841aa93f805e68757d511cfee725cb20f Mon Sep 17 00:00:00 2001
From: Miguel Grinberg <miguel.grinberg@gmail.com>
Date: Thu, 11 Apr 2024 15:53:43 +0100
Subject: [PATCH 01/12] basic type annotation support

---
 elasticsearch_dsl/document_base.py | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/elasticsearch_dsl/document_base.py b/elasticsearch_dsl/document_base.py
index 8abbc796..836bdf41 100644
--- a/elasticsearch_dsl/document_base.py
+++ b/elasticsearch_dsl/document_base.py
@@ -15,10 +15,11 @@
 #  specific language governing permissions and limitations
 #  under the License.
 
+from datetime import date, datetime
 from fnmatch import fnmatch
 
 from .exceptions import ValidationException
-from .field import Field
+from .field import Field, Integer, Float, Boolean, Text, Binary, Date
 from .mapping import Mapping
 from .utils import DOC_META_FIELDS, ObjectBase
 
@@ -36,12 +37,30 @@ def __new__(cls, name, bases, attrs):
 
 
 class DocumentOptions:
+    type_annotation_map = {
+        int: (Integer, {}),
+        float: (Float, {}),
+        bool: (Boolean, {}),
+        str: (Text, {}),
+        bytes: (Binary, {}),
+        datetime: (Date, {}),
+        date: (Date, {"format": "yyyy-MM-dd"}),
+    }
+
     def __init__(self, name, bases, attrs):
         meta = attrs.pop("Meta", None)
 
         # create the mapping instance
         self.mapping = getattr(meta, "mapping", Mapping())
 
+        for name, type_ in attrs.get('__annotations__', {}).items():
+            if name not in attrs:
+                if type_ in self.type_annotation_map:
+                    field, field_args = self.type_annotation_map[type_]
+                    self.mapping.field(name, field(**field_args))
+                elif issubclass(type_, Field):
+                    self.mapping.field(name, type_())
+
         # register all declared fields into the mapping
         for name, value in list(attrs.items()):
             if isinstance(value, Field):

From 8d772e14a973085ccf2ee482783f12467ea63e2d Mon Sep 17 00:00:00 2001
From: Miguel Grinberg <miguel.grinberg@gmail.com>
Date: Thu, 13 Jun 2024 18:49:19 +0100
Subject: [PATCH 02/12] support for optional, list, and other type hints

---
 elasticsearch_dsl/document_base.py | 61 ++++++++++++++++++++++++------
 elasticsearch_dsl/field.py         | 21 ++++++++++
 elasticsearch_dsl/utils.py         |  6 +++
 3 files changed, 76 insertions(+), 12 deletions(-)

diff --git a/elasticsearch_dsl/document_base.py b/elasticsearch_dsl/document_base.py
index 836bdf41..90ca6f7d 100644
--- a/elasticsearch_dsl/document_base.py
+++ b/elasticsearch_dsl/document_base.py
@@ -17,9 +17,21 @@
 
 from datetime import date, datetime
 from fnmatch import fnmatch
+from typing import List, Optional
 
 from .exceptions import ValidationException
-from .field import Field, Integer, Float, Boolean, Text, Binary, Date
+from .field import (
+    Binary,
+    Boolean,
+    Date,
+    Field,
+    Float,
+    InstrumentedField,
+    Integer,
+    Nested,
+    Object,
+    Text,
+)
 from .mapping import Mapping
 from .utils import DOC_META_FIELDS, ObjectBase
 
@@ -35,6 +47,11 @@ def __new__(cls, name, bases, attrs):
         attrs["_doc_type"] = DocumentOptions(name, bases, attrs)
         return super().__new__(cls, name, bases, attrs)
 
+    def __getattr__(cls, attr):
+        if attr in cls._doc_type.mapping:
+            return InstrumentedField(attr, cls._doc_type.mapping[attr])
+        return super().__getattribute__(attr)
+
 
 class DocumentOptions:
     type_annotation_map = {
@@ -53,18 +70,38 @@ def __init__(self, name, bases, attrs):
         # create the mapping instance
         self.mapping = getattr(meta, "mapping", Mapping())
 
-        for name, type_ in attrs.get('__annotations__', {}).items():
-            if name not in attrs:
-                if type_ in self.type_annotation_map:
+        annotations = attrs.get("__annotations__", {})
+        fields = set([n for n in attrs if isinstance(attrs[n], Field)])
+        fields.update(annotations.keys())
+        for name in fields:
+            if name in attrs:
+                value = attrs[name]
+            else:
+                type_ = annotations[name]
+                required = True
+                multi = False
+                while hasattr(type_, "__origin__"):
+                    if type_.__origin__ == Optional:
+                        required = False
+                        type_ = type_.__args__[0]
+                    elif issubclass(type_.__origin__, List):
+                        multi = True
+                        type_ = type_.__args__[0]
+                if issubclass(type_, InnerDoc):
+                    field = Nested if multi else Object
+                    field_args = {}
+                elif type_ in self.type_annotation_map:
                     field, field_args = self.type_annotation_map[type_]
-                    self.mapping.field(name, field(**field_args))
-                elif issubclass(type_, Field):
-                    self.mapping.field(name, type_())
-
-        # register all declared fields into the mapping
-        for name, value in list(attrs.items()):
-            if isinstance(value, Field):
-                self.mapping.field(name, value)
+                elif not issubclass(type_, Field):
+                    raise TypeError(f"Cannot map type {type_}")
+                else:
+                    field = type_
+                    field_args = {}
+                field_args = {"multi": multi, "required": required, **field_args}
+                value = field(**field_args)
+            value._name = name
+            self.mapping.field(name, value)
+            if name in attrs:
                 del attrs[name]
 
         # add all the mappings for meta fields
diff --git a/elasticsearch_dsl/field.py b/elasticsearch_dsl/field.py
index 3f704480..3326b475 100644
--- a/elasticsearch_dsl/field.py
+++ b/elasticsearch_dsl/field.py
@@ -77,6 +77,8 @@ def __init__(self, multi=False, required=False, *args, **kwargs):
         """
         self._multi = multi
         self._required = required
+        self._name = None
+        self._parent = None
         super().__init__(*args, **kwargs)
 
     def __getitem__(self, subfield):
@@ -123,6 +125,25 @@ def to_dict(self):
         return value
 
 
+class InstrumentedField:
+    def __init__(self, name, field):
+        self._name = name
+        self._field = field
+
+    def __getattr__(self, attr):
+        f = None
+        try:
+            f = self._field[attr]
+        except KeyError:
+            pass
+        if isinstance(f, Field):
+            return InstrumentedField(f"{self._name}.{attr}", f)
+        return getattr(self._field, attr)
+
+    def __repr__(self):
+        return self._name
+
+
 class CustomField(Field):
     name = "custom"
     _coerce = True
diff --git a/elasticsearch_dsl/utils.py b/elasticsearch_dsl/utils.py
index 6e311316..cff53480 100644
--- a/elasticsearch_dsl/utils.py
+++ b/elasticsearch_dsl/utils.py
@@ -499,6 +499,12 @@ def __getattr__(self, name):
                 return value
             raise
 
+    def __setattr__(self, name, value):
+        if name in self.__class__._doc_type.mapping:
+            self._d_[name] = value
+        else:
+            super().__setattr__(name, value)
+
     def to_dict(self, skip_empty=True):
         out = {}
         for k, v in self._d_.items():

From 95b2a59a3a4b426685ccd8c5ebb29d03943cf320 Mon Sep 17 00:00:00 2001
From: Miguel Grinberg <miguel.grinberg@gmail.com>
Date: Thu, 13 Jun 2024 20:24:55 +0100
Subject: [PATCH 03/12] additional typing support

---
 elasticsearch_dsl/__init__.py      |  4 +-
 elasticsearch_dsl/document_base.py | 89 ++++++++++++++++++++++++++----
 elasticsearch_dsl/field.py         |  2 -
 examples/async/vectors.py          | 26 ++++-----
 examples/vectors.py                | 33 +++++------
 5 files changed, 107 insertions(+), 47 deletions(-)

diff --git a/elasticsearch_dsl/__init__.py b/elasticsearch_dsl/__init__.py
index e7de5319..fd4433c2 100644
--- a/elasticsearch_dsl/__init__.py
+++ b/elasticsearch_dsl/__init__.py
@@ -19,7 +19,7 @@
 from .aggs import A
 from .analysis import analyzer, char_filter, normalizer, token_filter, tokenizer
 from .document import AsyncDocument, Document
-from .document_base import InnerDoc, MetaField
+from .document_base import InnerDoc, M, MetaField, mapped_field
 from .exceptions import (
     ElasticsearchDslException,
     IllegalOperation,
@@ -148,6 +148,7 @@
     "Keyword",
     "Long",
     "LongRange",
+    "M",
     "Mapping",
     "MetaField",
     "MultiSearch",
@@ -178,6 +179,7 @@
     "char_filter",
     "connections",
     "construct_field",
+    "mapped_field",
     "normalizer",
     "token_filter",
     "tokenizer",
diff --git a/elasticsearch_dsl/document_base.py b/elasticsearch_dsl/document_base.py
index 90ca6f7d..7514ced6 100644
--- a/elasticsearch_dsl/document_base.py
+++ b/elasticsearch_dsl/document_base.py
@@ -17,7 +17,7 @@
 
 from datetime import date, datetime
 from fnmatch import fnmatch
-from typing import List, Optional
+from typing import TYPE_CHECKING, Any, Generic, List, Optional, TypeVar, Union, overload
 
 from .exceptions import ValidationException
 from .field import (
@@ -70,6 +70,34 @@ def __init__(self, name, bases, attrs):
         # create the mapping instance
         self.mapping = getattr(meta, "mapping", Mapping())
 
+        # register the document's fields, which can be given in a few formats:
+        #
+        # class MyDocument(Document):
+        #     # required field using native typing
+        #     # (str, int, float, bool, datetime, date)
+        #     field1: str
+        #
+        #     # optional field using native typing
+        #     field2: Optional[datetime]
+        #
+        #     # array field using native typing
+        #     field3: list[int]
+        #
+        #     # sub-object, same as Object(MyInnerDoc)
+        #     field4: MyInnerDoc
+        #
+        #     # nested sub-objects, same as Nested(MyInnerDoc)
+        #     field5: list[MyInnerDoc]
+        #
+        #     # use typing, but override with any stock or custom field
+        #     field6: bool = MyCustomField()
+        #
+        #     # best mypy and pyright typing support
+        #     field7: M[date]
+        #     field8: M[str] = mapped_field(MyCustomText())
+        #
+        #     # legacy format without Python typing
+        #     field8 = Text()
         annotations = attrs.get("__annotations__", {})
         fields = set([n for n in attrs if isinstance(attrs[n], Field)])
         fields.update(annotations.keys())
@@ -81,25 +109,34 @@ def __init__(self, name, bases, attrs):
                 required = True
                 multi = False
                 while hasattr(type_, "__origin__"):
-                    if type_.__origin__ == Optional:
-                        required = False
+                    if type_.__origin__ == Mapped:
                         type_ = type_.__args__[0]
-                    elif issubclass(type_.__origin__, List):
+                    elif type_.__origin__ == Union:
+                        if len(type_.__args__) == 2 and type_.__args__[1] is type(None):
+                            required = False
+                            type_ = type_.__args__[0]
+                        else:
+                            raise TypeError("Unsupported union")
+                    elif type_.__origin__ in [list, List]:
                         multi = True
                         type_ = type_.__args__[0]
-                if issubclass(type_, InnerDoc):
+                    else:
+                        break
+                field_args = []
+                field_kwargs = {}
+                if not isinstance(type_, type):
+                    raise TypeError(f"Cannot map type {type_}")
+                elif issubclass(type_, InnerDoc):
                     field = Nested if multi else Object
-                    field_args = {}
+                    field_args = [type_]
                 elif type_ in self.type_annotation_map:
-                    field, field_args = self.type_annotation_map[type_]
+                    field, field_kwargs = self.type_annotation_map[type_]
                 elif not issubclass(type_, Field):
                     raise TypeError(f"Cannot map type {type_}")
                 else:
                     field = type_
-                    field_args = {}
-                field_args = {"multi": multi, "required": required, **field_args}
-                value = field(**field_args)
-            value._name = name
+                field_kwargs = {"multi": multi, "required": required, **field_kwargs}
+                value = field(*field_args, **field_kwargs)
             self.mapping.field(name, value)
             if name in attrs:
                 del attrs[name]
@@ -120,6 +157,36 @@ def name(self):
         return self.mapping.properties.name
 
 
+_FieldType = TypeVar("_FieldType")
+
+
+class Mapped(Generic[_FieldType]):
+    __slots__ = {}
+
+    if TYPE_CHECKING:
+
+        @overload
+        def __get__(self, instance: None, owner: Any) -> InstrumentedField: ...
+
+        @overload
+        def __get__(self, instance: object, owner: Any) -> _FieldType: ...
+
+        def __get__(
+            self, instance: Optional[object], owner: Any
+        ) -> Union[InstrumentedField, _FieldType]: ...
+
+        def __set__(self, instance: Optional[object], value: _FieldType) -> None: ...
+
+        def __delete__(self, instance: Any) -> None: ...
+
+
+M = Mapped
+
+
+def mapped_field(field) -> Any:
+    return field
+
+
 class InnerDoc(ObjectBase, metaclass=DocumentMeta):
     """
     Common class for inner documents like Object or Nested
diff --git a/elasticsearch_dsl/field.py b/elasticsearch_dsl/field.py
index 3326b475..cd526ebd 100644
--- a/elasticsearch_dsl/field.py
+++ b/elasticsearch_dsl/field.py
@@ -77,8 +77,6 @@ def __init__(self, multi=False, required=False, *args, **kwargs):
         """
         self._multi = multi
         self._required = required
-        self._name = None
-        self._parent = None
         super().__init__(*args, **kwargs)
 
     def __getitem__(self, subfield):
diff --git a/examples/async/vectors.py b/examples/async/vectors.py
index 620ea45f..5672b485 100644
--- a/examples/async/vectors.py
+++ b/examples/async/vectors.py
@@ -47,6 +47,8 @@
 import asyncio
 import json
 import os
+from datetime import datetime
+from typing import List, Optional
 from urllib.request import urlopen
 
 import nltk
@@ -55,12 +57,10 @@
 
 from elasticsearch_dsl import (
     AsyncDocument,
-    Date,
     DenseVector,
     InnerDoc,
     Keyword,
-    Nested,
-    Text,
+    M,
     async_connections,
 )
 
@@ -72,22 +72,22 @@
 
 
 class Passage(InnerDoc):
-    content = Text()
-    embedding = DenseVector()
+    content: M[str]
+    embedding: M[DenseVector]
 
 
 class WorkplaceDoc(AsyncDocument):
     class Index:
         name = "workplace_documents"
 
-    name = Text()
-    summary = Text()
-    content = Text()
-    created = Date()
-    updated = Date()
-    url = Keyword()
-    category = Keyword()
-    passages = Nested(Passage)
+    name: M[str]
+    summary: M[str]
+    content: M[str]
+    created: M[datetime]
+    updated: M[Optional[datetime]]
+    url: M[Keyword]
+    category: M[Keyword]
+    passages: M[Optional[List[Passage]]]
 
     _model = None
 
diff --git a/examples/vectors.py b/examples/vectors.py
index c204cb61..4f7aa1e9 100644
--- a/examples/vectors.py
+++ b/examples/vectors.py
@@ -46,22 +46,15 @@
 import argparse
 import json
 import os
+from datetime import datetime
+from typing import List, Optional
 from urllib.request import urlopen
 
 import nltk
 from sentence_transformers import SentenceTransformer
 from tqdm import tqdm
 
-from elasticsearch_dsl import (
-    Date,
-    DenseVector,
-    Document,
-    InnerDoc,
-    Keyword,
-    Nested,
-    Text,
-    connections,
-)
+from elasticsearch_dsl import DenseVector, Document, InnerDoc, Keyword, M, connections
 
 DATASET_URL = "https://raw.githubusercontent.com/elastic/elasticsearch-labs/main/datasets/workplace-documents.json"
 MODEL_NAME = "all-MiniLM-L6-v2"
@@ -71,22 +64,22 @@
 
 
 class Passage(InnerDoc):
-    content = Text()
-    embedding = DenseVector()
+    content: M[str]
+    embedding: M[DenseVector]
 
 
 class WorkplaceDoc(Document):
     class Index:
         name = "workplace_documents"
 
-    name = Text()
-    summary = Text()
-    content = Text()
-    created = Date()
-    updated = Date()
-    url = Keyword()
-    category = Keyword()
-    passages = Nested(Passage)
+    name: M[str]
+    summary: M[str]
+    content: M[str]
+    created: M[datetime]
+    updated: M[Optional[datetime]]
+    url: M[Keyword]
+    category: M[Keyword]
+    passages: M[Optional[List[Passage]]]
 
     _model = None
 

From c3db5deeb359016bdfb529d48aa35f5397e2e050 Mon Sep 17 00:00:00 2001
From: Miguel Grinberg <miguel.grinberg@gmail.com>
Date: Mon, 17 Jun 2024 16:23:09 +0100
Subject: [PATCH 04/12] dataclass-like behavior for Document and InnerDoc

---
 elasticsearch_dsl/_async/document.py |  4 +-
 elasticsearch_dsl/_sync/document.py  |  4 +-
 elasticsearch_dsl/document_base.py   | 88 ++++++++++++++++++++++++++--
 elasticsearch_dsl/utils.py           |  9 +++
 examples/async/vectors.py            | 22 ++++---
 examples/vectors.py                  | 31 ++++++----
 mypy.ini                             |  5 +-
 noxfile.py                           |  2 +-
 8 files changed, 131 insertions(+), 34 deletions(-)

diff --git a/elasticsearch_dsl/_async/document.py b/elasticsearch_dsl/_async/document.py
index 89ed06f4..1dfb5b9d 100644
--- a/elasticsearch_dsl/_async/document.py
+++ b/elasticsearch_dsl/_async/document.py
@@ -18,10 +18,11 @@
 import collections.abc
 
 from elasticsearch.exceptions import NotFoundError, RequestError
+from typing_extensions import dataclass_transform
 
 from .._async.index import AsyncIndex
 from ..async_connections import get_connection
-from ..document_base import DocumentBase, DocumentMeta
+from ..document_base import DocumentBase, DocumentMeta, mapped_field
 from ..exceptions import IllegalOperation
 from ..utils import DOC_META_FIELDS, META_FIELDS, merge
 from .search import AsyncSearch
@@ -62,6 +63,7 @@ def construct_index(cls, opts, bases):
         return i
 
 
+@dataclass_transform(field_specifiers=(mapped_field,))
 class AsyncDocument(DocumentBase, metaclass=AsyncIndexMeta):
     """
     Model-like class for persisting documents in elasticsearch.
diff --git a/elasticsearch_dsl/_sync/document.py b/elasticsearch_dsl/_sync/document.py
index c851c8e8..7e7acd51 100644
--- a/elasticsearch_dsl/_sync/document.py
+++ b/elasticsearch_dsl/_sync/document.py
@@ -18,10 +18,11 @@
 import collections.abc
 
 from elasticsearch.exceptions import NotFoundError, RequestError
+from typing_extensions import dataclass_transform
 
 from .._sync.index import Index
 from ..connections import get_connection
-from ..document_base import DocumentBase, DocumentMeta
+from ..document_base import DocumentBase, DocumentMeta, mapped_field
 from ..exceptions import IllegalOperation
 from ..utils import DOC_META_FIELDS, META_FIELDS, merge
 from .search import Search
@@ -60,6 +61,7 @@ def construct_index(cls, opts, bases):
         return i
 
 
+@dataclass_transform(field_specifiers=(mapped_field,))
 class Document(DocumentBase, metaclass=IndexMeta):
     """
     Model-like class for persisting documents in elasticsearch.
diff --git a/elasticsearch_dsl/document_base.py b/elasticsearch_dsl/document_base.py
index 7514ced6..22a2435c 100644
--- a/elasticsearch_dsl/document_base.py
+++ b/elasticsearch_dsl/document_base.py
@@ -17,7 +17,19 @@
 
 from datetime import date, datetime
 from fnmatch import fnmatch
-from typing import TYPE_CHECKING, Any, Generic, List, Optional, TypeVar, Union, overload
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Callable,
+    Generic,
+    List,
+    Optional,
+    TypeVar,
+    Union,
+    overload,
+)
+
+from typing_extensions import dataclass_transform
 
 from .exceptions import ValidationException
 from .field import (
@@ -101,10 +113,22 @@ def __init__(self, name, bases, attrs):
         annotations = attrs.get("__annotations__", {})
         fields = set([n for n in attrs if isinstance(attrs[n], Field)])
         fields.update(annotations.keys())
+        field_defaults = {}
         for name in fields:
+            value = None
             if name in attrs:
                 value = attrs[name]
-            else:
+                if isinstance(value, dict):
+                    # the mapped_field() wrapper function was used so we need
+                    # to look for the field instance and also record any
+                    # defaults
+                    value = attrs[name].get("_field")
+                    default_value = attrs[name].get("default") or attrs[name].get(
+                        "default_factory"
+                    )
+                    if default_value:
+                        field_defaults[name] = default_value
+            if value is None:
                 type_ = annotations[name]
                 required = True
                 multi = False
@@ -129,6 +153,7 @@ def __init__(self, name, bases, attrs):
                 elif issubclass(type_, InnerDoc):
                     field = Nested if multi else Object
                     field_args = [type_]
+                    required = False
                 elif type_ in self.type_annotation_map:
                     field, field_kwargs = self.type_annotation_map[type_]
                 elif not issubclass(type_, Field):
@@ -141,6 +166,9 @@ def __init__(self, name, bases, attrs):
             if name in attrs:
                 del attrs[name]
 
+        # store dataclass-style defaults for ObjectBase.__init__ to assign
+        attrs["_defaults"] = field_defaults
+
         # add all the mappings for meta fields
         for name in dir(meta):
             if isinstance(getattr(meta, name, None), MetaField):
@@ -161,6 +189,26 @@ def name(self):
 
 
 class Mapped(Generic[_FieldType]):
+    """Class that represents the type of a mapped field.
+
+    This class can be used as an optional wrapper on a field type to help type
+    checkers assign the correct type when the field is used as a class
+    attribute.
+
+    Consider the following definitions::
+
+        class MyDocument(Document):
+            first: str
+            second: M[str]
+
+        mydoc = MyDocument(first="1", second="2")
+
+    Type checkers have no trouble inferring the type of both ``mydoc.first``
+    and ``mydoc.second`` as ``str``, but while ``MyDocument.first`` will be
+    incorrectly typed as ``str``, ``MyDocument.second`` should be assigned the
+    correct ``InstrumentedField`` type.
+    """
+
     __slots__ = {}
 
     if TYPE_CHECKING:
@@ -172,7 +220,7 @@ def __get__(self, instance: None, owner: Any) -> InstrumentedField: ...
         def __get__(self, instance: object, owner: Any) -> _FieldType: ...
 
         def __get__(
-            self, instance: Optional[object], owner: Any
+            self, instance: object | None, owner: Any
         ) -> Union[InstrumentedField, _FieldType]: ...
 
         def __set__(self, instance: Optional[object], value: _FieldType) -> None: ...
@@ -183,10 +231,40 @@ def __delete__(self, instance: Any) -> None: ...
 M = Mapped
 
 
-def mapped_field(field) -> Any:
-    return field
+def mapped_field(
+    field: Optional[Field] = None,
+    *,
+    init: bool = True,
+    default: Any = None,
+    default_factory: Callable = None,
+    **kwargs,
+) -> Any:
+    """Construct a field using dataclass behaviors
+
+    This function can be used in the right side of a document field definition
+    as a wrapper for the field instance or as a way to provide dataclass-compatible
+    options.
+
+    :param field: The instance of ``Field`` to use for this field. If not provided,
+    an instance that is appropriate for the type given to the field is used.
+    :param init: a value of ``True`` adds this field to the constructor, and a
+    value of ``False`` omits it from it. The default is ``True``.
+    :param default: a default value to use for this field when one is not provided
+    explicitly.
+    :param default_factory: a callable that returns a default value for the field,
+    when one isn't provided explicitly. Only one of ``factory`` and
+    ``default_factory`` can be used.
+    """
+    return {
+        "_field": field,
+        "init": init,
+        "default": default,
+        "default_factory": default_factory,
+        **kwargs,
+    }
 
 
+@dataclass_transform(field_specifiers=(mapped_field,))
 class InnerDoc(ObjectBase, metaclass=DocumentMeta):
     """
     Common class for inner documents like Object or Nested
diff --git a/elasticsearch_dsl/utils.py b/elasticsearch_dsl/utils.py
index cff53480..0c5a230e 100644
--- a/elasticsearch_dsl/utils.py
+++ b/elasticsearch_dsl/utils.py
@@ -427,6 +427,15 @@ def __init__(self, meta=None, **kwargs):
 
         super(AttrDict, self).__setattr__("meta", HitMeta(meta))
 
+        # process field defaults
+        if hasattr(self, "_defaults"):
+            for name in self._defaults:
+                if name not in kwargs:
+                    value = self._defaults[name]
+                    if callable(value):
+                        value = value()
+                    kwargs[name] = value
+
         super().__init__(kwargs)
 
     @classmethod
diff --git a/examples/async/vectors.py b/examples/async/vectors.py
index 5672b485..b7fcbb7b 100644
--- a/examples/async/vectors.py
+++ b/examples/async/vectors.py
@@ -48,7 +48,7 @@
 import json
 import os
 from datetime import datetime
-from typing import List, Optional
+from typing import List, Optional, cast
 from urllib.request import urlopen
 
 import nltk
@@ -62,6 +62,7 @@
     Keyword,
     M,
     async_connections,
+    mapped_field,
 )
 
 DATASET_URL = "https://raw.githubusercontent.com/elastic/elasticsearch-labs/main/datasets/workplace-documents.json"
@@ -73,7 +74,7 @@
 
 class Passage(InnerDoc):
     content: M[str]
-    embedding: M[DenseVector]
+    embedding: M[List[float]] = mapped_field(DenseVector())
 
 
 class WorkplaceDoc(AsyncDocument):
@@ -85,32 +86,30 @@ class Index:
     content: M[str]
     created: M[datetime]
     updated: M[Optional[datetime]]
-    url: M[Keyword]
-    category: M[Keyword]
-    passages: M[Optional[List[Passage]]]
+    url: M[str] = mapped_field(Keyword())
+    category: M[str] = mapped_field(Keyword())
+    passages: M[List[Passage]] = mapped_field(default=[])
 
     _model = None
 
     @classmethod
-    def get_embedding_model(cls):
+    def get_embedding(cls, input: str) -> List[float]:
         if cls._model is None:
             cls._model = SentenceTransformer(MODEL_NAME)
-        return cls._model
+        return cast(List[float], cls._model.encode(input))
 
     def clean(self):
         # split the content into sentences
         passages = nltk.sent_tokenize(self.content)
 
         # generate an embedding for each passage and save it as a nested document
-        model = self.get_embedding_model()
         for passage in passages:
             self.passages.append(
-                Passage(content=passage, embedding=list(model.encode(passage)))
+                Passage(content=passage, embedding=self.get_embedding(passage))
             )
 
 
 async def create():
-
     # create the index
     await WorkplaceDoc._index.delete(ignore_unavailable=True)
     await WorkplaceDoc.init()
@@ -133,12 +132,11 @@ async def create():
 
 
 async def search(query):
-    model = WorkplaceDoc.get_embedding_model()
     return WorkplaceDoc.search().knn(
         field="passages.embedding",
         k=5,
         num_candidates=50,
-        query_vector=list(model.encode(query)),
+        query_vector=list(WorkplaceDoc.get_embedding(query)),
         inner_hits={"size": 2},
     )
 
diff --git a/examples/vectors.py b/examples/vectors.py
index 4f7aa1e9..9e138d6a 100644
--- a/examples/vectors.py
+++ b/examples/vectors.py
@@ -47,14 +47,22 @@
 import json
 import os
 from datetime import datetime
-from typing import List, Optional
+from typing import List, Optional, cast
 from urllib.request import urlopen
 
 import nltk
 from sentence_transformers import SentenceTransformer
 from tqdm import tqdm
 
-from elasticsearch_dsl import DenseVector, Document, InnerDoc, Keyword, M, connections
+from elasticsearch_dsl import (
+    DenseVector,
+    Document,
+    InnerDoc,
+    Keyword,
+    M,
+    connections,
+    mapped_field,
+)
 
 DATASET_URL = "https://raw.githubusercontent.com/elastic/elasticsearch-labs/main/datasets/workplace-documents.json"
 MODEL_NAME = "all-MiniLM-L6-v2"
@@ -65,7 +73,7 @@
 
 class Passage(InnerDoc):
     content: M[str]
-    embedding: M[DenseVector]
+    embedding: M[List[float]] = mapped_field(DenseVector())
 
 
 class WorkplaceDoc(Document):
@@ -77,32 +85,30 @@ class Index:
     content: M[str]
     created: M[datetime]
     updated: M[Optional[datetime]]
-    url: M[Keyword]
-    category: M[Keyword]
-    passages: M[Optional[List[Passage]]]
+    url: M[str] = mapped_field(Keyword())
+    category: M[str] = mapped_field(Keyword())
+    passages: M[List[Passage]] = mapped_field(default=[])
 
     _model = None
 
     @classmethod
-    def get_embedding_model(cls):
+    def get_embedding(cls, input: str) -> List[float]:
         if cls._model is None:
             cls._model = SentenceTransformer(MODEL_NAME)
-        return cls._model
+        return cast(List[float], cls._model.encode(input))
 
     def clean(self):
         # split the content into sentences
         passages = nltk.sent_tokenize(self.content)
 
         # generate an embedding for each passage and save it as a nested document
-        model = self.get_embedding_model()
         for passage in passages:
             self.passages.append(
-                Passage(content=passage, embedding=list(model.encode(passage)))
+                Passage(content=passage, embedding=self.get_embedding(passage))
             )
 
 
 def create():
-
     # create the index
     WorkplaceDoc._index.delete(ignore_unavailable=True)
     WorkplaceDoc.init()
@@ -125,12 +131,11 @@ def create():
 
 
 def search(query):
-    model = WorkplaceDoc.get_embedding_model()
     return WorkplaceDoc.search().knn(
         field="passages.embedding",
         k=5,
         num_candidates=50,
-        query_vector=list(model.encode(query)),
+        query_vector=list(WorkplaceDoc.get_embedding(query)),
         inner_hits={"size": 2},
     )
 
diff --git a/mypy.ini b/mypy.ini
index 0c795321..e71761ce 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -1,3 +1,6 @@
+[mypy]
+explicit_package_bases = True
+
 [mypy-elasticsearch_dsl.query]
 # Allow reexport of SF for tests
-implicit_reexport = True
\ No newline at end of file
+implicit_reexport = True
diff --git a/noxfile.py b/noxfile.py
index f90f22f0..aee6854c 100644
--- a/noxfile.py
+++ b/noxfile.py
@@ -89,7 +89,7 @@ def type_check(session):
     session.install("mypy", ".[develop]")
     errors = []
     popen = subprocess.Popen(
-        "mypy --strict elasticsearch_dsl tests",
+        "mypy --strict elasticsearch_dsl tests examples",
         env=session.env,
         shell=True,
         stdout=subprocess.PIPE,

From 31d919d8886ff1cbf1e5f6bcc843e5deddb824aa Mon Sep 17 00:00:00 2001
From: Miguel Grinberg <miguel.grinberg@gmail.com>
Date: Tue, 18 Jun 2024 12:07:40 +0100
Subject: [PATCH 05/12] unit tests

---
 elasticsearch_dsl/document_base.py |  20 ++++--
 examples/async/vectors.py          |   2 +-
 examples/vectors.py                |   2 +-
 tests/_async/test_document.py      | 112 +++++++++++++++++++++++++++++
 tests/_sync/test_document.py       | 112 +++++++++++++++++++++++++++++
 5 files changed, 240 insertions(+), 8 deletions(-)

diff --git a/elasticsearch_dsl/document_base.py b/elasticsearch_dsl/document_base.py
index 22a2435c..754faba0 100644
--- a/elasticsearch_dsl/document_base.py
+++ b/elasticsearch_dsl/document_base.py
@@ -104,9 +104,9 @@ def __init__(self, name, bases, attrs):
         #     # use typing, but override with any stock or custom field
         #     field6: bool = MyCustomField()
         #
-        #     # best mypy and pyright typing support
+        #     # best mypy and pyright support and dataclass-like behavior
         #     field7: M[date]
-        #     field8: M[str] = mapped_field(MyCustomText())
+        #     field8: M[str] = mapped_field(MyCustomText(), default="foo")
         #
         #     # legacy format without Python typing
         #     field8 = Text()
@@ -117,11 +117,13 @@ def __init__(self, name, bases, attrs):
         for name in fields:
             value = None
             if name in attrs:
+                # this field has a right-side value, which can be field
+                # instance on its own or wrapped with mapped_field()
                 value = attrs[name]
                 if isinstance(value, dict):
                     # the mapped_field() wrapper function was used so we need
                     # to look for the field instance and also record any
-                    # defaults
+                    # dataclass-style defaults
                     value = attrs[name].get("_field")
                     default_value = attrs[name].get("default") or attrs[name].get(
                         "default_factory"
@@ -129,19 +131,25 @@ def __init__(self, name, bases, attrs):
                     if default_value:
                         field_defaults[name] = default_value
             if value is None:
+                # the field does not have an explicit field instance given in
+                # a right-side assignment, so we need to figure out what field
+                # type to use from the annotation
                 type_ = annotations[name]
                 required = True
                 multi = False
                 while hasattr(type_, "__origin__"):
                     if type_.__origin__ == Mapped:
+                        # M[type] -> extract the wrapped type
                         type_ = type_.__args__[0]
                     elif type_.__origin__ == Union:
                         if len(type_.__args__) == 2 and type_.__args__[1] is type(None):
+                            # Optional[type] -> mark instance as optional
                             required = False
                             type_ = type_.__args__[0]
                         else:
                             raise TypeError("Unsupported union")
                     elif type_.__origin__ in [list, List]:
+                        # List[type] -> mark instance as multi
                         multi = True
                         type_ = type_.__args__[0]
                     else:
@@ -151,15 +159,15 @@ def __init__(self, name, bases, attrs):
                 if not isinstance(type_, type):
                     raise TypeError(f"Cannot map type {type_}")
                 elif issubclass(type_, InnerDoc):
+                    # object or nested field
                     field = Nested if multi else Object
                     field_args = [type_]
                     required = False
                 elif type_ in self.type_annotation_map:
+                    # use best field type for the type hint provided
                     field, field_kwargs = self.type_annotation_map[type_]
-                elif not issubclass(type_, Field):
-                    raise TypeError(f"Cannot map type {type_}")
                 else:
-                    field = type_
+                    raise TypeError(f"Cannot map type {type_}")
                 field_kwargs = {"multi": multi, "required": required, **field_kwargs}
                 value = field(*field_args, **field_kwargs)
             self.mapping.field(name, value)
diff --git a/examples/async/vectors.py b/examples/async/vectors.py
index b7fcbb7b..8dfcf7c5 100644
--- a/examples/async/vectors.py
+++ b/examples/async/vectors.py
@@ -96,7 +96,7 @@ class Index:
     def get_embedding(cls, input: str) -> List[float]:
         if cls._model is None:
             cls._model = SentenceTransformer(MODEL_NAME)
-        return cast(List[float], cls._model.encode(input))
+        return cast(List[float], list(cls._model.encode(input)))
 
     def clean(self):
         # split the content into sentences
diff --git a/examples/vectors.py b/examples/vectors.py
index 9e138d6a..aff710e0 100644
--- a/examples/vectors.py
+++ b/examples/vectors.py
@@ -95,7 +95,7 @@ class Index:
     def get_embedding(cls, input: str) -> List[float]:
         if cls._model is None:
             cls._model = SentenceTransformer(MODEL_NAME)
-        return cast(List[float], cls._model.encode(input))
+        return cast(List[float], list(cls._model.encode(input)))
 
     def clean(self):
         # split the content into sentences
diff --git a/tests/_async/test_document.py b/tests/_async/test_document.py
index 238e80fb..fa651c8b 100644
--- a/tests/_async/test_document.py
+++ b/tests/_async/test_document.py
@@ -20,6 +20,7 @@
 import pickle
 from datetime import datetime
 from hashlib import md5
+from typing import List, Optional
 
 import pytest
 from pytest import raises
@@ -28,11 +29,13 @@
     AsyncDocument,
     Index,
     InnerDoc,
+    M,
     Mapping,
     MetaField,
     Range,
     analyzer,
     field,
+    mapped_field,
     utils,
 )
 from elasticsearch_dsl.exceptions import IllegalOperation, ValidationException
@@ -127,6 +130,29 @@ class Index:
         name = "test-host"
 
 
+class TypedInnerDoc(InnerDoc):
+    st: M[str]
+    dt: M[Optional[datetime]]
+    li: M[List[int]]
+
+
+class TypedDoc(AsyncDocument):
+    st: str
+    dt: Optional[datetime]
+    li: List[int]
+    ob: TypedInnerDoc
+    ns: List[TypedInnerDoc]
+    ip: Optional[str] = field.Ip()
+    k1: str = field.Keyword(required=True)
+    k2: M[str] = field.Keyword()
+    k3: str = mapped_field(field.Keyword(), default="foo")
+    k4: M[Optional[str]] = mapped_field(field.Keyword())
+    s1: Secret = SecretField()
+    s2: M[Secret] = SecretField()
+    s3: Secret = mapped_field(SecretField())
+    s4: M[Optional[Secret]] = mapped_field(SecretField(), default_factory=lambda: "foo")
+
+
 def test_range_serializes_properly():
     class D(AsyncDocument):
         lr = field.LongRange()
@@ -640,3 +666,89 @@ class MySubDocWithNested(MyDoc):
         },
         "title": {"type": "keyword"},
     }
+
+
+def test_doc_with_type_hints():
+    props = TypedDoc._doc_type.mapping.to_dict()["properties"]
+    assert props == {
+        "st": {"type": "text"},
+        "dt": {"type": "date"},
+        "li": {"type": "integer"},
+        "ob": {
+            "type": "object",
+            "properties": {
+                "st": {"type": "text"},
+                "dt": {"type": "date"},
+                "li": {"type": "integer"},
+            },
+        },
+        "ns": {
+            "type": "nested",
+            "properties": {
+                "st": {"type": "text"},
+                "dt": {"type": "date"},
+                "li": {"type": "integer"},
+            },
+        },
+        "ip": {"type": "ip"},
+        "k1": {"type": "keyword"},
+        "k2": {"type": "keyword"},
+        "k3": {"type": "keyword"},
+        "k4": {"type": "keyword"},
+        "s1": {"type": "text"},
+        "s2": {"type": "text"},
+        "s3": {"type": "text"},
+        "s4": {"type": "text"},
+    }
+
+    doc = TypedDoc()
+    assert doc.k3 == "foo"
+    assert doc.s4 == "foo"
+    with raises(ValidationException) as exc_info:
+        doc.full_clean()
+    assert set(exc_info.value.args[0].keys()) == {"st", "li", "k1"}
+
+    doc.st = "s"
+    doc.li = [1, 2, 3]
+    doc.k1 = "k"
+    doc.full_clean()
+
+    doc.ob = TypedInnerDoc()
+    with raises(ValidationException) as exc_info:
+        doc.full_clean()
+    assert set(exc_info.value.args[0].keys()) == {"ob"}
+    assert set(exc_info.value.args[0]["ob"][0].args[0].keys()) == {"st", "li"}
+
+    doc.ob.st = "s"
+    doc.ob.li = [1]
+    doc.full_clean()
+
+    doc.ns.append(TypedInnerDoc(st="s"))
+    with raises(ValidationException) as exc_info:
+        doc.full_clean()
+
+    doc.ns[0].li = [1, 2]
+    doc.full_clean()
+
+    doc.ip = "1.2.3.4"
+    n = datetime.now()
+    doc.dt = n
+    assert doc.to_dict() == {
+        "st": "s",
+        "li": [1, 2, 3],
+        "dt": n,
+        "ob": {
+            "st": "s",
+            "li": [1],
+        },
+        "ns": [
+            {
+                "st": "s",
+                "li": [1, 2],
+            }
+        ],
+        "ip": "1.2.3.4",
+        "k1": "k",
+        "k3": "foo",
+        "s4": "foo",
+    }
diff --git a/tests/_sync/test_document.py b/tests/_sync/test_document.py
index 5cfa183c..e2296280 100644
--- a/tests/_sync/test_document.py
+++ b/tests/_sync/test_document.py
@@ -20,6 +20,7 @@
 import pickle
 from datetime import datetime
 from hashlib import md5
+from typing import List, Optional
 
 import pytest
 from pytest import raises
@@ -28,11 +29,13 @@
     Document,
     Index,
     InnerDoc,
+    M,
     Mapping,
     MetaField,
     Range,
     analyzer,
     field,
+    mapped_field,
     utils,
 )
 from elasticsearch_dsl.exceptions import IllegalOperation, ValidationException
@@ -127,6 +130,29 @@ class Index:
         name = "test-host"
 
 
+class TypedInnerDoc(InnerDoc):
+    st: M[str]
+    dt: M[Optional[datetime]]
+    li: M[List[int]]
+
+
+class TypedDoc(Document):
+    st: str
+    dt: Optional[datetime]
+    li: List[int]
+    ob: TypedInnerDoc
+    ns: List[TypedInnerDoc]
+    ip: Optional[str] = field.Ip()
+    k1: str = field.Keyword(required=True)
+    k2: M[str] = field.Keyword()
+    k3: str = mapped_field(field.Keyword(), default="foo")
+    k4: M[Optional[str]] = mapped_field(field.Keyword())
+    s1: Secret = SecretField()
+    s2: M[Secret] = SecretField()
+    s3: Secret = mapped_field(SecretField())
+    s4: M[Optional[Secret]] = mapped_field(SecretField(), default_factory=lambda: "foo")
+
+
 def test_range_serializes_properly():
     class D(Document):
         lr = field.LongRange()
@@ -640,3 +666,89 @@ class MySubDocWithNested(MyDoc):
         },
         "title": {"type": "keyword"},
     }
+
+
+def test_doc_with_type_hints():
+    props = TypedDoc._doc_type.mapping.to_dict()["properties"]
+    assert props == {
+        "st": {"type": "text"},
+        "dt": {"type": "date"},
+        "li": {"type": "integer"},
+        "ob": {
+            "type": "object",
+            "properties": {
+                "st": {"type": "text"},
+                "dt": {"type": "date"},
+                "li": {"type": "integer"},
+            },
+        },
+        "ns": {
+            "type": "nested",
+            "properties": {
+                "st": {"type": "text"},
+                "dt": {"type": "date"},
+                "li": {"type": "integer"},
+            },
+        },
+        "ip": {"type": "ip"},
+        "k1": {"type": "keyword"},
+        "k2": {"type": "keyword"},
+        "k3": {"type": "keyword"},
+        "k4": {"type": "keyword"},
+        "s1": {"type": "text"},
+        "s2": {"type": "text"},
+        "s3": {"type": "text"},
+        "s4": {"type": "text"},
+    }
+
+    doc = TypedDoc()
+    assert doc.k3 == "foo"
+    assert doc.s4 == "foo"
+    with raises(ValidationException) as exc_info:
+        doc.full_clean()
+    assert set(exc_info.value.args[0].keys()) == {"st", "li", "k1"}
+
+    doc.st = "s"
+    doc.li = [1, 2, 3]
+    doc.k1 = "k"
+    doc.full_clean()
+
+    doc.ob = TypedInnerDoc()
+    with raises(ValidationException) as exc_info:
+        doc.full_clean()
+    assert set(exc_info.value.args[0].keys()) == {"ob"}
+    assert set(exc_info.value.args[0]["ob"][0].args[0].keys()) == {"st", "li"}
+
+    doc.ob.st = "s"
+    doc.ob.li = [1]
+    doc.full_clean()
+
+    doc.ns.append(TypedInnerDoc(st="s"))
+    with raises(ValidationException) as exc_info:
+        doc.full_clean()
+
+    doc.ns[0].li = [1, 2]
+    doc.full_clean()
+
+    doc.ip = "1.2.3.4"
+    n = datetime.now()
+    doc.dt = n
+    assert doc.to_dict() == {
+        "st": "s",
+        "li": [1, 2, 3],
+        "dt": n,
+        "ob": {
+            "st": "s",
+            "li": [1],
+        },
+        "ns": [
+            {
+                "st": "s",
+                "li": [1, 2],
+            }
+        ],
+        "ip": "1.2.3.4",
+        "k1": "k",
+        "k3": "foo",
+        "s4": "foo",
+    }

From 15e213f542dbb19c45223120cfc8fd7037057f41 Mon Sep 17 00:00:00 2001
From: Miguel Grinberg <miguel.grinberg@gmail.com>
Date: Tue, 18 Jun 2024 17:11:56 +0100
Subject: [PATCH 06/12] support InstrumentedField in Search class

---
 elasticsearch_dsl/document_base.py | 57 +++++++++++++++++++++++-------
 elasticsearch_dsl/field.py         | 19 ----------
 elasticsearch_dsl/search_base.py   | 41 +++++++++++++--------
 tests/_async/test_document.py      | 49 +++++++++++++------------
 tests/_sync/test_document.py       | 49 +++++++++++++------------
 5 files changed, 124 insertions(+), 91 deletions(-)

diff --git a/elasticsearch_dsl/document_base.py b/elasticsearch_dsl/document_base.py
index 754faba0..27545106 100644
--- a/elasticsearch_dsl/document_base.py
+++ b/elasticsearch_dsl/document_base.py
@@ -32,18 +32,7 @@
 from typing_extensions import dataclass_transform
 
 from .exceptions import ValidationException
-from .field import (
-    Binary,
-    Boolean,
-    Date,
-    Field,
-    Float,
-    InstrumentedField,
-    Integer,
-    Nested,
-    Object,
-    Text,
-)
+from .field import Binary, Boolean, Date, Field, Float, Integer, Nested, Object, Text
 from .mapping import Mapping
 from .utils import DOC_META_FIELDS, ObjectBase
 
@@ -53,6 +42,50 @@ def __init__(self, *args, **kwargs):
         self.args, self.kwargs = args, kwargs
 
 
+class InstrumentedField:
+    """Proxy object for a mapped document field.
+
+    An object of this instance is returned when a field is access as a class
+    attribute of a ``Document`` or ``InnerDoc`` subclass. These objects can
+    be used in any situation in which a reference to a field is required, such
+    as when specifying sort options in a search::
+
+        class MyDocument(Document):
+            name: str
+
+        s = MyDocument.search()
+        s = s.sort(-MyDocument.name)  # sort by name in descending order
+    """
+
+    def __init__(self, name, field):
+        self._name = name
+        self._field = field
+
+    def __getattr__(self, attr):
+        f = None
+        try:
+            f = self._field[attr]
+        except KeyError:
+            pass
+        if isinstance(f, Field):
+            return InstrumentedField(f"{self._name}.{attr}", f)
+        return getattr(self._field, attr)
+
+    def __pos__(self):
+        """Return the field name representation for ascending sort order"""
+        return f"{self._name}"
+
+    def __neg__(self):
+        """Return the field name representation for descending sort order"""
+        return f"-{self._name}"
+
+    def __str__(self):
+        return self._name
+
+    def __repr__(self):
+        return f"InstrumentedField[{self._name}]"
+
+
 class DocumentMeta(type):
     def __new__(cls, name, bases, attrs):
         # DocumentMeta filters attrs in place
diff --git a/elasticsearch_dsl/field.py b/elasticsearch_dsl/field.py
index cd526ebd..3f704480 100644
--- a/elasticsearch_dsl/field.py
+++ b/elasticsearch_dsl/field.py
@@ -123,25 +123,6 @@ def to_dict(self):
         return value
 
 
-class InstrumentedField:
-    def __init__(self, name, field):
-        self._name = name
-        self._field = field
-
-    def __getattr__(self, attr):
-        f = None
-        try:
-            f = self._field[attr]
-        except KeyError:
-            pass
-        if isinstance(f, Field):
-            return InstrumentedField(f"{self._name}.{attr}", f)
-        return getattr(self._field, attr)
-
-    def __repr__(self):
-        return self._name
-
-
 class CustomField(Field):
     name = "custom"
     _coerce = True
diff --git a/elasticsearch_dsl/search_base.py b/elasticsearch_dsl/search_base.py
index 7a940d4a..893464c6 100644
--- a/elasticsearch_dsl/search_base.py
+++ b/elasticsearch_dsl/search_base.py
@@ -523,7 +523,7 @@ def knn(
         """
         Add a k-nearest neighbor (kNN) search.
 
-        :arg field: the name of the vector field to search against
+        :arg field: the vector field to search against as a string or document class attribute
         :arg k: number of nearest neighbors to return as top hits
         :arg num_candidates: number of nearest neighbor candidates to consider per shard
         :arg query_vector: the vector to search for
@@ -542,7 +542,7 @@ def knn(
         s = self._clone()
         s._knn.append(
             {
-                "field": field,
+                "field": str(field),  # str() is for InstrumentedField instances
                 "k": k,
                 "num_candidates": num_candidates,
             }
@@ -596,11 +596,15 @@ def source(self, fields=None, **kwargs):
         """
         Selectively control how the _source field is returned.
 
-        :arg fields: wildcard string, array of wildcards, or dictionary of includes and excludes
+        :arg fields: field name, wildcard string, list of field names or wildcards,
+                     or dictionary of includes and excludes
+        :arg kwargs: ``includes`` or ``excludes`` arguments, when ``fields`` is ``None``.
 
-        If ``fields`` is None, the entire document will be returned for
-        each hit.  If fields is a dictionary with keys of 'includes' and/or
-        'excludes' the fields will be either included or excluded appropriately.
+        When no arguments are given, the entire document will be returned for
+        each hit.  If ``fields`` is a string or list of strings, the field names or field
+        wildcards given will be included. If ``fields`` is a dictionary with keys of
+        'includes' and/or 'excludes' the fields will be either included or excluded
+        appropriately.
 
         Calling this multiple times with the same named parameter will override the
         previous values with the new ones.
@@ -619,8 +623,16 @@ def source(self, fields=None, **kwargs):
         if fields and kwargs:
             raise ValueError("You cannot specify fields and kwargs at the same time.")
 
+        def ensure_strings(fields):
+            if isinstance(fields, list):
+                return [str(f) for f in fields]
+            elif isinstance(fields, dict):
+                return {k: ensure_strings(v) for k, v in fields.items()}
+            else:
+                return str(fields)
+
         if fields is not None:
-            s._source = fields
+            s._source = fields if isinstance(fields, bool) else ensure_strings(fields)
             return s
 
         if kwargs and not isinstance(s._source, dict):
@@ -633,7 +645,7 @@ def source(self, fields=None, **kwargs):
                 except KeyError:
                     pass
             else:
-                s._source[key] = value
+                s._source[key] = ensure_strings(value)
 
         return s
 
@@ -663,11 +675,12 @@ def sort(self, *keys):
         s = self._clone()
         s._sort = []
         for k in keys:
-            if isinstance(k, str) and k.startswith("-"):
-                if k[1:] == "_score":
+            sort_field = str(k)
+            if sort_field.startswith("-"):
+                if sort_field[1:] == "_score":
                     raise IllegalOperation("Sorting by `-_score` is not allowed.")
-                k = {k[1:]: {"order": "desc"}}
-            s._sort.append(k)
+                sort_field = {sort_field[1:]: {"order": "desc"}}
+            s._sort.append(sort_field)
         return s
 
     def collapse(self, field=None, inner_hits=None, max_concurrent_group_searches=None):
@@ -684,7 +697,7 @@ def collapse(self, field=None, inner_hits=None, max_concurrent_group_searches=No
         if field is None:
             return s
 
-        s._collapse["field"] = field
+        s._collapse["field"] = str(field)
         if inner_hits:
             s._collapse["inner_hits"] = inner_hits
         if max_concurrent_group_searches:
@@ -740,7 +753,7 @@ def highlight(self, *fields, **kwargs):
         """
         s = self._clone()
         for f in fields:
-            s._highlight[f] = kwargs
+            s._highlight[str(f)] = kwargs
         return s
 
     def suggest(self, name, text=None, regex=None, **kwargs):
diff --git a/tests/_async/test_document.py b/tests/_async/test_document.py
index fa651c8b..46f39e57 100644
--- a/tests/_async/test_document.py
+++ b/tests/_async/test_document.py
@@ -130,29 +130,6 @@ class Index:
         name = "test-host"
 
 
-class TypedInnerDoc(InnerDoc):
-    st: M[str]
-    dt: M[Optional[datetime]]
-    li: M[List[int]]
-
-
-class TypedDoc(AsyncDocument):
-    st: str
-    dt: Optional[datetime]
-    li: List[int]
-    ob: TypedInnerDoc
-    ns: List[TypedInnerDoc]
-    ip: Optional[str] = field.Ip()
-    k1: str = field.Keyword(required=True)
-    k2: M[str] = field.Keyword()
-    k3: str = mapped_field(field.Keyword(), default="foo")
-    k4: M[Optional[str]] = mapped_field(field.Keyword())
-    s1: Secret = SecretField()
-    s2: M[Secret] = SecretField()
-    s3: Secret = mapped_field(SecretField())
-    s4: M[Optional[Secret]] = mapped_field(SecretField(), default_factory=lambda: "foo")
-
-
 def test_range_serializes_properly():
     class D(AsyncDocument):
         lr = field.LongRange()
@@ -669,6 +646,29 @@ class MySubDocWithNested(MyDoc):
 
 
 def test_doc_with_type_hints():
+    class TypedInnerDoc(InnerDoc):
+        st: M[str]
+        dt: M[Optional[datetime]]
+        li: M[List[int]]
+
+    class TypedDoc(AsyncDocument):
+        st: str
+        dt: Optional[datetime]
+        li: List[int]
+        ob: TypedInnerDoc
+        ns: List[TypedInnerDoc]
+        ip: Optional[str] = field.Ip()
+        k1: str = field.Keyword(required=True)
+        k2: M[str] = field.Keyword()
+        k3: str = mapped_field(field.Keyword(), default="foo")
+        k4: M[Optional[str]] = mapped_field(field.Keyword())
+        s1: Secret = SecretField()
+        s2: M[Secret] = SecretField()
+        s3: Secret = mapped_field(SecretField())
+        s4: M[Optional[Secret]] = mapped_field(
+            SecretField(), default_factory=lambda: "foo"
+        )
+
     props = TypedDoc._doc_type.mapping.to_dict()["properties"]
     assert props == {
         "st": {"type": "text"},
@@ -752,3 +752,6 @@ def test_doc_with_type_hints():
         "k3": "foo",
         "s4": "foo",
     }
+
+    s = TypedDoc.search().sort(TypedDoc.st, -TypedDoc.dt, +TypedDoc.ob.st)
+    assert s.to_dict() == {"sort": ["st", {"dt": {"order": "desc"}}, "ob.st"]}
diff --git a/tests/_sync/test_document.py b/tests/_sync/test_document.py
index e2296280..52f7c763 100644
--- a/tests/_sync/test_document.py
+++ b/tests/_sync/test_document.py
@@ -130,29 +130,6 @@ class Index:
         name = "test-host"
 
 
-class TypedInnerDoc(InnerDoc):
-    st: M[str]
-    dt: M[Optional[datetime]]
-    li: M[List[int]]
-
-
-class TypedDoc(Document):
-    st: str
-    dt: Optional[datetime]
-    li: List[int]
-    ob: TypedInnerDoc
-    ns: List[TypedInnerDoc]
-    ip: Optional[str] = field.Ip()
-    k1: str = field.Keyword(required=True)
-    k2: M[str] = field.Keyword()
-    k3: str = mapped_field(field.Keyword(), default="foo")
-    k4: M[Optional[str]] = mapped_field(field.Keyword())
-    s1: Secret = SecretField()
-    s2: M[Secret] = SecretField()
-    s3: Secret = mapped_field(SecretField())
-    s4: M[Optional[Secret]] = mapped_field(SecretField(), default_factory=lambda: "foo")
-
-
 def test_range_serializes_properly():
     class D(Document):
         lr = field.LongRange()
@@ -669,6 +646,29 @@ class MySubDocWithNested(MyDoc):
 
 
 def test_doc_with_type_hints():
+    class TypedInnerDoc(InnerDoc):
+        st: M[str]
+        dt: M[Optional[datetime]]
+        li: M[List[int]]
+
+    class TypedDoc(Document):
+        st: str
+        dt: Optional[datetime]
+        li: List[int]
+        ob: TypedInnerDoc
+        ns: List[TypedInnerDoc]
+        ip: Optional[str] = field.Ip()
+        k1: str = field.Keyword(required=True)
+        k2: M[str] = field.Keyword()
+        k3: str = mapped_field(field.Keyword(), default="foo")
+        k4: M[Optional[str]] = mapped_field(field.Keyword())
+        s1: Secret = SecretField()
+        s2: M[Secret] = SecretField()
+        s3: Secret = mapped_field(SecretField())
+        s4: M[Optional[Secret]] = mapped_field(
+            SecretField(), default_factory=lambda: "foo"
+        )
+
     props = TypedDoc._doc_type.mapping.to_dict()["properties"]
     assert props == {
         "st": {"type": "text"},
@@ -752,3 +752,6 @@ def test_doc_with_type_hints():
         "k3": "foo",
         "s4": "foo",
     }
+
+    s = TypedDoc.search().sort(TypedDoc.st, -TypedDoc.dt, +TypedDoc.ob.st)
+    assert s.to_dict() == {"sort": ["st", {"dt": {"order": "desc"}}, "ob.st"]}

From 7a405271958ce34bcbf36a60b89c2d6c541a65d2 Mon Sep 17 00:00:00 2001
From: Miguel Grinberg <miguel.grinberg@gmail.com>
Date: Tue, 18 Jun 2024 17:12:03 +0100
Subject: [PATCH 07/12] documentation

---
 docs/persistence.rst      | 154 ++++++++++++++++++++++++++++++++++++--
 examples/async/vectors.py |   2 +-
 examples/vectors.py       |   2 +-
 3 files changed, 150 insertions(+), 8 deletions(-)

diff --git a/docs/persistence.rst b/docs/persistence.rst
index cb1dc226..12e4a834 100644
--- a/docs/persistence.rst
+++ b/docs/persistence.rst
@@ -9,7 +9,7 @@ layer for your application.
 For more comprehensive examples have a look at the examples_ directory in the
 repository.
 
-.. _examples: https://github.com/elastic/elasticsearch-dsl-py/tree/master/examples
+.. _examples: https://github.com/elastic/elasticsearch-dsl-py/tree/main/examples
 
 .. _doc_type:
 
@@ -66,14 +66,14 @@ settings in elasticsearch (see :ref:`life-cycle` for details).
 Data types
 ~~~~~~~~~~
 
-The ``Document`` instances should be using native python types like
+The ``Document`` instances use native python types like ``str`` and
 ``datetime``. In case of ``Object`` or ``Nested`` fields an instance of the
-``InnerDoc`` subclass should be used just like in the ``add_comment`` method in
-the above example where we are creating an instance of the ``Comment`` class.
+``InnerDoc`` subclass is used, as in the ``add_comment`` method in the above
+example where we are creating an instance of the ``Comment`` class.
 
 There are some specific types that were created as part of this library to make
-working with specific field types easier, for example the ``Range`` object used
-in any of the `range fields
+working with some field types easier, for example the ``Range`` object used in
+any of the `range fields
 <https://www.elastic.co/guide/en/elasticsearch/reference/current/range.html>`_:
 
 .. code:: python
@@ -103,6 +103,148 @@ in any of the `range fields
     # empty range is unbounded
     Range().lower # None, False
 
+Python Type Hints
+~~~~~~~~~~~~~~~~~
+
+Document fields can be defined using standard Python type hints if desired.
+Here are some simple examples:
+
+.. code:: python
+
+    from typing import Optional
+
+    class Post(Document):
+        title: str                      # same as Text(required=True)
+        created_at: Optional[datetime]  # same as Date(required=False)
+        published: bool                 # same as Boolean(required=True)    
+
+Python types are mapped to their corresponding field type according to the
+following table:
+
+.. list-table:: Python type to DSL field mappings
+   :header-rows: 1
+
+   * - Python type
+     - DSL field
+   * - ``str``
+     - ``Text(required=True)``
+   * - ``bool``
+     - ``Boolean(required=True)``
+   * - ``int``
+     - ``Integer(required=True)``
+   * - ``float``
+     - ``Float(required=True)``
+   * - ``bytes``
+     - ``Binary(required=True)``
+   * - ``datetime``
+     - ``Date(required=True)``
+   * - ``date``
+     - ``Date(format="yyyy-MM-dd", required=True)``
+
+In addition to the above native types, a field can also be given a type hint
+of an ``InnerDoc`` subclass, in which case it becomes an ``Object`` field of
+that class. When the ``InnerDoc`` subclass is wrapped with ``List``, a
+``Nested`` field is created instead.
+
+.. code:: python
+
+    from typing import List
+
+    class Address(InnerDoc):
+        ...
+
+    class Comment(InnerDoc):
+        ...
+    
+    class Post(Document):
+        address: Address         # same as Object(Address)
+        comments: List[Comment]  # same as Nested(Comment)
+
+Unfortunately it is impossible to have Python type hints that uniquely
+identify every possible Elasticsearch field type. To choose a field type that
+is different thant the ones in the table above, the field instance can be added
+explicitly as a right-side assignment in the field declaration. The next
+example creates a field that is typed as ``str``, but is mapped to ``Keyword``
+instead of ``Text``:
+
+.. code:: python
+
+    class MyDocument(Document):
+        category: str = Keyword()
+
+This form can also be used when additional options need to be given to
+initialize the field, such as when using custom analyzer settings:
+
+.. code:: python
+
+    class Comment(InnerDoc):
+        content: str = Text(analyzer='snowball')
+
+The standard ``Optional`` modifier from the Python ``typing`` package can be
+used to change a typed field from required to optional. The ``List`` modifier
+can be added to a field to convert it to an array, similar to using the
+``multi=True`` argument on the field object.
+
+When using type hints as above, subclasses of ``Document`` and ``InnerDoc``
+inherit some of the behaviors associated with Python dataclasses. If
+necessary, the ``mapped_field()`` wrapper can be used on the right side of a
+typed field declaration, enabling dataclass options such as ``default`` or
+``default_factory`` to be included:
+
+.. code:: python
+
+    class MyDocument(Document):
+        title: str = mapped_field(default="no title")
+        created_at: datetime = mapped_field(default_factory=datetime.now)
+        published: bool = mapped_field(default=False)
+        category: str = mapped_field(Keyword(), default="general")
+
+Static type checkers such as `mypy <https://mypy-lang.org/>`_ and
+`pyright <https://github.com/microsoft/pyright>`_ can use the type hints and
+the dataclass-specific options added to the ``mapped_field()`` function to
+improve type inference and provide better real-time suggestions in IDEs.
+
+One situation in which type checkers can't infer the correct type is when
+using fields as class attributes. Consider the following example:
+
+.. code:: python
+
+    class MyDocument(Document):
+        title: str = mapped_field(default="no title")
+
+    doc = MyDocument()
+    # doc.title is typed as "str" (correct)
+    # MyDocument.title is also typed as "str" (incorrect)
+
+To help type checkers correctly identify class attributes as such, the ``M``
+generic must be used as a wrapper to the type hint, as shown in the next
+example:
+
+.. code:: python
+
+    from elasticsearch_dsl import M
+
+    class MyDocument(Document):
+        title: M[str]
+        created_at: M[datetime] = mapped_field(default_factory=datetime.now)
+
+    doc = MyDocument()
+    # doc.title is typed as "str"
+    # MyDocument.title is typed as "InstrumentedField"
+
+Note that the ``M`` type hint does not provide any runtime behavior, it just
+provides additional typing declarations for type checkers.
+
+The ``InstrumentedField`` objects returned when fields are accessed as class
+attributes are proxies for the field instances that can be used anywhere a
+field needs to be referenced, such as when specifying sort options in a
+``Search`` object:
+
+.. code:: python
+
+    # sort by creation date descending, and title ascending
+    s = MyDocument.search().sort(-MyDocument.created_at, MyDocument.title)
+
 Note on dates
 ~~~~~~~~~~~~~
 
diff --git a/examples/async/vectors.py b/examples/async/vectors.py
index 8dfcf7c5..b845455b 100644
--- a/examples/async/vectors.py
+++ b/examples/async/vectors.py
@@ -133,7 +133,7 @@ async def create():
 
 async def search(query):
     return WorkplaceDoc.search().knn(
-        field="passages.embedding",
+        field=WorkplaceDoc.passages.embedding,
         k=5,
         num_candidates=50,
         query_vector=list(WorkplaceDoc.get_embedding(query)),
diff --git a/examples/vectors.py b/examples/vectors.py
index aff710e0..25943a1c 100644
--- a/examples/vectors.py
+++ b/examples/vectors.py
@@ -132,7 +132,7 @@ def create():
 
 def search(query):
     return WorkplaceDoc.search().knn(
-        field="passages.embedding",
+        field=WorkplaceDoc.passages.embedding,
         k=5,
         num_candidates=50,
         query_vector=list(WorkplaceDoc.get_embedding(query)),

From 2819701b138eb0eb5d49f8068b9aa9f6e164b7f0 Mon Sep 17 00:00:00 2001
From: Miguel Grinberg <miguel.grinberg@gmail.com>
Date: Thu, 20 Jun 2024 10:13:55 +0100
Subject: [PATCH 08/12] Update docs/persistence.rst

Co-authored-by: Quentin Pradet <quentin.pradet@gmail.com>
---
 docs/persistence.rst | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/persistence.rst b/docs/persistence.rst
index 12e4a834..8aef073d 100644
--- a/docs/persistence.rst
+++ b/docs/persistence.rst
@@ -186,10 +186,10 @@ can be added to a field to convert it to an array, similar to using the
 ``multi=True`` argument on the field object.
 
 When using type hints as above, subclasses of ``Document`` and ``InnerDoc``
-inherit some of the behaviors associated with Python dataclasses. If
-necessary, the ``mapped_field()`` wrapper can be used on the right side of a
-typed field declaration, enabling dataclass options such as ``default`` or
-``default_factory`` to be included:
+inherit some of the behaviors associated with Python dataclasses. To add
+per-field dataclass options such as ``default`` or ``default_factory`` , the
+``mapped_field()`` wrapper can be used on the right side of a typed field
+declaration:
 
 .. code:: python
 

From 66542876917d60be1130f4928e5d8fa12d00169f Mon Sep 17 00:00:00 2001
From: Miguel Grinberg <miguel.grinberg@gmail.com>
Date: Thu, 20 Jun 2024 10:50:35 +0100
Subject: [PATCH 09/12] Update elasticsearch_dsl/document_base.py

Co-authored-by: Quentin Pradet <quentin.pradet@gmail.com>
---
 elasticsearch_dsl/document_base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/elasticsearch_dsl/document_base.py b/elasticsearch_dsl/document_base.py
index 27545106..c27ac175 100644
--- a/elasticsearch_dsl/document_base.py
+++ b/elasticsearch_dsl/document_base.py
@@ -45,7 +45,7 @@ def __init__(self, *args, **kwargs):
 class InstrumentedField:
     """Proxy object for a mapped document field.
 
-    An object of this instance is returned when a field is access as a class
+    An object of this instance is returned when a field is accessed as a class
     attribute of a ``Document`` or ``InnerDoc`` subclass. These objects can
     be used in any situation in which a reference to a field is required, such
     as when specifying sort options in a search::

From 2997d3c0d45a3555b6003f069d3dc42f006874c2 Mon Sep 17 00:00:00 2001
From: Miguel Grinberg <miguel.grinberg@gmail.com>
Date: Thu, 20 Jun 2024 15:31:09 +0100
Subject: [PATCH 10/12] addressed review feedback

---
 docs/persistence.rst               | 61 +++++++++++++++++++----------
 elasticsearch_dsl/document_base.py | 16 ++++----
 tests/_async/test_document.py      | 62 ++++++++++++++++++++++++++++++
 tests/_sync/test_document.py       | 62 ++++++++++++++++++++++++++++++
 4 files changed, 174 insertions(+), 27 deletions(-)

diff --git a/docs/persistence.rst b/docs/persistence.rst
index 8aef073d..ed082713 100644
--- a/docs/persistence.rst
+++ b/docs/persistence.rst
@@ -114,9 +114,14 @@ Here are some simple examples:
     from typing import Optional
 
     class Post(Document):
-        title: str                      # same as Text(required=True)
-        created_at: Optional[datetime]  # same as Date(required=False)
-        published: bool                 # same as Boolean(required=True)    
+        title: str                      # same as title = Text(required=True)
+        created_at: Optional[datetime]  # same as created_at = Date(required=False)
+        published: bool                 # same as published = Boolean(required=True)    
+
+It is important to note that when using ``Field`` subclasses such as ``Text``,
+``Date`` and ``Boolean``, they must be given in the right-side of an assignment,
+as shown in examples above. Using these classes as type hints will result in
+errors.
 
 Python types are mapped to their corresponding field type according to the
 following table:
@@ -140,10 +145,14 @@ following table:
      - ``Date(required=True)``
    * - ``date``
      - ``Date(format="yyyy-MM-dd", required=True)``
-
-In addition to the above native types, a field can also be given a type hint
-of an ``InnerDoc`` subclass, in which case it becomes an ``Object`` field of
-that class. When the ``InnerDoc`` subclass is wrapped with ``List``, a
+   * - ``InnerDocSubclass``
+     - ``Object(InnerDocSubclass)``
+   * - ``List(InnerDocSubclass)``
+     - ``Nested(InnerDocSubclass)``
+
+As noted in the last two rows of the table, a field can also be given a type
+hint of an ``InnerDoc`` subclass, in which case it becomes an ``Object`` field
+of that class. When the ``InnerDoc`` subclass is wrapped with ``List``, a
 ``Nested`` field is created instead.
 
 .. code:: python
@@ -157,12 +166,12 @@ that class. When the ``InnerDoc`` subclass is wrapped with ``List``, a
         ...
     
     class Post(Document):
-        address: Address         # same as Object(Address)
-        comments: List[Comment]  # same as Nested(Comment)
+        address: Address         # same as address = Object(Address)
+        comments: List[Comment]  # same as comments = Nested(Comment)
 
 Unfortunately it is impossible to have Python type hints that uniquely
 identify every possible Elasticsearch field type. To choose a field type that
-is different thant the ones in the table above, the field instance can be added
+is different than the ones in the table above, the field instance can be added
 explicitly as a right-side assignment in the field declaration. The next
 example creates a field that is typed as ``str``, but is mapped to ``Keyword``
 instead of ``Text``:
@@ -170,7 +179,7 @@ instead of ``Text``:
 .. code:: python
 
     class MyDocument(Document):
-        category: str = Keyword()
+        category: str = Keyword(required=True)
 
 This form can also be used when additional options need to be given to
 initialize the field, such as when using custom analyzer settings:
@@ -178,7 +187,7 @@ initialize the field, such as when using custom analyzer settings:
 .. code:: python
 
     class Comment(InnerDoc):
-        content: str = Text(analyzer='snowball')
+        content: str = Text(analyzer='snowball', required=True)
 
 The standard ``Optional`` modifier from the Python ``typing`` package can be
 used to change a typed field from required to optional. The ``List`` modifier
@@ -186,9 +195,11 @@ can be added to a field to convert it to an array, similar to using the
 ``multi=True`` argument on the field object.
 
 When using type hints as above, subclasses of ``Document`` and ``InnerDoc``
-inherit some of the behaviors associated with Python dataclasses. To add
-per-field dataclass options such as ``default`` or ``default_factory`` , the
-``mapped_field()`` wrapper can be used on the right side of a typed field
+inherit some of the behaviors associated with Python dataclasses, as defined by
+`PEP 681 <https://peps.python.org/pep-0681/>`_ and the
+`dataclass_transform decorator <https://typing.readthedocs.io/en/latest/spec/dataclasses.html#dataclass-transform>`_.
+To add per-field dataclass options such as ``default`` or ``default_factory``,
+the ``mapped_field()`` wrapper can be used on the right side of a typed field
 declaration:
 
 .. code:: python
@@ -197,7 +208,11 @@ declaration:
         title: str = mapped_field(default="no title")
         created_at: datetime = mapped_field(default_factory=datetime.now)
         published: bool = mapped_field(default=False)
-        category: str = mapped_field(Keyword(), default="general")
+        category: str = mapped_field(Keyword(required=True), default="general")
+
+When using the ``mapped_field()`` wrapper function, an explicit field type
+instance can be passed as a first positional argument, as the ``category``
+field does in the example above.
 
 Static type checkers such as `mypy <https://mypy-lang.org/>`_ and
 `pyright <https://github.com/microsoft/pyright>`_ can use the type hints and
@@ -210,7 +225,7 @@ using fields as class attributes. Consider the following example:
 .. code:: python
 
     class MyDocument(Document):
-        title: str = mapped_field(default="no title")
+        title: str
 
     doc = MyDocument()
     # doc.title is typed as "str" (correct)
@@ -218,7 +233,7 @@ using fields as class attributes. Consider the following example:
 
 To help type checkers correctly identify class attributes as such, the ``M``
 generic must be used as a wrapper to the type hint, as shown in the next
-example:
+examples:
 
 .. code:: python
 
@@ -230,10 +245,13 @@ example:
 
     doc = MyDocument()
     # doc.title is typed as "str"
+    # doc.created_at is typed as "datetime"
     # MyDocument.title is typed as "InstrumentedField"
+    # MyDocument.created_at is typed as "InstrumentedField"
 
-Note that the ``M`` type hint does not provide any runtime behavior, it just
-provides additional typing declarations for type checkers.
+Note that the ``M`` type hint does not provide any runtime behavior and its use
+is not required, but it can be useful to eliminate spurious type errors in IDEs
+or type checking builds.
 
 The ``InstrumentedField`` objects returned when fields are accessed as class
 attributes are proxies for the field instances that can be used anywhere a
@@ -245,6 +263,9 @@ field needs to be referenced, such as when specifying sort options in a
     # sort by creation date descending, and title ascending
     s = MyDocument.search().sort(-MyDocument.created_at, MyDocument.title)
 
+When specifying sorting order, the ``+`` and ``-`` unary operators can be used
+on the class field attributes to indicate ascending and descending order.
+
 Note on dates
 ~~~~~~~~~~~~~
 
diff --git a/elasticsearch_dsl/document_base.py b/elasticsearch_dsl/document_base.py
index c27ac175..bb4aff0d 100644
--- a/elasticsearch_dsl/document_base.py
+++ b/elasticsearch_dsl/document_base.py
@@ -62,14 +62,16 @@ def __init__(self, name, field):
         self._field = field
 
     def __getattr__(self, attr):
-        f = None
         try:
-            f = self._field[attr]
-        except KeyError:
-            pass
-        if isinstance(f, Field):
-            return InstrumentedField(f"{self._name}.{attr}", f)
-        return getattr(self._field, attr)
+            # first let's see if this is an attribute of this object
+            return super().__getattribute__(attr)
+        except AttributeError:
+            try:
+                # next we see if we have a sub-field with this name
+                return InstrumentedField(f"{self._name}.{attr}", self._field[attr])
+            except KeyError:
+                # lastly we let the wrapped field resolve this attribute
+                return getattr(self._field, attr)
 
     def __pos__(self):
         """Return the field name representation for ascending sort order"""
diff --git a/tests/_async/test_document.py b/tests/_async/test_document.py
index 46f39e57..d5c4e0e8 100644
--- a/tests/_async/test_document.py
+++ b/tests/_async/test_document.py
@@ -38,6 +38,7 @@
     mapped_field,
     utils,
 )
+from elasticsearch_dsl.document_base import InstrumentedField
 from elasticsearch_dsl.exceptions import IllegalOperation, ValidationException
 
 
@@ -755,3 +756,64 @@ class TypedDoc(AsyncDocument):
 
     s = TypedDoc.search().sort(TypedDoc.st, -TypedDoc.dt, +TypedDoc.ob.st)
     assert s.to_dict() == {"sort": ["st", {"dt": {"order": "desc"}}, "ob.st"]}
+
+
+def test_instrumented_field():
+    class Child(InnerDoc):
+        st: M[str]
+
+    class Doc(AsyncDocument):
+        st: str
+        ob: Child
+        ns: List[Child]
+
+    doc = Doc(
+        st="foo",
+        ob=Child(st="bar"),
+        ns=[
+            Child(st="baz"),
+            Child(st="qux"),
+        ],
+    )
+
+    assert type(doc.st) is str
+    assert doc.st == "foo"
+
+    assert type(doc.ob) is Child
+    assert doc.ob.st == "bar"
+
+    assert type(doc.ns) is utils.AttrList
+    assert doc.ns[0].st == "baz"
+    assert doc.ns[1].st == "qux"
+    assert type(doc.ns[0]) is Child
+    assert type(doc.ns[1]) is Child
+
+    assert type(Doc.st) is InstrumentedField
+    assert str(Doc.st) == "st"
+    assert +Doc.st == "st"
+    assert -Doc.st == "-st"
+    assert Doc.st.to_dict() == {"type": "text"}
+    with raises(AttributeError):
+        Doc.st.something
+
+    assert type(Doc.ob) is InstrumentedField
+    assert str(Doc.ob) == "ob"
+    assert str(Doc.ob.st) == "ob.st"
+    assert +Doc.ob.st == "ob.st"
+    assert -Doc.ob.st == "-ob.st"
+    assert Doc.ob.st.to_dict() == {"type": "text"}
+    with raises(AttributeError):
+        Doc.ob.something
+    with raises(AttributeError):
+        Doc.ob.st.something
+
+    assert type(Doc.ns) is InstrumentedField
+    assert str(Doc.ns) == "ns"
+    assert str(Doc.ns.st) == "ns.st"
+    assert +Doc.ns.st == "ns.st"
+    assert -Doc.ns.st == "-ns.st"
+    assert Doc.ns.st.to_dict() == {"type": "text"}
+    with raises(AttributeError):
+        Doc.ns.something
+    with raises(AttributeError):
+        Doc.ns.st.something
diff --git a/tests/_sync/test_document.py b/tests/_sync/test_document.py
index 52f7c763..df617389 100644
--- a/tests/_sync/test_document.py
+++ b/tests/_sync/test_document.py
@@ -38,6 +38,7 @@
     mapped_field,
     utils,
 )
+from elasticsearch_dsl.document_base import InstrumentedField
 from elasticsearch_dsl.exceptions import IllegalOperation, ValidationException
 
 
@@ -755,3 +756,64 @@ class TypedDoc(Document):
 
     s = TypedDoc.search().sort(TypedDoc.st, -TypedDoc.dt, +TypedDoc.ob.st)
     assert s.to_dict() == {"sort": ["st", {"dt": {"order": "desc"}}, "ob.st"]}
+
+
+def test_instrumented_field():
+    class Child(InnerDoc):
+        st: M[str]
+
+    class Doc(Document):
+        st: str
+        ob: Child
+        ns: List[Child]
+
+    doc = Doc(
+        st="foo",
+        ob=Child(st="bar"),
+        ns=[
+            Child(st="baz"),
+            Child(st="qux"),
+        ],
+    )
+
+    assert type(doc.st) is str
+    assert doc.st == "foo"
+
+    assert type(doc.ob) is Child
+    assert doc.ob.st == "bar"
+
+    assert type(doc.ns) is utils.AttrList
+    assert doc.ns[0].st == "baz"
+    assert doc.ns[1].st == "qux"
+    assert type(doc.ns[0]) is Child
+    assert type(doc.ns[1]) is Child
+
+    assert type(Doc.st) is InstrumentedField
+    assert str(Doc.st) == "st"
+    assert +Doc.st == "st"
+    assert -Doc.st == "-st"
+    assert Doc.st.to_dict() == {"type": "text"}
+    with raises(AttributeError):
+        Doc.st.something
+
+    assert type(Doc.ob) is InstrumentedField
+    assert str(Doc.ob) == "ob"
+    assert str(Doc.ob.st) == "ob.st"
+    assert +Doc.ob.st == "ob.st"
+    assert -Doc.ob.st == "-ob.st"
+    assert Doc.ob.st.to_dict() == {"type": "text"}
+    with raises(AttributeError):
+        Doc.ob.something
+    with raises(AttributeError):
+        Doc.ob.st.something
+
+    assert type(Doc.ns) is InstrumentedField
+    assert str(Doc.ns) == "ns"
+    assert str(Doc.ns.st) == "ns.st"
+    assert +Doc.ns.st == "ns.st"
+    assert -Doc.ns.st == "-ns.st"
+    assert Doc.ns.st.to_dict() == {"type": "text"}
+    with raises(AttributeError):
+        Doc.ns.something
+    with raises(AttributeError):
+        Doc.ns.st.something

From 749dfb498e30fc099eb4dd6d5dfdaaa2f7445672 Mon Sep 17 00:00:00 2001
From: Miguel Grinberg <miguel.grinberg@gmail.com>
Date: Fri, 21 Jun 2024 15:09:33 +0100
Subject: [PATCH 11/12] better docs for Optional

---
 docs/persistence.rst               | 43 +++++++++++++++++-------------
 elasticsearch_dsl/document_base.py |  1 -
 examples/async/vectors.py          |  6 ++---
 examples/vectors.py                |  6 ++---
 4 files changed, 30 insertions(+), 26 deletions(-)

diff --git a/docs/persistence.rst b/docs/persistence.rst
index ed082713..ab3ffbe4 100644
--- a/docs/persistence.rst
+++ b/docs/persistence.rst
@@ -145,15 +145,24 @@ following table:
      - ``Date(required=True)``
    * - ``date``
      - ``Date(format="yyyy-MM-dd", required=True)``
-   * - ``InnerDocSubclass``
-     - ``Object(InnerDocSubclass)``
-   * - ``List(InnerDocSubclass)``
-     - ``Nested(InnerDocSubclass)``
 
-As noted in the last two rows of the table, a field can also be given a type
-hint of an ``InnerDoc`` subclass, in which case it becomes an ``Object`` field
-of that class. When the ``InnerDoc`` subclass is wrapped with ``List``, a
-``Nested`` field is created instead.
+To type a field as optional, the standard ``Optional`` modifier from the Python
+``typing`` package can be used. The ``List`` modifier can be added to a field
+to convert it to an array, similar to using the ``multi=True`` argument on the
+field object.
+
+.. code:: python
+
+    from typing import Optional, List
+
+    class MyDoc(Document):
+        pub_date: Optional[datetime]  # same as pub_date = Date()
+        authors: List[str]            # same as authors = Text(multi=True, required=True)
+        comments: Optional[List[str]] # same as comments = Text(multi=True)
+
+A field can also be given a type hint of an ``InnerDoc`` subclass, in which
+case it becomes an ``Object`` field of that class. When the ``InnerDoc``
+subclass is wrapped with ``List``, a ``Nested`` field is created instead.
 
 .. code:: python
 
@@ -166,34 +175,30 @@ of that class. When the ``InnerDoc`` subclass is wrapped with ``List``, a
         ...
     
     class Post(Document):
-        address: Address         # same as address = Object(Address)
-        comments: List[Comment]  # same as comments = Nested(Comment)
+        address: Address         # same as address = Object(Address, required=True)
+        comments: List[Comment]  # same as comments = Nested(Comment, required=True)
 
 Unfortunately it is impossible to have Python type hints that uniquely
 identify every possible Elasticsearch field type. To choose a field type that
 is different than the ones in the table above, the field instance can be added
 explicitly as a right-side assignment in the field declaration. The next
-example creates a field that is typed as ``str``, but is mapped to ``Keyword``
-instead of ``Text``:
+example creates a field that is typed as ``Optional[str]``, but is mapped to
+``Keyword`` instead of ``Text``:
 
 .. code:: python
 
     class MyDocument(Document):
-        category: str = Keyword(required=True)
+        category: Optional[str] = Keyword()
 
 This form can also be used when additional options need to be given to
-initialize the field, such as when using custom analyzer settings:
+initialize the field, such as when using custom analyzer settings or changing
+the ``required`` default:
 
 .. code:: python
 
     class Comment(InnerDoc):
         content: str = Text(analyzer='snowball', required=True)
 
-The standard ``Optional`` modifier from the Python ``typing`` package can be
-used to change a typed field from required to optional. The ``List`` modifier
-can be added to a field to convert it to an array, similar to using the
-``multi=True`` argument on the field object.
-
 When using type hints as above, subclasses of ``Document`` and ``InnerDoc``
 inherit some of the behaviors associated with Python dataclasses, as defined by
 `PEP 681 <https://peps.python.org/pep-0681/>`_ and the
diff --git a/elasticsearch_dsl/document_base.py b/elasticsearch_dsl/document_base.py
index bb4aff0d..46694157 100644
--- a/elasticsearch_dsl/document_base.py
+++ b/elasticsearch_dsl/document_base.py
@@ -197,7 +197,6 @@ def __init__(self, name, bases, attrs):
                     # object or nested field
                     field = Nested if multi else Object
                     field_args = [type_]
-                    required = False
                 elif type_ in self.type_annotation_map:
                     # use best field type for the type hint provided
                     field, field_kwargs = self.type_annotation_map[type_]
diff --git a/examples/async/vectors.py b/examples/async/vectors.py
index b845455b..84bc001e 100644
--- a/examples/async/vectors.py
+++ b/examples/async/vectors.py
@@ -86,9 +86,9 @@ class Index:
     content: M[str]
     created: M[datetime]
     updated: M[Optional[datetime]]
-    url: M[str] = mapped_field(Keyword())
-    category: M[str] = mapped_field(Keyword())
-    passages: M[List[Passage]] = mapped_field(default=[])
+    url: M[str] = mapped_field(Keyword(required=True))
+    category: M[str] = mapped_field(Keyword(required=True))
+    passages: M[Optional[List[Passage]]] = mapped_field(default=[])
 
     _model = None
 
diff --git a/examples/vectors.py b/examples/vectors.py
index 25943a1c..ae34eaae 100644
--- a/examples/vectors.py
+++ b/examples/vectors.py
@@ -85,9 +85,9 @@ class Index:
     content: M[str]
     created: M[datetime]
     updated: M[Optional[datetime]]
-    url: M[str] = mapped_field(Keyword())
-    category: M[str] = mapped_field(Keyword())
-    passages: M[List[Passage]] = mapped_field(default=[])
+    url: M[str] = mapped_field(Keyword(required=True))
+    category: M[str] = mapped_field(Keyword(required=True))
+    passages: M[Optional[List[Passage]]] = mapped_field(default=[])
 
     _model = None
 

From 4c774189ea64fccf8af3b36e5a5579a9398c1e4f Mon Sep 17 00:00:00 2001
From: Miguel Grinberg <miguel.grinberg@gmail.com>
Date: Fri, 21 Jun 2024 15:51:50 +0100
Subject: [PATCH 12/12] fix optional in test

---
 tests/_async/test_document.py | 4 ++--
 tests/_sync/test_document.py  | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/_async/test_document.py b/tests/_async/test_document.py
index d5c4e0e8..3bfb80b2 100644
--- a/tests/_async/test_document.py
+++ b/tests/_async/test_document.py
@@ -656,8 +656,8 @@ class TypedDoc(AsyncDocument):
         st: str
         dt: Optional[datetime]
         li: List[int]
-        ob: TypedInnerDoc
-        ns: List[TypedInnerDoc]
+        ob: Optional[TypedInnerDoc]
+        ns: Optional[List[TypedInnerDoc]]
         ip: Optional[str] = field.Ip()
         k1: str = field.Keyword(required=True)
         k2: M[str] = field.Keyword()
diff --git a/tests/_sync/test_document.py b/tests/_sync/test_document.py
index df617389..27567ac9 100644
--- a/tests/_sync/test_document.py
+++ b/tests/_sync/test_document.py
@@ -656,8 +656,8 @@ class TypedDoc(Document):
         st: str
         dt: Optional[datetime]
         li: List[int]
-        ob: TypedInnerDoc
-        ns: List[TypedInnerDoc]
+        ob: Optional[TypedInnerDoc]
+        ns: Optional[List[TypedInnerDoc]]
         ip: Optional[str] = field.Ip()
         k1: str = field.Keyword(required=True)
         k2: M[str] = field.Keyword()