diff --git a/.github/workflows/test-python.yml b/.github/workflows/test-python.yml index 8dad68ab20..2941f9c3ab 100644 --- a/.github/workflows/test-python.yml +++ b/.github/workflows/test-python.yml @@ -68,9 +68,6 @@ jobs: mypy --install-types --non-interactive --disable-error-code var-annotated --disable-error-code attr-defined --disable-error-code union-attr --disable-error-code assignment --disable-error-code no-redef --disable-error-code index --allow-redefinition --allow-untyped-globals --exclude "test/mypy_fails/*.*" test python -m pip install -U typing_extensions mypy --install-types --non-interactive test/test_typing.py test/test_typing_strict.py - - name: Run mypy strict - run: | - mypy --strict test/test_typing_strict.py - name: Run pyright run: | python -m pip install -U pip pyright==1.1.290 diff --git a/bson/__init__.py b/bson/__init__.py index 2fe4aa173e..700a5d4cf8 100644 --- a/bson/__init__.py +++ b/bson/__init__.py @@ -1115,7 +1115,7 @@ def decode_all( if not isinstance(opts, CodecOptions): raise _CODEC_OPTIONS_TYPE_ERROR - return _decode_all(data, opts) # type: ignore[arg-type] + return _decode_all(data, opts) # type:ignore[arg-type] def _decode_selective(rawdoc: Any, fields: Any, codec_options: Any) -> Mapping[Any, Any]: diff --git a/bson/codec_options.py b/bson/codec_options.py index c09de8a931..096be85264 100644 --- a/bson/codec_options.py +++ b/bson/codec_options.py @@ -19,15 +19,17 @@ import enum from collections.abc import MutableMapping as _MutableMapping from typing import ( + TYPE_CHECKING, Any, Callable, Dict, + Generic, Iterable, Mapping, NamedTuple, Optional, + Tuple, Type, - TypeVar, Union, cast, ) @@ -37,11 +39,7 @@ UUID_REPRESENTATION_NAMES, UuidRepresentation, ) - - -def _abstractproperty(func: Callable[..., Any]) -> property: - return property(abc.abstractmethod(func)) - +from bson.typings import _DocumentType _RAW_BSON_DOCUMENT_MARKER = 101 @@ -62,7 +60,7 @@ class TypeEncoder(abc.ABC): See :ref:`custom-type-type-codec` documentation for an example. """ - @_abstractproperty + @abc.abstractproperty def python_type(self) -> Any: """The Python type to be converted into something serializable.""" pass @@ -83,7 +81,7 @@ class TypeDecoder(abc.ABC): See :ref:`custom-type-type-codec` documentation for an example. """ - @_abstractproperty + @abc.abstractproperty def bson_type(self) -> Any: """The BSON type to be converted into our own type.""" pass @@ -112,7 +110,6 @@ class TypeCodec(TypeEncoder, TypeDecoder): _Codec = Union[TypeEncoder, TypeDecoder, TypeCodec] _Fallback = Callable[[Any], Any] -_DocumentType = TypeVar("_DocumentType", bound=Mapping[str, Any]) class TypeRegistry(object): @@ -244,208 +241,259 @@ class _BaseCodecOptions(NamedTuple): datetime_conversion: Optional[DatetimeConversion] -class CodecOptions(_BaseCodecOptions): - """Encapsulates options used encoding and / or decoding BSON.""" - - def __init__(self, *args, **kwargs): - """Encapsulates options used encoding and / or decoding BSON. - - The `document_class` option is used to define a custom type for use - decoding BSON documents. Access to the underlying raw BSON bytes for - a document is available using the :class:`~bson.raw_bson.RawBSONDocument` - type:: - - >>> from bson.raw_bson import RawBSONDocument - >>> from bson.codec_options import CodecOptions - >>> codec_options = CodecOptions(document_class=RawBSONDocument) - >>> coll = db.get_collection('test', codec_options=codec_options) - >>> doc = coll.find_one() - >>> doc.raw - '\\x16\\x00\\x00\\x00\\x07_id\\x00[0\\x165\\x91\\x10\\xea\\x14\\xe8\\xc5\\x8b\\x93\\x00' - - The document class can be any type that inherits from - :class:`~collections.abc.MutableMapping`:: - - >>> class AttributeDict(dict): - ... # A dict that supports attribute access. - ... def __getattr__(self, key): - ... return self[key] - ... def __setattr__(self, key, value): - ... self[key] = value - ... - >>> codec_options = CodecOptions(document_class=AttributeDict) - >>> coll = db.get_collection('test', codec_options=codec_options) - >>> doc = coll.find_one() - >>> doc._id - ObjectId('5b3016359110ea14e8c58b93') - - See :doc:`/examples/datetimes` for examples using the `tz_aware` and - `tzinfo` options. - - See :doc:`/examples/uuid` for examples using the `uuid_representation` - option. - - :Parameters: - - `document_class`: BSON documents returned in queries will be decoded - to an instance of this class. Must be a subclass of - :class:`~collections.abc.MutableMapping`. Defaults to :class:`dict`. - - `tz_aware`: If ``True``, BSON datetimes will be decoded to timezone - aware instances of :class:`~datetime.datetime`. Otherwise they will be - naive. Defaults to ``False``. - - `uuid_representation`: The BSON representation to use when encoding - and decoding instances of :class:`~uuid.UUID`. Defaults to - :data:`~bson.binary.UuidRepresentation.UNSPECIFIED`. New - applications should consider setting this to - :data:`~bson.binary.UuidRepresentation.STANDARD` for cross language - compatibility. See :ref:`handling-uuid-data-example` for details. - - `unicode_decode_error_handler`: The error handler to apply when - a Unicode-related error occurs during BSON decoding that would - otherwise raise :exc:`UnicodeDecodeError`. Valid options include - 'strict', 'replace', 'backslashreplace', 'surrogateescape', and - 'ignore'. Defaults to 'strict'. - - `tzinfo`: A :class:`~datetime.tzinfo` subclass that specifies the - timezone to/from which :class:`~datetime.datetime` objects should be - encoded/decoded. - - `type_registry`: Instance of :class:`TypeRegistry` used to customize - encoding and decoding behavior. - - `datetime_conversion`: Specifies how UTC datetimes should be decoded - within BSON. Valid options include 'datetime_ms' to return as a - DatetimeMS, 'datetime' to return as a datetime.datetime and - raising a ValueError for out-of-range values, 'datetime_auto' to - return DatetimeMS objects when the underlying datetime is - out-of-range and 'datetime_clamp' to clamp to the minimum and - maximum possible datetimes. Defaults to 'datetime'. - - .. versionchanged:: 4.0 - The default for `uuid_representation` was changed from - :const:`~bson.binary.UuidRepresentation.PYTHON_LEGACY` to - :const:`~bson.binary.UuidRepresentation.UNSPECIFIED`. - - .. versionadded:: 3.8 - `type_registry` attribute. - - .. warning:: Care must be taken when changing - `unicode_decode_error_handler` from its default value ('strict'). - The 'replace' and 'ignore' modes should not be used when documents - retrieved from the server will be modified in the client application - and stored back to the server. - """ - super().__init__() - - def __new__( - cls: Type["CodecOptions"], - document_class: Optional[Type[Mapping[str, Any]]] = None, - tz_aware: bool = False, - uuid_representation: Optional[int] = UuidRepresentation.UNSPECIFIED, - unicode_decode_error_handler: str = "strict", - tzinfo: Optional[datetime.tzinfo] = None, - type_registry: Optional[TypeRegistry] = None, - datetime_conversion: Optional[DatetimeConversion] = DatetimeConversion.DATETIME, - ) -> "CodecOptions": - doc_class = document_class or dict - # issubclass can raise TypeError for generic aliases like SON[str, Any]. - # In that case we can use the base class for the comparison. - is_mapping = False - try: - is_mapping = issubclass(doc_class, _MutableMapping) - except TypeError: - if hasattr(doc_class, "__origin__"): - is_mapping = issubclass(doc_class.__origin__, _MutableMapping) # type: ignore[union-attr] - if not (is_mapping or _raw_document_class(doc_class)): - raise TypeError( - "document_class must be dict, bson.son.SON, " - "bson.raw_bson.RawBSONDocument, or a " - "subclass of collections.abc.MutableMapping" - ) - if not isinstance(tz_aware, bool): - raise TypeError("tz_aware must be True or False") - if uuid_representation not in ALL_UUID_REPRESENTATIONS: - raise ValueError( - "uuid_representation must be a value from bson.binary.UuidRepresentation" +if TYPE_CHECKING: + + class CodecOptions(Tuple, Generic[_DocumentType]): + document_class: Type[_DocumentType] + tz_aware: bool + uuid_representation: int + unicode_decode_error_handler: Optional[str] + tzinfo: Optional[datetime.tzinfo] + type_registry: TypeRegistry + datetime_conversion: Optional[int] + + def __new__( + cls: Type["CodecOptions"], + document_class: Optional[Type[_DocumentType]] = ..., + tz_aware: bool = ..., + uuid_representation: Optional[int] = ..., + unicode_decode_error_handler: Optional[str] = ..., + tzinfo: Optional[datetime.tzinfo] = ..., + type_registry: Optional[TypeRegistry] = ..., + datetime_conversion: Optional[int] = ..., + ) -> "CodecOptions[_DocumentType]": + ... + + # CodecOptions API + def with_options(self, **kwargs: Any) -> "CodecOptions[_DocumentType]": + ... + + def _arguments_repr(self) -> str: + ... + + def _options_dict(self) -> Dict[Any, Any]: + ... + + # NamedTuple API + @classmethod + def _make(cls, obj: Iterable) -> "CodecOptions[_DocumentType]": + ... + + def _asdict(self) -> Dict[str, Any]: + ... + + def _replace(self, **kwargs: Any) -> "CodecOptions[_DocumentType]": + ... + + _source: str + _fields: Tuple[str] + +else: + + class CodecOptions(_BaseCodecOptions): + """Encapsulates options used encoding and / or decoding BSON.""" + + def __init__(self, *args, **kwargs): + """Encapsulates options used encoding and / or decoding BSON. + + The `document_class` option is used to define a custom type for use + decoding BSON documents. Access to the underlying raw BSON bytes for + a document is available using the :class:`~bson.raw_bson.RawBSONDocument` + type:: + + >>> from bson.raw_bson import RawBSONDocument + >>> from bson.codec_options import CodecOptions + >>> codec_options = CodecOptions(document_class=RawBSONDocument) + >>> coll = db.get_collection('test', codec_options=codec_options) + >>> doc = coll.find_one() + >>> doc.raw + '\\x16\\x00\\x00\\x00\\x07_id\\x00[0\\x165\\x91\\x10\\xea\\x14\\xe8\\xc5\\x8b\\x93\\x00' + + The document class can be any type that inherits from + :class:`~collections.abc.MutableMapping`:: + + >>> class AttributeDict(dict): + ... # A dict that supports attribute access. + ... def __getattr__(self, key): + ... return self[key] + ... def __setattr__(self, key, value): + ... self[key] = value + ... + >>> codec_options = CodecOptions(document_class=AttributeDict) + >>> coll = db.get_collection('test', codec_options=codec_options) + >>> doc = coll.find_one() + >>> doc._id + ObjectId('5b3016359110ea14e8c58b93') + + See :doc:`/examples/datetimes` for examples using the `tz_aware` and + `tzinfo` options. + + See :doc:`/examples/uuid` for examples using the `uuid_representation` + option. + + :Parameters: + - `document_class`: BSON documents returned in queries will be decoded + to an instance of this class. Must be a subclass of + :class:`~collections.abc.MutableMapping`. Defaults to :class:`dict`. + - `tz_aware`: If ``True``, BSON datetimes will be decoded to timezone + aware instances of :class:`~datetime.datetime`. Otherwise they will be + naive. Defaults to ``False``. + - `uuid_representation`: The BSON representation to use when encoding + and decoding instances of :class:`~uuid.UUID`. Defaults to + :data:`~bson.binary.UuidRepresentation.UNSPECIFIED`. New + applications should consider setting this to + :data:`~bson.binary.UuidRepresentation.STANDARD` for cross language + compatibility. See :ref:`handling-uuid-data-example` for details. + - `unicode_decode_error_handler`: The error handler to apply when + a Unicode-related error occurs during BSON decoding that would + otherwise raise :exc:`UnicodeDecodeError`. Valid options include + 'strict', 'replace', 'backslashreplace', 'surrogateescape', and + 'ignore'. Defaults to 'strict'. + - `tzinfo`: A :class:`~datetime.tzinfo` subclass that specifies the + timezone to/from which :class:`~datetime.datetime` objects should be + encoded/decoded. + - `type_registry`: Instance of :class:`TypeRegistry` used to customize + encoding and decoding behavior. + - `datetime_conversion`: Specifies how UTC datetimes should be decoded + within BSON. Valid options include 'datetime_ms' to return as a + DatetimeMS, 'datetime' to return as a datetime.datetime and + raising a ValueError for out-of-range values, 'datetime_auto' to + return DatetimeMS objects when the underlying datetime is + out-of-range and 'datetime_clamp' to clamp to the minimum and + maximum possible datetimes. Defaults to 'datetime'. + + .. versionchanged:: 4.0 + The default for `uuid_representation` was changed from + :const:`~bson.binary.UuidRepresentation.PYTHON_LEGACY` to + :const:`~bson.binary.UuidRepresentation.UNSPECIFIED`. + + .. versionadded:: 3.8 + `type_registry` attribute. + + .. warning:: Care must be taken when changing + `unicode_decode_error_handler` from its default value ('strict'). + The 'replace' and 'ignore' modes should not be used when documents + retrieved from the server will be modified in the client application + and stored back to the server. + """ + super().__init__() + + def __new__( + cls: Type["CodecOptions"], + document_class: Optional[Type[Mapping[str, Any]]] = None, + tz_aware: bool = False, + uuid_representation: Optional[int] = UuidRepresentation.UNSPECIFIED, + unicode_decode_error_handler: str = "strict", + tzinfo: Optional[datetime.tzinfo] = None, + type_registry: Optional[TypeRegistry] = None, + datetime_conversion: Optional[DatetimeConversion] = DatetimeConversion.DATETIME, + ) -> "CodecOptions": + doc_class = document_class or dict + # issubclass can raise TypeError for generic aliases like SON[str, Any]. + # In that case we can use the base class for the comparison. + is_mapping = False + try: + is_mapping = issubclass(doc_class, _MutableMapping) + except TypeError: + if hasattr(doc_class, "__origin__"): + is_mapping = issubclass(doc_class.__origin__, _MutableMapping) + if not (is_mapping or _raw_document_class(doc_class)): + raise TypeError( + "document_class must be dict, bson.son.SON, " + "bson.raw_bson.RawBSONDocument, or a " + "subclass of collections.abc.MutableMapping" + ) + if not isinstance(tz_aware, bool): + raise TypeError("tz_aware must be True or False") + if uuid_representation not in ALL_UUID_REPRESENTATIONS: + raise ValueError( + "uuid_representation must be a value from bson.binary.UuidRepresentation" + ) + if not isinstance(unicode_decode_error_handler, str): + raise ValueError("unicode_decode_error_handler must be a string") + if tzinfo is not None: + if not isinstance(tzinfo, datetime.tzinfo): + raise TypeError("tzinfo must be an instance of datetime.tzinfo") + if not tz_aware: + raise ValueError("cannot specify tzinfo without also setting tz_aware=True") + + type_registry = type_registry or TypeRegistry() + + if not isinstance(type_registry, TypeRegistry): + raise TypeError("type_registry must be an instance of TypeRegistry") + + return tuple.__new__( + cls, + ( + doc_class, + tz_aware, + uuid_representation, + unicode_decode_error_handler, + tzinfo, + type_registry, + datetime_conversion, + ), ) - if not isinstance(unicode_decode_error_handler, str): - raise ValueError("unicode_decode_error_handler must be a string") - if tzinfo is not None: - if not isinstance(tzinfo, datetime.tzinfo): - raise TypeError("tzinfo must be an instance of datetime.tzinfo") - if not tz_aware: - raise ValueError("cannot specify tzinfo without also setting tz_aware=True") - - type_registry = type_registry or TypeRegistry() - - if not isinstance(type_registry, TypeRegistry): - raise TypeError("type_registry must be an instance of TypeRegistry") - - return tuple.__new__( - cls, - ( - doc_class, - tz_aware, - uuid_representation, - unicode_decode_error_handler, - tzinfo, - type_registry, - datetime_conversion, - ), - ) - - def _arguments_repr(self) -> str: - """Representation of the arguments used to create this object.""" - document_class_repr = "dict" if self.document_class is dict else repr(self.document_class) - - uuid_rep_repr = UUID_REPRESENTATION_NAMES.get( - self.uuid_representation, self.uuid_representation - ) - return ( - "document_class=%s, tz_aware=%r, uuid_representation=%s, " - "unicode_decode_error_handler=%r, tzinfo=%r, " - "type_registry=%r, datetime_conversion=%s" - % ( - document_class_repr, - self.tz_aware, - uuid_rep_repr, - self.unicode_decode_error_handler, - self.tzinfo, - self.type_registry, - self.datetime_conversion, + def _arguments_repr(self) -> str: + """Representation of the arguments used to create this object.""" + document_class_repr = ( + "dict" if self.document_class is dict else repr(self.document_class) ) - ) - - def _options_dict(self) -> Dict[str, Any]: - """Dictionary of the arguments used to create this object.""" - # TODO: PYTHON-2442 use _asdict() instead - return { - "document_class": self.document_class, - "tz_aware": self.tz_aware, - "uuid_representation": self.uuid_representation, - "unicode_decode_error_handler": self.unicode_decode_error_handler, - "tzinfo": self.tzinfo, - "type_registry": self.type_registry, - "datetime_conversion": self.datetime_conversion, - } - - def __repr__(self): - return "%s(%s)" % (self.__class__.__name__, self._arguments_repr()) - def with_options(self, **kwargs: Any) -> "CodecOptions": - """Make a copy of this CodecOptions, overriding some options:: - - >>> from bson.codec_options import DEFAULT_CODEC_OPTIONS - >>> DEFAULT_CODEC_OPTIONS.tz_aware - False - >>> options = DEFAULT_CODEC_OPTIONS.with_options(tz_aware=True) - >>> options.tz_aware - True - - .. versionadded:: 3.5 - """ - opts = self._options_dict() - opts.update(kwargs) - return CodecOptions(**opts) + uuid_rep_repr = UUID_REPRESENTATION_NAMES.get( + self.uuid_representation, self.uuid_representation + ) + return ( + "document_class=%s, tz_aware=%r, uuid_representation=%s, " + "unicode_decode_error_handler=%r, tzinfo=%r, " + "type_registry=%r, datetime_conversion=%s" + % ( + document_class_repr, + self.tz_aware, + uuid_rep_repr, + self.unicode_decode_error_handler, + self.tzinfo, + self.type_registry, + self.datetime_conversion, + ) + ) -DEFAULT_CODEC_OPTIONS = CodecOptions() + def _options_dict(self) -> Dict[str, Any]: + """Dictionary of the arguments used to create this object.""" + # TODO: PYTHON-2442 use _asdict() instead + return { + "document_class": self.document_class, + "tz_aware": self.tz_aware, + "uuid_representation": self.uuid_representation, + "unicode_decode_error_handler": self.unicode_decode_error_handler, + "tzinfo": self.tzinfo, + "type_registry": self.type_registry, + "datetime_conversion": self.datetime_conversion, + } + + def __repr__(self): + return "%s(%s)" % (self.__class__.__name__, self._arguments_repr()) + + def with_options(self, **kwargs: Any) -> "CodecOptions": + """Make a copy of this CodecOptions, overriding some options:: + + >>> from bson.codec_options import DEFAULT_CODEC_OPTIONS + >>> DEFAULT_CODEC_OPTIONS.tz_aware + False + >>> options = DEFAULT_CODEC_OPTIONS.with_options(tz_aware=True) + >>> options.tz_aware + True + + .. versionadded:: 3.5 + """ + opts = self._options_dict() + opts.update(kwargs) + return CodecOptions(**opts) + + +DEFAULT_CODEC_OPTIONS: "CodecOptions[Mapping[str, Any]]" = CodecOptions() def _parse_codec_options(options: Any) -> CodecOptions: diff --git a/bson/codec_options.pyi b/bson/codec_options.pyi deleted file mode 100644 index 8242bd4cb2..0000000000 --- a/bson/codec_options.pyi +++ /dev/null @@ -1,106 +0,0 @@ -# Copyright 2022-present MongoDB, Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Workaround for https://bugs.python.org/issue43923. -Ideally we would have done this with a single class, but -generic subclasses *must* take a parameter, and prior to Python 3.9 -or in Python 3.7 and 3.8 with `from __future__ import annotations`, -you get the error: "TypeError: 'type' object is not subscriptable". -""" - -import datetime -import abc -import enum -from typing import Tuple, Generic, Optional, Mapping, Any, Type, Dict, Iterable, Tuple, Callable, Union -from bson.typings import _DocumentType, _DocumentTypeArg - - -class TypeEncoder(abc.ABC, metaclass=abc.ABCMeta): - @property - @abc.abstractmethod - def python_type(self) -> Any: ... - @abc.abstractmethod - def transform_python(self, value: Any) -> Any: ... - -class TypeDecoder(abc.ABC, metaclass=abc.ABCMeta): - @property - @abc.abstractmethod - def bson_type(self) -> Any: ... - @abc.abstractmethod - def transform_bson(self, value: Any) -> Any: ... - -class TypeCodec(TypeEncoder, TypeDecoder, metaclass=abc.ABCMeta): ... - -Codec = Union[TypeEncoder, TypeDecoder, TypeCodec] -Fallback = Callable[[Any], Any] - -class TypeRegistry: - _decoder_map: Dict[Any, Any] - _encoder_map: Dict[Any, Any] - _fallback_encoder: Optional[Fallback] - - def __init__(self, type_codecs: Optional[Iterable[Codec]] = ..., fallback_encoder: Optional[Fallback] = ...) -> None: ... - def __eq__(self, other: Any) -> Any: ... - -class DatetimeConversion(int, enum.Enum): - DATETIME = ... - DATETIME_CLAMP = ... - DATETIME_MS = ... - DATETIME_AUTO = ... - -class CodecOptions(Tuple, Generic[_DocumentType]): - document_class: Type[_DocumentType] - tz_aware: bool - uuid_representation: int - unicode_decode_error_handler: Optional[str] - tzinfo: Optional[datetime.tzinfo] - type_registry: TypeRegistry - datetime_conversion: Optional[int] - - def __new__( - cls: Type[CodecOptions], - document_class: Optional[Type[_DocumentType]] = ..., - tz_aware: bool = ..., - uuid_representation: Optional[int] = ..., - unicode_decode_error_handler: Optional[str] = ..., - tzinfo: Optional[datetime.tzinfo] = ..., - type_registry: Optional[TypeRegistry] = ..., - datetime_conversion: Optional[int] = ..., - ) -> CodecOptions[_DocumentType]: ... - - # CodecOptions API - def with_options(self, **kwargs: Any) -> CodecOptions[_DocumentTypeArg]: ... - - def _arguments_repr(self) -> str: ... - - def _options_dict(self) -> Dict[Any, Any]: ... - - # NamedTuple API - @classmethod - def _make(cls, obj: Iterable) -> CodecOptions[_DocumentType]: ... - - def _asdict(self) -> Dict[str, Any]: ... - - def _replace(self, **kwargs: Any) -> CodecOptions[_DocumentType]: ... - - _source: str - _fields: Tuple[str] - - -DEFAULT_CODEC_OPTIONS: "CodecOptions[Mapping[str, Any]]" -_RAW_BSON_DOCUMENT_MARKER: int - -def _raw_document_class(document_class: Any) -> bool: ... - -def _parse_codec_options(options: Any) -> CodecOptions: ... diff --git a/test/test_custom_types.py b/test/test_custom_types.py index 868756c67d..676b3b6af0 100644 --- a/test/test_custom_types.py +++ b/test/test_custom_types.py @@ -541,7 +541,8 @@ def transform_bson(self, value): {MyIntEncoder.python_type: codec_instances[1].transform_python}, ) self.assertEqual( - type_registry._decoder_map, {MyIntDecoder.bson_type: codec_instances[0].transform_bson} + type_registry._decoder_map, + {MyIntDecoder.bson_type: codec_instances[0].transform_bson}, ) def test_initialize_fail(self):