diff --git a/src/django_nh3/forms.py b/src/django_nh3/forms.py index ca44fb5..cf69f24 100644 --- a/src/django_nh3/forms.py +++ b/src/django_nh3/forms.py @@ -7,6 +7,8 @@ from django import forms from django.utils.safestring import mark_safe +from .utils import get_nh3_options + class Nh3Field(forms.CharField): """nh3 form field""" @@ -15,27 +17,35 @@ class Nh3Field(forms.CharField): def __init__( self, - attributes: dict[str, set[str]] = {}, + *args: Any, + attributes: dict[str, set[str]] | None = None, attribute_filter: Callable[[str, str, str], str] | None = None, - clean_content_tags: set[str] = set(), - empty_value: Any | None = "", + clean_content_tags: set[str] | None = None, + empty_value: Any | None = None, + generic_attribute_prefixes: set[str] | None = None, link_rel: str = "", + set_tag_attribute_values: dict[str, dict[str, str]] | None = None, strip_comments: bool = False, - tags: set[str] = set(), - *args: Any, + tags: set[str] | None = None, + tag_attribute_values: dict[str, dict[str, set[str]]] | None = None, + url_schemes: set[str] | None = None, **kwargs: dict[Any, Any], ): super().__init__(*args, **kwargs) self.empty_value = empty_value - self.nh3_options = { - "attributes": attributes, - "attribute_filter": attribute_filter, - "clean_content_tags": clean_content_tags, - "link_rel": link_rel, - "strip_comments": strip_comments, - "tags": tags, - } + self.nh3_options = get_nh3_options( + attributes=attributes, + attribute_filter=attribute_filter, + clean_content_tags=clean_content_tags, + generic_attribute_prefixes=generic_attribute_prefixes, + link_rel=link_rel, + set_tag_attribute_values=set_tag_attribute_values, + strip_comments=strip_comments, + tags=tags, + tag_attribute_values=tag_attribute_values, + url_schemes=url_schemes, + ) def to_python(self, value: Any) -> Any: """ diff --git a/src/django_nh3/models.py b/src/django_nh3/models.py index e5b5b2d..5f2c90f 100644 --- a/src/django_nh3/models.py +++ b/src/django_nh3/models.py @@ -13,30 +13,39 @@ from typing_extensions import deprecated from . import forms +from .utils import get_nh3_options class Nh3FieldMixin: def __init__( self, - attributes: dict[str, set[str]] = {}, + *args: Any, + attributes: dict[str, set[str]] | None = None, attribute_filter: Callable[[str, str, str], str] | None = None, - clean_content_tags: set[str] = set(), + clean_content_tags: set[str] | None = None, + generic_attribute_prefixes: set[str] | None = None, link_rel: str = "", + set_tag_attribute_values: dict[str, dict[str, str]] | None = None, strip_comments: bool = False, - tags: set[str] = set(), - *args: Any, + tags: set[str] | None = None, + tag_attribute_values: dict[str, dict[str, set[str]]] | None = None, + url_schemes: set[str] | None = None, **kwargs: Any, ) -> None: super().__init__(*args, **kwargs) - self.nh3_options = { - "attributes": attributes, - "attribute_filter": attribute_filter, - "clean_content_tags": clean_content_tags, - "link_rel": link_rel, - "strip_comments": strip_comments, - "tags": tags, - } + self.nh3_options = get_nh3_options( + attributes=attributes, + attribute_filter=attribute_filter, + clean_content_tags=clean_content_tags, + generic_attribute_prefixes=generic_attribute_prefixes, + link_rel=link_rel, + set_tag_attribute_values=set_tag_attribute_values, + strip_comments=strip_comments, + tags=tags, + tag_attribute_values=tag_attribute_values, + url_schemes=url_schemes, + ) def formfield( self, form_class: FormField = forms.Nh3Field, **kwargs: Any @@ -47,14 +56,24 @@ def formfield( if not self.choices: # type: ignore[attr-defined] kwargs.update( { - "max_length": self.max_length, # type: ignore[attr-defined] "attributes": self.nh3_options.get("attributes"), "attribute_filter": self.nh3_options.get("attribute_filter"), "clean_content_tags": self.nh3_options.get("clean_content_tags"), + "generic_attribute_prefixes": self.nh3_options.get( + "generic_attribute_prefixes" + ), "link_rel": self.nh3_options.get("link_rel"), + "max_length": self.max_length, # type: ignore[attr-defined] + "required": not self.blank, # type: ignore[attr-defined] + "set_tag_attribute_values": self.nh3_options.get( + "set_tag_attribute_values" + ), "strip_comments": self.nh3_options.get("strip_comments"), + "tag_attribute_values": self.nh3_options.get( + "tag_attribute_values" + ), "tags": self.nh3_options.get("tags"), - "required": not self.blank, # type: ignore[attr-defined] + "url_schemes": self.nh3_options.get("url_schemes"), } ) diff --git a/src/django_nh3/utils.py b/src/django_nh3/utils.py index b667fd0..9e6f978 100644 --- a/src/django_nh3/utils.py +++ b/src/django_nh3/utils.py @@ -1,12 +1,14 @@ import logging +from collections.abc import Callable from typing import Any from django.conf import settings +from django.utils.module_loading import import_string logger = logging.getLogger(__name__) -def get_nh3_default_options() -> dict[str, Any]: +def get_nh3_configured_default_options() -> dict[str, Any]: """ Pull the django-nh3 settings similarly to how django-bleach handled them. @@ -16,35 +18,177 @@ def get_nh3_default_options() -> dict[str, Any]: BLEACH_ALLOWED_TAGS -> NH3_ALLOWED_TAGS BLEACH_ALLOWED_ATTRIBUTES -> NH3_ALLOWED_ATTRIBUTES BLEACH_STRIP_COMMENTS -> NH3_STRIP_COMMENTS + BLEACH_ALLOWED_PROTOCOLS -> NH3_ALLOWED_URL_SCHEMES While other settings have no current support in nh3: BLEACH_ALLOWED_STYLES -> There is no support for styling - BLEACH_ALLOWED_PROTOCOLS -> There is no support for protocols BLEACH_STRIP_TAGS -> This is the default behavior of nh3 """ - nh3_args: dict[str, Any] = {} nh3_settings = { + # Sets the tags that are allowed (eg: allowlist) + # Ensure that no tags in this are also in NH3_CLEAN_CONTENT_TAGS or + # NH3_ALLOWED_ATTRIBUTES "NH3_ALLOWED_TAGS": "tags", + # Sets the tags whose contents will be completely removed from the + # output (eg: blocklist) + # Ensure that no tags in this are also in NH3_ALLOWED_TAGS or + # NH3_ALLOWED_ATTRIBUTES + # Default: script, style "NH3_ALLOWED_ATTRIBUTES": "attributes", + # Sets the HTML attributes that are allowed on specific tags, * key + # means the attributes are allowed on any tag (eg: allowlist) + # Ensure that no tags in this are also in NH3_CLEAN_CONTENT_TAGS + "NH3_CLEAN_CONTENT_TAGS": "clean_content_tags", + # Dotted path to a callback that allows rewriting of all attributes. + # The callback takes name of the element, attribute and its value. + # Returns None to remove the attribute, or a value to use + "NH3_ALLOWED_ATTRIBUTES_FILTER": "attribute_filter", + # Configures the handling of HTML comments, defaults to True "NH3_STRIP_COMMENTS": "strip_comments", + # Configures a rel attribute that will be added on links, defaults to + # noopener noreferrer. To turn on rel-insertion, pass a space-separated + # list. If rel is in the generic or tag attributes, this must be set to + # None + # Common rel values to include: + # noopener + # noreferrer + # nofollow + "NH3_LINK_REL": "link_rel", + # Sets the prefix of attributes that are allowed on any tag + "NH3_ALLOWED_GENERIC_ATTRIBUTE_PREFIXES": "generic_attribute_prefixes", + # Sets the values of HTML attributes that are allowed on specific tags. + # The value is structured as a map from tag names to a map from + # attribute names to a set of attribute values. If a tag is not itself + # whitelisted, adding entries to this map will do nothing. + "NH3_ALLOWED_TAG_ATTRIBUTE_VALUES": "tag_attribute_values", + # Sets the values of HTML attributes that are to be set on specific + # tags. The value is structured as a map from tag names to a map from + # attribute names to an attribute value. If a tag is not itself + # whitelisted, adding entries to this map will do nothing. + "NH3_SET_TAG_ATTRIBUTE_VALUES": "set_tag_attribute_values", + # Sets the URL schemes permitted on href and src attributes + "NH3_ALLOWED_URL_SCHEMES": "url_schemes", } - for setting, kwarg in nh3_settings.items(): - if hasattr(settings, setting): - attr = getattr(settings, setting) + return { + kwarg: getattr(settings, setting_name) + for setting_name, kwarg in nh3_settings.items() + if hasattr(settings, setting_name) + } + + +def normalize_nh3_options( # noqa: C901, PLR0912 + options: dict[str, Any], +) -> dict[str, Any]: + nh3_args: dict[str, Any] = {} + for kwarg_name, kwarg_value in options.items(): + value = kwarg_value + + # Convert from general iterables to sets + if kwarg_name in [ + "tags", + "clean_content_tags", + "generic_attribute_prefixes", + "url_schemes", + ]: + value = set(value) + + elif kwarg_name == "attributes": + copy_dict = value.copy() + for tag, attributes in value.items(): + copy_dict[tag] = set(attributes) + value = copy_dict + + elif kwarg_name == "attribute_filter": + if callable(value): + pass + elif isinstance(value, str): + value = import_string(value) + + elif kwarg_name == "strip_comments": + value = bool(value) - # Convert from general iterables to sets - if setting == "NH3_ALLOWED_TAGS": - attr = set(attr) - elif setting == "NH3_ALLOWED_ATTRIBUTES": - copy_dict = attr.copy() - for tag, attributes in attr.items(): - copy_dict[tag] = set(attributes) - attr = copy_dict + elif kwarg_name == "link_rel": + value = str(value) - nh3_args[kwarg] = attr + elif kwarg_name == "tag_attribute_values": + # The value is structured as a map from tag names to a map from + # attribute names to a set of attribute values. + allowed_tag_attr_dict: dict[str, dict[str, set[str]]] = {} + for tag_name, attribute_dict in value.items(): + allowed_tag_attr_dict[tag_name] = {} + for attr_name, attr_value in attribute_dict.items(): + allowed_tag_attr_dict[tag_name][attr_name] = set(attr_value) + value = allowed_tag_attr_dict + + elif kwarg_name == "set_tag_attribute_values": + # The value is structured as a map from tag names to a map from + # attribute names to an attribute value. + set_tag_attr_dict: dict[str, dict[str, str]] = {} + for tag_name, attribute_dict in value.items(): + set_tag_attr_dict[tag_name] = {} + for attr_name, attr_value in attribute_dict.items(): + set_tag_attr_dict[tag_name][attr_name] = str(attr_value) + value = set_tag_attr_dict + + nh3_args[kwarg_name] = value return nh3_args + + +def get_nh3_default_options() -> dict[str, Any]: + return normalize_nh3_options(get_nh3_configured_default_options()) + + +def get_nh3_options( + tags: set[str] | None = None, + clean_content_tags: set[str] | None = None, + attributes: dict[str, set[str]] | None = None, + attribute_filter: Callable[[str, str, str], str] | None = None, + strip_comments: bool = False, + link_rel: str = "", + generic_attribute_prefixes: set[str] | None = None, + tag_attribute_values: dict[str, dict[str, set[str]]] | None = None, + set_tag_attribute_values: dict[str, dict[str, str]] | None = None, + url_schemes: set[str] | None = None, +) -> dict[str, Any]: + defaults = get_nh3_configured_default_options() + + tags = tags or defaults.get("tags", None) or set() + attributes = attributes or defaults.get("attributes", {}) + clean_content_tags = ( + clean_content_tags or defaults.get("clean_content_tags", None) or set() + ) + attribute_filter = attribute_filter or defaults.get("attribute_filter", None) + strip_comments = strip_comments or defaults.get("strip_comments", False) + link_rel = link_rel or defaults.get("link_rel", "") + generic_attribute_prefixes = ( + generic_attribute_prefixes + or defaults.get("generic_attribute_prefixes", None) + or set() + ) + tag_attribute_values = ( + tag_attribute_values or defaults.get("tag_attribute_values", None) or {} + ) + set_tag_attribute_values = ( + set_tag_attribute_values or defaults.get("set_tag_attribute_values", None) or {} + ) + url_schemes = url_schemes or defaults.get("url_schemes", None) or set() + + return normalize_nh3_options( + { + "tags": tags, + "clean_content_tags": clean_content_tags, + "attributes": attributes, + "attribute_filter": attribute_filter, + "strip_comments": strip_comments, + "link_rel": link_rel, + "generic_attribute_prefixes": generic_attribute_prefixes, + "tag_attribute_values": tag_attribute_values, + "set_tag_attribute_values": set_tag_attribute_values, + "url_schemes": url_schemes, + } + ) diff --git a/tests/test_settings.py b/tests/test_settings.py index d08161b..b918a3c 100644 --- a/tests/test_settings.py +++ b/tests/test_settings.py @@ -2,7 +2,7 @@ from django.test import TestCase -from django_nh3.utils import get_nh3_default_options +from django_nh3.utils import get_nh3_default_options, normalize_nh3_options from .constants import ALLOWED_ATTRIBUTES, ALLOWED_TAGS, STRIP_COMMENTS @@ -31,3 +31,193 @@ def test_custom_tags(self, settings): def test_strip_comments(self, settings): nh3_args = get_nh3_default_options() self.assertEqual(nh3_args["strip_comments"], STRIP_COMMENTS) + + +def set_test_flag_true(): + return "set_test_flag_true" + + +class TestNormalizeNh3Options(TestCase): + def test_unrecognized_keys_are_passed_through(self): + self.assertEqual(normalize_nh3_options({"unknown": None}), {"unknown": None}) + self.assertEqual(normalize_nh3_options({"unknown": []}), {"unknown": []}) + self.assertEqual(normalize_nh3_options({"unknown": ()}), {"unknown": ()}) + self.assertEqual(normalize_nh3_options({"unknown": set()}), {"unknown": set()}) + + def test_tags_clean_content_tags_generic_attribute_prefixes_and_url_schemes(self): + for kwargs, expected in [ + ( + {"tags": ["one", "two", "three"]}, + {"tags": {"one", "two", "three"}}, + ), + ( + {"clean_content_tags": ["two", "three", "four"]}, + {"clean_content_tags": {"two", "three", "four"}}, + ), + ( + {"generic_attribute_prefixes": ["three", "four", "five"]}, + {"generic_attribute_prefixes": {"three", "four", "five"}}, + ), + ( + {"url_schemes": ["four", "five", "six"]}, + {"url_schemes": {"four", "five", "six"}}, + ), + ]: + with self.subTest(kwargs=kwargs, expected=expected): + self.assertDictEqual( + normalize_nh3_options(kwargs), + expected, + ) + + def test_attribute_filter(self): + self.assertDictEqual( + normalize_nh3_options({"attribute_filter": set_test_flag_true}), + {"attribute_filter": set_test_flag_true}, + ) + + # Make sure that the function is exactly what we expect + result = normalize_nh3_options({"attribute_filter": set_test_flag_true}) + self.assertEqual(result["attribute_filter"](), "set_test_flag_true") + + self.assertDictEqual( + normalize_nh3_options( + {"attribute_filter": "tests.test_settings.set_test_flag_true"} + ), + {"attribute_filter": set_test_flag_true}, + ) + + # Make sure that the function is exactly what we expect + result = normalize_nh3_options({"attribute_filter": set_test_flag_true}) + self.assertEqual(result["attribute_filter"](), "set_test_flag_true") + + def test_strip_comments(self): + for kwargs, expected in [ + ( + {"strip_comments": None}, + {"strip_comments": False}, + ), + ( + {"strip_comments": []}, + {"strip_comments": False}, + ), + ( + {"strip_comments": ""}, + {"strip_comments": False}, + ), + ( + {"strip_comments": True}, + {"strip_comments": True}, + ), + ( + {"strip_comments": "happy"}, + {"strip_comments": True}, + ), + ]: + with self.subTest(kwargs=kwargs, expected=expected): + self.assertDictEqual( + normalize_nh3_options(kwargs), # type: ignore[arg-type] + expected, + ) + + def test_link_rel(self): + for kwargs, expected in [ + ( + {"link_rel": ""}, + {"link_rel": ""}, + ), + ( + {"link_rel": "my string"}, + {"link_rel": "my string"}, + ), + ( + {"link_rel": 0}, + {"link_rel": "0"}, + ), + ( + {"link_rel": None}, + {"link_rel": "None"}, + ), + ( + {"link_rel": True}, + {"link_rel": "True"}, + ), + ( + {"link_rel": False}, + {"link_rel": "False"}, + ), + ]: + with self.subTest(kwargs=kwargs, expected=expected): + self.assertDictEqual( + normalize_nh3_options(kwargs), # type: ignore[arg-type] + expected, + ) + + def assertDataStructureEqual(self, left, right): + if isinstance(left, set): + self.assertIsInstance(right, set) + self.assertSetEqual(left, right) + elif isinstance(left, dict): + self.assertIsInstance(right, dict) + self.assertEqual(left.keys(), right.keys()) + for key in left.keys(): + self.assertIn(key, right.keys()) + self.assertDataStructureEqual(left[key], right[key]) + else: + self.assertIsInstance(right, left.__class__) + self.assertEqual(left, right) + + def test_tag_attribute_values(self): + self.assertDataStructureEqual( + normalize_nh3_options( + { + "tag_attribute_values": { + "tag1": { + "attrA": ["A1", "A2", "A3"], + "attrB": ("B1", "B2", "B3"), + }, + "tag2": { + "attrC": ["C1", "C2", "C3"], + }, + }, + } + ), + { + "tag_attribute_values": { + "tag1": { + "attrA": {"A1", "A2", "A3"}, + "attrB": {"B1", "B2", "B3"}, + }, + "tag2": { + "attrC": {"C1", "C2", "C3"}, + }, + }, + }, + ) + + def test_set_tag_attribute_values(self): + self.assertDataStructureEqual( + normalize_nh3_options( + { + "set_tag_attribute_values": { + "tag1": { + "attrA": "Avalue", + "attrB": "Bvalue", + }, + "tag2": { + "attrC": "Cvalue", + }, + }, + } + ), + { + "set_tag_attribute_values": { + "tag1": { + "attrA": "Avalue", + "attrB": "Bvalue", + }, + "tag2": { + "attrC": "Cvalue", + }, + }, + }, + )