From 16d29e60186665f200fcfe59bc772006c2370dd6 Mon Sep 17 00:00:00 2001 From: Shreya Kamble Date: Wed, 22 Jan 2025 16:52:39 +0530 Subject: [PATCH] feat: handle invalid attr key-values --- README.md | 29 +++++++++++++++++++++++++++++ package.json | 2 +- src/toRedactor.tsx | 5 ++++- src/utils/index.ts | 2 ++ test/expectedJson.ts | 22 +++++++++++++++++++++- test/toRedactor.test.ts | 6 ++++++ 6 files changed, 63 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 1d9519a..726a853 100644 --- a/README.md +++ b/README.md @@ -365,6 +365,35 @@ To help the JSON RTE Serializer recognize and process additional tags that are c ### Convert JSON to HTML +#### HTML Attribute Name and Value Sanitization + + +This project ensures that HTML attributes are properly validated and sanitized according to the W3C HTML specification. It validates attribute names based on the HTML standards and sanitizes attribute values to ensure correct rendering and security, particularly against cross-site scripting (XSS) vulnerabilities. + +#### Attribute Name Guidelines + +All HTML attribute names must conform to the [W3C HTML specification](https://www.w3.org/TR/2012/WD-html-markup-20120329/syntax.html#attribute-name). These guidelines specify the following rules: + +- **Printable ASCII Characters:** Attribute names must consist only of printable ASCII characters. +- **Case-Insensitive:** Attribute names are case-insensitive, but lowercase is preferred for consistency. +- **No Special Characters:** Attribute names cannot contain spaces or special characters such as `=`, `>`, `<`, `"`, etc. +- **Allowed Attributes:** Attributes such as `xmlns`, `aria-*`, `data-*`, and others defined by HTML5 standards are allowed and must follow specific rules. + +##### Important Note: +If an attribute name does not conform to these rules, the attribute will be **dropped** from the element. + +#### Attribute Value Guidelines + +The values of HTML attributes are sanitized to ensure proper rendering and to mitigate security risks, such as Cross-Site Scripting (XSS). This sanitization process involves replacing HTML entities (like `<`, `>`, `&`, etc.) with their corresponding characters and removing any invalid or unsafe characters. + +Here are some common HTML entities and their replacements: + +- `<` → `<` +- `>` → `>` +- `&` → `&` + + +
You can pass the `allowNonStandardTags: true` parameter within the `jsonToHtml` method to allow the JSON RTE Serializer tool to recognize standard HTML tags or element types and convert them into JSON format. You can use the following customized JSON RTE Serializer code to convert your JSON RTE field data into HTML format. diff --git a/package.json b/package.json index 626ec39..c981325 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@contentstack/json-rte-serializer", - "version": "2.0.13", + "version": "2.1.0", "description": "This Package converts Html Document to Json and vice-versa.", "main": "lib/index.js", "module": "lib/index.mjs", diff --git a/src/toRedactor.tsx b/src/toRedactor.tsx index 2dc1406..ca5abde 100644 --- a/src/toRedactor.tsx +++ b/src/toRedactor.tsx @@ -2,7 +2,7 @@ import kebbab from 'lodash.kebabcase' import isEmpty from 'lodash.isempty' import {IJsonToHtmlElementTags, IJsonToHtmlOptions, IJsonToHtmlTextTags} from './types' import isPlainObject from 'lodash.isplainobject' -import {replaceHtmlEntities } from './utils' +import {replaceHtmlEntities, forbiddenAttrChars } from './utils' const ELEMENT_TYPES: IJsonToHtmlElementTags = { 'blockquote': (attrs: string, child: string) => { @@ -507,6 +507,9 @@ export const toRedactor = (jsonValue: any,options?:IJsonToHtmlOptions) : string } delete attrsJson['redactor-attributes'] Object.entries(attrsJson).forEach((key) => { + if (forbiddenAttrChars.some(char => key[0].includes(char))) { + return; + } return key[1] ? (key[1] !== '' ? (attrs += `${key[0]}="${replaceHtmlEntities(key[1])}" `) : '') : '' }) attrs = (attrs.trim() ? ' ' : '') + attrs.trim() diff --git a/src/utils/index.ts b/src/utils/index.ts index b4b9c08..df48a1c 100644 --- a/src/utils/index.ts +++ b/src/utils/index.ts @@ -5,3 +5,5 @@ export function replaceHtmlEntities(str: string): string { .replace(/>/g, '>') .replace(/"/g, '"'); } + +export const forbiddenAttrChars = ['"', "'", '>','<', '/', '=']; \ No newline at end of file diff --git a/test/expectedJson.ts b/test/expectedJson.ts index 1433de4..a77a1d5 100644 --- a/test/expectedJson.ts +++ b/test/expectedJson.ts @@ -2006,7 +2006,8 @@ export default { ``, '', ``, - `` + ``, + `` ], "json": [ @@ -2181,6 +2182,25 @@ export default { }, children: [{ text: "" }], }, + { + "uid": "45a850acbeb949db86afe415625ad1ce", + "type": "social-embeds", + "attrs": { + "123": "456", + "src": "https://www.youtube.com/embed/Gw7EqoOYC9A?si=bWdnezma6qFAePQU", + "width": 560, + "height": 320, + "

ding": 234, + "status": "Active", + "emptyKey<": "12", + "country/": "USA" + }, + "children": [ + { + "text": "" + } + ] + }, ] diff --git a/test/toRedactor.test.ts b/test/toRedactor.test.ts index 6517f88..adbb8aa 100644 --- a/test/toRedactor.test.ts +++ b/test/toRedactor.test.ts @@ -279,6 +279,12 @@ describe("Testing json to html conversion", () => { const html = toRedactor(json); expect(html).toBe(expectedValue["RT-360"].html[4]); }) + + it("should drop invalid attribute names",()=>{ + const json = expectedValue["RT-360"].json[5] + const html = toRedactor(json); + expect(html).toBe(expectedValue["RT-360"].html[5]); + }) }) test('should convert numeric width to string', () => {