diff --git a/LICENSE b/LICENSE index 46147ae..60c49c4 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2023-2024 Contentstack +Copyright (c) 2024-2025 Contentstack Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.md b/README.md index 5921e5d..726a853 100644 --- a/README.md +++ b/README.md @@ -161,6 +161,7 @@ On the other hand, the `customTextWrapper` parser function provides the followin - `child`: The HTML string that specifies the child element - `value`: The value passed against the child element + You can use the following customized JSON RTE Serializer code to convert your JSON RTE field data into HTML format. ```javascript @@ -356,12 +357,43 @@ The resulting JSON-formatted data will look as follows: ## Automatic Conversion +> **_Note_**: `src` url's provided for social-embeds and embed items will by default be uri encoded. + By default, the JSON Rich Text Editor field supports limited HTML tags within the editor. Due to this, the JSON RTE Serializer tool is not able to recognize each and every standard HTML tag. To help the JSON RTE Serializer recognize and process additional tags that are commonly used across HTML, you can use the automatic conversion option. When using this option, you need to pass the `allowNonStandardTags: true` parameter within the `jsonToHtml` or `htmlToJson` method to manipulate the working of the JSON RTE Serializer package as per your requirements. When you pass this parameter, it customizes your JSON RTE Serializer code to allow the support for all standard HTML-recognized tags or element types in the JSON Rich Text Editor field. ### Convert JSON to HTML +#### HTML Attribute Name and Value Sanitization + + +This project ensures that HTML attributes are properly validated and sanitized according to the W3C HTML specification. It validates attribute names based on the HTML standards and sanitizes attribute values to ensure correct rendering and security, particularly against cross-site scripting (XSS) vulnerabilities. + +#### Attribute Name Guidelines + +All HTML attribute names must conform to the [W3C HTML specification](https://www.w3.org/TR/2012/WD-html-markup-20120329/syntax.html#attribute-name). These guidelines specify the following rules: + +- **Printable ASCII Characters:** Attribute names must consist only of printable ASCII characters. +- **Case-Insensitive:** Attribute names are case-insensitive, but lowercase is preferred for consistency. +- **No Special Characters:** Attribute names cannot contain spaces or special characters such as `=`, `>`, `<`, `"`, etc. +- **Allowed Attributes:** Attributes such as `xmlns`, `aria-*`, `data-*`, and others defined by HTML5 standards are allowed and must follow specific rules. + +##### Important Note: +If an attribute name does not conform to these rules, the attribute will be **dropped** from the element. + +#### Attribute Value Guidelines + +The values of HTML attributes are sanitized to ensure proper rendering and to mitigate security risks, such as Cross-Site Scripting (XSS). This sanitization process involves replacing HTML entities (like `<`, `>`, `&`, etc.) with their corresponding characters and removing any invalid or unsafe characters. + +Here are some common HTML entities and their replacements: + +- `<` → `<` +- `>` → `>` +- `&` → `&` + + +
You can pass the `allowNonStandardTags: true` parameter within the `jsonToHtml` method to allow the JSON RTE Serializer tool to recognize standard HTML tags or element types and convert them into JSON format. You can use the following customized JSON RTE Serializer code to convert your JSON RTE field data into HTML format. diff --git a/package-lock.json b/package-lock.json index f88724b..7deee9c 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "@contentstack/json-rte-serializer", - "version": "2.0.7", + "version": "2.0.13", "lockfileVersion": 2, "requires": true, "packages": { "": { "name": "@contentstack/json-rte-serializer", - "version": "2.0.7", + "version": "2.0.13", "license": "MIT", "dependencies": { "array-flat-polyfill": "^1.0.1", diff --git a/package.json b/package.json index 48f0f16..c981325 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@contentstack/json-rte-serializer", - "version": "2.0.12", + "version": "2.1.0", "description": "This Package converts Html Document to Json and vice-versa.", "main": "lib/index.js", "module": "lib/index.mjs", diff --git a/src/toRedactor.tsx b/src/toRedactor.tsx index 36508e1..ca5abde 100644 --- a/src/toRedactor.tsx +++ b/src/toRedactor.tsx @@ -1,8 +1,8 @@ import kebbab from 'lodash.kebabcase' import isEmpty from 'lodash.isempty' - import {IJsonToHtmlElementTags, IJsonToHtmlOptions, IJsonToHtmlTextTags} from './types' import isPlainObject from 'lodash.isplainobject' +import {replaceHtmlEntities, forbiddenAttrChars } from './utils' const ELEMENT_TYPES: IJsonToHtmlElementTags = { 'blockquote': (attrs: string, child: string) => { @@ -379,6 +379,9 @@ export const toRedactor = (jsonValue: any,options?:IJsonToHtmlOptions) : string } if (attrsJson['width']) { let width = attrsJson['width'] + if(typeof width === 'number'){ + width = width.toString() + } if (width.slice(width.length - 1) === '%') { style = `width: ${allattrs['width'] + '%'}; height: ${attrsJson['height'] ? attrsJson['height'] : 'auto'};` } else { @@ -494,12 +497,20 @@ export const toRedactor = (jsonValue: any,options?:IJsonToHtmlOptions) : string } figureStyles.fieldsEdited.push(figureStyles.caption) } + + if (jsonValue['type'] === 'social-embeds' || jsonValue['type'] === 'embed') { + attrsJson['src'] = encodeURI(allattrs['src']); + } + if(!(options?.customElementTypes && !isEmpty(options.customElementTypes) && options.customElementTypes[jsonValue['type']])) { delete attrsJson['url'] } delete attrsJson['redactor-attributes'] Object.entries(attrsJson).forEach((key) => { - return key[1] ? (key[1] !== '' ? (attrs += `${key[0]}="${key[1]}" `) : '') : '' + if (forbiddenAttrChars.some(char => key[0].includes(char))) { + return; + } + return key[1] ? (key[1] !== '' ? (attrs += `${key[0]}="${replaceHtmlEntities(key[1])}" `) : '') : '' }) attrs = (attrs.trim() ? ' ' : '') + attrs.trim() } @@ -556,7 +567,7 @@ export const toRedactor = (jsonValue: any,options?:IJsonToHtmlOptions) : string if(['td','th'].includes(jsonValue['type'])){ if(jsonValue?.['attrs']?.['void']) return '' } - + attrs = (attrs.trim() ? ' ' : '') + attrs.trim() return ELEMENT_TYPES[orgType || jsonValue['type']](attrs, children,jsonValue, figureStyles) diff --git a/src/utils/index.ts b/src/utils/index.ts new file mode 100644 index 0000000..df48a1c --- /dev/null +++ b/src/utils/index.ts @@ -0,0 +1,9 @@ +export function replaceHtmlEntities(str: string): string { + return String(str) + .replace(/&/g, '&') + .replace(//g, '>') + .replace(/"/g, '"'); +} + +export const forbiddenAttrChars = ['"', "'", '>','<', '/', '=']; \ No newline at end of file diff --git a/test/expectedJson.ts b/test/expectedJson.ts index 777a02d..a77a1d5 100644 --- a/test/expectedJson.ts +++ b/test/expectedJson.ts @@ -1999,6 +1999,211 @@ export default { } ], "htmlUpdated": "

" + }, + "RT-360":{ + "html": [ + ``, + ``, + '', + ``, + ``, + `` + ], + "json": + [ + { + "type": "doc", + "attrs": {}, + "uid": "18396bf67f1f4b0a9da57643ac0542ca", + "children": [ + { + "uid": "45a850acbeb949db86afe415625ad1ce", + "type": "social-embeds", + "attrs": { + "src": "https://www.youtube.com/watch?v=Gw7EqoOYC9A\">`); }) + + describe("RT-360", () =>{ + it("should encode and not render invalid src urls", () => { + const json = expectedValue["RT-360"].json[0] + const html = toRedactor(json); + expect(html).toBe(expectedValue["RT-360"].html[0]); + }) + + it("should handle undefined or null cases",()=>{ + const json = expectedValue["RT-360"].json[1] + const html = toRedactor(json); + expect(html).toBe(expectedValue["RT-360"].html[1]); + }) + + it("should handle src urls without protocol",()=>{ + const json = expectedValue["RT-360"].json[2] + const html = toRedactor(json); + expect(html).toBe(expectedValue["RT-360"].html[2]); + }) + + it("should work only for valid embed urls",()=>{ + const json = expectedValue["RT-360"].json[3] + const html = toRedactor(json); + expect(html).toBe(expectedValue["RT-360"].html[3]); + }) + + it("should escape html entities in attribute values",()=>{ + const json = expectedValue["RT-360"].json[4] + const html = toRedactor(json); + expect(html).toBe(expectedValue["RT-360"].html[4]); + }) + + it("should drop invalid attribute names",()=>{ + const json = expectedValue["RT-360"].json[5] + const html = toRedactor(json); + expect(html).toBe(expectedValue["RT-360"].html[5]); + }) + }) + + test('should convert numeric width to string', () => { + const json = {"type":"doc","uid":"0ebe9a3b835d413595885c44d9527b72","attrs":{},"children":[{"type":"img","attrs":{"style":{"text-align":"center"},"redactor-attributes":{"alt":"Infographic showing 3 results from Forrester study of Contentstack CMS: $3M increase in profit, $507.3K productivity savings and $2.0M savings due to reduced time to publish.","src":"https://images.contentstack.io/v3/assets/blt7359e2a55efae483/bltea2a11144a2c68b5/63c08b7f438f80612c397994/CS_Infographics_ForresterReport_Data_3_1200x628_(1).png","position":"center","width":641},"url":"https://images.contentstack.io/v3/assets/blt7359e2a55efae483/bltea2a11144a2c68b5/63c08b7f438f80612c397994/CS_Infographics_ForresterReport_Data_3_1200x628_(1).png","width":641},"uid":"15516d511e7a4e28b418e49bdba0464d","children":[{"text":""}]}] } + const html = toRedactor(json); + expect(html).toBe(`Infographic showing 3 results from Forrester study of Contentstack CMS: $3M increase in profit, $507.3K productivity savings and $2.0M savings due to reduced time to publish.`) + }) })