From 0746d8f3da12007192c05a186fc8744e59bfb8b3 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Wed, 17 Jul 2024 23:15:22 +0200 Subject: [PATCH] Resolve TODOs in ext/dom around nullable content It's indeed possible this is NULL. When you create a new text-like node in libxml and pass NULL as content, you do get NULL in the content field instead of the empty string. You can hit this by creating DOMText or DOMComment directly and not passing any argument. This could also be created internally. We refactor the code such that this detail is hidden and we add a test to check that it correctly throws an exception. --- ext/dom/characterdata.c | 31 ++++------------- ext/dom/php_dom.h | 5 +++ .../tests/null_text_content_manipulation.phpt | 33 +++++++++++++++++++ ext/dom/text.c | 7 +--- 4 files changed, 46 insertions(+), 30 deletions(-) create mode 100644 ext/dom/tests/null_text_content_manipulation.phpt diff --git a/ext/dom/characterdata.c b/ext/dom/characterdata.c index b17679655bee1..6e47da166a364 100644 --- a/ext/dom/characterdata.c +++ b/ext/dom/characterdata.c @@ -104,7 +104,6 @@ Modern spec URL: https://dom.spec.whatwg.org/#dom-characterdata-substringdata PHP_METHOD(DOMCharacterData, substringData) { zval *id; - xmlChar *cur; xmlChar *substring; xmlNodePtr node; zend_long offset_input, count_input; @@ -119,11 +118,7 @@ PHP_METHOD(DOMCharacterData, substringData) DOM_GET_OBJ(node, id, xmlNodePtr, intern); - cur = node->content; - if (cur == NULL) { - /* TODO: is this even possible? */ - cur = BAD_CAST ""; - } + const xmlChar *cur = php_dom_get_content_or_empty(node); length = xmlUTF8Strlen(cur); if (ZEND_LONG_INT_OVFL(offset_input) || ZEND_LONG_INT_OVFL(count_input)) { @@ -197,7 +192,7 @@ Modern spec URL: https://dom.spec.whatwg.org/#dom-characterdata-insertdata static void dom_character_data_insert_data(INTERNAL_FUNCTION_PARAMETERS, bool return_true) { zval *id; - xmlChar *cur, *first, *second; + xmlChar *first, *second; xmlNodePtr node; char *arg; zend_long offset_input; @@ -213,11 +208,7 @@ static void dom_character_data_insert_data(INTERNAL_FUNCTION_PARAMETERS, bool re DOM_GET_OBJ(node, id, xmlNodePtr, intern); - cur = node->content; - if (cur == NULL) { - /* TODO: is this even possible? */ - cur = BAD_CAST ""; - } + const xmlChar *cur = php_dom_get_content_or_empty(node); length = xmlUTF8Strlen(cur); @@ -268,7 +259,7 @@ Modern spec URL: https://dom.spec.whatwg.org/#dom-characterdata-deletedata static void dom_character_data_delete_data(INTERNAL_FUNCTION_PARAMETERS, bool return_true) { zval *id; - xmlChar *cur, *substring, *second; + xmlChar *substring, *second; xmlNodePtr node; zend_long offset, count_input; unsigned int count; @@ -282,11 +273,7 @@ static void dom_character_data_delete_data(INTERNAL_FUNCTION_PARAMETERS, bool re DOM_GET_OBJ(node, id, xmlNodePtr, intern); - cur = node->content; - if (cur == NULL) { - /* TODO: is this even possible? */ - cur = BAD_CAST ""; - } + const xmlChar *cur = php_dom_get_content_or_empty(node); length = xmlUTF8Strlen(cur); @@ -340,7 +327,7 @@ Modern spec URL: https://dom.spec.whatwg.org/#dom-characterdata-replacedata static void dom_character_data_replace_data(INTERNAL_FUNCTION_PARAMETERS, bool return_true) { zval *id; - xmlChar *cur, *substring, *second = NULL; + xmlChar *substring, *second = NULL; xmlNodePtr node; char *arg; zend_long offset, count_input; @@ -356,11 +343,7 @@ static void dom_character_data_replace_data(INTERNAL_FUNCTION_PARAMETERS, bool r DOM_GET_OBJ(node, id, xmlNodePtr, intern); - cur = node->content; - if (cur == NULL) { - /* TODO: is this even possible? */ - cur = BAD_CAST ""; - } + const xmlChar *cur = php_dom_get_content_or_empty(node); length = xmlUTF8Strlen(cur); diff --git a/ext/dom/php_dom.h b/ext/dom/php_dom.h index 624e3cbaa5760..6d7c109c6eeb4 100644 --- a/ext/dom/php_dom.h +++ b/ext/dom/php_dom.h @@ -294,6 +294,11 @@ static zend_always_inline xmlNodePtr php_dom_first_child_of_container_node(xmlNo } } +static zend_always_inline const xmlChar *php_dom_get_content_or_empty(const xmlNode *node) +{ + return node->content ? node->content : BAD_CAST ""; +} + PHP_MINIT_FUNCTION(dom); PHP_MSHUTDOWN_FUNCTION(dom); PHP_MINFO_FUNCTION(dom); diff --git a/ext/dom/tests/null_text_content_manipulation.phpt b/ext/dom/tests/null_text_content_manipulation.phpt new file mode 100644 index 0000000000000..9b1e35d60f766 --- /dev/null +++ b/ext/dom/tests/null_text_content_manipulation.phpt @@ -0,0 +1,33 @@ +--TEST-- +NULL text content manipulation +--EXTENSIONS-- +dom +--FILE-- +substringData(1, 0)); +} catch (DOMException $e) { + echo $e->getMessage(), "\n"; +} +try { + var_dump($text->insertData(1, "")); +} catch (DOMException $e) { + echo $e->getMessage(), "\n"; +} +try { + var_dump($text->deleteData(1, 1)); +} catch (DOMException $e) { + echo $e->getMessage(), "\n"; +} +try { + var_dump($text->replaceData(1, 1, "")); +} catch (DOMException $e) { + echo $e->getMessage(), "\n"; +} +?> +--EXPECT-- +Index Size Error +Index Size Error +Index Size Error +Index Size Error diff --git a/ext/dom/text.c b/ext/dom/text.c index ef0d5d0cef6f7..46b6f9d9dcc9d 100644 --- a/ext/dom/text.c +++ b/ext/dom/text.c @@ -100,7 +100,6 @@ Modern spec URL: https://dom.spec.whatwg.org/#dom-text-splittext PHP_METHOD(DOMText, splitText) { zval *id; - xmlChar *cur; xmlChar *first; xmlChar *second; xmlNodePtr node; @@ -120,11 +119,7 @@ PHP_METHOD(DOMText, splitText) RETURN_THROWS(); } - cur = node->content; - if (cur == NULL) { - /* TODO: is this even possible? */ - cur = BAD_CAST ""; - } + const xmlChar *cur = php_dom_get_content_or_empty(node); length = xmlUTF8Strlen(cur); if (ZEND_LONG_INT_OVFL(offset) || (int)offset > length) {