diff --git a/NEWS b/NEWS
index 53e4fe73519b2..808f3ed806101 100644
--- a/NEWS
+++ b/NEWS
@@ -44,6 +44,9 @@ PHP NEWS
- Date:
. Implement More Appropriate Date/Time Exceptions RFC. (Derick)
+- DOM:
+ . Fix bug GH-11308 (getElementsByTagName() is O(N^2)). (nielsdos)
+
- Exif:
. Removed unneeded codepaths in exif_process_TIFF_in_JPEG(). (nielsdos)
diff --git a/UPGRADING.INTERNALS b/UPGRADING.INTERNALS
index 99b609a115b00..6db2d99ec59b9 100644
--- a/UPGRADING.INTERNALS
+++ b/UPGRADING.INTERNALS
@@ -120,6 +120,24 @@ PHP 8.3 INTERNALS UPGRADE NOTES
- A new function dom_get_doc_props_read_only() is added to gather the document
properties in a read-only way. This function avoids allocation when there are
no document properties changed yet.
+ - The node list returned by DOMNode::getElementsByTagName() and
+ DOMNode::getElementsByTagNameNS() now caches the length and the last requested item.
+ This means that the length and the last requested item are not recalculated
+ when the node list is iterated over multiple times.
+ If you do not use the internal PHP dom APIs to modify the document, you need to
+ manually invalidate the cache using php_libxml_invalidate_node_list_cache_from_doc().
+ Furthermore, the following internal APIs were added to handle the cache:
+ . php_dom_is_cache_tag_stale_from_doc_ptr()
+ . php_dom_is_cache_tag_stale_from_node()
+ . php_dom_mark_cache_tag_up_to_date_from_node()
+ - The function dom_get_elements_by_tag_name_ns_raw() has an additional parameter to indicate
+ the base node of the node list. This function also no longer accepts -1 as the index argument.
+ - The function dom_namednode_iter() has additional arguments to avoid recomputing the length of
+ the strings.
+
+ g. ext/libxml
+ - Two new functions: php_libxml_invalidate_node_list_cache_from_doc() and
+ php_libxml_invalidate_node_list_cache() were added to invalidate the cache of a node list.
========================
4. OpCode changes
diff --git a/ext/dom/document.c b/ext/dom/document.c
index 13324645e987b..06c4b2b97b9e3 100644
--- a/ext/dom/document.c
+++ b/ext/dom/document.c
@@ -777,7 +777,6 @@ PHP_METHOD(DOMDocument, getElementsByTagName)
size_t name_len;
dom_object *intern, *namednode;
char *name;
- xmlChar *local;
id = ZEND_THIS;
if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &name, &name_len) == FAILURE) {
@@ -788,8 +787,7 @@ PHP_METHOD(DOMDocument, getElementsByTagName)
php_dom_create_iterator(return_value, DOM_NODELIST);
namednode = Z_DOMOBJ_P(return_value);
- local = xmlCharStrndup(name, name_len);
- dom_namednode_iter(intern, 0, namednode, NULL, local, NULL);
+ dom_namednode_iter(intern, 0, namednode, NULL, name, name_len, NULL, 0);
}
/* }}} end dom_document_get_elements_by_tag_name */
@@ -847,6 +845,8 @@ PHP_METHOD(DOMDocument, importNode)
}
}
+ php_libxml_invalidate_node_list_cache_from_doc(docp);
+
DOM_RET_OBJ((xmlNodePtr) retnodep, &ret, intern);
}
/* }}} end dom_document_import_node */
@@ -991,7 +991,6 @@ PHP_METHOD(DOMDocument, getElementsByTagNameNS)
size_t uri_len, name_len;
dom_object *intern, *namednode;
char *uri, *name;
- xmlChar *local, *nsuri;
id = ZEND_THIS;
if (zend_parse_parameters(ZEND_NUM_ARGS(), "s!s", &uri, &uri_len, &name, &name_len) == FAILURE) {
@@ -1002,9 +1001,7 @@ PHP_METHOD(DOMDocument, getElementsByTagNameNS)
php_dom_create_iterator(return_value, DOM_NODELIST);
namednode = Z_DOMOBJ_P(return_value);
- local = xmlCharStrndup(name, name_len);
- nsuri = xmlCharStrndup(uri ? uri : "", uri_len);
- dom_namednode_iter(intern, 0, namednode, NULL, local, nsuri);
+ dom_namednode_iter(intern, 0, namednode, NULL, name, name_len, uri ? uri : "", uri_len);
}
/* }}} end dom_document_get_elements_by_tag_name_ns */
@@ -1070,6 +1067,8 @@ PHP_METHOD(DOMDocument, normalizeDocument)
DOM_GET_OBJ(docp, id, xmlDocPtr, intern);
+ php_libxml_invalidate_node_list_cache_from_doc(docp);
+
dom_normalize((xmlNodePtr) docp);
}
/* }}} end dom_document_normalize_document */
@@ -1328,10 +1327,14 @@ static void dom_parse_document(INTERNAL_FUNCTION_PARAMETERS, int mode) {
if (id != NULL) {
intern = Z_DOMOBJ_P(id);
+ size_t old_modification_nr = 0;
if (intern != NULL) {
docp = (xmlDocPtr) dom_object_get_node(intern);
doc_prop = NULL;
if (docp != NULL) {
+ const php_libxml_doc_ptr *doc_ptr = docp->_private;
+ ZEND_ASSERT(doc_ptr != NULL); /* Must exist, we have a document */
+ old_modification_nr = doc_ptr->cache_tag.modification_nr;
php_libxml_decrement_node_ptr((php_libxml_node_object *) intern);
doc_prop = intern->document->doc_props;
intern->document->doc_props = NULL;
@@ -1348,6 +1351,12 @@ static void dom_parse_document(INTERNAL_FUNCTION_PARAMETERS, int mode) {
}
php_libxml_increment_node_ptr((php_libxml_node_object *)intern, (xmlNodePtr)newdoc, (void *)intern);
+ /* Since iterators should invalidate, we need to start the modification number from the old counter */
+ if (old_modification_nr != 0) {
+ php_libxml_doc_ptr* doc_ptr = (php_libxml_doc_ptr*) ((php_libxml_node_object*) intern)->node; /* downcast */
+ doc_ptr->cache_tag.modification_nr = old_modification_nr;
+ php_libxml_invalidate_node_list_cache(doc_ptr);
+ }
RETURN_TRUE;
} else {
@@ -1563,6 +1572,8 @@ PHP_METHOD(DOMDocument, xinclude)
php_dom_remove_xinclude_nodes(root);
}
+ php_libxml_invalidate_node_list_cache_from_doc(docp);
+
if (err) {
RETVAL_LONG(err);
} else {
@@ -1871,10 +1882,14 @@ static void dom_load_html(INTERNAL_FUNCTION_PARAMETERS, int mode) /* {{{ */
if (id != NULL && instanceof_function(Z_OBJCE_P(id), dom_document_class_entry)) {
intern = Z_DOMOBJ_P(id);
+ size_t old_modification_nr = 0;
if (intern != NULL) {
docp = (xmlDocPtr) dom_object_get_node(intern);
doc_prop = NULL;
if (docp != NULL) {
+ const php_libxml_doc_ptr *doc_ptr = docp->_private;
+ ZEND_ASSERT(doc_ptr != NULL); /* Must exist, we have a document */
+ old_modification_nr = doc_ptr->cache_tag.modification_nr;
php_libxml_decrement_node_ptr((php_libxml_node_object *) intern);
doc_prop = intern->document->doc_props;
intern->document->doc_props = NULL;
@@ -1891,6 +1906,12 @@ static void dom_load_html(INTERNAL_FUNCTION_PARAMETERS, int mode) /* {{{ */
}
php_libxml_increment_node_ptr((php_libxml_node_object *)intern, (xmlNodePtr)newdoc, (void *)intern);
+ /* Since iterators should invalidate, we need to start the modification number from the old counter */
+ if (old_modification_nr != 0) {
+ php_libxml_doc_ptr* doc_ptr = (php_libxml_doc_ptr*) ((php_libxml_node_object*) intern)->node; /* downcast */
+ doc_ptr->cache_tag.modification_nr = old_modification_nr;
+ php_libxml_invalidate_node_list_cache(doc_ptr);
+ }
RETURN_TRUE;
} else {
diff --git a/ext/dom/documenttype.c b/ext/dom/documenttype.c
index b046b05f80eff..cfc4b043edb22 100644
--- a/ext/dom/documenttype.c
+++ b/ext/dom/documenttype.c
@@ -65,7 +65,7 @@ int dom_documenttype_entities_read(dom_object *obj, zval *retval)
entityht = (xmlHashTable *) doctypep->entities;
intern = Z_DOMOBJ_P(retval);
- dom_namednode_iter(obj, XML_ENTITY_NODE, intern, entityht, NULL, NULL);
+ dom_namednode_iter(obj, XML_ENTITY_NODE, intern, entityht, NULL, 0, NULL, 0);
return SUCCESS;
}
@@ -93,7 +93,7 @@ int dom_documenttype_notations_read(dom_object *obj, zval *retval)
notationht = (xmlHashTable *) doctypep->notations;
intern = Z_DOMOBJ_P(retval);
- dom_namednode_iter(obj, XML_NOTATION_NODE, intern, notationht, NULL, NULL);
+ dom_namednode_iter(obj, XML_NOTATION_NODE, intern, notationht, NULL, 0, NULL, 0);
return SUCCESS;
}
diff --git a/ext/dom/dom_iterators.c b/ext/dom/dom_iterators.c
index 72c97104db04d..2cf2c7bb6e7ce 100644
--- a/ext/dom/dom_iterators.c
+++ b/ext/dom/dom_iterators.c
@@ -179,7 +179,7 @@ static void php_dom_iterator_move_forward(zend_object_iterator *iter) /* {{{ */
dom_object *intern;
dom_object *nnmap;
dom_nnodemap_object *objmap;
- int previndex=0;
+ int previndex;
HashTable *nodeht;
zval *entry;
bool do_curobj_undef = 1;
@@ -205,23 +205,32 @@ static void php_dom_iterator_move_forward(zend_object_iterator *iter) /* {{{ */
do_curobj_undef = 0;
}
} else {
- curnode = (xmlNodePtr)((php_libxml_node_ptr *)intern->ptr)->node;
if (objmap->nodetype == XML_ATTRIBUTE_NODE ||
objmap->nodetype == XML_ELEMENT_NODE) {
+ curnode = (xmlNodePtr)((php_libxml_node_ptr *)intern->ptr)->node;
curnode = curnode->next;
} else {
- /* Nav the tree evey time as this is LIVE */
+ /* The collection is live, we nav the tree from the base object if we cannot
+ * use the cache to restart from the last point. */
basenode = dom_object_get_node(objmap->baseobj);
- if (basenode && (basenode->type == XML_DOCUMENT_NODE ||
- basenode->type == XML_HTML_DOCUMENT_NODE)) {
- basenode = xmlDocGetRootElement((xmlDoc *) basenode);
- } else if (basenode) {
- basenode = basenode->children;
- } else {
+ if (UNEXPECTED(!basenode)) {
goto err;
}
+ if (php_dom_is_cache_tag_stale_from_node(&iterator->cache_tag, basenode)) {
+ php_dom_mark_cache_tag_up_to_date_from_node(&iterator->cache_tag, basenode);
+ previndex = 0;
+ if (basenode && (basenode->type == XML_DOCUMENT_NODE ||
+ basenode->type == XML_HTML_DOCUMENT_NODE)) {
+ curnode = xmlDocGetRootElement((xmlDoc *) basenode);
+ } else {
+ curnode = basenode->children;
+ }
+ } else {
+ previndex = iter->index - 1;
+ curnode = (xmlNodePtr)((php_libxml_node_ptr *)intern->ptr)->node;
+ }
curnode = dom_get_elements_by_tag_name_ns_raw(
- basenode, (char *) objmap->ns, (char *) objmap->local, &previndex, iter->index);
+ basenode, curnode, (char *) objmap->ns, (char *) objmap->local, &previndex, iter->index);
}
}
} else {
@@ -258,7 +267,7 @@ zend_object_iterator *php_dom_get_iterator(zend_class_entry *ce, zval *object, i
{
dom_object *intern;
dom_nnodemap_object *objmap;
- xmlNodePtr nodep, curnode=NULL;
+ xmlNodePtr curnode=NULL;
int curindex = 0;
HashTable *nodeht;
zval *entry;
@@ -270,6 +279,7 @@ zend_object_iterator *php_dom_get_iterator(zend_class_entry *ce, zval *object, i
}
iterator = emalloc(sizeof(php_dom_iterator));
zend_iterator_init(&iterator->intern);
+ iterator->cache_tag.modification_nr = 0;
ZVAL_OBJ_COPY(&iterator->intern.data, Z_OBJ_P(object));
iterator->intern.funcs = &php_dom_iterator_funcs;
@@ -288,24 +298,25 @@ zend_object_iterator *php_dom_get_iterator(zend_class_entry *ce, zval *object, i
ZVAL_COPY(&iterator->curobj, entry);
}
} else {
- nodep = (xmlNode *)dom_object_get_node(objmap->baseobj);
- if (!nodep) {
+ xmlNodePtr basep = (xmlNode *)dom_object_get_node(objmap->baseobj);
+ if (!basep) {
goto err;
}
if (objmap->nodetype == XML_ATTRIBUTE_NODE || objmap->nodetype == XML_ELEMENT_NODE) {
if (objmap->nodetype == XML_ATTRIBUTE_NODE) {
- curnode = (xmlNodePtr) nodep->properties;
+ curnode = (xmlNodePtr) basep->properties;
} else {
- curnode = (xmlNodePtr) nodep->children;
+ curnode = (xmlNodePtr) basep->children;
}
} else {
+ xmlNodePtr nodep = basep;
if (nodep->type == XML_DOCUMENT_NODE || nodep->type == XML_HTML_DOCUMENT_NODE) {
nodep = xmlDocGetRootElement((xmlDoc *) nodep);
} else {
nodep = nodep->children;
}
curnode = dom_get_elements_by_tag_name_ns_raw(
- nodep, (char *) objmap->ns, (char *) objmap->local, &curindex, 0);
+ basep, nodep, (char *) objmap->ns, (char *) objmap->local, &curindex, 0);
}
}
} else {
diff --git a/ext/dom/element.c b/ext/dom/element.c
index 19cef5834657a..93d9ad5fb910a 100644
--- a/ext/dom/element.c
+++ b/ext/dom/element.c
@@ -511,7 +511,6 @@ PHP_METHOD(DOMElement, getElementsByTagName)
size_t name_len;
dom_object *intern, *namednode;
char *name;
- xmlChar *local;
id = ZEND_THIS;
if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &name, &name_len) == FAILURE) {
@@ -522,8 +521,7 @@ PHP_METHOD(DOMElement, getElementsByTagName)
php_dom_create_iterator(return_value, DOM_NODELIST);
namednode = Z_DOMOBJ_P(return_value);
- local = xmlCharStrndup(name, name_len);
- dom_namednode_iter(intern, 0, namednode, NULL, local, NULL);
+ dom_namednode_iter(intern, 0, namednode, NULL, name, name_len, NULL, 0);
}
/* }}} end dom_element_get_elements_by_tag_name */
@@ -930,7 +928,6 @@ PHP_METHOD(DOMElement, getElementsByTagNameNS)
size_t uri_len, name_len;
dom_object *intern, *namednode;
char *uri, *name;
- xmlChar *local, *nsuri;
id = ZEND_THIS;
if (zend_parse_parameters(ZEND_NUM_ARGS(), "s!s", &uri, &uri_len, &name, &name_len) == FAILURE) {
@@ -941,9 +938,7 @@ PHP_METHOD(DOMElement, getElementsByTagNameNS)
php_dom_create_iterator(return_value, DOM_NODELIST);
namednode = Z_DOMOBJ_P(return_value);
- local = xmlCharStrndup(name, name_len);
- nsuri = xmlCharStrndup(uri ? uri : "", uri_len);
- dom_namednode_iter(intern, 0, namednode, NULL, local, nsuri);
+ dom_namednode_iter(intern, 0, namednode, NULL, name, name_len, uri ? uri : "", uri_len);
}
/* }}} end dom_element_get_elements_by_tag_name_ns */
diff --git a/ext/dom/node.c b/ext/dom/node.c
index fdb51bf51092f..78c9b2dca1802 100644
--- a/ext/dom/node.c
+++ b/ext/dom/node.c
@@ -195,6 +195,8 @@ int dom_node_node_value_write(dom_object *obj, zval *newval)
break;
}
+ php_libxml_invalidate_node_list_cache_from_doc(nodep->doc);
+
zend_string_release_ex(str, 0);
return SUCCESS;
}
@@ -274,7 +276,7 @@ int dom_node_child_nodes_read(dom_object *obj, zval *retval)
php_dom_create_iterator(retval, DOM_NODELIST);
intern = Z_DOMOBJ_P(retval);
- dom_namednode_iter(obj, XML_ELEMENT_NODE, intern, NULL, NULL, NULL);
+ dom_namednode_iter(obj, XML_ELEMENT_NODE, intern, NULL, NULL, 0, NULL, 0);
return SUCCESS;
}
@@ -482,7 +484,7 @@ int dom_node_attributes_read(dom_object *obj, zval *retval)
if (nodep->type == XML_ELEMENT_NODE) {
php_dom_create_iterator(retval, DOM_NAMEDNODEMAP);
intern = Z_DOMOBJ_P(retval);
- dom_namednode_iter(obj, XML_ATTRIBUTE_NODE, intern, NULL, NULL, NULL);
+ dom_namednode_iter(obj, XML_ATTRIBUTE_NODE, intern, NULL, NULL, 0, NULL, 0);
} else {
ZVAL_NULL(retval);
}
@@ -769,6 +771,8 @@ int dom_node_text_content_write(dom_object *obj, zval *newval)
return FAILURE;
}
+ php_libxml_invalidate_node_list_cache_from_doc(nodep->doc);
+
const xmlChar *xmlChars = (const xmlChar *) ZSTR_VAL(str);
int type = nodep->type;
@@ -897,6 +901,8 @@ PHP_METHOD(DOMNode, insertBefore)
php_libxml_increment_doc_ref((php_libxml_node_object *)childobj, NULL);
}
+ php_libxml_invalidate_node_list_cache_from_doc(parentp->doc);
+
if (ref != NULL) {
DOM_GET_OBJ(refp, ref, xmlNodePtr, refpobj);
if (refp->parent != parentp) {
@@ -1086,6 +1092,7 @@ PHP_METHOD(DOMNode, replaceChild)
nodep->doc->intSubset = (xmlDtd *) newchild;
}
}
+ php_libxml_invalidate_node_list_cache_from_doc(nodep->doc);
DOM_RET_OBJ(oldchild, &ret, intern);
}
/* }}} end dom_node_replace_child */
@@ -1127,6 +1134,7 @@ PHP_METHOD(DOMNode, removeChild)
}
xmlUnlinkNode(child);
+ php_libxml_invalidate_node_list_cache_from_doc(nodep->doc);
DOM_RET_OBJ(child, &ret, intern);
}
/* }}} end dom_node_remove_child */
@@ -1230,6 +1238,8 @@ PHP_METHOD(DOMNode, appendChild)
dom_reconcile_ns(nodep->doc, new_child);
+ php_libxml_invalidate_node_list_cache_from_doc(nodep->doc);
+
DOM_RET_OBJ(new_child, &ret, intern);
}
/* }}} end dom_node_append_child */
@@ -1339,6 +1349,8 @@ PHP_METHOD(DOMNode, normalize)
DOM_GET_OBJ(nodep, id, xmlNodePtr, intern);
+ php_libxml_invalidate_node_list_cache_from_doc(nodep->doc);
+
dom_normalize(nodep);
}
@@ -1571,6 +1583,8 @@ static void dom_canonicalization(INTERNAL_FUNCTION_PARAMETERS, int mode) /* {{{
RETURN_THROWS();
}
+ php_libxml_invalidate_node_list_cache_from_doc(docp);
+
if (xpath_array == NULL) {
if (nodep->type != XML_DOCUMENT_NODE) {
ctxp = xmlXPathNewContext(docp);
diff --git a/ext/dom/nodelist.c b/ext/dom/nodelist.c
index b03ebe1acd90a..20e3b18bee883 100644
--- a/ext/dom/nodelist.c
+++ b/ext/dom/nodelist.c
@@ -31,6 +31,24 @@
* Since:
*/
+static zend_always_inline void objmap_cache_release_cached_obj(dom_nnodemap_object *objmap)
+{
+ if (objmap->cached_obj) {
+ /* Since the DOM is a tree there can be no cycles. */
+ if (GC_DELREF(&objmap->cached_obj->std) == 0) {
+ zend_objects_store_del(&objmap->cached_obj->std);
+ }
+ objmap->cached_obj = NULL;
+ objmap->cached_obj_index = 0;
+ }
+}
+
+static zend_always_inline void reset_objmap_cache(dom_nnodemap_object *objmap)
+{
+ objmap_cache_release_cached_obj(objmap);
+ objmap->cached_length = -1;
+}
+
static int get_nodelist_length(dom_object *obj)
{
dom_nnodemap_object *objmap = (dom_nnodemap_object *) obj->ptr;
@@ -52,6 +70,17 @@ static int get_nodelist_length(dom_object *obj)
return 0;
}
+ if (!php_dom_is_cache_tag_stale_from_node(&objmap->cache_tag, nodep)) {
+ if (objmap->cached_length >= 0) {
+ return objmap->cached_length;
+ }
+ /* Only the length is out-of-date, the cache tag is still valid.
+ * Therefore, only overwrite the length and keep the currently cached object. */
+ } else {
+ php_dom_mark_cache_tag_up_to_date_from_node(&objmap->cache_tag, nodep);
+ reset_objmap_cache(objmap);
+ }
+
int count = 0;
if (objmap->nodetype == XML_ATTRIBUTE_NODE || objmap->nodetype == XML_ELEMENT_NODE) {
xmlNodePtr curnode = nodep->children;
@@ -63,15 +92,18 @@ static int get_nodelist_length(dom_object *obj)
}
}
} else {
+ xmlNodePtr basep = nodep;
if (nodep->type == XML_DOCUMENT_NODE || nodep->type == XML_HTML_DOCUMENT_NODE) {
nodep = xmlDocGetRootElement((xmlDoc *) nodep);
} else {
nodep = nodep->children;
}
dom_get_elements_by_tag_name_ns_raw(
- nodep, (char *) objmap->ns, (char *) objmap->local, &count, -1);
+ basep, nodep, (char *) objmap->ns, (char *) objmap->local, &count, INT_MAX - 1 /* because of <= */);
}
+ objmap->cached_length = count;
+
return count;
}
@@ -113,11 +145,12 @@ PHP_METHOD(DOMNodeList, item)
zval *id;
zend_long index;
int ret;
+ bool cache_itemnode = false;
dom_object *intern;
xmlNodePtr itemnode = NULL;
dom_nnodemap_object *objmap;
- xmlNodePtr nodep, curnode;
+ xmlNodePtr basep;
int count = 0;
id = ZEND_THIS;
@@ -145,23 +178,51 @@ PHP_METHOD(DOMNodeList, item)
return;
}
} else if (objmap->baseobj) {
- nodep = dom_object_get_node(objmap->baseobj);
- if (nodep) {
+ basep = dom_object_get_node(objmap->baseobj);
+ if (basep) {
+ xmlNodePtr nodep = basep;
+ /* For now we're only able to use cache for forward search.
+ * TODO: in the future we could extend the logic of the node list such that backwards searches
+ * are also possible. */
+ bool restart = true;
+ int relative_index = index;
+ if (index >= objmap->cached_obj_index && objmap->cached_obj && !php_dom_is_cache_tag_stale_from_node(&objmap->cache_tag, nodep)) {
+ xmlNodePtr cached_obj_xml_node = dom_object_get_node(objmap->cached_obj);
+
+ /* The node cannot be NULL if the cache is valid. If it is NULL, then it means we
+ * forgot an invalidation somewhere. Take the defensive programming approach and invalidate
+ * it here if it's NULL (except in debug mode where we would want to catch this). */
+ if (UNEXPECTED(cached_obj_xml_node == NULL)) {
+#if ZEND_DEBUG
+ ZEND_UNREACHABLE();
+#endif
+ reset_objmap_cache(objmap);
+ } else {
+ restart = false;
+ relative_index -= objmap->cached_obj_index;
+ nodep = cached_obj_xml_node;
+ }
+ }
if (objmap->nodetype == XML_ATTRIBUTE_NODE || objmap->nodetype == XML_ELEMENT_NODE) {
- curnode = nodep->children;
- while (count < index && curnode != NULL) {
+ if (restart) {
+ nodep = nodep->children;
+ }
+ while (count < relative_index && nodep != NULL) {
count++;
- curnode = curnode->next;
+ nodep = nodep->next;
}
- itemnode = curnode;
+ itemnode = nodep;
} else {
- if (nodep->type == XML_DOCUMENT_NODE || nodep->type == XML_HTML_DOCUMENT_NODE) {
- nodep = xmlDocGetRootElement((xmlDoc *) nodep);
- } else {
- nodep = nodep->children;
+ if (restart) {
+ if (basep->type == XML_DOCUMENT_NODE || basep->type == XML_HTML_DOCUMENT_NODE) {
+ nodep = xmlDocGetRootElement((xmlDoc*) basep);
+ } else {
+ nodep = basep->children;
+ }
}
- itemnode = dom_get_elements_by_tag_name_ns_raw(nodep, (char *) objmap->ns, (char *) objmap->local, &count, index);
+ itemnode = dom_get_elements_by_tag_name_ns_raw(basep, nodep, (char *) objmap->ns, (char *) objmap->local, &count, relative_index);
}
+ cache_itemnode = true;
}
}
}
@@ -169,6 +230,25 @@ PHP_METHOD(DOMNodeList, item)
if (itemnode) {
DOM_RET_OBJ(itemnode, &ret, objmap->baseobj);
+ if (cache_itemnode) {
+ /* Hold additional reference for the cache, must happen before releasing the cache
+ * because we might be the last reference holder.
+ * Instead of storing and copying zvals, we store the object pointer directly.
+ * This saves us some bytes because a pointer is smaller than a zval.
+ * This also means we have to manually refcount the objects here, and remove the reference count
+ * in reset_objmap_cache() and the destructor. */
+ dom_object *cached_obj = Z_DOMOBJ_P(return_value);
+ GC_ADDREF(&cached_obj->std);
+ /* If the tag is stale, all cached data is useless. Otherwise only the cached object is useless. */
+ if (php_dom_is_cache_tag_stale_from_node(&objmap->cache_tag, itemnode)) {
+ php_dom_mark_cache_tag_up_to_date_from_node(&objmap->cache_tag, itemnode);
+ reset_objmap_cache(objmap);
+ } else {
+ objmap_cache_release_cached_obj(objmap);
+ }
+ objmap->cached_obj_index = index;
+ objmap->cached_obj = cached_obj;
+ }
return;
}
}
diff --git a/ext/dom/parentnode.c b/ext/dom/parentnode.c
index 4d0fffeb9e058..36cd6104f38a4 100644
--- a/ext/dom/parentnode.c
+++ b/ext/dom/parentnode.c
@@ -280,6 +280,8 @@ void dom_parent_node_append(dom_object *context, zval *nodes, int nodesc)
return;
}
+ php_libxml_invalidate_node_list_cache_from_doc(parentNode->doc);
+
xmlNode *fragment = dom_zvals_to_fragment(context->document, parentNode, nodes, nodesc);
if (fragment == NULL) {
@@ -322,6 +324,8 @@ void dom_parent_node_prepend(dom_object *context, zval *nodes, int nodesc)
return;
}
+ php_libxml_invalidate_node_list_cache_from_doc(parentNode->doc);
+
xmlNodePtr newchild, nextsib;
xmlNode *fragment = dom_zvals_to_fragment(context->document, parentNode, nodes, nodesc);
@@ -402,6 +406,8 @@ void dom_parent_node_after(dom_object *context, zval *nodes, int nodesc)
doc = prevsib->doc;
+ php_libxml_invalidate_node_list_cache_from_doc(doc);
+
/* Spec step 4: convert nodes into fragment */
fragment = dom_zvals_to_fragment(context->document, parentNode, nodes, nodesc);
@@ -451,6 +457,8 @@ void dom_parent_node_before(dom_object *context, zval *nodes, int nodesc)
doc = nextsib->doc;
+ php_libxml_invalidate_node_list_cache_from_doc(doc);
+
/* Spec step 4: convert nodes into fragment */
fragment = dom_zvals_to_fragment(context->document, parentNode, nodes, nodesc);
@@ -506,6 +514,8 @@ void dom_child_node_remove(dom_object *context)
return;
}
+ php_libxml_invalidate_node_list_cache_from_doc(context->document->ptr);
+
while (children) {
if (children == child) {
xmlUnlinkNode(child);
diff --git a/ext/dom/php_dom.c b/ext/dom/php_dom.c
index e02b0973291c5..44c10ea6a8aec 100644
--- a/ext/dom/php_dom.c
+++ b/ext/dom/php_dom.c
@@ -942,7 +942,7 @@ void dom_objects_free_storage(zend_object *object)
}
/* }}} */
-void dom_namednode_iter(dom_object *basenode, int ntype, dom_object *intern, xmlHashTablePtr ht, xmlChar *local, xmlChar *ns) /* {{{ */
+void dom_namednode_iter(dom_object *basenode, int ntype, dom_object *intern, xmlHashTablePtr ht, const char *local, size_t local_len, const char *ns, size_t ns_len) /* {{{ */
{
dom_nnodemap_object *mapptr = (dom_nnodemap_object *) intern->ptr;
@@ -950,11 +950,33 @@ void dom_namednode_iter(dom_object *basenode, int ntype, dom_object *intern, xml
ZVAL_OBJ_COPY(&mapptr->baseobj_zv, &basenode->std);
+ xmlDocPtr doc = basenode->document ? basenode->document->ptr : NULL;
+
mapptr->baseobj = basenode;
mapptr->nodetype = ntype;
mapptr->ht = ht;
- mapptr->local = local;
- mapptr->ns = ns;
+
+ const xmlChar* tmp;
+
+ if (local) {
+ int len = local_len > INT_MAX ? -1 : (int) local_len;
+ if (doc != NULL && (tmp = xmlDictExists(doc->dict, (const xmlChar *)local, len)) != NULL) {
+ mapptr->local = (xmlChar*) tmp;
+ } else {
+ mapptr->local = xmlCharStrndup(local, len);
+ mapptr->free_local = true;
+ }
+ }
+
+ if (ns) {
+ int len = ns_len > INT_MAX ? -1 : (int) ns_len;
+ if (doc != NULL && (tmp = xmlDictExists(doc->dict, (const xmlChar *)ns, len)) != NULL) {
+ mapptr->ns = (xmlChar*) tmp;
+ } else {
+ mapptr->ns = xmlCharStrndup(ns, len);
+ mapptr->free_ns = true;
+ }
+ }
}
/* }}} */
@@ -1010,10 +1032,13 @@ void dom_nnodemap_objects_free_storage(zend_object *object) /* {{{ */
dom_nnodemap_object *objmap = (dom_nnodemap_object *)intern->ptr;
if (objmap) {
- if (objmap->local) {
+ if (objmap->cached_obj && GC_DELREF(&objmap->cached_obj->std) == 0) {
+ zend_objects_store_del(&objmap->cached_obj->std);
+ }
+ if (objmap->free_local) {
xmlFree(objmap->local);
}
- if (objmap->ns) {
+ if (objmap->free_ns) {
xmlFree(objmap->ns);
}
if (!Z_ISUNDEF(objmap->baseobj_zv)) {
@@ -1042,7 +1067,13 @@ zend_object *dom_nnodemap_objects_new(zend_class_entry *class_type) /* {{{ */
objmap->nodetype = 0;
objmap->ht = NULL;
objmap->local = NULL;
+ objmap->free_local = false;
objmap->ns = NULL;
+ objmap->free_ns = false;
+ objmap->cache_tag.modification_nr = 0;
+ objmap->cached_length = -1;
+ objmap->cached_obj = NULL;
+ objmap->cached_obj_index = 0;
return &intern->std;
}
@@ -1220,19 +1251,25 @@ bool dom_has_feature(zend_string *feature, zend_string *version)
}
/* }}} end dom_has_feature */
-xmlNode *dom_get_elements_by_tag_name_ns_raw(xmlNodePtr nodep, char *ns, char *local, int *cur, int index) /* {{{ */
+xmlNode *dom_get_elements_by_tag_name_ns_raw(xmlNodePtr basep, xmlNodePtr nodep, char *ns, char *local, int *cur, int index) /* {{{ */
{
+ /* Can happen with detached document */
+ if (UNEXPECTED(nodep == NULL)) {
+ return NULL;
+ }
+
xmlNodePtr ret = NULL;
+ bool local_match_any = local[0] == '*' && local[1] == '\0';
/* Note: The spec says that ns == '' must be transformed to ns == NULL. In other words, they are equivalent.
* PHP however does not do this and internally uses the empty string everywhere when the user provides ns == NULL.
* This is because for PHP ns == NULL has another meaning: "match every namespace" instead of "match the empty namespace". */
bool ns_match_any = ns == NULL || (ns[0] == '*' && ns[1] == '\0');
- while (nodep != NULL && (*cur <= index || index == -1)) {
+ while (*cur <= index) {
if (nodep->type == XML_ELEMENT_NODE) {
- if (xmlStrEqual(nodep->name, (xmlChar *)local) || xmlStrEqual((xmlChar *)"*", (xmlChar *)local)) {
- if (ns_match_any || (!strcmp(ns, "") && nodep->ns == NULL) || (nodep->ns != NULL && xmlStrEqual(nodep->ns->href, (xmlChar *)ns))) {
+ if (local_match_any || xmlStrEqual(nodep->name, (xmlChar *)local)) {
+ if (ns_match_any || (ns[0] == '\0' && nodep->ns == NULL) || (nodep->ns != NULL && xmlStrEqual(nodep->ns->href, (xmlChar *)ns))) {
if (*cur == index) {
ret = nodep;
break;
@@ -1240,16 +1277,33 @@ xmlNode *dom_get_elements_by_tag_name_ns_raw(xmlNodePtr nodep, char *ns, char *l
(*cur)++;
}
}
- ret = dom_get_elements_by_tag_name_ns_raw(nodep->children, ns, local, cur, index);
- if (ret != NULL) {
- break;
+
+ if (nodep->children) {
+ nodep = nodep->children;
+ continue;
}
}
- nodep = nodep->next;
+
+ if (nodep->next) {
+ nodep = nodep->next;
+ } else {
+ /* Go upwards, until we find a parent node with a next sibling, or until we hit the base. */
+ do {
+ nodep = nodep->parent;
+ if (nodep == basep) {
+ return NULL;
+ }
+ /* This shouldn't happen, unless there's an invalidation bug somewhere. */
+ if (UNEXPECTED(nodep == NULL)) {
+ zend_throw_error(NULL, "Current node in traversal is not in the document. Please report this as a bug in php-src.");
+ return NULL;
+ }
+ } while (nodep->next == NULL);
+ nodep = nodep->next;
+ }
}
return ret;
}
-/* }}} */
/* }}} end dom_element_get_elements_by_tag_name_ns_raw */
static inline bool is_empty_node(xmlNodePtr nodep)
diff --git a/ext/dom/php_dom.h b/ext/dom/php_dom.h
index a7ae09384cfdc..0602f4166eaa7 100644
--- a/ext/dom/php_dom.h
+++ b/ext/dom/php_dom.h
@@ -82,15 +82,22 @@ typedef struct _dom_nnodemap_object {
dom_object *baseobj;
zval baseobj_zv;
int nodetype;
+ int cached_length;
xmlHashTable *ht;
xmlChar *local;
xmlChar *ns;
+ php_libxml_cache_tag cache_tag;
+ dom_object *cached_obj;
+ int cached_obj_index;
+ bool free_local : 1;
+ bool free_ns : 1;
} dom_nnodemap_object;
typedef struct {
zend_object_iterator intern;
zval curobj;
HashPosition pos;
+ php_libxml_cache_tag cache_tag;
} php_dom_iterator;
#include "domexception.h"
@@ -113,14 +120,14 @@ void dom_set_old_ns(xmlDoc *doc, xmlNs *ns);
void dom_reconcile_ns(xmlDocPtr doc, xmlNodePtr nodep);
xmlNsPtr dom_get_nsdecl(xmlNode *node, xmlChar *localName);
void dom_normalize (xmlNodePtr nodep);
-xmlNode *dom_get_elements_by_tag_name_ns_raw(xmlNodePtr nodep, char *ns, char *local, int *cur, int index);
+xmlNode *dom_get_elements_by_tag_name_ns_raw(xmlNodePtr basep, xmlNodePtr nodep, char *ns, char *local, int *cur, int index);
void php_dom_create_implementation(zval *retval);
int dom_hierarchy(xmlNodePtr parent, xmlNodePtr child);
bool dom_has_feature(zend_string *feature, zend_string *version);
int dom_node_is_read_only(xmlNodePtr node);
int dom_node_children_valid(xmlNodePtr node);
void php_dom_create_iterator(zval *return_value, int ce_type);
-void dom_namednode_iter(dom_object *basenode, int ntype, dom_object *intern, xmlHashTablePtr ht, xmlChar *local, xmlChar *ns);
+void dom_namednode_iter(dom_object *basenode, int ntype, dom_object *intern, xmlHashTablePtr ht, const char *local, size_t local_len, const char *ns, size_t ns_len);
xmlNodePtr create_notation(const xmlChar *name, const xmlChar *ExternalID, const xmlChar *SystemID);
xmlNode *php_dom_libxml_hash_iter(xmlHashTable *ht, int index);
xmlNode *php_dom_libxml_notation_iter(xmlHashTable *ht, int index);
@@ -153,6 +160,33 @@ void dom_child_node_remove(dom_object *context);
#define DOM_NODELIST 0
#define DOM_NAMEDNODEMAP 1
+static zend_always_inline bool php_dom_is_cache_tag_stale_from_doc_ptr(const php_libxml_cache_tag *cache_tag, const php_libxml_doc_ptr *doc_ptr)
+{
+ ZEND_ASSERT(cache_tag != NULL);
+ ZEND_ASSERT(doc_ptr != NULL);
+ /* See overflow comment in php_libxml_invalidate_node_list_cache(). */
+#if SIZEOF_SIZE_T == 8
+ return cache_tag->modification_nr != doc_ptr->cache_tag.modification_nr;
+#else
+ return cache_tag->modification_nr != doc_ptr->cache_tag.modification_nr || UNEXPECTED(doc_ptr->cache_tag.modification_nr == SIZE_MAX);
+#endif
+}
+
+static zend_always_inline bool php_dom_is_cache_tag_stale_from_node(const php_libxml_cache_tag *cache_tag, const xmlNodePtr node)
+{
+ ZEND_ASSERT(node != NULL);
+ return !node->doc || !node->doc->_private || php_dom_is_cache_tag_stale_from_doc_ptr(cache_tag, node->doc->_private);
+}
+
+static zend_always_inline void php_dom_mark_cache_tag_up_to_date_from_node(php_libxml_cache_tag *cache_tag, const xmlNodePtr node)
+{
+ ZEND_ASSERT(cache_tag != NULL);
+ if (node->doc && node->doc->_private) {
+ const php_libxml_doc_ptr* doc_ptr = node->doc->_private;
+ cache_tag->modification_nr = doc_ptr->cache_tag.modification_nr;
+ }
+}
+
PHP_MINIT_FUNCTION(dom);
PHP_MSHUTDOWN_FUNCTION(dom);
PHP_MINFO_FUNCTION(dom);
diff --git a/ext/dom/processinginstruction.c b/ext/dom/processinginstruction.c
index 465ecb431e73a..c40d24d18ce23 100644
--- a/ext/dom/processinginstruction.c
+++ b/ext/dom/processinginstruction.c
@@ -128,6 +128,8 @@ int dom_processinginstruction_data_write(dom_object *obj, zval *newval)
return FAILURE;
}
+ php_libxml_invalidate_node_list_cache_from_doc(nodep->doc);
+
xmlNodeSetContentLen(nodep, (xmlChar *) ZSTR_VAL(str), ZSTR_LEN(str) + 1);
zend_string_release_ex(str, 0);
diff --git a/ext/dom/tests/DOMDocument_getElementsByTagName_liveness.phpt b/ext/dom/tests/DOMDocument_getElementsByTagName_liveness.phpt
new file mode 100644
index 0000000000000..2b4622d10d389
--- /dev/null
+++ b/ext/dom/tests/DOMDocument_getElementsByTagName_liveness.phpt
@@ -0,0 +1,47 @@
+--TEST--
+DOMDocument::getElementsByTagName() is live
+--EXTENSIONS--
+dom
+--FILE--
+loadXML( ' Hello xinclude: book.xml not found Hello xinclude: book.xml not found
hello
world
'); + +$elements = $doc->getElementsByTagName('p'); +$elements->item(0); // Activate item cache +$doc->loadHTML('A
B
C
'); +var_dump($elements); +var_dump($elements->item(0)->textContent); // First lookup +var_dump($elements->item(2)->textContent); // Uses cache +var_dump($elements->item(1)->textContent); // Does not use cache + +echo "-- Remove cached item test --\n"; + +$doc = new DOMDocument(); +$doc->loadHTML('hello
world
!
'); + +$elements = $doc->getElementsByTagName('p'); +$item = $elements->item(0); // Activate item cache +var_dump($item->textContent); +$item->remove(); +// Now element 0 means "world", and 1 means "!" +unset($item); +$item = $elements->item(1); +var_dump($item->textContent); + +echo "-- Removal of cached item in loop test --\n"; + +$doc = new DOMDocument; +$doc->loadXML( 'hello
world
!
'); + +$elements = $doc->getElementsByTagName('p'); +$item = $elements->item(0); // Activate item cache +var_dump($elements->length); // Length not cached yet, should still compute +$item->remove(); +// Now element 0 means "world", and 1 means "!" +unset($item); +var_dump($elements->length); +$item = $elements->item(1); +var_dump($item->textContent); +$item = $elements->item(1); +var_dump($item->textContent); +$item = $elements->item(0); +var_dump($item->textContent); +$item = $elements->item(1); +var_dump($item->textContent); + +?> +--EXPECT-- +int(3) +int(2) +string(1) "!" +string(1) "!" +string(5) "world" +string(1) "!" diff --git a/ext/dom/tests/DOMDocument_liveness_caching_invalidation.phpt b/ext/dom/tests/DOMDocument_liveness_caching_invalidation.phpt new file mode 100644 index 0000000000000..e05bd1ac6f646 --- /dev/null +++ b/ext/dom/tests/DOMDocument_liveness_caching_invalidation.phpt @@ -0,0 +1,43 @@ +--TEST-- +DOMDocument liveness caching invalidation by textContent +--EXTENSIONS-- +dom +--FILE-- +loadXML('