diff --git a/NEWS b/NEWS index 53e4fe73519b2..808f3ed806101 100644 --- a/NEWS +++ b/NEWS @@ -44,6 +44,9 @@ PHP NEWS - Date: . Implement More Appropriate Date/Time Exceptions RFC. (Derick) +- DOM: + . Fix bug GH-11308 (getElementsByTagName() is O(N^2)). (nielsdos) + - Exif: . Removed unneeded codepaths in exif_process_TIFF_in_JPEG(). (nielsdos) diff --git a/UPGRADING.INTERNALS b/UPGRADING.INTERNALS index 99b609a115b00..6db2d99ec59b9 100644 --- a/UPGRADING.INTERNALS +++ b/UPGRADING.INTERNALS @@ -120,6 +120,24 @@ PHP 8.3 INTERNALS UPGRADE NOTES - A new function dom_get_doc_props_read_only() is added to gather the document properties in a read-only way. This function avoids allocation when there are no document properties changed yet. + - The node list returned by DOMNode::getElementsByTagName() and + DOMNode::getElementsByTagNameNS() now caches the length and the last requested item. + This means that the length and the last requested item are not recalculated + when the node list is iterated over multiple times. + If you do not use the internal PHP dom APIs to modify the document, you need to + manually invalidate the cache using php_libxml_invalidate_node_list_cache_from_doc(). + Furthermore, the following internal APIs were added to handle the cache: + . php_dom_is_cache_tag_stale_from_doc_ptr() + . php_dom_is_cache_tag_stale_from_node() + . php_dom_mark_cache_tag_up_to_date_from_node() + - The function dom_get_elements_by_tag_name_ns_raw() has an additional parameter to indicate + the base node of the node list. This function also no longer accepts -1 as the index argument. + - The function dom_namednode_iter() has additional arguments to avoid recomputing the length of + the strings. + + g. ext/libxml + - Two new functions: php_libxml_invalidate_node_list_cache_from_doc() and + php_libxml_invalidate_node_list_cache() were added to invalidate the cache of a node list. ======================== 4. OpCode changes diff --git a/ext/dom/document.c b/ext/dom/document.c index 13324645e987b..06c4b2b97b9e3 100644 --- a/ext/dom/document.c +++ b/ext/dom/document.c @@ -777,7 +777,6 @@ PHP_METHOD(DOMDocument, getElementsByTagName) size_t name_len; dom_object *intern, *namednode; char *name; - xmlChar *local; id = ZEND_THIS; if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &name, &name_len) == FAILURE) { @@ -788,8 +787,7 @@ PHP_METHOD(DOMDocument, getElementsByTagName) php_dom_create_iterator(return_value, DOM_NODELIST); namednode = Z_DOMOBJ_P(return_value); - local = xmlCharStrndup(name, name_len); - dom_namednode_iter(intern, 0, namednode, NULL, local, NULL); + dom_namednode_iter(intern, 0, namednode, NULL, name, name_len, NULL, 0); } /* }}} end dom_document_get_elements_by_tag_name */ @@ -847,6 +845,8 @@ PHP_METHOD(DOMDocument, importNode) } } + php_libxml_invalidate_node_list_cache_from_doc(docp); + DOM_RET_OBJ((xmlNodePtr) retnodep, &ret, intern); } /* }}} end dom_document_import_node */ @@ -991,7 +991,6 @@ PHP_METHOD(DOMDocument, getElementsByTagNameNS) size_t uri_len, name_len; dom_object *intern, *namednode; char *uri, *name; - xmlChar *local, *nsuri; id = ZEND_THIS; if (zend_parse_parameters(ZEND_NUM_ARGS(), "s!s", &uri, &uri_len, &name, &name_len) == FAILURE) { @@ -1002,9 +1001,7 @@ PHP_METHOD(DOMDocument, getElementsByTagNameNS) php_dom_create_iterator(return_value, DOM_NODELIST); namednode = Z_DOMOBJ_P(return_value); - local = xmlCharStrndup(name, name_len); - nsuri = xmlCharStrndup(uri ? uri : "", uri_len); - dom_namednode_iter(intern, 0, namednode, NULL, local, nsuri); + dom_namednode_iter(intern, 0, namednode, NULL, name, name_len, uri ? uri : "", uri_len); } /* }}} end dom_document_get_elements_by_tag_name_ns */ @@ -1070,6 +1067,8 @@ PHP_METHOD(DOMDocument, normalizeDocument) DOM_GET_OBJ(docp, id, xmlDocPtr, intern); + php_libxml_invalidate_node_list_cache_from_doc(docp); + dom_normalize((xmlNodePtr) docp); } /* }}} end dom_document_normalize_document */ @@ -1328,10 +1327,14 @@ static void dom_parse_document(INTERNAL_FUNCTION_PARAMETERS, int mode) { if (id != NULL) { intern = Z_DOMOBJ_P(id); + size_t old_modification_nr = 0; if (intern != NULL) { docp = (xmlDocPtr) dom_object_get_node(intern); doc_prop = NULL; if (docp != NULL) { + const php_libxml_doc_ptr *doc_ptr = docp->_private; + ZEND_ASSERT(doc_ptr != NULL); /* Must exist, we have a document */ + old_modification_nr = doc_ptr->cache_tag.modification_nr; php_libxml_decrement_node_ptr((php_libxml_node_object *) intern); doc_prop = intern->document->doc_props; intern->document->doc_props = NULL; @@ -1348,6 +1351,12 @@ static void dom_parse_document(INTERNAL_FUNCTION_PARAMETERS, int mode) { } php_libxml_increment_node_ptr((php_libxml_node_object *)intern, (xmlNodePtr)newdoc, (void *)intern); + /* Since iterators should invalidate, we need to start the modification number from the old counter */ + if (old_modification_nr != 0) { + php_libxml_doc_ptr* doc_ptr = (php_libxml_doc_ptr*) ((php_libxml_node_object*) intern)->node; /* downcast */ + doc_ptr->cache_tag.modification_nr = old_modification_nr; + php_libxml_invalidate_node_list_cache(doc_ptr); + } RETURN_TRUE; } else { @@ -1563,6 +1572,8 @@ PHP_METHOD(DOMDocument, xinclude) php_dom_remove_xinclude_nodes(root); } + php_libxml_invalidate_node_list_cache_from_doc(docp); + if (err) { RETVAL_LONG(err); } else { @@ -1871,10 +1882,14 @@ static void dom_load_html(INTERNAL_FUNCTION_PARAMETERS, int mode) /* {{{ */ if (id != NULL && instanceof_function(Z_OBJCE_P(id), dom_document_class_entry)) { intern = Z_DOMOBJ_P(id); + size_t old_modification_nr = 0; if (intern != NULL) { docp = (xmlDocPtr) dom_object_get_node(intern); doc_prop = NULL; if (docp != NULL) { + const php_libxml_doc_ptr *doc_ptr = docp->_private; + ZEND_ASSERT(doc_ptr != NULL); /* Must exist, we have a document */ + old_modification_nr = doc_ptr->cache_tag.modification_nr; php_libxml_decrement_node_ptr((php_libxml_node_object *) intern); doc_prop = intern->document->doc_props; intern->document->doc_props = NULL; @@ -1891,6 +1906,12 @@ static void dom_load_html(INTERNAL_FUNCTION_PARAMETERS, int mode) /* {{{ */ } php_libxml_increment_node_ptr((php_libxml_node_object *)intern, (xmlNodePtr)newdoc, (void *)intern); + /* Since iterators should invalidate, we need to start the modification number from the old counter */ + if (old_modification_nr != 0) { + php_libxml_doc_ptr* doc_ptr = (php_libxml_doc_ptr*) ((php_libxml_node_object*) intern)->node; /* downcast */ + doc_ptr->cache_tag.modification_nr = old_modification_nr; + php_libxml_invalidate_node_list_cache(doc_ptr); + } RETURN_TRUE; } else { diff --git a/ext/dom/documenttype.c b/ext/dom/documenttype.c index b046b05f80eff..cfc4b043edb22 100644 --- a/ext/dom/documenttype.c +++ b/ext/dom/documenttype.c @@ -65,7 +65,7 @@ int dom_documenttype_entities_read(dom_object *obj, zval *retval) entityht = (xmlHashTable *) doctypep->entities; intern = Z_DOMOBJ_P(retval); - dom_namednode_iter(obj, XML_ENTITY_NODE, intern, entityht, NULL, NULL); + dom_namednode_iter(obj, XML_ENTITY_NODE, intern, entityht, NULL, 0, NULL, 0); return SUCCESS; } @@ -93,7 +93,7 @@ int dom_documenttype_notations_read(dom_object *obj, zval *retval) notationht = (xmlHashTable *) doctypep->notations; intern = Z_DOMOBJ_P(retval); - dom_namednode_iter(obj, XML_NOTATION_NODE, intern, notationht, NULL, NULL); + dom_namednode_iter(obj, XML_NOTATION_NODE, intern, notationht, NULL, 0, NULL, 0); return SUCCESS; } diff --git a/ext/dom/dom_iterators.c b/ext/dom/dom_iterators.c index 72c97104db04d..2cf2c7bb6e7ce 100644 --- a/ext/dom/dom_iterators.c +++ b/ext/dom/dom_iterators.c @@ -179,7 +179,7 @@ static void php_dom_iterator_move_forward(zend_object_iterator *iter) /* {{{ */ dom_object *intern; dom_object *nnmap; dom_nnodemap_object *objmap; - int previndex=0; + int previndex; HashTable *nodeht; zval *entry; bool do_curobj_undef = 1; @@ -205,23 +205,32 @@ static void php_dom_iterator_move_forward(zend_object_iterator *iter) /* {{{ */ do_curobj_undef = 0; } } else { - curnode = (xmlNodePtr)((php_libxml_node_ptr *)intern->ptr)->node; if (objmap->nodetype == XML_ATTRIBUTE_NODE || objmap->nodetype == XML_ELEMENT_NODE) { + curnode = (xmlNodePtr)((php_libxml_node_ptr *)intern->ptr)->node; curnode = curnode->next; } else { - /* Nav the tree evey time as this is LIVE */ + /* The collection is live, we nav the tree from the base object if we cannot + * use the cache to restart from the last point. */ basenode = dom_object_get_node(objmap->baseobj); - if (basenode && (basenode->type == XML_DOCUMENT_NODE || - basenode->type == XML_HTML_DOCUMENT_NODE)) { - basenode = xmlDocGetRootElement((xmlDoc *) basenode); - } else if (basenode) { - basenode = basenode->children; - } else { + if (UNEXPECTED(!basenode)) { goto err; } + if (php_dom_is_cache_tag_stale_from_node(&iterator->cache_tag, basenode)) { + php_dom_mark_cache_tag_up_to_date_from_node(&iterator->cache_tag, basenode); + previndex = 0; + if (basenode && (basenode->type == XML_DOCUMENT_NODE || + basenode->type == XML_HTML_DOCUMENT_NODE)) { + curnode = xmlDocGetRootElement((xmlDoc *) basenode); + } else { + curnode = basenode->children; + } + } else { + previndex = iter->index - 1; + curnode = (xmlNodePtr)((php_libxml_node_ptr *)intern->ptr)->node; + } curnode = dom_get_elements_by_tag_name_ns_raw( - basenode, (char *) objmap->ns, (char *) objmap->local, &previndex, iter->index); + basenode, curnode, (char *) objmap->ns, (char *) objmap->local, &previndex, iter->index); } } } else { @@ -258,7 +267,7 @@ zend_object_iterator *php_dom_get_iterator(zend_class_entry *ce, zval *object, i { dom_object *intern; dom_nnodemap_object *objmap; - xmlNodePtr nodep, curnode=NULL; + xmlNodePtr curnode=NULL; int curindex = 0; HashTable *nodeht; zval *entry; @@ -270,6 +279,7 @@ zend_object_iterator *php_dom_get_iterator(zend_class_entry *ce, zval *object, i } iterator = emalloc(sizeof(php_dom_iterator)); zend_iterator_init(&iterator->intern); + iterator->cache_tag.modification_nr = 0; ZVAL_OBJ_COPY(&iterator->intern.data, Z_OBJ_P(object)); iterator->intern.funcs = &php_dom_iterator_funcs; @@ -288,24 +298,25 @@ zend_object_iterator *php_dom_get_iterator(zend_class_entry *ce, zval *object, i ZVAL_COPY(&iterator->curobj, entry); } } else { - nodep = (xmlNode *)dom_object_get_node(objmap->baseobj); - if (!nodep) { + xmlNodePtr basep = (xmlNode *)dom_object_get_node(objmap->baseobj); + if (!basep) { goto err; } if (objmap->nodetype == XML_ATTRIBUTE_NODE || objmap->nodetype == XML_ELEMENT_NODE) { if (objmap->nodetype == XML_ATTRIBUTE_NODE) { - curnode = (xmlNodePtr) nodep->properties; + curnode = (xmlNodePtr) basep->properties; } else { - curnode = (xmlNodePtr) nodep->children; + curnode = (xmlNodePtr) basep->children; } } else { + xmlNodePtr nodep = basep; if (nodep->type == XML_DOCUMENT_NODE || nodep->type == XML_HTML_DOCUMENT_NODE) { nodep = xmlDocGetRootElement((xmlDoc *) nodep); } else { nodep = nodep->children; } curnode = dom_get_elements_by_tag_name_ns_raw( - nodep, (char *) objmap->ns, (char *) objmap->local, &curindex, 0); + basep, nodep, (char *) objmap->ns, (char *) objmap->local, &curindex, 0); } } } else { diff --git a/ext/dom/element.c b/ext/dom/element.c index 19cef5834657a..93d9ad5fb910a 100644 --- a/ext/dom/element.c +++ b/ext/dom/element.c @@ -511,7 +511,6 @@ PHP_METHOD(DOMElement, getElementsByTagName) size_t name_len; dom_object *intern, *namednode; char *name; - xmlChar *local; id = ZEND_THIS; if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &name, &name_len) == FAILURE) { @@ -522,8 +521,7 @@ PHP_METHOD(DOMElement, getElementsByTagName) php_dom_create_iterator(return_value, DOM_NODELIST); namednode = Z_DOMOBJ_P(return_value); - local = xmlCharStrndup(name, name_len); - dom_namednode_iter(intern, 0, namednode, NULL, local, NULL); + dom_namednode_iter(intern, 0, namednode, NULL, name, name_len, NULL, 0); } /* }}} end dom_element_get_elements_by_tag_name */ @@ -930,7 +928,6 @@ PHP_METHOD(DOMElement, getElementsByTagNameNS) size_t uri_len, name_len; dom_object *intern, *namednode; char *uri, *name; - xmlChar *local, *nsuri; id = ZEND_THIS; if (zend_parse_parameters(ZEND_NUM_ARGS(), "s!s", &uri, &uri_len, &name, &name_len) == FAILURE) { @@ -941,9 +938,7 @@ PHP_METHOD(DOMElement, getElementsByTagNameNS) php_dom_create_iterator(return_value, DOM_NODELIST); namednode = Z_DOMOBJ_P(return_value); - local = xmlCharStrndup(name, name_len); - nsuri = xmlCharStrndup(uri ? uri : "", uri_len); - dom_namednode_iter(intern, 0, namednode, NULL, local, nsuri); + dom_namednode_iter(intern, 0, namednode, NULL, name, name_len, uri ? uri : "", uri_len); } /* }}} end dom_element_get_elements_by_tag_name_ns */ diff --git a/ext/dom/node.c b/ext/dom/node.c index fdb51bf51092f..78c9b2dca1802 100644 --- a/ext/dom/node.c +++ b/ext/dom/node.c @@ -195,6 +195,8 @@ int dom_node_node_value_write(dom_object *obj, zval *newval) break; } + php_libxml_invalidate_node_list_cache_from_doc(nodep->doc); + zend_string_release_ex(str, 0); return SUCCESS; } @@ -274,7 +276,7 @@ int dom_node_child_nodes_read(dom_object *obj, zval *retval) php_dom_create_iterator(retval, DOM_NODELIST); intern = Z_DOMOBJ_P(retval); - dom_namednode_iter(obj, XML_ELEMENT_NODE, intern, NULL, NULL, NULL); + dom_namednode_iter(obj, XML_ELEMENT_NODE, intern, NULL, NULL, 0, NULL, 0); return SUCCESS; } @@ -482,7 +484,7 @@ int dom_node_attributes_read(dom_object *obj, zval *retval) if (nodep->type == XML_ELEMENT_NODE) { php_dom_create_iterator(retval, DOM_NAMEDNODEMAP); intern = Z_DOMOBJ_P(retval); - dom_namednode_iter(obj, XML_ATTRIBUTE_NODE, intern, NULL, NULL, NULL); + dom_namednode_iter(obj, XML_ATTRIBUTE_NODE, intern, NULL, NULL, 0, NULL, 0); } else { ZVAL_NULL(retval); } @@ -769,6 +771,8 @@ int dom_node_text_content_write(dom_object *obj, zval *newval) return FAILURE; } + php_libxml_invalidate_node_list_cache_from_doc(nodep->doc); + const xmlChar *xmlChars = (const xmlChar *) ZSTR_VAL(str); int type = nodep->type; @@ -897,6 +901,8 @@ PHP_METHOD(DOMNode, insertBefore) php_libxml_increment_doc_ref((php_libxml_node_object *)childobj, NULL); } + php_libxml_invalidate_node_list_cache_from_doc(parentp->doc); + if (ref != NULL) { DOM_GET_OBJ(refp, ref, xmlNodePtr, refpobj); if (refp->parent != parentp) { @@ -1086,6 +1092,7 @@ PHP_METHOD(DOMNode, replaceChild) nodep->doc->intSubset = (xmlDtd *) newchild; } } + php_libxml_invalidate_node_list_cache_from_doc(nodep->doc); DOM_RET_OBJ(oldchild, &ret, intern); } /* }}} end dom_node_replace_child */ @@ -1127,6 +1134,7 @@ PHP_METHOD(DOMNode, removeChild) } xmlUnlinkNode(child); + php_libxml_invalidate_node_list_cache_from_doc(nodep->doc); DOM_RET_OBJ(child, &ret, intern); } /* }}} end dom_node_remove_child */ @@ -1230,6 +1238,8 @@ PHP_METHOD(DOMNode, appendChild) dom_reconcile_ns(nodep->doc, new_child); + php_libxml_invalidate_node_list_cache_from_doc(nodep->doc); + DOM_RET_OBJ(new_child, &ret, intern); } /* }}} end dom_node_append_child */ @@ -1339,6 +1349,8 @@ PHP_METHOD(DOMNode, normalize) DOM_GET_OBJ(nodep, id, xmlNodePtr, intern); + php_libxml_invalidate_node_list_cache_from_doc(nodep->doc); + dom_normalize(nodep); } @@ -1571,6 +1583,8 @@ static void dom_canonicalization(INTERNAL_FUNCTION_PARAMETERS, int mode) /* {{{ RETURN_THROWS(); } + php_libxml_invalidate_node_list_cache_from_doc(docp); + if (xpath_array == NULL) { if (nodep->type != XML_DOCUMENT_NODE) { ctxp = xmlXPathNewContext(docp); diff --git a/ext/dom/nodelist.c b/ext/dom/nodelist.c index b03ebe1acd90a..20e3b18bee883 100644 --- a/ext/dom/nodelist.c +++ b/ext/dom/nodelist.c @@ -31,6 +31,24 @@ * Since: */ +static zend_always_inline void objmap_cache_release_cached_obj(dom_nnodemap_object *objmap) +{ + if (objmap->cached_obj) { + /* Since the DOM is a tree there can be no cycles. */ + if (GC_DELREF(&objmap->cached_obj->std) == 0) { + zend_objects_store_del(&objmap->cached_obj->std); + } + objmap->cached_obj = NULL; + objmap->cached_obj_index = 0; + } +} + +static zend_always_inline void reset_objmap_cache(dom_nnodemap_object *objmap) +{ + objmap_cache_release_cached_obj(objmap); + objmap->cached_length = -1; +} + static int get_nodelist_length(dom_object *obj) { dom_nnodemap_object *objmap = (dom_nnodemap_object *) obj->ptr; @@ -52,6 +70,17 @@ static int get_nodelist_length(dom_object *obj) return 0; } + if (!php_dom_is_cache_tag_stale_from_node(&objmap->cache_tag, nodep)) { + if (objmap->cached_length >= 0) { + return objmap->cached_length; + } + /* Only the length is out-of-date, the cache tag is still valid. + * Therefore, only overwrite the length and keep the currently cached object. */ + } else { + php_dom_mark_cache_tag_up_to_date_from_node(&objmap->cache_tag, nodep); + reset_objmap_cache(objmap); + } + int count = 0; if (objmap->nodetype == XML_ATTRIBUTE_NODE || objmap->nodetype == XML_ELEMENT_NODE) { xmlNodePtr curnode = nodep->children; @@ -63,15 +92,18 @@ static int get_nodelist_length(dom_object *obj) } } } else { + xmlNodePtr basep = nodep; if (nodep->type == XML_DOCUMENT_NODE || nodep->type == XML_HTML_DOCUMENT_NODE) { nodep = xmlDocGetRootElement((xmlDoc *) nodep); } else { nodep = nodep->children; } dom_get_elements_by_tag_name_ns_raw( - nodep, (char *) objmap->ns, (char *) objmap->local, &count, -1); + basep, nodep, (char *) objmap->ns, (char *) objmap->local, &count, INT_MAX - 1 /* because of <= */); } + objmap->cached_length = count; + return count; } @@ -113,11 +145,12 @@ PHP_METHOD(DOMNodeList, item) zval *id; zend_long index; int ret; + bool cache_itemnode = false; dom_object *intern; xmlNodePtr itemnode = NULL; dom_nnodemap_object *objmap; - xmlNodePtr nodep, curnode; + xmlNodePtr basep; int count = 0; id = ZEND_THIS; @@ -145,23 +178,51 @@ PHP_METHOD(DOMNodeList, item) return; } } else if (objmap->baseobj) { - nodep = dom_object_get_node(objmap->baseobj); - if (nodep) { + basep = dom_object_get_node(objmap->baseobj); + if (basep) { + xmlNodePtr nodep = basep; + /* For now we're only able to use cache for forward search. + * TODO: in the future we could extend the logic of the node list such that backwards searches + * are also possible. */ + bool restart = true; + int relative_index = index; + if (index >= objmap->cached_obj_index && objmap->cached_obj && !php_dom_is_cache_tag_stale_from_node(&objmap->cache_tag, nodep)) { + xmlNodePtr cached_obj_xml_node = dom_object_get_node(objmap->cached_obj); + + /* The node cannot be NULL if the cache is valid. If it is NULL, then it means we + * forgot an invalidation somewhere. Take the defensive programming approach and invalidate + * it here if it's NULL (except in debug mode where we would want to catch this). */ + if (UNEXPECTED(cached_obj_xml_node == NULL)) { +#if ZEND_DEBUG + ZEND_UNREACHABLE(); +#endif + reset_objmap_cache(objmap); + } else { + restart = false; + relative_index -= objmap->cached_obj_index; + nodep = cached_obj_xml_node; + } + } if (objmap->nodetype == XML_ATTRIBUTE_NODE || objmap->nodetype == XML_ELEMENT_NODE) { - curnode = nodep->children; - while (count < index && curnode != NULL) { + if (restart) { + nodep = nodep->children; + } + while (count < relative_index && nodep != NULL) { count++; - curnode = curnode->next; + nodep = nodep->next; } - itemnode = curnode; + itemnode = nodep; } else { - if (nodep->type == XML_DOCUMENT_NODE || nodep->type == XML_HTML_DOCUMENT_NODE) { - nodep = xmlDocGetRootElement((xmlDoc *) nodep); - } else { - nodep = nodep->children; + if (restart) { + if (basep->type == XML_DOCUMENT_NODE || basep->type == XML_HTML_DOCUMENT_NODE) { + nodep = xmlDocGetRootElement((xmlDoc*) basep); + } else { + nodep = basep->children; + } } - itemnode = dom_get_elements_by_tag_name_ns_raw(nodep, (char *) objmap->ns, (char *) objmap->local, &count, index); + itemnode = dom_get_elements_by_tag_name_ns_raw(basep, nodep, (char *) objmap->ns, (char *) objmap->local, &count, relative_index); } + cache_itemnode = true; } } } @@ -169,6 +230,25 @@ PHP_METHOD(DOMNodeList, item) if (itemnode) { DOM_RET_OBJ(itemnode, &ret, objmap->baseobj); + if (cache_itemnode) { + /* Hold additional reference for the cache, must happen before releasing the cache + * because we might be the last reference holder. + * Instead of storing and copying zvals, we store the object pointer directly. + * This saves us some bytes because a pointer is smaller than a zval. + * This also means we have to manually refcount the objects here, and remove the reference count + * in reset_objmap_cache() and the destructor. */ + dom_object *cached_obj = Z_DOMOBJ_P(return_value); + GC_ADDREF(&cached_obj->std); + /* If the tag is stale, all cached data is useless. Otherwise only the cached object is useless. */ + if (php_dom_is_cache_tag_stale_from_node(&objmap->cache_tag, itemnode)) { + php_dom_mark_cache_tag_up_to_date_from_node(&objmap->cache_tag, itemnode); + reset_objmap_cache(objmap); + } else { + objmap_cache_release_cached_obj(objmap); + } + objmap->cached_obj_index = index; + objmap->cached_obj = cached_obj; + } return; } } diff --git a/ext/dom/parentnode.c b/ext/dom/parentnode.c index 4d0fffeb9e058..36cd6104f38a4 100644 --- a/ext/dom/parentnode.c +++ b/ext/dom/parentnode.c @@ -280,6 +280,8 @@ void dom_parent_node_append(dom_object *context, zval *nodes, int nodesc) return; } + php_libxml_invalidate_node_list_cache_from_doc(parentNode->doc); + xmlNode *fragment = dom_zvals_to_fragment(context->document, parentNode, nodes, nodesc); if (fragment == NULL) { @@ -322,6 +324,8 @@ void dom_parent_node_prepend(dom_object *context, zval *nodes, int nodesc) return; } + php_libxml_invalidate_node_list_cache_from_doc(parentNode->doc); + xmlNodePtr newchild, nextsib; xmlNode *fragment = dom_zvals_to_fragment(context->document, parentNode, nodes, nodesc); @@ -402,6 +406,8 @@ void dom_parent_node_after(dom_object *context, zval *nodes, int nodesc) doc = prevsib->doc; + php_libxml_invalidate_node_list_cache_from_doc(doc); + /* Spec step 4: convert nodes into fragment */ fragment = dom_zvals_to_fragment(context->document, parentNode, nodes, nodesc); @@ -451,6 +457,8 @@ void dom_parent_node_before(dom_object *context, zval *nodes, int nodesc) doc = nextsib->doc; + php_libxml_invalidate_node_list_cache_from_doc(doc); + /* Spec step 4: convert nodes into fragment */ fragment = dom_zvals_to_fragment(context->document, parentNode, nodes, nodesc); @@ -506,6 +514,8 @@ void dom_child_node_remove(dom_object *context) return; } + php_libxml_invalidate_node_list_cache_from_doc(context->document->ptr); + while (children) { if (children == child) { xmlUnlinkNode(child); diff --git a/ext/dom/php_dom.c b/ext/dom/php_dom.c index e02b0973291c5..44c10ea6a8aec 100644 --- a/ext/dom/php_dom.c +++ b/ext/dom/php_dom.c @@ -942,7 +942,7 @@ void dom_objects_free_storage(zend_object *object) } /* }}} */ -void dom_namednode_iter(dom_object *basenode, int ntype, dom_object *intern, xmlHashTablePtr ht, xmlChar *local, xmlChar *ns) /* {{{ */ +void dom_namednode_iter(dom_object *basenode, int ntype, dom_object *intern, xmlHashTablePtr ht, const char *local, size_t local_len, const char *ns, size_t ns_len) /* {{{ */ { dom_nnodemap_object *mapptr = (dom_nnodemap_object *) intern->ptr; @@ -950,11 +950,33 @@ void dom_namednode_iter(dom_object *basenode, int ntype, dom_object *intern, xml ZVAL_OBJ_COPY(&mapptr->baseobj_zv, &basenode->std); + xmlDocPtr doc = basenode->document ? basenode->document->ptr : NULL; + mapptr->baseobj = basenode; mapptr->nodetype = ntype; mapptr->ht = ht; - mapptr->local = local; - mapptr->ns = ns; + + const xmlChar* tmp; + + if (local) { + int len = local_len > INT_MAX ? -1 : (int) local_len; + if (doc != NULL && (tmp = xmlDictExists(doc->dict, (const xmlChar *)local, len)) != NULL) { + mapptr->local = (xmlChar*) tmp; + } else { + mapptr->local = xmlCharStrndup(local, len); + mapptr->free_local = true; + } + } + + if (ns) { + int len = ns_len > INT_MAX ? -1 : (int) ns_len; + if (doc != NULL && (tmp = xmlDictExists(doc->dict, (const xmlChar *)ns, len)) != NULL) { + mapptr->ns = (xmlChar*) tmp; + } else { + mapptr->ns = xmlCharStrndup(ns, len); + mapptr->free_ns = true; + } + } } /* }}} */ @@ -1010,10 +1032,13 @@ void dom_nnodemap_objects_free_storage(zend_object *object) /* {{{ */ dom_nnodemap_object *objmap = (dom_nnodemap_object *)intern->ptr; if (objmap) { - if (objmap->local) { + if (objmap->cached_obj && GC_DELREF(&objmap->cached_obj->std) == 0) { + zend_objects_store_del(&objmap->cached_obj->std); + } + if (objmap->free_local) { xmlFree(objmap->local); } - if (objmap->ns) { + if (objmap->free_ns) { xmlFree(objmap->ns); } if (!Z_ISUNDEF(objmap->baseobj_zv)) { @@ -1042,7 +1067,13 @@ zend_object *dom_nnodemap_objects_new(zend_class_entry *class_type) /* {{{ */ objmap->nodetype = 0; objmap->ht = NULL; objmap->local = NULL; + objmap->free_local = false; objmap->ns = NULL; + objmap->free_ns = false; + objmap->cache_tag.modification_nr = 0; + objmap->cached_length = -1; + objmap->cached_obj = NULL; + objmap->cached_obj_index = 0; return &intern->std; } @@ -1220,19 +1251,25 @@ bool dom_has_feature(zend_string *feature, zend_string *version) } /* }}} end dom_has_feature */ -xmlNode *dom_get_elements_by_tag_name_ns_raw(xmlNodePtr nodep, char *ns, char *local, int *cur, int index) /* {{{ */ +xmlNode *dom_get_elements_by_tag_name_ns_raw(xmlNodePtr basep, xmlNodePtr nodep, char *ns, char *local, int *cur, int index) /* {{{ */ { + /* Can happen with detached document */ + if (UNEXPECTED(nodep == NULL)) { + return NULL; + } + xmlNodePtr ret = NULL; + bool local_match_any = local[0] == '*' && local[1] == '\0'; /* Note: The spec says that ns == '' must be transformed to ns == NULL. In other words, they are equivalent. * PHP however does not do this and internally uses the empty string everywhere when the user provides ns == NULL. * This is because for PHP ns == NULL has another meaning: "match every namespace" instead of "match the empty namespace". */ bool ns_match_any = ns == NULL || (ns[0] == '*' && ns[1] == '\0'); - while (nodep != NULL && (*cur <= index || index == -1)) { + while (*cur <= index) { if (nodep->type == XML_ELEMENT_NODE) { - if (xmlStrEqual(nodep->name, (xmlChar *)local) || xmlStrEqual((xmlChar *)"*", (xmlChar *)local)) { - if (ns_match_any || (!strcmp(ns, "") && nodep->ns == NULL) || (nodep->ns != NULL && xmlStrEqual(nodep->ns->href, (xmlChar *)ns))) { + if (local_match_any || xmlStrEqual(nodep->name, (xmlChar *)local)) { + if (ns_match_any || (ns[0] == '\0' && nodep->ns == NULL) || (nodep->ns != NULL && xmlStrEqual(nodep->ns->href, (xmlChar *)ns))) { if (*cur == index) { ret = nodep; break; @@ -1240,16 +1277,33 @@ xmlNode *dom_get_elements_by_tag_name_ns_raw(xmlNodePtr nodep, char *ns, char *l (*cur)++; } } - ret = dom_get_elements_by_tag_name_ns_raw(nodep->children, ns, local, cur, index); - if (ret != NULL) { - break; + + if (nodep->children) { + nodep = nodep->children; + continue; } } - nodep = nodep->next; + + if (nodep->next) { + nodep = nodep->next; + } else { + /* Go upwards, until we find a parent node with a next sibling, or until we hit the base. */ + do { + nodep = nodep->parent; + if (nodep == basep) { + return NULL; + } + /* This shouldn't happen, unless there's an invalidation bug somewhere. */ + if (UNEXPECTED(nodep == NULL)) { + zend_throw_error(NULL, "Current node in traversal is not in the document. Please report this as a bug in php-src."); + return NULL; + } + } while (nodep->next == NULL); + nodep = nodep->next; + } } return ret; } -/* }}} */ /* }}} end dom_element_get_elements_by_tag_name_ns_raw */ static inline bool is_empty_node(xmlNodePtr nodep) diff --git a/ext/dom/php_dom.h b/ext/dom/php_dom.h index a7ae09384cfdc..0602f4166eaa7 100644 --- a/ext/dom/php_dom.h +++ b/ext/dom/php_dom.h @@ -82,15 +82,22 @@ typedef struct _dom_nnodemap_object { dom_object *baseobj; zval baseobj_zv; int nodetype; + int cached_length; xmlHashTable *ht; xmlChar *local; xmlChar *ns; + php_libxml_cache_tag cache_tag; + dom_object *cached_obj; + int cached_obj_index; + bool free_local : 1; + bool free_ns : 1; } dom_nnodemap_object; typedef struct { zend_object_iterator intern; zval curobj; HashPosition pos; + php_libxml_cache_tag cache_tag; } php_dom_iterator; #include "domexception.h" @@ -113,14 +120,14 @@ void dom_set_old_ns(xmlDoc *doc, xmlNs *ns); void dom_reconcile_ns(xmlDocPtr doc, xmlNodePtr nodep); xmlNsPtr dom_get_nsdecl(xmlNode *node, xmlChar *localName); void dom_normalize (xmlNodePtr nodep); -xmlNode *dom_get_elements_by_tag_name_ns_raw(xmlNodePtr nodep, char *ns, char *local, int *cur, int index); +xmlNode *dom_get_elements_by_tag_name_ns_raw(xmlNodePtr basep, xmlNodePtr nodep, char *ns, char *local, int *cur, int index); void php_dom_create_implementation(zval *retval); int dom_hierarchy(xmlNodePtr parent, xmlNodePtr child); bool dom_has_feature(zend_string *feature, zend_string *version); int dom_node_is_read_only(xmlNodePtr node); int dom_node_children_valid(xmlNodePtr node); void php_dom_create_iterator(zval *return_value, int ce_type); -void dom_namednode_iter(dom_object *basenode, int ntype, dom_object *intern, xmlHashTablePtr ht, xmlChar *local, xmlChar *ns); +void dom_namednode_iter(dom_object *basenode, int ntype, dom_object *intern, xmlHashTablePtr ht, const char *local, size_t local_len, const char *ns, size_t ns_len); xmlNodePtr create_notation(const xmlChar *name, const xmlChar *ExternalID, const xmlChar *SystemID); xmlNode *php_dom_libxml_hash_iter(xmlHashTable *ht, int index); xmlNode *php_dom_libxml_notation_iter(xmlHashTable *ht, int index); @@ -153,6 +160,33 @@ void dom_child_node_remove(dom_object *context); #define DOM_NODELIST 0 #define DOM_NAMEDNODEMAP 1 +static zend_always_inline bool php_dom_is_cache_tag_stale_from_doc_ptr(const php_libxml_cache_tag *cache_tag, const php_libxml_doc_ptr *doc_ptr) +{ + ZEND_ASSERT(cache_tag != NULL); + ZEND_ASSERT(doc_ptr != NULL); + /* See overflow comment in php_libxml_invalidate_node_list_cache(). */ +#if SIZEOF_SIZE_T == 8 + return cache_tag->modification_nr != doc_ptr->cache_tag.modification_nr; +#else + return cache_tag->modification_nr != doc_ptr->cache_tag.modification_nr || UNEXPECTED(doc_ptr->cache_tag.modification_nr == SIZE_MAX); +#endif +} + +static zend_always_inline bool php_dom_is_cache_tag_stale_from_node(const php_libxml_cache_tag *cache_tag, const xmlNodePtr node) +{ + ZEND_ASSERT(node != NULL); + return !node->doc || !node->doc->_private || php_dom_is_cache_tag_stale_from_doc_ptr(cache_tag, node->doc->_private); +} + +static zend_always_inline void php_dom_mark_cache_tag_up_to_date_from_node(php_libxml_cache_tag *cache_tag, const xmlNodePtr node) +{ + ZEND_ASSERT(cache_tag != NULL); + if (node->doc && node->doc->_private) { + const php_libxml_doc_ptr* doc_ptr = node->doc->_private; + cache_tag->modification_nr = doc_ptr->cache_tag.modification_nr; + } +} + PHP_MINIT_FUNCTION(dom); PHP_MSHUTDOWN_FUNCTION(dom); PHP_MINFO_FUNCTION(dom); diff --git a/ext/dom/processinginstruction.c b/ext/dom/processinginstruction.c index 465ecb431e73a..c40d24d18ce23 100644 --- a/ext/dom/processinginstruction.c +++ b/ext/dom/processinginstruction.c @@ -128,6 +128,8 @@ int dom_processinginstruction_data_write(dom_object *obj, zval *newval) return FAILURE; } + php_libxml_invalidate_node_list_cache_from_doc(nodep->doc); + xmlNodeSetContentLen(nodep, (xmlChar *) ZSTR_VAL(str), ZSTR_LEN(str) + 1); zend_string_release_ex(str, 0); diff --git a/ext/dom/tests/DOMDocument_getElementsByTagName_liveness.phpt b/ext/dom/tests/DOMDocument_getElementsByTagName_liveness.phpt new file mode 100644 index 0000000000000..2b4622d10d389 --- /dev/null +++ b/ext/dom/tests/DOMDocument_getElementsByTagName_liveness.phpt @@ -0,0 +1,47 @@ +--TEST-- +DOMDocument::getElementsByTagName() is live +--EXTENSIONS-- +dom +--FILE-- +loadXML( '' ); +$root = $doc->documentElement; + +$i = 0; + +/* Note that the list is live. The explanation for the output is as follows: + Before the loop we have the following (writing only the attributes): + 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 + + Now the loop starts, the current element is marked with a V. $i == 0: + V + 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 + 1 gets printed. $i == 0, which is even, so 1 gets removed, which results in: + V + 2 3 4 5 6 7 8 9 10 11 12 13 14 15 + Note that everything shifted to the left. + Because the list is live, the current element pointer still refers to the first index, which now corresponds to element with attribute 2. + Now the foreach body ends, which means we go to the next element, which is now 3 instead of 2. + V + 2 3 4 5 6 7 8 9 10 11 12 13 14 15 + 3 gets printed. $i == 1, which is odd, so nothing happens and we move on to the next element: + V + 2 3 4 5 6 7 8 9 10 11 12 13 14 15 + 4 gets printed. $i == 2, which is even, so 4 gets removed, which results in: + V + 2 3 5 6 7 8 9 10 11 12 13 14 15 + Note again everything shifted to the left. + Now the foreach body ends, which means we go to the next element, which is now 6 instead of 5. + V + 2 3 5 6 7 8 9 10 11 12 13 14 15 + 6 gets printed, etc... */ +foreach ($doc->getElementsByTagName('e') as $node) { + print $node->getAttribute('i') . ' '; + if ($i++ % 2 == 0) + $root->removeChild($node); +} +print "\n"; +?> +--EXPECT-- +1 3 4 6 7 9 10 12 13 15 diff --git a/ext/dom/tests/DOMDocument_getElementsByTagName_liveness_simplexml.phpt b/ext/dom/tests/DOMDocument_getElementsByTagName_liveness_simplexml.phpt new file mode 100644 index 0000000000000..0ac52cd5d662f --- /dev/null +++ b/ext/dom/tests/DOMDocument_getElementsByTagName_liveness_simplexml.phpt @@ -0,0 +1,29 @@ +--TEST-- +DOMDocument::getElementsByTagName() liveness with simplexml_import_dom +--EXTENSIONS-- +dom +simplexml +--FILE-- +loadXML( '' ); +$list = $doc->getElementsByTagName('e'); +print $list->item(5)->getAttribute('i')."\n"; +echo "before import\n"; +$s = simplexml_import_dom($doc->documentElement); +echo "after import\n"; + +unset($s->e[5]); +print $list->item(5)->getAttribute('i')."\n"; + +unset($s->e[5]); +print $list->item(5)->getAttribute('i')."\n"; + +?> +--EXPECT-- +6 +before import +after import +7 +8 diff --git a/ext/dom/tests/DOMDocument_getElementsByTagName_liveness_tree_walk.phpt b/ext/dom/tests/DOMDocument_getElementsByTagName_liveness_tree_walk.phpt new file mode 100644 index 0000000000000..91d810df51bc6 --- /dev/null +++ b/ext/dom/tests/DOMDocument_getElementsByTagName_liveness_tree_walk.phpt @@ -0,0 +1,89 @@ +--TEST-- +DOMDocument::getElementsByTagName() liveness tree walk +--EXTENSIONS-- +dom +--FILE-- +loadXML(''); + +echo "-- On first child, for --\n"; +$list = $doc->documentElement->firstChild->getElementsByTagName('b'); +var_dump($list->length); +for ($i = 0; $i < $list->length; $i++) { + echo $i, " ", $list->item($i)->getAttribute('i'), "\n"; +} +// Try to access one beyond to check if we don't get excess elements +var_dump($list->item($i)); + +echo "-- On first child, foreach --\n"; +foreach ($list as $item) { + echo $item->getAttribute('i'), "\n"; +} + +echo "-- On document, for --\n"; +$list = $doc->getElementsByTagName('b'); +var_dump($list->length); +for ($i = 0; $i < $list->length; $i++) { + echo $i, " ", $list->item($i)->getAttribute('i'), "\n"; +} +// Try to access one beyond to check if we don't get excess elements +var_dump($list->item($i)); + +echo "-- On document, foreach --\n"; +foreach ($list as $item) { + echo $item->getAttribute('i'), "\n"; +} + +echo "-- On document, after caching followed by removing --\n"; + +$list = $doc->documentElement->firstChild->getElementsByTagName('b'); +$list->item(0); // Activate item cache +$list->item(0)->remove(); +$list->item(0)->remove(); +$list->item(0)->remove(); +var_dump($list->length); +var_dump($list->item(0)); +foreach ($list as $item) { + echo "Should not execute\n"; +} + +echo "-- On document, clean list after removal --\n"; +$list = $doc->documentElement->firstChild->getElementsByTagName('b'); +var_dump($list->length); +var_dump($list->item(0)); +foreach ($list as $item) { + echo "Should not execute\n"; +} + +?> +--EXPECT-- +-- On first child, for -- +int(3) +0 1 +1 2 +2 3 +NULL +-- On first child, foreach -- +1 +2 +3 +-- On document, for -- +int(4) +0 1 +1 2 +2 3 +3 4 +NULL +-- On document, foreach -- +1 +2 +3 +4 +-- On document, after caching followed by removing -- +int(0) +NULL +-- On document, clean list after removal -- +int(0) +NULL diff --git a/ext/dom/tests/DOMDocument_getElementsByTagName_liveness_write_properties.phpt b/ext/dom/tests/DOMDocument_getElementsByTagName_liveness_write_properties.phpt new file mode 100644 index 0000000000000..af8af51844c9d --- /dev/null +++ b/ext/dom/tests/DOMDocument_getElementsByTagName_liveness_write_properties.phpt @@ -0,0 +1,43 @@ +--TEST-- +DOMDocument::getElementsByTagName() liveness affected by writing properties +--EXTENSIONS-- +dom +--FILE-- +'; +$fields = ['nodeValue', 'textContent']; + +foreach ($fields as $field) { + $doc = new DOMDocument; + $doc->loadXML($xml); + $list = $doc->getElementsByTagName('a'); + var_dump($list->item(0) === NULL); + $doc->documentElement->{$field} = 'new_content'; + var_dump($list->item(0) === NULL); + print $doc->saveXML(); +} + +// Shouldn't be affected +$doc = new DOMDocument; +$doc->loadXML($xml); +$list = $doc->getElementsByTagNameNS('foo', 'a'); +var_dump($list->item(0) === NULL); +$doc->documentElement->firstChild->prefix = 'ns2'; +var_dump($list->item(0) === NULL); +print $doc->saveXML(); + +?> +--EXPECT-- +bool(false) +bool(true) + +new_content +bool(false) +bool(true) + +new_content +bool(false) +bool(false) + + diff --git a/ext/dom/tests/DOMDocument_getElementsByTagName_liveness_xinclude.phpt b/ext/dom/tests/DOMDocument_getElementsByTagName_liveness_xinclude.phpt new file mode 100644 index 0000000000000..2c14a2080569e --- /dev/null +++ b/ext/dom/tests/DOMDocument_getElementsByTagName_liveness_xinclude.phpt @@ -0,0 +1,43 @@ +--TEST-- +DOMDocument::getElementsByTagName() liveness with DOMDocument::xinclude() +--EXTENSIONS-- +dom +--FILE-- + + +

Hello

+ + + +

xinclude: book.xml not found

+
+
+
+
+EOD; + +$dom = new DOMDocument; +$dom->loadXML($xml); +$elements = $dom->getElementsByTagName('p'); +var_dump($elements->item(0)->textContent); +@$dom->xinclude(); +var_dump($elements->item(1)->textContent); +echo $dom->saveXML(); + +?> +--EXPECT-- +string(5) "Hello" +string(28) "xinclude: book.xml not found" + + +

Hello

+ + +

xinclude: book.xml not found

+ +
+
diff --git a/ext/dom/tests/DOMDocument_item_cache_invalidation.phpt b/ext/dom/tests/DOMDocument_item_cache_invalidation.phpt new file mode 100644 index 0000000000000..dad532b8167fe --- /dev/null +++ b/ext/dom/tests/DOMDocument_item_cache_invalidation.phpt @@ -0,0 +1,69 @@ +--TEST-- +DOMDocument node list item cache invalidation +--EXTENSIONS-- +dom +--FILE-- +loadHTML('

hello

world

'); + +$elements = $doc->getElementsByTagName('p'); +$elements->item(0); // Activate item cache +$doc->loadHTML('

A

B

C

'); +var_dump($elements); +var_dump($elements->item(0)->textContent); // First lookup +var_dump($elements->item(2)->textContent); // Uses cache +var_dump($elements->item(1)->textContent); // Does not use cache + +echo "-- Remove cached item test --\n"; + +$doc = new DOMDocument(); +$doc->loadHTML('

hello

world

!

'); + +$elements = $doc->getElementsByTagName('p'); +$item = $elements->item(0); // Activate item cache +var_dump($item->textContent); +$item->remove(); +// Now element 0 means "world", and 1 means "!" +unset($item); +$item = $elements->item(1); +var_dump($item->textContent); + +echo "-- Removal of cached item in loop test --\n"; + +$doc = new DOMDocument; +$doc->loadXML( '' ); +$root = $doc->documentElement; + +$i = 0; +$elements = $root->getElementsByTagName('e'); +for ($i = 0; $i < 11; $i++) { + $node = $elements->item($i); + print $node->getAttribute('i') . ' '; + if ($i++ % 2 == 0) + $root->removeChild( $node ); +} +print "\n"; + +?> +--EXPECTF-- +-- Switch document test -- +object(DOMNodeList)#2 (1) { + ["length"]=> + int(3) +} +string(1) "A" +string(1) "C" +string(1) "B" +-- Remove cached item test -- +string(5) "hello" +string(1) "!" +-- Removal of cached item in loop test -- +1 4 7 10 13 +Fatal error: Uncaught Error: Call to a member function getAttribute() on null in %s:%d +Stack trace: +#0 {main} + thrown in %s on line %d diff --git a/ext/dom/tests/DOMDocument_length_cache_invalidation.phpt b/ext/dom/tests/DOMDocument_length_cache_invalidation.phpt new file mode 100644 index 0000000000000..7a3633894a381 --- /dev/null +++ b/ext/dom/tests/DOMDocument_length_cache_invalidation.phpt @@ -0,0 +1,34 @@ +--TEST-- +DOMDocument node list length cache invalidation +--EXTENSIONS-- +dom +--FILE-- +loadHTML('

hello

world

!

'); + +$elements = $doc->getElementsByTagName('p'); +$item = $elements->item(0); // Activate item cache +var_dump($elements->length); // Length not cached yet, should still compute +$item->remove(); +// Now element 0 means "world", and 1 means "!" +unset($item); +var_dump($elements->length); +$item = $elements->item(1); +var_dump($item->textContent); +$item = $elements->item(1); +var_dump($item->textContent); +$item = $elements->item(0); +var_dump($item->textContent); +$item = $elements->item(1); +var_dump($item->textContent); + +?> +--EXPECT-- +int(3) +int(2) +string(1) "!" +string(1) "!" +string(5) "world" +string(1) "!" diff --git a/ext/dom/tests/DOMDocument_liveness_caching_invalidation.phpt b/ext/dom/tests/DOMDocument_liveness_caching_invalidation.phpt new file mode 100644 index 0000000000000..e05bd1ac6f646 --- /dev/null +++ b/ext/dom/tests/DOMDocument_liveness_caching_invalidation.phpt @@ -0,0 +1,43 @@ +--TEST-- +DOMDocument liveness caching invalidation by textContent +--EXTENSIONS-- +dom +--FILE-- +loadXML(''); +$root = $doc->documentElement; + +$i = 0; + +echo "-- Overwrite during iteration --\n"; + +foreach ($doc->getElementsByTagName('e') as $node) { + if ($i++ == 2) { + $root->textContent = 'overwrite'; + } + var_dump($node->tagName, $node->getAttribute('id')); +} + +echo "-- Empty iteration --\n"; +foreach ($doc->getElementsByTagName('e') as $node) { + echo "Should not execute\n"; +} + +echo "-- After adding an element again --\n"; +$root->appendChild(new DOMElement('e')); +foreach ($doc->getElementsByTagName('e') as $node) { + echo "Should execute once\n"; +} +?> +--EXPECT-- +-- Overwrite during iteration -- +string(1) "e" +string(1) "1" +string(1) "e" +string(1) "2" +string(1) "e" +string(1) "3" +-- Empty iteration -- +-- After adding an element again -- +Should execute once diff --git a/ext/dom/tests/DOMElement_getElementsByTagName_without_document.phpt b/ext/dom/tests/DOMElement_getElementsByTagName_without_document.phpt new file mode 100644 index 0000000000000..9aebf3139cdf9 --- /dev/null +++ b/ext/dom/tests/DOMElement_getElementsByTagName_without_document.phpt @@ -0,0 +1,16 @@ +--TEST-- +Node list cache should not break on DOMElement::getElementsByTagName() without document +--EXTENSIONS-- +dom +--FILE-- +getElementsByTagName("b") as $x) { + var_dump($x); +} + +?> +Done +--EXPECT-- +Done diff --git a/ext/libxml/libxml.c b/ext/libxml/libxml.c index 71ed3f911cca7..5af3443069aba 100644 --- a/ext/libxml/libxml.c +++ b/ext/libxml/libxml.c @@ -1163,8 +1163,14 @@ PHP_LIBXML_API int php_libxml_increment_node_ptr(php_libxml_node_object *object, object->node->_private = private_data; } } else { + if (UNEXPECTED(node->type == XML_DOCUMENT_NODE || node->type == XML_HTML_DOCUMENT_NODE)) { + php_libxml_doc_ptr *doc_ptr = emalloc(sizeof(php_libxml_doc_ptr)); + doc_ptr->cache_tag.modification_nr = 1; /* iterators start at 0, such that they will start in an uninitialised state */ + object->node = (php_libxml_node_ptr *) doc_ptr; /* downcast */ + } else { + object->node = emalloc(sizeof(php_libxml_node_ptr)); + } ret_refcount = 1; - object->node = emalloc(sizeof(php_libxml_node_ptr)); object->node->node = node; object->node->refcount = 1; object->node->_private = private_data; diff --git a/ext/libxml/php_libxml.h b/ext/libxml/php_libxml.h index de9b49d2ce3b6..a23ff6ee57c13 100644 --- a/ext/libxml/php_libxml.h +++ b/ext/libxml/php_libxml.h @@ -69,6 +69,16 @@ typedef struct _php_libxml_node_ptr { void *_private; } php_libxml_node_ptr; +typedef struct { + size_t modification_nr; +} php_libxml_cache_tag; + +/* extends php_libxml_node_ptr */ +typedef struct { + php_libxml_node_ptr node_ptr; + php_libxml_cache_tag cache_tag; +} php_libxml_doc_ptr; + typedef struct _php_libxml_node_object { php_libxml_node_ptr *node; php_libxml_ref_obj *document; @@ -81,6 +91,27 @@ static inline php_libxml_node_object *php_libxml_node_fetch_object(zend_object * return (php_libxml_node_object *)((char*)(obj) - obj->handlers->offset); } +static zend_always_inline void php_libxml_invalidate_node_list_cache(php_libxml_doc_ptr *doc_ptr) +{ +#if SIZEOF_SIZE_T == 8 + /* If one operation happens every nanosecond, then it would still require 584 years to overflow + * the counter. So we'll just assume this never happens. */ + doc_ptr->cache_tag.modification_nr++; +#else + size_t new_modification_nr = doc_ptr->cache_tag.modification_nr + 1; + if (EXPECTED(new_modification_nr > 0)) { /* unsigned overflow; checking after addition results in one less instruction */ + doc_ptr->cache_tag.modification_nr = new_modification_nr; + } +#endif +} + +static zend_always_inline void php_libxml_invalidate_node_list_cache_from_doc(xmlDocPtr docp) +{ + if (docp && docp->_private) { /* docp is NULL for detached nodes */ + php_libxml_invalidate_node_list_cache(docp->_private); + } +} + #define Z_LIBXML_NODE_P(zv) php_libxml_node_fetch_object(Z_OBJ_P((zv))) typedef void * (*php_libxml_export_node) (zval *object); diff --git a/ext/simplexml/simplexml.c b/ext/simplexml/simplexml.c index d3f2865e12036..e0340b2e3a68a 100644 --- a/ext/simplexml/simplexml.c +++ b/ext/simplexml/simplexml.c @@ -442,6 +442,8 @@ static zval *sxe_prop_dim_write(zend_object *object, zval *member, zval *value, GET_NODE(sxe, node); + php_libxml_invalidate_node_list_cache_from_doc(node->doc); + if (sxe->iter.type == SXE_ITER_ATTRLIST) { attribs = 1; elements = 0; @@ -813,6 +815,8 @@ static void sxe_prop_dim_delete(zend_object *object, zval *member, bool elements GET_NODE(sxe, node); + php_libxml_invalidate_node_list_cache_from_doc(node->doc); + if (Z_TYPE_P(member) == IS_LONG) { if (sxe->iter.type != SXE_ITER_ATTRLIST) { attribs = 0; @@ -1686,6 +1690,8 @@ PHP_METHOD(SimpleXMLElement, addChild) sxe = Z_SXEOBJ_P(ZEND_THIS); GET_NODE(sxe, node); + php_libxml_invalidate_node_list_cache_from_doc(node->doc); + if (sxe->iter.type == SXE_ITER_ATTRLIST) { php_error_docref(NULL, E_WARNING, "Cannot add element to attributes"); return;