Skip to content

Implement iteration cache, item cache and length cache for node list iteration #11330

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 18 commits into from
Jun 2, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions NEWS
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,9 @@ PHP NEWS
- Date:
. Implement More Appropriate Date/Time Exceptions RFC. (Derick)

- DOM:
. Fix bug GH-11308 (getElementsByTagName() is O(N^2)). (nielsdos)

- Exif:
. Removed unneeded codepaths in exif_process_TIFF_in_JPEG(). (nielsdos)

Expand Down
18 changes: 18 additions & 0 deletions UPGRADING.INTERNALS
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,24 @@ PHP 8.3 INTERNALS UPGRADE NOTES
- A new function dom_get_doc_props_read_only() is added to gather the document
properties in a read-only way. This function avoids allocation when there are
no document properties changed yet.
- The node list returned by DOMNode::getElementsByTagName() and
DOMNode::getElementsByTagNameNS() now caches the length and the last requested item.
This means that the length and the last requested item are not recalculated
when the node list is iterated over multiple times.
If you do not use the internal PHP dom APIs to modify the document, you need to
manually invalidate the cache using php_libxml_invalidate_node_list_cache_from_doc().
Furthermore, the following internal APIs were added to handle the cache:
. php_dom_is_cache_tag_stale_from_doc_ptr()
. php_dom_is_cache_tag_stale_from_node()
. php_dom_mark_cache_tag_up_to_date_from_node()
- The function dom_get_elements_by_tag_name_ns_raw() has an additional parameter to indicate
the base node of the node list. This function also no longer accepts -1 as the index argument.
- The function dom_namednode_iter() has additional arguments to avoid recomputing the length of
the strings.

g. ext/libxml
- Two new functions: php_libxml_invalidate_node_list_cache_from_doc() and
php_libxml_invalidate_node_list_cache() were added to invalidate the cache of a node list.

========================
4. OpCode changes
Expand Down
35 changes: 28 additions & 7 deletions ext/dom/document.c
Original file line number Diff line number Diff line change
Expand Up @@ -777,7 +777,6 @@ PHP_METHOD(DOMDocument, getElementsByTagName)
size_t name_len;
dom_object *intern, *namednode;
char *name;
xmlChar *local;

id = ZEND_THIS;
if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &name, &name_len) == FAILURE) {
Expand All @@ -788,8 +787,7 @@ PHP_METHOD(DOMDocument, getElementsByTagName)

php_dom_create_iterator(return_value, DOM_NODELIST);
namednode = Z_DOMOBJ_P(return_value);
local = xmlCharStrndup(name, name_len);
dom_namednode_iter(intern, 0, namednode, NULL, local, NULL);
dom_namednode_iter(intern, 0, namednode, NULL, name, name_len, NULL, 0);
}
/* }}} end dom_document_get_elements_by_tag_name */

Expand Down Expand Up @@ -847,6 +845,8 @@ PHP_METHOD(DOMDocument, importNode)
}
}

php_libxml_invalidate_node_list_cache_from_doc(docp);

DOM_RET_OBJ((xmlNodePtr) retnodep, &ret, intern);
}
/* }}} end dom_document_import_node */
Expand Down Expand Up @@ -991,7 +991,6 @@ PHP_METHOD(DOMDocument, getElementsByTagNameNS)
size_t uri_len, name_len;
dom_object *intern, *namednode;
char *uri, *name;
xmlChar *local, *nsuri;

id = ZEND_THIS;
if (zend_parse_parameters(ZEND_NUM_ARGS(), "s!s", &uri, &uri_len, &name, &name_len) == FAILURE) {
Expand All @@ -1002,9 +1001,7 @@ PHP_METHOD(DOMDocument, getElementsByTagNameNS)

php_dom_create_iterator(return_value, DOM_NODELIST);
namednode = Z_DOMOBJ_P(return_value);
local = xmlCharStrndup(name, name_len);
nsuri = xmlCharStrndup(uri ? uri : "", uri_len);
dom_namednode_iter(intern, 0, namednode, NULL, local, nsuri);
dom_namednode_iter(intern, 0, namednode, NULL, name, name_len, uri ? uri : "", uri_len);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't uri_len potentially undefined here? Or does ZPP set the value to 0 even if the uri pointer is set to NULL?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

uri_len will be 0 if uri is not provided. Check out zend_parse_arg_string(): If the string is NULL then the length gets set to 0.

}
/* }}} end dom_document_get_elements_by_tag_name_ns */

Expand Down Expand Up @@ -1070,6 +1067,8 @@ PHP_METHOD(DOMDocument, normalizeDocument)

DOM_GET_OBJ(docp, id, xmlDocPtr, intern);

php_libxml_invalidate_node_list_cache_from_doc(docp);

dom_normalize((xmlNodePtr) docp);
}
/* }}} end dom_document_normalize_document */
Expand Down Expand Up @@ -1328,10 +1327,14 @@ static void dom_parse_document(INTERNAL_FUNCTION_PARAMETERS, int mode) {

if (id != NULL) {
intern = Z_DOMOBJ_P(id);
size_t old_modification_nr = 0;
if (intern != NULL) {
docp = (xmlDocPtr) dom_object_get_node(intern);
doc_prop = NULL;
if (docp != NULL) {
const php_libxml_doc_ptr *doc_ptr = docp->_private;
ZEND_ASSERT(doc_ptr != NULL); /* Must exist, we have a document */
old_modification_nr = doc_ptr->cache_tag.modification_nr;
php_libxml_decrement_node_ptr((php_libxml_node_object *) intern);
doc_prop = intern->document->doc_props;
intern->document->doc_props = NULL;
Expand All @@ -1348,6 +1351,12 @@ static void dom_parse_document(INTERNAL_FUNCTION_PARAMETERS, int mode) {
}

php_libxml_increment_node_ptr((php_libxml_node_object *)intern, (xmlNodePtr)newdoc, (void *)intern);
/* Since iterators should invalidate, we need to start the modification number from the old counter */
if (old_modification_nr != 0) {
php_libxml_doc_ptr* doc_ptr = (php_libxml_doc_ptr*) ((php_libxml_node_object*) intern)->node; /* downcast */
doc_ptr->cache_tag.modification_nr = old_modification_nr;
php_libxml_invalidate_node_list_cache(doc_ptr);
}

RETURN_TRUE;
} else {
Expand Down Expand Up @@ -1563,6 +1572,8 @@ PHP_METHOD(DOMDocument, xinclude)
php_dom_remove_xinclude_nodes(root);
}

php_libxml_invalidate_node_list_cache_from_doc(docp);

if (err) {
RETVAL_LONG(err);
} else {
Expand Down Expand Up @@ -1871,10 +1882,14 @@ static void dom_load_html(INTERNAL_FUNCTION_PARAMETERS, int mode) /* {{{ */

if (id != NULL && instanceof_function(Z_OBJCE_P(id), dom_document_class_entry)) {
intern = Z_DOMOBJ_P(id);
size_t old_modification_nr = 0;
if (intern != NULL) {
docp = (xmlDocPtr) dom_object_get_node(intern);
doc_prop = NULL;
if (docp != NULL) {
const php_libxml_doc_ptr *doc_ptr = docp->_private;
ZEND_ASSERT(doc_ptr != NULL); /* Must exist, we have a document */
old_modification_nr = doc_ptr->cache_tag.modification_nr;
php_libxml_decrement_node_ptr((php_libxml_node_object *) intern);
doc_prop = intern->document->doc_props;
intern->document->doc_props = NULL;
Expand All @@ -1891,6 +1906,12 @@ static void dom_load_html(INTERNAL_FUNCTION_PARAMETERS, int mode) /* {{{ */
}

php_libxml_increment_node_ptr((php_libxml_node_object *)intern, (xmlNodePtr)newdoc, (void *)intern);
/* Since iterators should invalidate, we need to start the modification number from the old counter */
if (old_modification_nr != 0) {
php_libxml_doc_ptr* doc_ptr = (php_libxml_doc_ptr*) ((php_libxml_node_object*) intern)->node; /* downcast */
doc_ptr->cache_tag.modification_nr = old_modification_nr;
php_libxml_invalidate_node_list_cache(doc_ptr);
}

RETURN_TRUE;
} else {
Expand Down
4 changes: 2 additions & 2 deletions ext/dom/documenttype.c
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ int dom_documenttype_entities_read(dom_object *obj, zval *retval)
entityht = (xmlHashTable *) doctypep->entities;

intern = Z_DOMOBJ_P(retval);
dom_namednode_iter(obj, XML_ENTITY_NODE, intern, entityht, NULL, NULL);
dom_namednode_iter(obj, XML_ENTITY_NODE, intern, entityht, NULL, 0, NULL, 0);

return SUCCESS;
}
Expand Down Expand Up @@ -93,7 +93,7 @@ int dom_documenttype_notations_read(dom_object *obj, zval *retval)
notationht = (xmlHashTable *) doctypep->notations;

intern = Z_DOMOBJ_P(retval);
dom_namednode_iter(obj, XML_NOTATION_NODE, intern, notationht, NULL, NULL);
dom_namednode_iter(obj, XML_NOTATION_NODE, intern, notationht, NULL, 0, NULL, 0);

return SUCCESS;
}
Expand Down
43 changes: 27 additions & 16 deletions ext/dom/dom_iterators.c
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ static void php_dom_iterator_move_forward(zend_object_iterator *iter) /* {{{ */
dom_object *intern;
dom_object *nnmap;
dom_nnodemap_object *objmap;
int previndex=0;
int previndex;
HashTable *nodeht;
zval *entry;
bool do_curobj_undef = 1;
Expand All @@ -205,23 +205,32 @@ static void php_dom_iterator_move_forward(zend_object_iterator *iter) /* {{{ */
do_curobj_undef = 0;
}
} else {
curnode = (xmlNodePtr)((php_libxml_node_ptr *)intern->ptr)->node;
if (objmap->nodetype == XML_ATTRIBUTE_NODE ||
objmap->nodetype == XML_ELEMENT_NODE) {
curnode = (xmlNodePtr)((php_libxml_node_ptr *)intern->ptr)->node;
curnode = curnode->next;
} else {
/* Nav the tree evey time as this is LIVE */
/* The collection is live, we nav the tree from the base object if we cannot
* use the cache to restart from the last point. */
basenode = dom_object_get_node(objmap->baseobj);
if (basenode && (basenode->type == XML_DOCUMENT_NODE ||
basenode->type == XML_HTML_DOCUMENT_NODE)) {
basenode = xmlDocGetRootElement((xmlDoc *) basenode);
} else if (basenode) {
basenode = basenode->children;
} else {
if (UNEXPECTED(!basenode)) {
goto err;
}
if (php_dom_is_cache_tag_stale_from_node(&iterator->cache_tag, basenode)) {
php_dom_mark_cache_tag_up_to_date_from_node(&iterator->cache_tag, basenode);
previndex = 0;
if (basenode && (basenode->type == XML_DOCUMENT_NODE ||
basenode->type == XML_HTML_DOCUMENT_NODE)) {
curnode = xmlDocGetRootElement((xmlDoc *) basenode);
} else {
curnode = basenode->children;
}
} else {
previndex = iter->index - 1;
curnode = (xmlNodePtr)((php_libxml_node_ptr *)intern->ptr)->node;
}
curnode = dom_get_elements_by_tag_name_ns_raw(
basenode, (char *) objmap->ns, (char *) objmap->local, &previndex, iter->index);
basenode, curnode, (char *) objmap->ns, (char *) objmap->local, &previndex, iter->index);
}
}
} else {
Expand Down Expand Up @@ -258,7 +267,7 @@ zend_object_iterator *php_dom_get_iterator(zend_class_entry *ce, zval *object, i
{
dom_object *intern;
dom_nnodemap_object *objmap;
xmlNodePtr nodep, curnode=NULL;
xmlNodePtr curnode=NULL;
int curindex = 0;
HashTable *nodeht;
zval *entry;
Expand All @@ -270,6 +279,7 @@ zend_object_iterator *php_dom_get_iterator(zend_class_entry *ce, zval *object, i
}
iterator = emalloc(sizeof(php_dom_iterator));
zend_iterator_init(&iterator->intern);
iterator->cache_tag.modification_nr = 0;

ZVAL_OBJ_COPY(&iterator->intern.data, Z_OBJ_P(object));
iterator->intern.funcs = &php_dom_iterator_funcs;
Expand All @@ -288,24 +298,25 @@ zend_object_iterator *php_dom_get_iterator(zend_class_entry *ce, zval *object, i
ZVAL_COPY(&iterator->curobj, entry);
}
} else {
nodep = (xmlNode *)dom_object_get_node(objmap->baseobj);
if (!nodep) {
xmlNodePtr basep = (xmlNode *)dom_object_get_node(objmap->baseobj);
if (!basep) {
goto err;
}
if (objmap->nodetype == XML_ATTRIBUTE_NODE || objmap->nodetype == XML_ELEMENT_NODE) {
if (objmap->nodetype == XML_ATTRIBUTE_NODE) {
curnode = (xmlNodePtr) nodep->properties;
curnode = (xmlNodePtr) basep->properties;
} else {
curnode = (xmlNodePtr) nodep->children;
curnode = (xmlNodePtr) basep->children;
}
} else {
xmlNodePtr nodep = basep;
if (nodep->type == XML_DOCUMENT_NODE || nodep->type == XML_HTML_DOCUMENT_NODE) {
nodep = xmlDocGetRootElement((xmlDoc *) nodep);
} else {
nodep = nodep->children;
}
curnode = dom_get_elements_by_tag_name_ns_raw(
nodep, (char *) objmap->ns, (char *) objmap->local, &curindex, 0);
basep, nodep, (char *) objmap->ns, (char *) objmap->local, &curindex, 0);
}
}
} else {
Expand Down
9 changes: 2 additions & 7 deletions ext/dom/element.c
Original file line number Diff line number Diff line change
Expand Up @@ -511,7 +511,6 @@ PHP_METHOD(DOMElement, getElementsByTagName)
size_t name_len;
dom_object *intern, *namednode;
char *name;
xmlChar *local;

id = ZEND_THIS;
if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &name, &name_len) == FAILURE) {
Expand All @@ -522,8 +521,7 @@ PHP_METHOD(DOMElement, getElementsByTagName)

php_dom_create_iterator(return_value, DOM_NODELIST);
namednode = Z_DOMOBJ_P(return_value);
local = xmlCharStrndup(name, name_len);
dom_namednode_iter(intern, 0, namednode, NULL, local, NULL);
dom_namednode_iter(intern, 0, namednode, NULL, name, name_len, NULL, 0);
}
/* }}} end dom_element_get_elements_by_tag_name */

Expand Down Expand Up @@ -930,7 +928,6 @@ PHP_METHOD(DOMElement, getElementsByTagNameNS)
size_t uri_len, name_len;
dom_object *intern, *namednode;
char *uri, *name;
xmlChar *local, *nsuri;

id = ZEND_THIS;
if (zend_parse_parameters(ZEND_NUM_ARGS(), "s!s", &uri, &uri_len, &name, &name_len) == FAILURE) {
Expand All @@ -941,9 +938,7 @@ PHP_METHOD(DOMElement, getElementsByTagNameNS)

php_dom_create_iterator(return_value, DOM_NODELIST);
namednode = Z_DOMOBJ_P(return_value);
local = xmlCharStrndup(name, name_len);
nsuri = xmlCharStrndup(uri ? uri : "", uri_len);
dom_namednode_iter(intern, 0, namednode, NULL, local, nsuri);
dom_namednode_iter(intern, 0, namednode, NULL, name, name_len, uri ? uri : "", uri_len);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ditto uri_len comment


}
/* }}} end dom_element_get_elements_by_tag_name_ns */
Expand Down
18 changes: 16 additions & 2 deletions ext/dom/node.c
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,8 @@ int dom_node_node_value_write(dom_object *obj, zval *newval)
break;
}

php_libxml_invalidate_node_list_cache_from_doc(nodep->doc);

zend_string_release_ex(str, 0);
return SUCCESS;
}
Expand Down Expand Up @@ -274,7 +276,7 @@ int dom_node_child_nodes_read(dom_object *obj, zval *retval)

php_dom_create_iterator(retval, DOM_NODELIST);
intern = Z_DOMOBJ_P(retval);
dom_namednode_iter(obj, XML_ELEMENT_NODE, intern, NULL, NULL, NULL);
dom_namednode_iter(obj, XML_ELEMENT_NODE, intern, NULL, NULL, 0, NULL, 0);

return SUCCESS;
}
Expand Down Expand Up @@ -482,7 +484,7 @@ int dom_node_attributes_read(dom_object *obj, zval *retval)
if (nodep->type == XML_ELEMENT_NODE) {
php_dom_create_iterator(retval, DOM_NAMEDNODEMAP);
intern = Z_DOMOBJ_P(retval);
dom_namednode_iter(obj, XML_ATTRIBUTE_NODE, intern, NULL, NULL, NULL);
dom_namednode_iter(obj, XML_ATTRIBUTE_NODE, intern, NULL, NULL, 0, NULL, 0);
} else {
ZVAL_NULL(retval);
}
Expand Down Expand Up @@ -769,6 +771,8 @@ int dom_node_text_content_write(dom_object *obj, zval *newval)
return FAILURE;
}

php_libxml_invalidate_node_list_cache_from_doc(nodep->doc);

const xmlChar *xmlChars = (const xmlChar *) ZSTR_VAL(str);
int type = nodep->type;

Expand Down Expand Up @@ -897,6 +901,8 @@ PHP_METHOD(DOMNode, insertBefore)
php_libxml_increment_doc_ref((php_libxml_node_object *)childobj, NULL);
}

php_libxml_invalidate_node_list_cache_from_doc(parentp->doc);

if (ref != NULL) {
DOM_GET_OBJ(refp, ref, xmlNodePtr, refpobj);
if (refp->parent != parentp) {
Expand Down Expand Up @@ -1086,6 +1092,7 @@ PHP_METHOD(DOMNode, replaceChild)
nodep->doc->intSubset = (xmlDtd *) newchild;
}
}
php_libxml_invalidate_node_list_cache_from_doc(nodep->doc);
DOM_RET_OBJ(oldchild, &ret, intern);
}
/* }}} end dom_node_replace_child */
Expand Down Expand Up @@ -1127,6 +1134,7 @@ PHP_METHOD(DOMNode, removeChild)
}

xmlUnlinkNode(child);
php_libxml_invalidate_node_list_cache_from_doc(nodep->doc);
DOM_RET_OBJ(child, &ret, intern);
}
/* }}} end dom_node_remove_child */
Expand Down Expand Up @@ -1230,6 +1238,8 @@ PHP_METHOD(DOMNode, appendChild)

dom_reconcile_ns(nodep->doc, new_child);

php_libxml_invalidate_node_list_cache_from_doc(nodep->doc);

DOM_RET_OBJ(new_child, &ret, intern);
}
/* }}} end dom_node_append_child */
Expand Down Expand Up @@ -1339,6 +1349,8 @@ PHP_METHOD(DOMNode, normalize)

DOM_GET_OBJ(nodep, id, xmlNodePtr, intern);

php_libxml_invalidate_node_list_cache_from_doc(nodep->doc);

dom_normalize(nodep);

}
Expand Down Expand Up @@ -1571,6 +1583,8 @@ static void dom_canonicalization(INTERNAL_FUNCTION_PARAMETERS, int mode) /* {{{
RETURN_THROWS();
}

php_libxml_invalidate_node_list_cache_from_doc(docp);

if (xpath_array == NULL) {
if (nodep->type != XML_DOCUMENT_NODE) {
ctxp = xmlXPathNewContext(docp);
Expand Down
Loading