mirror of
https://gitlab.gnome.org/GNOME/libxml2.git
synced 2025-07-12 08:59:34 +03:00
Work around lxml API abuse
Make xmlNodeDumpOutput and htmlNodeDumpFormatOutput work with corrupted parent pointers. This used to work with the old recursive code but the non-recursive rewrite required parent pointers to be set correctly. Unfortunately, lxml relies on the old behavior and passes subtrees with a corrupted structure. Fall back to a recursive function call if an invalid parent pointer is detected. Fixes #255.
This commit is contained in:
46
HTMLtree.c
46
HTMLtree.c
@ -744,7 +744,7 @@ void
|
|||||||
htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
|
htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
|
||||||
xmlNodePtr cur, const char *encoding ATTRIBUTE_UNUSED,
|
xmlNodePtr cur, const char *encoding ATTRIBUTE_UNUSED,
|
||||||
int format) {
|
int format) {
|
||||||
xmlNodePtr root;
|
xmlNodePtr root, parent;
|
||||||
xmlAttrPtr attr;
|
xmlAttrPtr attr;
|
||||||
const htmlElemDesc * info;
|
const htmlElemDesc * info;
|
||||||
|
|
||||||
@ -755,6 +755,7 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
|
|||||||
}
|
}
|
||||||
|
|
||||||
root = cur;
|
root = cur;
|
||||||
|
parent = cur->parent;
|
||||||
while (1) {
|
while (1) {
|
||||||
switch (cur->type) {
|
switch (cur->type) {
|
||||||
case XML_HTML_DOCUMENT_NODE:
|
case XML_HTML_DOCUMENT_NODE:
|
||||||
@ -762,13 +763,25 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
|
|||||||
if (((xmlDocPtr) cur)->intSubset != NULL) {
|
if (((xmlDocPtr) cur)->intSubset != NULL) {
|
||||||
htmlDtdDumpOutput(buf, (xmlDocPtr) cur, NULL);
|
htmlDtdDumpOutput(buf, (xmlDocPtr) cur, NULL);
|
||||||
}
|
}
|
||||||
if (cur->children != NULL) {
|
/* Always validate cur->parent when descending. */
|
||||||
|
if ((cur->parent == parent) && (cur->children != NULL)) {
|
||||||
|
parent = cur;
|
||||||
cur = cur->children;
|
cur = cur->children;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case XML_ELEMENT_NODE:
|
case XML_ELEMENT_NODE:
|
||||||
|
/*
|
||||||
|
* Some users like lxml are known to pass nodes with a corrupted
|
||||||
|
* tree structure. Fall back to a recursive call to handle this
|
||||||
|
* case.
|
||||||
|
*/
|
||||||
|
if ((cur->parent != parent) && (cur->children != NULL)) {
|
||||||
|
htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Get specific HTML info for that node.
|
* Get specific HTML info for that node.
|
||||||
*/
|
*/
|
||||||
@ -817,6 +830,7 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
|
|||||||
(cur->name != NULL) &&
|
(cur->name != NULL) &&
|
||||||
(cur->name[0] != 'p')) /* p, pre, param */
|
(cur->name[0] != 'p')) /* p, pre, param */
|
||||||
xmlOutputBufferWriteString(buf, "\n");
|
xmlOutputBufferWriteString(buf, "\n");
|
||||||
|
parent = cur;
|
||||||
cur = cur->children;
|
cur = cur->children;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@ -825,9 +839,9 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
|
|||||||
(info != NULL) && (!info->isinline)) {
|
(info != NULL) && (!info->isinline)) {
|
||||||
if ((cur->next->type != HTML_TEXT_NODE) &&
|
if ((cur->next->type != HTML_TEXT_NODE) &&
|
||||||
(cur->next->type != HTML_ENTITY_REF_NODE) &&
|
(cur->next->type != HTML_ENTITY_REF_NODE) &&
|
||||||
(cur->parent != NULL) &&
|
(parent != NULL) &&
|
||||||
(cur->parent->name != NULL) &&
|
(parent->name != NULL) &&
|
||||||
(cur->parent->name[0] != 'p')) /* p, pre, param */
|
(parent->name[0] != 'p')) /* p, pre, param */
|
||||||
xmlOutputBufferWriteString(buf, "\n");
|
xmlOutputBufferWriteString(buf, "\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -842,9 +856,9 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
|
|||||||
break;
|
break;
|
||||||
if (((cur->name == (const xmlChar *)xmlStringText) ||
|
if (((cur->name == (const xmlChar *)xmlStringText) ||
|
||||||
(cur->name != (const xmlChar *)xmlStringTextNoenc)) &&
|
(cur->name != (const xmlChar *)xmlStringTextNoenc)) &&
|
||||||
((cur->parent == NULL) ||
|
((parent == NULL) ||
|
||||||
((xmlStrcasecmp(cur->parent->name, BAD_CAST "script")) &&
|
((xmlStrcasecmp(parent->name, BAD_CAST "script")) &&
|
||||||
(xmlStrcasecmp(cur->parent->name, BAD_CAST "style"))))) {
|
(xmlStrcasecmp(parent->name, BAD_CAST "style"))))) {
|
||||||
xmlChar *buffer;
|
xmlChar *buffer;
|
||||||
|
|
||||||
buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
|
buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
|
||||||
@ -902,13 +916,9 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
cur = parent;
|
||||||
* The parent should never be NULL here but we want to handle
|
/* cur->parent was validated when descending. */
|
||||||
* corrupted documents gracefully.
|
parent = cur->parent;
|
||||||
*/
|
|
||||||
if (cur->parent == NULL)
|
|
||||||
return;
|
|
||||||
cur = cur->parent;
|
|
||||||
|
|
||||||
if ((cur->type == XML_HTML_DOCUMENT_NODE) ||
|
if ((cur->type == XML_HTML_DOCUMENT_NODE) ||
|
||||||
(cur->type == XML_DOCUMENT_NODE)) {
|
(cur->type == XML_DOCUMENT_NODE)) {
|
||||||
@ -939,9 +949,9 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
|
|||||||
(cur->next != NULL)) {
|
(cur->next != NULL)) {
|
||||||
if ((cur->next->type != HTML_TEXT_NODE) &&
|
if ((cur->next->type != HTML_TEXT_NODE) &&
|
||||||
(cur->next->type != HTML_ENTITY_REF_NODE) &&
|
(cur->next->type != HTML_ENTITY_REF_NODE) &&
|
||||||
(cur->parent != NULL) &&
|
(parent != NULL) &&
|
||||||
(cur->parent->name != NULL) &&
|
(parent->name != NULL) &&
|
||||||
(cur->parent->name[0] != 'p')) /* p, pre, param */
|
(parent->name[0] != 'p')) /* p, pre, param */
|
||||||
xmlOutputBufferWriteString(buf, "\n");
|
xmlOutputBufferWriteString(buf, "\n");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
31
xmlsave.c
31
xmlsave.c
@ -847,7 +847,7 @@ htmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) {
|
|||||||
static void
|
static void
|
||||||
xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) {
|
xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) {
|
||||||
int format = ctxt->format;
|
int format = ctxt->format;
|
||||||
xmlNodePtr tmp, root, unformattedNode = NULL;
|
xmlNodePtr tmp, root, unformattedNode = NULL, parent;
|
||||||
xmlAttrPtr attr;
|
xmlAttrPtr attr;
|
||||||
xmlChar *start, *end;
|
xmlChar *start, *end;
|
||||||
xmlOutputBufferPtr buf;
|
xmlOutputBufferPtr buf;
|
||||||
@ -856,6 +856,7 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) {
|
|||||||
buf = ctxt->buf;
|
buf = ctxt->buf;
|
||||||
|
|
||||||
root = cur;
|
root = cur;
|
||||||
|
parent = cur->parent;
|
||||||
while (1) {
|
while (1) {
|
||||||
switch (cur->type) {
|
switch (cur->type) {
|
||||||
case XML_DOCUMENT_NODE:
|
case XML_DOCUMENT_NODE:
|
||||||
@ -868,7 +869,9 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) {
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
case XML_DOCUMENT_FRAG_NODE:
|
case XML_DOCUMENT_FRAG_NODE:
|
||||||
if (cur->children != NULL) {
|
/* Always validate cur->parent when descending. */
|
||||||
|
if ((cur->parent == parent) && (cur->children != NULL)) {
|
||||||
|
parent = cur;
|
||||||
cur = cur->children;
|
cur = cur->children;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@ -887,7 +890,18 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) {
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
case XML_ELEMENT_NODE:
|
case XML_ELEMENT_NODE:
|
||||||
if ((cur != root) && (ctxt->format == 1) && (xmlIndentTreeOutput))
|
/*
|
||||||
|
* Some users like lxml are known to pass nodes with a corrupted
|
||||||
|
* tree structure. Fall back to a recursive call to handle this
|
||||||
|
* case.
|
||||||
|
*/
|
||||||
|
if ((cur->parent != parent) && (cur->children != NULL)) {
|
||||||
|
xmlNodeDumpOutputInternal(ctxt, cur);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((ctxt->level > 0) && (ctxt->format == 1) &&
|
||||||
|
(xmlIndentTreeOutput))
|
||||||
xmlOutputBufferWrite(buf, ctxt->indent_size *
|
xmlOutputBufferWrite(buf, ctxt->indent_size *
|
||||||
(ctxt->level > ctxt->indent_nr ?
|
(ctxt->level > ctxt->indent_nr ?
|
||||||
ctxt->indent_nr : ctxt->level),
|
ctxt->indent_nr : ctxt->level),
|
||||||
@ -942,6 +956,7 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) {
|
|||||||
xmlOutputBufferWrite(buf, 1, ">");
|
xmlOutputBufferWrite(buf, 1, ">");
|
||||||
if (ctxt->format == 1) xmlOutputBufferWrite(buf, 1, "\n");
|
if (ctxt->format == 1) xmlOutputBufferWrite(buf, 1, "\n");
|
||||||
if (ctxt->level >= 0) ctxt->level++;
|
if (ctxt->level >= 0) ctxt->level++;
|
||||||
|
parent = cur;
|
||||||
cur = cur->children;
|
cur = cur->children;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@ -1058,13 +1073,9 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) {
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
cur = parent;
|
||||||
* The parent should never be NULL here but we want to handle
|
/* cur->parent was validated when descending. */
|
||||||
* corrupted documents gracefully.
|
parent = cur->parent;
|
||||||
*/
|
|
||||||
if (cur->parent == NULL)
|
|
||||||
return;
|
|
||||||
cur = cur->parent;
|
|
||||||
|
|
||||||
if (cur->type == XML_ELEMENT_NODE) {
|
if (cur->type == XML_ELEMENT_NODE) {
|
||||||
if (ctxt->level > 0) ctxt->level--;
|
if (ctxt->level > 0) ctxt->level--;
|
||||||
|
Reference in New Issue
Block a user