mirror of
https://gitlab.gnome.org/GNOME/libxml2.git
synced 2025-08-24 09:49:22 +03:00
Make htmlNodeDumpFormatOutput non-recursive
Fixes stack overflow with deeply nested HTML documents. Found by OSS-Fuzz.
This commit is contained in:
406
HTMLtree.c
406
HTMLtree.c
@ -759,50 +759,6 @@ htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur,
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* htmlAttrListDumpOutput:
|
||||
* @buf: the HTML buffer output
|
||||
* @doc: the document
|
||||
* @cur: the first attribute pointer
|
||||
* @encoding: the encoding string
|
||||
*
|
||||
* Dump a list of HTML attributes
|
||||
*/
|
||||
static void
|
||||
htmlAttrListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, const char *encoding) {
|
||||
if (cur == NULL) {
|
||||
return;
|
||||
}
|
||||
while (cur != NULL) {
|
||||
htmlAttrDumpOutput(buf, doc, cur, encoding);
|
||||
cur = cur->next;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* htmlNodeListDumpOutput:
|
||||
* @buf: the HTML buffer output
|
||||
* @doc: the document
|
||||
* @cur: the first node
|
||||
* @encoding: the encoding string
|
||||
* @format: should formatting spaces been added
|
||||
*
|
||||
* Dump an HTML node list, recursive behaviour,children are printed too.
|
||||
*/
|
||||
static void
|
||||
htmlNodeListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
|
||||
xmlNodePtr cur, const char *encoding, int format) {
|
||||
if (cur == NULL) {
|
||||
return;
|
||||
}
|
||||
while (cur != NULL) {
|
||||
htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
|
||||
cur = cur->next;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* htmlNodeDumpFormatOutput:
|
||||
* @buf: the HTML buffer output
|
||||
@ -816,6 +772,8 @@ htmlNodeListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
|
||||
void
|
||||
htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
|
||||
xmlNodePtr cur, const char *encoding, int format) {
|
||||
xmlNodePtr root;
|
||||
xmlAttrPtr attr;
|
||||
const htmlElemDesc * info;
|
||||
|
||||
xmlInitParser();
|
||||
@ -823,172 +781,193 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
|
||||
if ((cur == NULL) || (buf == NULL)) {
|
||||
return;
|
||||
}
|
||||
/*
|
||||
* Special cases.
|
||||
*/
|
||||
if (cur->type == XML_DTD_NODE)
|
||||
return;
|
||||
if ((cur->type == XML_HTML_DOCUMENT_NODE) ||
|
||||
(cur->type == XML_DOCUMENT_NODE)){
|
||||
htmlDocContentDumpOutput(buf, (xmlDocPtr) cur, encoding);
|
||||
return;
|
||||
}
|
||||
if (cur->type == XML_ATTRIBUTE_NODE) {
|
||||
htmlAttrDumpOutput(buf, doc, (xmlAttrPtr) cur, encoding);
|
||||
return;
|
||||
}
|
||||
if (cur->type == HTML_TEXT_NODE) {
|
||||
if (cur->content != NULL) {
|
||||
if (((cur->name == (const xmlChar *)xmlStringText) ||
|
||||
(cur->name != (const xmlChar *)xmlStringTextNoenc)) &&
|
||||
((cur->parent == NULL) ||
|
||||
((xmlStrcasecmp(cur->parent->name, BAD_CAST "script")) &&
|
||||
(xmlStrcasecmp(cur->parent->name, BAD_CAST "style"))))) {
|
||||
xmlChar *buffer;
|
||||
|
||||
buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
|
||||
if (buffer != NULL) {
|
||||
xmlOutputBufferWriteString(buf, (const char *)buffer);
|
||||
xmlFree(buffer);
|
||||
}
|
||||
} else {
|
||||
xmlOutputBufferWriteString(buf, (const char *)cur->content);
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
if (cur->type == HTML_COMMENT_NODE) {
|
||||
if (cur->content != NULL) {
|
||||
xmlOutputBufferWriteString(buf, "<!--");
|
||||
xmlOutputBufferWriteString(buf, (const char *)cur->content);
|
||||
xmlOutputBufferWriteString(buf, "-->");
|
||||
}
|
||||
return;
|
||||
}
|
||||
if (cur->type == HTML_PI_NODE) {
|
||||
if (cur->name == NULL)
|
||||
return;
|
||||
xmlOutputBufferWriteString(buf, "<?");
|
||||
xmlOutputBufferWriteString(buf, (const char *)cur->name);
|
||||
if (cur->content != NULL) {
|
||||
xmlOutputBufferWriteString(buf, " ");
|
||||
xmlOutputBufferWriteString(buf, (const char *)cur->content);
|
||||
}
|
||||
xmlOutputBufferWriteString(buf, ">");
|
||||
return;
|
||||
}
|
||||
if (cur->type == HTML_ENTITY_REF_NODE) {
|
||||
xmlOutputBufferWriteString(buf, "&");
|
||||
xmlOutputBufferWriteString(buf, (const char *)cur->name);
|
||||
xmlOutputBufferWriteString(buf, ";");
|
||||
return;
|
||||
}
|
||||
if (cur->type == HTML_PRESERVE_NODE) {
|
||||
if (cur->content != NULL) {
|
||||
xmlOutputBufferWriteString(buf, (const char *)cur->content);
|
||||
}
|
||||
return;
|
||||
}
|
||||
root = cur;
|
||||
while (1) {
|
||||
switch (cur->type) {
|
||||
case XML_HTML_DOCUMENT_NODE:
|
||||
case XML_DOCUMENT_NODE:
|
||||
if (((xmlDocPtr) cur)->intSubset != NULL) {
|
||||
htmlDtdDumpOutput(buf, (xmlDocPtr) cur, NULL);
|
||||
}
|
||||
if (cur->children != NULL) {
|
||||
cur = cur->children;
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
|
||||
/*
|
||||
* Get specific HTML info for that node.
|
||||
*/
|
||||
if (cur->ns == NULL)
|
||||
info = htmlTagLookup(cur->name);
|
||||
else
|
||||
info = NULL;
|
||||
case XML_ELEMENT_NODE:
|
||||
/*
|
||||
* Get specific HTML info for that node.
|
||||
*/
|
||||
if (cur->ns == NULL)
|
||||
info = htmlTagLookup(cur->name);
|
||||
else
|
||||
info = NULL;
|
||||
|
||||
xmlOutputBufferWriteString(buf, "<");
|
||||
if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
|
||||
xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
|
||||
xmlOutputBufferWriteString(buf, ":");
|
||||
}
|
||||
xmlOutputBufferWriteString(buf, (const char *)cur->name);
|
||||
if (cur->nsDef)
|
||||
xmlNsListDumpOutput(buf, cur->nsDef);
|
||||
if (cur->properties != NULL)
|
||||
htmlAttrListDumpOutput(buf, doc, cur->properties, encoding);
|
||||
|
||||
if ((info != NULL) && (info->empty)) {
|
||||
xmlOutputBufferWriteString(buf, ">");
|
||||
if ((format) && (!info->isinline) && (cur->next != NULL)) {
|
||||
if ((cur->next->type != HTML_TEXT_NODE) &&
|
||||
(cur->next->type != HTML_ENTITY_REF_NODE) &&
|
||||
(cur->parent != NULL) &&
|
||||
(cur->parent->name != NULL) &&
|
||||
(cur->parent->name[0] != 'p')) /* p, pre, param */
|
||||
xmlOutputBufferWriteString(buf, "\n");
|
||||
}
|
||||
return;
|
||||
}
|
||||
if (((cur->type == XML_ELEMENT_NODE) || (cur->content == NULL)) &&
|
||||
(cur->children == NULL)) {
|
||||
if ((info != NULL) && (info->saveEndTag != 0) &&
|
||||
(xmlStrcmp(BAD_CAST info->name, BAD_CAST "html")) &&
|
||||
(xmlStrcmp(BAD_CAST info->name, BAD_CAST "body"))) {
|
||||
xmlOutputBufferWriteString(buf, ">");
|
||||
} else {
|
||||
xmlOutputBufferWriteString(buf, "></");
|
||||
xmlOutputBufferWriteString(buf, "<");
|
||||
if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
|
||||
xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
|
||||
xmlOutputBufferWriteString(buf, ":");
|
||||
}
|
||||
xmlOutputBufferWriteString(buf, (const char *)cur->name);
|
||||
xmlOutputBufferWriteString(buf, ">");
|
||||
}
|
||||
if ((format) && (cur->next != NULL) &&
|
||||
(info != NULL) && (!info->isinline)) {
|
||||
if ((cur->next->type != HTML_TEXT_NODE) &&
|
||||
(cur->next->type != HTML_ENTITY_REF_NODE) &&
|
||||
(cur->parent != NULL) &&
|
||||
(cur->parent->name != NULL) &&
|
||||
(cur->parent->name[0] != 'p')) /* p, pre, param */
|
||||
xmlOutputBufferWriteString(buf, "\n");
|
||||
}
|
||||
return;
|
||||
}
|
||||
xmlOutputBufferWriteString(buf, ">");
|
||||
if ((cur->type != XML_ELEMENT_NODE) &&
|
||||
(cur->content != NULL)) {
|
||||
/*
|
||||
* Uses the OutputBuffer property to automatically convert
|
||||
* invalids to charrefs
|
||||
*/
|
||||
xmlOutputBufferWriteString(buf, (const char *)cur->name);
|
||||
if (cur->nsDef)
|
||||
xmlNsListDumpOutput(buf, cur->nsDef);
|
||||
attr = cur->properties;
|
||||
while (attr != NULL) {
|
||||
htmlAttrDumpOutput(buf, doc, attr, encoding);
|
||||
attr = attr->next;
|
||||
}
|
||||
|
||||
xmlOutputBufferWriteString(buf, (const char *) cur->content);
|
||||
}
|
||||
if (cur->children != NULL) {
|
||||
if ((format) && (info != NULL) && (!info->isinline) &&
|
||||
(cur->children->type != HTML_TEXT_NODE) &&
|
||||
(cur->children->type != HTML_ENTITY_REF_NODE) &&
|
||||
(cur->children != cur->last) &&
|
||||
(cur->name != NULL) &&
|
||||
(cur->name[0] != 'p')) /* p, pre, param */
|
||||
xmlOutputBufferWriteString(buf, "\n");
|
||||
htmlNodeListDumpOutput(buf, doc, cur->children, encoding, format);
|
||||
if ((format) && (info != NULL) && (!info->isinline) &&
|
||||
(cur->last->type != HTML_TEXT_NODE) &&
|
||||
(cur->last->type != HTML_ENTITY_REF_NODE) &&
|
||||
(cur->children != cur->last) &&
|
||||
(cur->name != NULL) &&
|
||||
(cur->name[0] != 'p')) /* p, pre, param */
|
||||
xmlOutputBufferWriteString(buf, "\n");
|
||||
}
|
||||
xmlOutputBufferWriteString(buf, "</");
|
||||
if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
|
||||
xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
|
||||
xmlOutputBufferWriteString(buf, ":");
|
||||
}
|
||||
xmlOutputBufferWriteString(buf, (const char *)cur->name);
|
||||
xmlOutputBufferWriteString(buf, ">");
|
||||
if ((format) && (info != NULL) && (!info->isinline) &&
|
||||
(cur->next != NULL)) {
|
||||
if ((cur->next->type != HTML_TEXT_NODE) &&
|
||||
(cur->next->type != HTML_ENTITY_REF_NODE) &&
|
||||
(cur->parent != NULL) &&
|
||||
(cur->parent->name != NULL) &&
|
||||
(cur->parent->name[0] != 'p')) /* p, pre, param */
|
||||
xmlOutputBufferWriteString(buf, "\n");
|
||||
if ((info != NULL) && (info->empty)) {
|
||||
xmlOutputBufferWriteString(buf, ">");
|
||||
} else if (cur->children == NULL) {
|
||||
if ((info != NULL) && (info->saveEndTag != 0) &&
|
||||
(xmlStrcmp(BAD_CAST info->name, BAD_CAST "html")) &&
|
||||
(xmlStrcmp(BAD_CAST info->name, BAD_CAST "body"))) {
|
||||
xmlOutputBufferWriteString(buf, ">");
|
||||
} else {
|
||||
xmlOutputBufferWriteString(buf, "></");
|
||||
if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
|
||||
xmlOutputBufferWriteString(buf,
|
||||
(const char *)cur->ns->prefix);
|
||||
xmlOutputBufferWriteString(buf, ":");
|
||||
}
|
||||
xmlOutputBufferWriteString(buf, (const char *)cur->name);
|
||||
xmlOutputBufferWriteString(buf, ">");
|
||||
}
|
||||
} else {
|
||||
xmlOutputBufferWriteString(buf, ">");
|
||||
if ((format) && (info != NULL) && (!info->isinline) &&
|
||||
(cur->children->type != HTML_TEXT_NODE) &&
|
||||
(cur->children->type != HTML_ENTITY_REF_NODE) &&
|
||||
(cur->children != cur->last) &&
|
||||
(cur->name != NULL) &&
|
||||
(cur->name[0] != 'p')) /* p, pre, param */
|
||||
xmlOutputBufferWriteString(buf, "\n");
|
||||
cur = cur->children;
|
||||
continue;
|
||||
}
|
||||
|
||||
if ((format) && (cur->next != NULL) &&
|
||||
(info != NULL) && (!info->isinline)) {
|
||||
if ((cur->next->type != HTML_TEXT_NODE) &&
|
||||
(cur->next->type != HTML_ENTITY_REF_NODE) &&
|
||||
(cur->parent != NULL) &&
|
||||
(cur->parent->name != NULL) &&
|
||||
(cur->parent->name[0] != 'p')) /* p, pre, param */
|
||||
xmlOutputBufferWriteString(buf, "\n");
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
case XML_ATTRIBUTE_NODE:
|
||||
htmlAttrDumpOutput(buf, doc, (xmlAttrPtr) cur, encoding);
|
||||
break;
|
||||
|
||||
case HTML_TEXT_NODE:
|
||||
if (cur->content == NULL)
|
||||
break;
|
||||
if (((cur->name == (const xmlChar *)xmlStringText) ||
|
||||
(cur->name != (const xmlChar *)xmlStringTextNoenc)) &&
|
||||
((cur->parent == NULL) ||
|
||||
((xmlStrcasecmp(cur->parent->name, BAD_CAST "script")) &&
|
||||
(xmlStrcasecmp(cur->parent->name, BAD_CAST "style"))))) {
|
||||
xmlChar *buffer;
|
||||
|
||||
buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
|
||||
if (buffer != NULL) {
|
||||
xmlOutputBufferWriteString(buf, (const char *)buffer);
|
||||
xmlFree(buffer);
|
||||
}
|
||||
} else {
|
||||
xmlOutputBufferWriteString(buf, (const char *)cur->content);
|
||||
}
|
||||
break;
|
||||
|
||||
case HTML_COMMENT_NODE:
|
||||
if (cur->content != NULL) {
|
||||
xmlOutputBufferWriteString(buf, "<!--");
|
||||
xmlOutputBufferWriteString(buf, (const char *)cur->content);
|
||||
xmlOutputBufferWriteString(buf, "-->");
|
||||
}
|
||||
break;
|
||||
|
||||
case HTML_PI_NODE:
|
||||
if (cur->name != NULL) {
|
||||
xmlOutputBufferWriteString(buf, "<?");
|
||||
xmlOutputBufferWriteString(buf, (const char *)cur->name);
|
||||
if (cur->content != NULL) {
|
||||
xmlOutputBufferWriteString(buf, " ");
|
||||
xmlOutputBufferWriteString(buf,
|
||||
(const char *)cur->content);
|
||||
}
|
||||
xmlOutputBufferWriteString(buf, ">");
|
||||
}
|
||||
break;
|
||||
|
||||
case HTML_ENTITY_REF_NODE:
|
||||
xmlOutputBufferWriteString(buf, "&");
|
||||
xmlOutputBufferWriteString(buf, (const char *)cur->name);
|
||||
xmlOutputBufferWriteString(buf, ";");
|
||||
break;
|
||||
|
||||
case HTML_PRESERVE_NODE:
|
||||
if (cur->content != NULL) {
|
||||
xmlOutputBufferWriteString(buf, (const char *)cur->content);
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
while (1) {
|
||||
if (cur == root)
|
||||
return;
|
||||
if (cur->next != NULL) {
|
||||
cur = cur->next;
|
||||
break;
|
||||
}
|
||||
|
||||
cur = cur->parent;
|
||||
|
||||
if ((cur->type == XML_HTML_DOCUMENT_NODE) ||
|
||||
(cur->type == XML_DOCUMENT_NODE)) {
|
||||
xmlOutputBufferWriteString(buf, "\n");
|
||||
} else {
|
||||
if ((format) && (cur->ns == NULL))
|
||||
info = htmlTagLookup(cur->name);
|
||||
else
|
||||
info = NULL;
|
||||
|
||||
if ((format) && (info != NULL) && (!info->isinline) &&
|
||||
(cur->last->type != HTML_TEXT_NODE) &&
|
||||
(cur->last->type != HTML_ENTITY_REF_NODE) &&
|
||||
(cur->children != cur->last) &&
|
||||
(cur->name != NULL) &&
|
||||
(cur->name[0] != 'p')) /* p, pre, param */
|
||||
xmlOutputBufferWriteString(buf, "\n");
|
||||
|
||||
xmlOutputBufferWriteString(buf, "</");
|
||||
if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
|
||||
xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
|
||||
xmlOutputBufferWriteString(buf, ":");
|
||||
}
|
||||
xmlOutputBufferWriteString(buf, (const char *)cur->name);
|
||||
xmlOutputBufferWriteString(buf, ">");
|
||||
|
||||
if ((format) && (info != NULL) && (!info->isinline) &&
|
||||
(cur->next != NULL)) {
|
||||
if ((cur->next->type != HTML_TEXT_NODE) &&
|
||||
(cur->next->type != HTML_ENTITY_REF_NODE) &&
|
||||
(cur->parent != NULL) &&
|
||||
(cur->parent->name != NULL) &&
|
||||
(cur->parent->name[0] != 'p')) /* p, pre, param */
|
||||
xmlOutputBufferWriteString(buf, "\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -1020,26 +999,7 @@ htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
|
||||
void
|
||||
htmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
|
||||
const char *encoding, int format) {
|
||||
int type;
|
||||
|
||||
xmlInitParser();
|
||||
|
||||
if ((buf == NULL) || (cur == NULL))
|
||||
return;
|
||||
|
||||
/*
|
||||
* force to output the stuff as HTML, especially for entities
|
||||
*/
|
||||
type = cur->type;
|
||||
cur->type = XML_HTML_DOCUMENT_NODE;
|
||||
if (cur->intSubset != NULL) {
|
||||
htmlDtdDumpOutput(buf, cur, NULL);
|
||||
}
|
||||
if (cur->children != NULL) {
|
||||
htmlNodeListDumpOutput(buf, cur, cur->children, encoding, format);
|
||||
}
|
||||
xmlOutputBufferWriteString(buf, "\n");
|
||||
cur->type = (xmlElementType) type;
|
||||
htmlNodeDumpFormatOutput(buf, cur, (xmlNodePtr) cur, encoding, format);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -1053,7 +1013,7 @@ htmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
|
||||
void
|
||||
htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
|
||||
const char *encoding) {
|
||||
htmlDocContentDumpFormatOutput(buf, cur, encoding, 1);
|
||||
htmlNodeDumpFormatOutput(buf, cur, (xmlNodePtr) cur, encoding, 1);
|
||||
}
|
||||
|
||||
/************************************************************************
|
||||
|
Reference in New Issue
Block a user