From d99ddd9bd57a08fad891795de3b3f7910e4aec55 Mon Sep 17 00:00:00 2001 From: Nick Wellnhofer Date: Sat, 5 Mar 2022 21:46:40 +0100 Subject: [PATCH] Improve buffer allocation scheme In most places, we really need the double-it scheme to avoid quadratic behavior. The hybrid scheme still can cause many reallocations and the bounded scheme doesn't seem to provide meaningful protection in xmlreader.c. --- parser.c | 1 + runsuite.c | 3 +++ tree.c | 7 +++++-- xmlIO.c | 5 +---- xmlreader.c | 13 +++++++++---- xmlsave.c | 4 ++++ 6 files changed, 23 insertions(+), 10 deletions(-) diff --git a/parser.c b/parser.c index f4707cbb..712328ca 100644 --- a/parser.c +++ b/parser.c @@ -8068,6 +8068,7 @@ xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { "xmlLoadEntityContent parameter error"); return(-1); } + xmlBufferSetAllocationScheme(buf, XML_BUFFER_ALLOC_DOUBLEIT); input = xmlNewEntityInputStream(ctxt, entity); if (input == NULL) { diff --git a/runsuite.c b/runsuite.c index f7957ce1..483490a2 100644 --- a/runsuite.c +++ b/runsuite.c @@ -325,6 +325,7 @@ xsdIncorrectTestCase(xmlNodePtr cur) { fprintf(stderr, "out of memory !\n"); fatalError(); } + xmlBufferSetAllocationScheme(buf, XML_BUFFER_ALLOC_DOUBLEIT); xmlNodeDump(buf, test->doc, test, 0, 0); pctxt = xmlRelaxNGNewMemParserCtxt((const char *)buf->content, buf->use); xmlRelaxNGSetParserErrors(pctxt, testErrorHandler, testErrorHandler, @@ -363,6 +364,7 @@ installResources(xmlNodePtr tst, const xmlChar *base) { fprintf(stderr, "out of memory !\n"); fatalError(); } + xmlBufferSetAllocationScheme(buf, XML_BUFFER_ALLOC_DOUBLEIT); xmlNodeDump(buf, tst->doc, tst, 0, 0); while (tst != NULL) { @@ -458,6 +460,7 @@ xsdTestCase(xmlNodePtr tst) { fprintf(stderr, "out of memory !\n"); fatalError(); } + xmlBufferSetAllocationScheme(buf, XML_BUFFER_ALLOC_DOUBLEIT); xmlNodeDump(buf, test->doc, test, 0, 0); pctxt = xmlRelaxNGNewMemParserCtxt((const char *)buf->content, buf->use); xmlRelaxNGSetParserErrors(pctxt, testErrorHandler, testErrorHandler, diff --git a/tree.c b/tree.c index 583a7ea7..e4425f10 100644 --- a/tree.c +++ b/tree.c @@ -1284,7 +1284,7 @@ xmlStringLenGetNodeList(const xmlDoc *doc, const xmlChar *value, int len) { buf = xmlBufCreateSize(0); if (buf == NULL) return(NULL); - xmlBufSetAllocationScheme(buf, XML_BUFFER_ALLOC_HYBRID); + xmlBufSetAllocationScheme(buf, XML_BUFFER_ALLOC_DOUBLEIT); q = cur; while ((cur < end) && (*cur != 0)) { @@ -1505,7 +1505,7 @@ xmlStringGetNodeList(const xmlDoc *doc, const xmlChar *value) { buf = xmlBufCreateSize(0); if (buf == NULL) return(NULL); - xmlBufSetAllocationScheme(buf, XML_BUFFER_ALLOC_HYBRID); + xmlBufSetAllocationScheme(buf, XML_BUFFER_ALLOC_DOUBLEIT); q = cur; while (*cur != 0) { @@ -5559,6 +5559,7 @@ xmlNodeGetContent(const xmlNode *cur) buf = xmlBufCreateSize(64); if (buf == NULL) return (NULL); + xmlBufSetAllocationScheme(buf, XML_BUFFER_ALLOC_DOUBLEIT); xmlBufGetNodeContent(buf, cur); ret = xmlBufDetach(buf); xmlBufFree(buf); @@ -5584,6 +5585,7 @@ xmlNodeGetContent(const xmlNode *cur) buf = xmlBufCreate(); if (buf == NULL) return (NULL); + xmlBufSetAllocationScheme(buf, XML_BUFFER_ALLOC_DOUBLEIT); xmlBufGetNodeContent(buf, cur); @@ -5606,6 +5608,7 @@ xmlNodeGetContent(const xmlNode *cur) buf = xmlBufCreate(); if (buf == NULL) return (NULL); + xmlBufSetAllocationScheme(buf, XML_BUFFER_ALLOC_DOUBLEIT); xmlBufGetNodeContent(buf, (xmlNodePtr) cur); diff --git a/xmlIO.c b/xmlIO.c index a80ef11f..172044a5 100644 --- a/xmlIO.c +++ b/xmlIO.c @@ -2389,10 +2389,7 @@ xmlAllocOutputBuffer(xmlCharEncodingHandlerPtr encoder) { xmlFree(ret); return(NULL); } - - /* try to avoid a performance problem with Windows realloc() */ - if (xmlBufGetAllocationScheme(ret->buffer) == XML_BUFFER_ALLOC_EXACT) - xmlBufSetAllocationScheme(ret->buffer, XML_BUFFER_ALLOC_DOUBLEIT); + xmlBufSetAllocationScheme(ret->buffer, XML_BUFFER_ALLOC_DOUBLEIT); ret->encoder = encoder; if (encoder != NULL) { diff --git a/xmlreader.c b/xmlreader.c index 5d2250a6..ba95813b 100644 --- a/xmlreader.c +++ b/xmlreader.c @@ -1180,6 +1180,7 @@ xmlTextReaderCollectSiblings(xmlNodePtr node) buffer = xmlBufferCreate(); if (buffer == NULL) return NULL; + xmlBufferSetAllocationScheme(buffer, XML_BUFFER_ALLOC_DOUBLEIT); for ( ; node != NULL; node = node->next) { switch (node->type) { @@ -1633,11 +1634,14 @@ xmlTextReaderReadInnerXml(xmlTextReaderPtr reader ATTRIBUTE_UNUSED) buff = xmlBufferCreate(); if (buff == NULL) return NULL; + xmlBufferSetAllocationScheme(buff, XML_BUFFER_ALLOC_DOUBLEIT); for (cur_node = reader->node->children; cur_node != NULL; cur_node = cur_node->next) { /* XXX: Why is the node copied? */ node = xmlDocCopyNode(cur_node, doc, 1); + /* XXX: Why do we need a second buffer? */ buff2 = xmlBufferCreate(); + xmlBufferSetAllocationScheme(buff2, XML_BUFFER_ALLOC_DOUBLEIT); if (xmlNodeDump(buff2, doc, node, 0, 0) == -1) { xmlFreeNode(node); xmlBufferFree(buff2); @@ -1687,6 +1691,7 @@ xmlTextReaderReadOuterXml(xmlTextReaderPtr reader ATTRIBUTE_UNUSED) node = xmlDocCopyNode(node, doc, 1); } buff = xmlBufferCreate(); + xmlBufferSetAllocationScheme(buff, XML_BUFFER_ALLOC_DOUBLEIT); if (xmlNodeDump(buff, doc, node, 0, 0) == -1) { xmlFreeNode(node); xmlBufferFree(buff); @@ -2017,7 +2022,7 @@ xmlNewTextReader(xmlParserInputBufferPtr input, const char *URI) { } /* no operation on a reader should require a huge buffer */ xmlBufSetAllocationScheme(ret->buffer, - XML_BUFFER_ALLOC_BOUNDED); + XML_BUFFER_ALLOC_DOUBLEIT); ret->sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler)); if (ret->sax == NULL) { xmlBufFree(ret->buffer); @@ -3555,7 +3560,7 @@ xmlTextReaderConstValue(xmlTextReaderPtr reader) { return (NULL); } xmlBufSetAllocationScheme(reader->buffer, - XML_BUFFER_ALLOC_BOUNDED); + XML_BUFFER_ALLOC_DOUBLEIT); } else xmlBufEmpty(reader->buffer); xmlBufGetNodeContent(reader->buffer, node); @@ -3565,7 +3570,7 @@ xmlTextReaderConstValue(xmlTextReaderPtr reader) { xmlBufFree(reader->buffer); reader->buffer = xmlBufCreateSize(100); xmlBufSetAllocationScheme(reader->buffer, - XML_BUFFER_ALLOC_BOUNDED); + XML_BUFFER_ALLOC_DOUBLEIT); ret = BAD_CAST ""; } return(ret); @@ -5079,7 +5084,7 @@ xmlTextReaderSetup(xmlTextReaderPtr reader, } /* no operation on a reader should require a huge buffer */ xmlBufSetAllocationScheme(reader->buffer, - XML_BUFFER_ALLOC_BOUNDED); + XML_BUFFER_ALLOC_DOUBLEIT); if (reader->sax == NULL) reader->sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler)); if (reader->sax == NULL) { diff --git a/xmlsave.c b/xmlsave.c index 3addd652..7b557494 100644 --- a/xmlsave.c +++ b/xmlsave.c @@ -474,6 +474,7 @@ xmlBufDumpNotationTable(xmlBufPtr buf, xmlNotationTablePtr table) { */ return; } + xmlBufferSetAllocationScheme(buffer, XML_BUFFER_ALLOC_DOUBLEIT); xmlDumpNotationTable(buffer, table); xmlBufMergeBuffer(buf, buffer); } @@ -497,6 +498,7 @@ xmlBufDumpElementDecl(xmlBufPtr buf, xmlElementPtr elem) { */ return; } + xmlBufferSetAllocationScheme(buffer, XML_BUFFER_ALLOC_DOUBLEIT); xmlDumpElementDecl(buffer, elem); xmlBufMergeBuffer(buf, buffer); } @@ -520,6 +522,7 @@ xmlBufDumpAttributeDecl(xmlBufPtr buf, xmlAttributePtr attr) { */ return; } + xmlBufferSetAllocationScheme(buffer, XML_BUFFER_ALLOC_DOUBLEIT); xmlDumpAttributeDecl(buffer, attr); xmlBufMergeBuffer(buf, buffer); } @@ -542,6 +545,7 @@ xmlBufDumpEntityDecl(xmlBufPtr buf, xmlEntityPtr ent) { */ return; } + xmlBufferSetAllocationScheme(buffer, XML_BUFFER_ALLOC_DOUBLEIT); xmlDumpEntityDecl(buffer, ent); xmlBufMergeBuffer(buf, buffer); }