1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2024-10-26 12:25:09 +03:00

parser: Use hash tables to avoid quadratic behavior

Use a hash table to lookup namespaces by prefix. The hash table stores
an index into the namespace table. Auxiliary data for namespaces is
stored in a separate array along the main namespace table.

Use a hash table to verify attribute uniqueness. The hash table stores
an index into the attribute table.

Reuse hash value from the dictionary to avoid computing them twice.

See #346.
This commit is contained in:
Nick Wellnhofer 2023-09-29 00:18:44 +02:00
parent e48f3d8e0a
commit e0dd330b8f
5 changed files with 1051 additions and 374 deletions

11
SAX2.c
View File

@ -1870,8 +1870,12 @@ xmlSAX2AttributeNs(xmlParserCtxtPtr ctxt,
/*
* Note: if prefix == NULL, the attribute is not in the default namespace
*/
if (prefix != NULL)
if (prefix != NULL) {
namespace = xmlParserNsLookupSax(ctxt, prefix);
if ((namespace == NULL) && (xmlStrEqual(prefix, BAD_CAST "xml"))) {
namespace = xmlSearchNs(ctxt->myDoc, ctxt->node, prefix);
}
}
/*
* allocate the node
@ -2201,6 +2205,9 @@ xmlSAX2StartElementNs(void *ctx,
*/
continue;
}
xmlParserNsUpdateSax(ctxt, pref, ns);
#ifdef LIBXML_VALID_ENABLED
if ((!ctxt->html) && ctxt->validate && ctxt->wellFormed &&
ctxt->myDoc && ctxt->myDoc->intSubset) {
@ -2242,7 +2249,7 @@ xmlSAX2StartElementNs(void *ctx,
* Note that, if prefix is NULL, this searches for the default Ns
*/
if ((URI != NULL) && (ret->ns == NULL)) {
ret->ns = xmlSearchNs(ctxt->myDoc, parent, prefix);
ret->ns = xmlParserNsLookupSax(ctxt, prefix);
if ((ret->ns == NULL) && (xmlStrEqual(prefix, BAD_CAST "xml"))) {
ret->ns = xmlSearchNs(ctxt->myDoc, ret, prefix);
}

View File

@ -172,6 +172,8 @@ typedef enum {
} xmlParserMode;
typedef struct _xmlStartTag xmlStartTag;
typedef struct _xmlParserNsData xmlParserNsData;
typedef struct _xmlAttrHashBucket xmlAttrHashBucket;
/**
* xmlParserCtxt:
@ -282,7 +284,7 @@ struct _xmlParserCtxt {
int nsNr; /* the number of inherited namespaces */
int nsMax; /* the size of the arrays */
const xmlChar * *nsTab; /* the array of prefix/namespace name */
int *attallocs; /* which attribute were allocated */
unsigned *attallocs; /* which attribute were allocated */
xmlStartTag *pushTab; /* array of data for push */
xmlHashTablePtr attsDefault; /* defaulted attributes if any */
xmlHashTablePtr attsSpecial; /* non-CDATA attributes if any */
@ -319,6 +321,10 @@ struct _xmlParserCtxt {
unsigned short nbErrors; /* number of errors */
unsigned short nbWarnings; /* number of warnings */
unsigned maxAmpl; /* maximum amplification factor */
xmlParserNsData *nsdb; /* namespace database */
unsigned attrHashMax; /* allocated size */
xmlAttrHashBucket *attrHash; /* atttribute hash table */
};
/**

View File

@ -49,4 +49,16 @@ xmlDetectEncoding(xmlParserCtxtPtr ctxt);
XML_HIDDEN void
xmlSetDeclaredEncoding(xmlParserCtxtPtr ctxt, xmlChar *encoding);
/*
* These functions allow SAX handlers to attach extra data to namespaces
* efficiently and should be made public.
*/
void
xmlParserNsFree(xmlParserNsData *nsdb);
XML_HIDDEN int
xmlParserNsUpdateSax(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
void *saxData);
XML_HIDDEN void *
xmlParserNsLookupSax(xmlParserCtxtPtr ctxt, const xmlChar *prefix);
#endif /* XML_PARSER_H_PRIVATE__ */

1370
parser.c

File diff suppressed because it is too large Load Diff

View File

@ -2061,7 +2061,9 @@ xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
if (ctxt->atts != NULL) xmlFree((xmlChar * *)ctxt->atts);
if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
if (ctxt->nsTab != NULL) xmlFree((char *) ctxt->nsTab);
if (ctxt->nsTab != NULL) xmlFree(ctxt->nsTab);
if (ctxt->nsdb != NULL) xmlParserNsFree(ctxt->nsdb);
if (ctxt->attrHash != NULL) xmlFree(ctxt->attrHash);
if (ctxt->pushTab != NULL) xmlFree(ctxt->pushTab);
if (ctxt->attallocs != NULL) xmlFree(ctxt->attallocs);
if (ctxt->attsDefault != NULL)