diff --git a/HTMLparser.c b/HTMLparser.c
index 3c5e08ca..bc543303 100644
--- a/HTMLparser.c
+++ b/HTMLparser.c
@@ -36,6 +36,7 @@
#include
#include "private/buf.h"
+#include "private/dict.h"
#include "private/enc.h"
#include "private/error.h"
#include "private/html.h"
@@ -2356,10 +2357,11 @@ htmlNewDoc(const xmlChar *URI, const xmlChar *ExternalID) {
* Returns the Tag Name parsed or NULL
*/
-static const xmlChar *
+static xmlHashedString
htmlParseHTMLName(htmlParserCtxtPtr ctxt, int attr) {
+ xmlHashedString ret;
xmlChar buf[HTML_PARSER_BUFFER_SIZE];
- const xmlChar *ret, *in;
+ const xmlChar *in;
size_t avail;
int eof = PARSER_PROGRESSIVE(ctxt);
int nbchar = 0;
@@ -2436,8 +2438,8 @@ htmlParseHTMLName(htmlParserCtxtPtr ctxt, int attr) {
SHRINK;
- ret = xmlDictLookup(ctxt->dict, buf, nbchar);
- if (ret == NULL)
+ ret = xmlDictLookupHashed(ctxt->dict, buf, nbchar);
+ if (ret.name == NULL)
htmlErrMemory(ctxt);
return(ret);
@@ -3514,15 +3516,15 @@ bogus:
* Returns the attribute name, and the value in *value.
*/
-static const xmlChar *
+static xmlHashedString
htmlParseAttribute(htmlParserCtxtPtr ctxt, xmlChar **value) {
- const xmlChar *name;
+ xmlHashedString hname;
xmlChar *val = NULL;
*value = NULL;
- name = htmlParseHTMLName(ctxt, 1);
- if (name == NULL)
- return(NULL);
+ hname = htmlParseHTMLName(ctxt, 1);
+ if (hname.name == NULL)
+ return(hname);
/*
* read the value
@@ -3535,7 +3537,7 @@ htmlParseAttribute(htmlParserCtxtPtr ctxt, xmlChar **value) {
}
*value = val;
- return(name);
+ return(hname);
}
/**
@@ -3617,6 +3619,48 @@ htmlCheckMeta(htmlParserCtxtPtr ctxt, const xmlChar **atts) {
}
+/**
+ * htmlAttrHashInsert:
+ * @ctxt: parser context
+ * @size: size of the hash table
+ * @name: attribute name
+ * @hashValue: hash value of name
+ * @aindex: attribute index (this is a multiple of 5)
+ *
+ * Inserts a new attribute into the hash table.
+ *
+ * Returns INT_MAX if no existing attribute was found, the attribute
+ * index if an attribute was found, -1 if a memory allocation failed.
+ */
+static int
+htmlAttrHashInsert(xmlParserCtxtPtr ctxt, unsigned size, const xmlChar *name,
+ unsigned hashValue, int aindex) {
+ xmlAttrHashBucket *table = ctxt->attrHash;
+ xmlAttrHashBucket *bucket;
+ unsigned hindex;
+
+ hindex = hashValue & (size - 1);
+ bucket = &table[hindex];
+
+ while (bucket->index >= 0) {
+ const xmlChar **atts = &ctxt->atts[bucket->index];
+
+ if (name == atts[0])
+ return(bucket->index);
+
+ hindex++;
+ bucket++;
+ if (hindex >= size) {
+ hindex = 0;
+ bucket = table;
+ }
+ }
+
+ bucket->index = aindex;
+
+ return(INT_MAX);
+}
+
/**
* htmlParseStartTag:
* @ctxt: an HTML parser context
@@ -3657,7 +3701,7 @@ htmlParseStartTag(htmlParserCtxtPtr ctxt) {
maxatts = ctxt->maxatts;
GROW;
- name = htmlParseHTMLName(ctxt, 0);
+ name = htmlParseHTMLName(ctxt, 0).name;
if (name == NULL)
return;
if (xmlStrEqual(name, BAD_CAST"meta"))
@@ -3717,6 +3761,8 @@ htmlParseStartTag(htmlParserCtxtPtr ctxt) {
(CUR != '>') &&
((CUR != '/') || (NXT(1) != '>')) &&
(PARSER_STOPPED(ctxt) == 0)) {
+ xmlHashedString hattname;
+
/* unexpected-solidus-in-tag */
if (CUR == '/') {
SKIP(1);
@@ -3724,55 +3770,50 @@ htmlParseStartTag(htmlParserCtxtPtr ctxt) {
continue;
}
GROW;
- attname = htmlParseAttribute(ctxt, &attvalue);
+ hattname = htmlParseAttribute(ctxt, &attvalue);
+ attname = hattname.name;
+
if (attname != NULL) {
-
- /*
- * Well formedness requires at most one declaration of an attribute
- */
- for (i = 0; i < nbatts;i += 2) {
- if (xmlStrEqual(atts[i], attname)) {
- if (attvalue != NULL)
- xmlFree(attvalue);
- goto failed;
- }
- }
-
/*
* Add the pair to atts
*/
- if (atts == NULL) {
- maxatts = 22; /* allow for 10 attrs by default */
- atts = (const xmlChar **)
- xmlMalloc(maxatts * sizeof(xmlChar *));
- if (atts == NULL) {
- htmlErrMemory(ctxt);
- if (attvalue != NULL)
- xmlFree(attvalue);
- goto failed;
- }
- ctxt->atts = atts;
- ctxt->maxatts = maxatts;
- } else if (nbatts + 4 > maxatts) {
- const xmlChar **n;
+ if (nbatts + 4 > maxatts) {
+ const xmlChar **tmp;
+ unsigned *utmp;
+ size_t newSize = maxatts ? maxatts * 2 : 22;
- maxatts *= 2;
- n = (const xmlChar **) xmlRealloc((void *) atts,
- maxatts * sizeof(const xmlChar *));
- if (n == NULL) {
+ tmp = xmlMalloc(newSize * sizeof(tmp[0]));
+ if (tmp == NULL) {
htmlErrMemory(ctxt);
if (attvalue != NULL)
xmlFree(attvalue);
goto failed;
}
- atts = n;
+
+ utmp = xmlRealloc(ctxt->attallocs,
+ newSize / 2 * sizeof(utmp[0]));
+ if (utmp == NULL) {
+ htmlErrMemory(ctxt);
+ if (attvalue != NULL)
+ xmlFree(attvalue);
+ xmlFree(tmp);
+ goto failed;
+ }
+
+ if (maxatts > 0)
+ memcpy(tmp, atts, maxatts * sizeof(tmp[0]));
+ xmlFree(atts);
+
+ atts = tmp;
+ maxatts = newSize;
ctxt->atts = atts;
+ ctxt->attallocs = utmp;
ctxt->maxatts = maxatts;
}
+
+ ctxt->attallocs[nbatts/2] = hattname.hashValue;
atts[nbatts++] = attname;
atts[nbatts++] = attvalue;
- atts[nbatts] = NULL;
- atts[nbatts + 1] = NULL;
}
else {
if (attvalue != NULL)
@@ -3789,10 +3830,65 @@ failed:
}
/*
- * Handle specific association to the META tag
+ * Verify that attribute names are unique.
*/
- if (meta && (nbatts != 0))
- htmlCheckMeta(ctxt, atts);
+ if (nbatts > 2) {
+ unsigned attrHashSize;
+ int j, k;
+
+ attrHashSize = 4;
+ while (attrHashSize / 2 < (unsigned) nbatts / 2)
+ attrHashSize *= 2;
+
+ if (attrHashSize > ctxt->attrHashMax) {
+ xmlAttrHashBucket *tmp;
+
+ tmp = xmlRealloc(ctxt->attrHash, attrHashSize * sizeof(tmp[0]));
+ if (tmp == NULL) {
+ htmlErrMemory(ctxt);
+ goto done;
+ }
+
+ ctxt->attrHash = tmp;
+ ctxt->attrHashMax = attrHashSize;
+ }
+
+ memset(ctxt->attrHash, -1, attrHashSize * sizeof(ctxt->attrHash[0]));
+
+ for (i = 0, j = 0, k = 0; i < nbatts; i += 2, k++) {
+ unsigned hashValue;
+ int res;
+
+ attname = atts[i];
+ hashValue = ctxt->attallocs[k] | 0x80000000;
+
+ res = htmlAttrHashInsert(ctxt, attrHashSize, attname,
+ hashValue, j);
+ if (res < 0)
+ continue;
+
+ if (res == INT_MAX) {
+ atts[j] = atts[i];
+ atts[j+1] = atts[i+1];
+ j += 2;
+ } else {
+ xmlFree((xmlChar *) atts[i+1]);
+ }
+ }
+
+ nbatts = j;
+ }
+
+ if (nbatts > 0) {
+ atts[nbatts] = NULL;
+ atts[nbatts + 1] = NULL;
+
+ /*
+ * Handle specific association to the META tag
+ */
+ if (meta)
+ htmlCheckMeta(ctxt, atts);
+ }
/*
* SAX: Start of Element !
@@ -3857,7 +3953,7 @@ htmlParseEndTag(htmlParserCtxtPtr ctxt)
return;
}
- name = htmlParseHTMLName(ctxt, 0);
+ name = htmlParseHTMLName(ctxt, 0).name;
if (name == NULL)
return;
diff --git a/include/private/parser.h b/include/private/parser.h
index 6b585200..79a1bf67 100644
--- a/include/private/parser.h
+++ b/include/private/parser.h
@@ -46,6 +46,10 @@
(((ctxt)->input->entity != NULL) && \
((ctxt)->input->entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)))
+struct _xmlAttrHashBucket {
+ int index;
+};
+
XML_HIDDEN void
xmlCtxtVErr(xmlParserCtxtPtr ctxt, xmlNodePtr node, xmlErrorDomain domain,
xmlParserErrors code, xmlErrorLevel level,
diff --git a/parser.c b/parser.c
index 222410f2..8bb5fee1 100644
--- a/parser.c
+++ b/parser.c
@@ -116,10 +116,6 @@ struct _xmlParserNsData {
int minNsIndex;
};
-struct _xmlAttrHashBucket {
- int index;
-};
-
static int
xmlParseElementStart(xmlParserCtxtPtr ctxt);