1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2024-12-23 17:33:50 +03:00

SAX2: Split out legacy SAX1 handling

Split xmlSAX2StartElement into two functions handling legacy SAX1 and
HTML.
This commit is contained in:
Nick Wellnhofer 2024-06-16 23:21:55 +02:00
parent 2b0c4abb1f
commit faae3a91ce
2 changed files with 252 additions and 192 deletions

440
SAX2.c
View File

@ -905,7 +905,7 @@ xmlSAX2AppendChild(xmlParserCtxtPtr ctxt, xmlNodePtr node) {
}
}
#if defined(LIBXML_SAX1_ENABLED) || defined(LIBXML_HTML_ENABLED) || defined(LIBXML_WRITER_ENABLED) || defined(LIBXML_LEGACY_ENABLED)
#if defined(LIBXML_SAX1_ENABLED)
/**
* xmlNsErrMsg:
* @ctxt: an XML parser context
@ -925,95 +925,55 @@ xmlNsErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
}
/**
* xmlSAX2AttributeInternal:
* xmlSAX1Attribute:
* @ctx: the user data (XML parser context)
* @fullname: The attribute name, including namespace prefix
* @value: The attribute value
* @prefix: the prefix on the element node
*
* Handle an attribute that has been read by the parser.
* The default handling is to convert the attribute into an
* DOM subtree and past it in a new xmlAttr element added to
* the element.
*
* Deprecated SAX1 interface.
*/
static void
xmlSAX2AttributeInternal(void *ctx, const xmlChar *fullname,
const xmlChar *value, const xmlChar *prefix ATTRIBUTE_UNUSED)
xmlSAX1Attribute(xmlParserCtxtPtr ctxt, const xmlChar *fullname,
const xmlChar *value, const xmlChar *prefix)
{
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx;
xmlAttrPtr ret;
xmlChar *name;
xmlChar *ns;
xmlChar *nval;
xmlNsPtr namespace;
if (ctxt->html) {
name = xmlStrdup(fullname);
ns = NULL;
namespace = NULL;
} else {
/*
* Split the full name into a namespace prefix and the tag name
*/
name = xmlSplitQName(ctxt, fullname, &ns);
if ((name != NULL) && (name[0] == 0)) {
if (xmlStrEqual(ns, BAD_CAST "xmlns")) {
xmlNsErrMsg(ctxt, XML_ERR_NS_DECL_ERROR,
"invalid namespace declaration '%s'\n",
fullname, NULL);
} else {
xmlNsWarnMsg(ctxt, XML_WAR_NS_COLUMN,
"Avoid attribute ending with ':' like '%s'\n",
fullname, NULL);
}
if (ns != NULL)
xmlFree(ns);
ns = NULL;
xmlFree(name);
name = xmlStrdup(fullname);
}
}
if (name == NULL) {
xmlSAX2ErrMemory(ctxt);
if (ns != NULL)
xmlFree(ns);
return;
}
#ifdef LIBXML_HTML_ENABLED
if ((ctxt->html) &&
(value == NULL) && (htmlIsBooleanAttr(fullname))) {
nval = xmlStrdup(fullname);
if (nval == NULL)
xmlSAX2ErrMemory(ctxt);
value = (const xmlChar *) nval;
} else
#endif
{
#ifdef LIBXML_VALID_ENABLED
/*
* Do the last stage of the attribute normalization
* Needed for HTML too:
* http://www.w3.org/TR/html4/types.html#h-6.2
*/
ctxt->vctxt.valid = 1;
nval = xmlValidCtxtNormalizeAttributeValue(&ctxt->vctxt,
ctxt->myDoc, ctxt->node,
fullname, value);
if (ctxt->vctxt.valid != 1) {
ctxt->valid = 0;
/*
* Split the full name into a namespace prefix and the tag name
*/
name = xmlSplitQName(ctxt, fullname, &ns);
if ((name != NULL) && (name[0] == 0)) {
if (xmlStrEqual(ns, BAD_CAST "xmlns")) {
xmlNsErrMsg(ctxt, XML_ERR_NS_DECL_ERROR,
"invalid namespace declaration '%s'\n",
fullname, NULL);
} else {
xmlNsWarnMsg(ctxt, XML_WAR_NS_COLUMN,
"Avoid attribute ending with ':' like '%s'\n",
fullname, NULL);
}
if (ns != NULL)
xmlFree(ns);
ns = NULL;
xmlFree(name);
name = xmlStrdup(fullname);
if (name == NULL) {
xmlSAX2ErrMemory(ctxt);
if (ns != NULL)
xmlFree(ns);
return;
}
if (nval != NULL)
value = nval;
#else
nval = NULL;
#endif /* LIBXML_VALID_ENABLED */
}
/*
* Check whether it's a namespace definition
*/
if ((!ctxt->html) && (ns == NULL) &&
if ((ns == NULL) &&
(name[0] == 'x') && (name[1] == 'm') && (name[2] == 'l') &&
(name[3] == 'n') && (name[4] == 's') && (name[5] == 0)) {
xmlNsPtr nsret;
@ -1029,8 +989,6 @@ xmlSAX2AttributeInternal(void *ctx, const xmlChar *fullname,
xmlSAX2ErrMemory(ctxt);
if (name != NULL)
xmlFree(name);
if (nval != NULL)
xmlFree(nval);
return;
}
} else {
@ -1073,14 +1031,11 @@ xmlSAX2AttributeInternal(void *ctx, const xmlChar *fullname,
#endif /* LIBXML_VALID_ENABLED */
if (name != NULL)
xmlFree(name);
if (nval != NULL)
xmlFree(nval);
if (val != value)
xmlFree(val);
return;
}
if ((!ctxt->html) &&
(ns != NULL) && (ns[0] == 'x') && (ns[1] == 'm') && (ns[2] == 'l') &&
if ((ns != NULL) && (ns[0] == 'x') && (ns[1] == 'm') && (ns[2] == 'l') &&
(ns[3] == 'n') && (ns[4] == 's') && (ns[5] == 0)) {
xmlNsPtr nsret;
xmlChar *val;
@ -1096,8 +1051,6 @@ xmlSAX2AttributeInternal(void *ctx, const xmlChar *fullname,
xmlFree(ns);
if (name != NULL)
xmlFree(name);
if (nval != NULL)
xmlFree(nval);
return;
}
} else {
@ -1145,8 +1098,6 @@ xmlSAX2AttributeInternal(void *ctx, const xmlChar *fullname,
#endif /* LIBXML_VALID_ENABLED */
if (name != NULL)
xmlFree(name);
if (nval != NULL)
xmlFree(nval);
if (val != value)
xmlFree(val);
return;
@ -1196,7 +1147,7 @@ xmlSAX2AttributeInternal(void *ctx, const xmlChar *fullname,
goto error;
}
if ((ctxt->replaceEntities == 0) && (!ctxt->html)) {
if (ctxt->replaceEntities == 0) {
if (xmlNodeParseContent((xmlNodePtr) ret, value, INT_MAX) < 0)
xmlSAX2ErrMemory(ctxt);
} else if (value != NULL) {
@ -1210,7 +1161,7 @@ xmlSAX2AttributeInternal(void *ctx, const xmlChar *fullname,
}
#ifdef LIBXML_VALID_ENABLED
if ((!ctxt->html) && ctxt->validate && ctxt->wellFormed &&
if (ctxt->validate && ctxt->wellFormed &&
ctxt->myDoc && ctxt->myDoc->intSubset) {
/*
@ -1287,8 +1238,6 @@ xmlSAX2AttributeInternal(void *ctx, const xmlChar *fullname,
}
error:
if (nval != NULL)
xmlFree(nval);
if (ns != NULL)
xmlFree(ns);
}
@ -1297,6 +1246,8 @@ error:
* xmlCheckDefaultedAttributes:
*
* Check defaulted attributes from the DTD
*
* Deprecated SAX1 interface.
*/
static void
xmlCheckDefaultedAttributes(xmlParserCtxtPtr ctxt, const xmlChar *name,
@ -1429,8 +1380,8 @@ process_external_subset:
}
}
if (att == NULL) {
xmlSAX2AttributeInternal(ctxt, fulln,
attr->defaultValue, prefix);
xmlSAX1Attribute(ctxt, fulln,
attr->defaultValue, prefix);
}
if ((fulln != fn) && (fulln != attr->name))
xmlFree(fulln);
@ -1449,15 +1400,17 @@ process_external_subset:
}
/**
* xmlSAX2StartElement:
* xmlSAX1StartElement:
* @ctx: the user data (XML parser context)
* @fullname: The element name, including namespace prefix
* @atts: An array of name/value attributes pairs, NULL terminated
*
* called when an opening tag has been processed.
*
* Deprecated SAX1 interface.
*/
void
xmlSAX2StartElement(void *ctx, const xmlChar *fullname, const xmlChar **atts)
static void
xmlSAX1StartElement(void *ctx, const xmlChar *fullname, const xmlChar **atts)
{
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx;
xmlNodePtr ret;
@ -1467,7 +1420,7 @@ xmlSAX2StartElement(void *ctx, const xmlChar *fullname, const xmlChar **atts)
xmlChar *prefix;
const xmlChar *att;
const xmlChar *value;
int i;
int i, res;
if ((ctx == NULL) || (fullname == NULL) || (ctxt->myDoc == NULL)) return;
@ -1485,18 +1438,13 @@ xmlSAX2StartElement(void *ctx, const xmlChar *fullname, const xmlChar **atts)
ctxt->validate = 0;
}
if (ctxt->html) {
prefix = NULL;
name = xmlStrdup(fullname);
} else {
/*
* Split the full name into a namespace prefix and the tag name
*/
name = xmlSplitQName(ctxt, fullname, &prefix);
if (name == NULL) {
xmlSAX2ErrMemory(ctxt);
return;
}
/*
* Split the full name into a namespace prefix and the tag name
*/
name = xmlSplitQName(ctxt, fullname, &prefix);
if (name == NULL) {
xmlSAX2ErrMemory(ctxt);
return;
}
/*
@ -1533,64 +1481,60 @@ xmlSAX2StartElement(void *ctx, const xmlChar *fullname, const xmlChar **atts)
return;
}
if (!ctxt->html) {
int res;
/*
* Insert all the defaulted attributes from the DTD especially
* namespaces
*/
if ((ctxt->myDoc->intSubset != NULL) ||
(ctxt->myDoc->extSubset != NULL)) {
xmlCheckDefaultedAttributes(ctxt, name, prefix, atts);
}
/*
* Insert all the defaulted attributes from the DTD especially
* namespaces
*/
if ((ctxt->myDoc->intSubset != NULL) ||
(ctxt->myDoc->extSubset != NULL)) {
xmlCheckDefaultedAttributes(ctxt, name, prefix, atts);
}
/*
* process all the attributes whose name start with "xmlns"
*/
if (atts != NULL) {
i = 0;
att = atts[i++];
value = atts[i++];
while ((att != NULL) && (value != NULL)) {
if ((att[0] == 'x') && (att[1] == 'm') && (att[2] == 'l') &&
(att[3] == 'n') && (att[4] == 's'))
xmlSAX1Attribute(ctxt, att, value, prefix);
/*
* process all the attributes whose name start with "xmlns"
*/
if (atts != NULL) {
i = 0;
att = atts[i++];
value = atts[i++];
while ((att != NULL) && (value != NULL)) {
if ((att[0] == 'x') && (att[1] == 'm') && (att[2] == 'l') &&
(att[3] == 'n') && (att[4] == 's'))
xmlSAX2AttributeInternal(ctxt, att, value, prefix);
att = atts[i++];
value = atts[i++];
}
}
}
/*
* Search the namespace, note that since the attributes have been
* processed, the local namespaces are available.
*/
res = xmlSearchNsSafe(ret, prefix, &ns);
/*
* Search the namespace, note that since the attributes have been
* processed, the local namespaces are available.
*/
res = xmlSearchNsSafe(ret, prefix, &ns);
if (res < 0)
xmlSAX2ErrMemory(ctxt);
if ((ns == NULL) && (parent != NULL)) {
res = xmlSearchNsSafe(parent, prefix, &ns);
if (res < 0)
xmlSAX2ErrMemory(ctxt);
if ((ns == NULL) && (parent != NULL)) {
res = xmlSearchNsSafe(parent, prefix, &ns);
if (res < 0)
xmlSAX2ErrMemory(ctxt);
}
if ((prefix != NULL) && (ns == NULL)) {
xmlNsWarnMsg(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
"Namespace prefix %s is not defined\n",
prefix, NULL);
ns = xmlNewNs(ret, NULL, prefix);
if (ns == NULL)
xmlSAX2ErrMemory(ctxt);
}
/*
* set the namespace node, making sure that if the default namespace
* is unbound on a parent we simply keep it NULL
*/
if ((ns != NULL) && (ns->href != NULL) &&
((ns->href[0] != 0) || (ns->prefix != NULL)))
xmlSetNs(ret, ns);
}
if ((prefix != NULL) && (ns == NULL)) {
xmlNsWarnMsg(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
"Namespace prefix %s is not defined\n",
prefix, NULL);
ns = xmlNewNs(ret, NULL, prefix);
if (ns == NULL)
xmlSAX2ErrMemory(ctxt);
}
/*
* set the namespace node, making sure that if the default namespace
* is unbound on a parent we simply keep it NULL
*/
if ((ns != NULL) && (ns->href != NULL) &&
((ns->href[0] != 0) || (ns->prefix != NULL)))
xmlSetNs(ret, ns);
/*
* process all the other attributes
@ -1599,25 +1543,17 @@ xmlSAX2StartElement(void *ctx, const xmlChar *fullname, const xmlChar **atts)
i = 0;
att = atts[i++];
value = atts[i++];
if (ctxt->html) {
while (att != NULL) {
xmlSAX2AttributeInternal(ctxt, att, value, NULL);
att = atts[i++];
value = atts[i++];
}
} else {
while ((att != NULL) && (value != NULL)) {
if ((att[0] != 'x') || (att[1] != 'm') || (att[2] != 'l') ||
(att[3] != 'n') || (att[4] != 's'))
xmlSAX2AttributeInternal(ctxt, att, value, NULL);
while ((att != NULL) && (value != NULL)) {
if ((att[0] != 'x') || (att[1] != 'm') || (att[2] != 'l') ||
(att[3] != 'n') || (att[4] != 's'))
xmlSAX1Attribute(ctxt, att, value, NULL);
/*
* Next ones
*/
att = atts[i++];
value = atts[i++];
}
}
/*
* Next ones
*/
att = atts[i++];
value = atts[i++];
}
}
#ifdef LIBXML_VALID_ENABLED
@ -1643,6 +1579,143 @@ xmlSAX2StartElement(void *ctx, const xmlChar *fullname, const xmlChar **atts)
xmlFree(prefix);
}
#endif /* LIBXML_SAX1_ENABLED */
#ifdef LIBXML_HTML_ENABLED
static void
xmlSAX2HtmlAttribute(xmlParserCtxtPtr ctxt, const xmlChar *fullname,
const xmlChar *value) {
xmlAttrPtr ret;
xmlChar *name;
xmlChar *nval = NULL;
name = xmlStrdup(fullname);
if (name == NULL) {
xmlSAX2ErrMemory(ctxt);
return;
}
ret = xmlNewNsPropEatName(ctxt->node, NULL, name, NULL);
if (ret == NULL) {
xmlSAX2ErrMemory(ctxt);
return;
}
if ((value == NULL) && (htmlIsBooleanAttr(fullname))) {
nval = xmlStrdup(fullname);
if (nval == NULL) {
xmlSAX2ErrMemory(ctxt);
return;
}
value = nval;
}
if (value != NULL) {
ret->children = xmlNewDocText(ctxt->myDoc, value);
if (ret->children == NULL) {
xmlSAX2ErrMemory(ctxt);
} else {
ret->last = ret->children;
ret->children->parent = (xmlNodePtr) ret;
}
}
if (nval != NULL)
xmlFree(nval);
}
/**
* xmlSAX2StartHtmlElement:
* @ctxt: parser context
* @fullname: The element name, including namespace prefix
* @atts: An array of name/value attributes pairs, NULL terminated
*
* Called when an opening tag has been processed.
*/
static void
xmlSAX2StartHtmlElement(xmlParserCtxtPtr ctxt, const xmlChar *fullname,
const xmlChar **atts) {
xmlNodePtr ret;
xmlNodePtr parent;
xmlChar *name;
const xmlChar *att;
const xmlChar *value;
int i;
name = xmlStrdup(fullname);
ret = xmlNewDocNodeEatName(ctxt->myDoc, NULL, name, NULL);
if (ret == NULL) {
xmlSAX2ErrMemory(ctxt);
return;
}
ctxt->nodemem = -1;
/* Initialize parent before pushing node */
parent = ctxt->node;
if (parent == NULL)
parent = (xmlNodePtr) ctxt->myDoc;
/*
* Link the child element
*/
xmlSAX2AppendChild(ctxt, ret);
/*
* We are parsing a new node.
*/
if (nodePush(ctxt, ret) < 0) {
xmlUnlinkNode(ret);
xmlFreeNode(ret);
return;
}
if (atts != NULL) {
i = 0;
att = atts[i++];
value = atts[i++];
while (att != NULL) {
xmlSAX2HtmlAttribute(ctxt, att, value);
att = atts[i++];
value = atts[i++];
}
}
}
#endif /* LIBXML_HTML_ENABLED */
/**
* xmlSAX2StartElement:
* @ctx: the user data (XML parser context)
* @fullname: The element name, including namespace prefix
* @atts: An array of name/value attributes pairs, NULL terminated
*
* Called when an opening tag has been processed.
*
* Used for HTML and SAX1.
*/
void
xmlSAX2StartElement(void *ctx, const xmlChar *fullname, const xmlChar **atts) {
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx;
(void) atts;
if ((ctxt == NULL) || (fullname == NULL) || (ctxt->myDoc == NULL))
return;
#ifdef LIBXML_SAX1_ENABLED
if (!ctxt->html) {
xmlSAX1StartElement(ctxt, fullname, atts);
return;
}
#endif
#ifdef LIBXML_HTML_ENABLED
if (ctxt->html) {
xmlSAX2StartHtmlElement(ctxt, fullname, atts);
return;
}
#endif
}
/**
* xmlSAX2EndElement:
@ -1650,42 +1723,33 @@ xmlSAX2StartElement(void *ctx, const xmlChar *fullname, const xmlChar **atts)
* @name: The element name
*
* called when the end of an element has been detected.
*
* Used for HTML and SAX1.
*/
void
xmlSAX2EndElement(void *ctx, const xmlChar *name ATTRIBUTE_UNUSED)
{
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx;
if (ctx == NULL) return;
if (ctxt == NULL)
return;
ctxt->nodemem = -1;
#ifdef LIBXML_VALID_ENABLED
if (ctxt->validate && ctxt->wellFormed &&
#if defined(LIBXML_SAX1_ENABLED) && defined(LIBXML_VALID_ENABLED)
if (!ctxt->html && ctxt->validate && ctxt->wellFormed &&
ctxt->myDoc && ctxt->myDoc->intSubset)
ctxt->valid &= xmlValidateOneElement(&ctxt->vctxt, ctxt->myDoc,
ctxt->node);
#endif /* LIBXML_VALID_ENABLED */
#if defined(LIBXML_SAX1_ENABLED) || defined(LIBXML_HTML_ENABLED)
ctxt->nodemem = -1;
/*
* end of parsing of this node.
*/
nodePop(ctxt);
#endif
}
#else /* LIBXML_SAX1_ENABLED || LIBXML_HTML_ENABLED || LIBXML_LEGACY_ENABLED */
/** DOC_DISABLE */
void
xmlSAX2StartElement(void *ctx ATTRIBUTE_UNUSED,
const xmlChar *fullname ATTRIBUTE_UNUSED,
const xmlChar **atts ATTRIBUTE_UNUSED) {
}
void
xmlSAX2EndElement(void *ctx ATTRIBUTE_UNUSED,
const xmlChar *name ATTRIBUTE_UNUSED) {
}
/** DOC_ENABLE */
#endif /* LIBXML_SAX1_ENABLED || LIBXML_HTML_ENABLED || LIBXML_LEGACY_ENABLED */
/*
* xmlSAX2TextNode:

View File

@ -355,8 +355,6 @@ xmlNewTextWriterDoc(xmlDocPtr * doc, int compression)
memset(&saxHandler, '\0', sizeof(saxHandler));
xmlSAX2InitDefaultSAXHandler(&saxHandler, 1);
saxHandler.startDocument = xmlTextWriterStartDocumentCallback;
saxHandler.startElement = xmlSAX2StartElement;
saxHandler.endElement = xmlSAX2EndElement;
ctxt = xmlCreatePushParserCtxt(&saxHandler, NULL, NULL, 0, NULL);
if (ctxt == NULL) {
@ -424,8 +422,6 @@ xmlNewTextWriterTree(xmlDocPtr doc, xmlNodePtr node, int compression)
memset(&saxHandler, '\0', sizeof(saxHandler));
xmlSAX2InitDefaultSAXHandler(&saxHandler, 1);
saxHandler.startDocument = xmlTextWriterStartDocumentCallback;
saxHandler.startElement = xmlSAX2StartElement;
saxHandler.endElement = xmlSAX2EndElement;
ctxt = xmlCreatePushParserCtxt(&saxHandler, NULL, NULL, 0, NULL);
if (ctxt == NULL) {