1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2025-03-09 04:58:16 +03:00

Merge duplicate code paths handling PE references

xmlParsePEReference is essentially a subset of
xmlParserHandlePEReference, so make xmlParserHandlePEReference call
xmlParsePEReference. The code paths in these functions differed
slighty, but the code from xmlParserHandlePEReference seems more solid
and tested.
This commit is contained in:
Nick Wellnhofer 2017-06-05 21:16:00 +02:00
parent dbaab1f369
commit 03904159f8

189
parser.c
View File

@ -2533,11 +2533,6 @@ xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
*/
void
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
const xmlChar *name;
xmlEntityPtr entity = NULL;
xmlParserInputPtr input;
if (RAW != '%') return;
switch(ctxt->instate) {
case XML_PARSER_CDATA_SECTION:
return;
@ -2592,128 +2587,7 @@ xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
return;
}
NEXT;
name = xmlParseName(ctxt);
if (xmlParserDebugEntities)
xmlGenericError(xmlGenericErrorContext,
"PEReference: %s\n", name);
if (name == NULL) {
xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
} else {
if (RAW == ';') {
NEXT;
if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
if (ctxt->instate == XML_PARSER_EOF)
return;
if (entity == NULL) {
/*
* [ WFC: Entity Declared ]
* In a document without any DTD, a document with only an
* internal DTD subset which contains no parameter entity
* references, or a document with "standalone='yes'", ...
* ... The declaration of a parameter entity must precede
* any reference to it...
*/
if ((ctxt->standalone == 1) ||
((ctxt->hasExternalSubset == 0) &&
(ctxt->hasPErefs == 0))) {
xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
"PEReference: %%%s; not found\n", name);
} else {
/*
* [ VC: Entity Declared ]
* In a document with an external subset or external
* parameter entities with "standalone='no'", ...
* ... The declaration of a parameter entity must precede
* any reference to it...
*/
if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
"PEReference: %%%s; not found\n",
name, NULL);
} else
xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
"PEReference: %%%s; not found\n",
name, NULL);
ctxt->valid = 0;
}
xmlParserEntityCheck(ctxt, 0, NULL, 0);
} else if (ctxt->input->free != deallocblankswrapper) {
input = xmlNewBlanksWrapperInputStream(ctxt, entity);
if (xmlPushInput(ctxt, input) < 0)
return;
} else {
if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
(entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
xmlChar start[4];
xmlCharEncoding enc;
/*
* Note: external parameter entities will not be loaded, it
* is not required for a non-validating parser, unless the
* option of validating, or substituting entities were
* given. Doing so is far more secure as the parser will
* only process data coming from the document entity by
* default.
*/
if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
((ctxt->options & XML_PARSE_NOENT) == 0) &&
((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
(ctxt->replaceEntities == 0) &&
(ctxt->validate == 0))
return;
/*
* handle the extra spaces added before and after
* c.f. http://www.w3.org/TR/REC-xml#as-PE
* this is done independently.
*/
input = xmlNewEntityInputStream(ctxt, entity);
if (xmlPushInput(ctxt, input) < 0)
return;
/*
* Get the 4 first bytes and decode the charset
* if enc != XML_CHAR_ENCODING_NONE
* plug some encoding conversion routines.
* Note that, since we may have some non-UTF8
* encoding (like UTF16, bug 135229), the 'length'
* is not known, but we can calculate based upon
* the amount of data in the buffer.
*/
GROW
if (ctxt->instate == XML_PARSER_EOF)
return;
if ((ctxt->input->end - ctxt->input->cur)>=4) {
start[0] = RAW;
start[1] = NXT(1);
start[2] = NXT(2);
start[3] = NXT(3);
enc = xmlDetectCharEncoding(start, 4);
if (enc != XML_CHAR_ENCODING_NONE) {
xmlSwitchEncoding(ctxt, enc);
}
}
if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
(CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
(IS_BLANK_CH(NXT(5)))) {
xmlParseTextDecl(ctxt);
}
} else {
xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
"PEReference: %s is not a parameter entity\n",
name);
}
}
} else {
xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
}
}
xmlParsePEReference(ctxt);
}
/*
@ -8057,12 +7931,14 @@ xmlParsePEReference(xmlParserCtxtPtr ctxt)
NEXT;
name = xmlParseName(ctxt);
if (name == NULL) {
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
"xmlParsePEReference: no name\n");
xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
return;
}
if (xmlParserDebugEntities)
xmlGenericError(xmlGenericErrorContext,
"PEReference: %s\n", name);
if (RAW != ';') {
xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
return;
}
@ -8104,10 +7980,15 @@ xmlParsePEReference(xmlParserCtxtPtr ctxt)
* ... The declaration of a parameter entity must
* precede any reference to it...
*/
xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
"PEReference: %%%s; not found\n",
name, NULL);
ctxt->valid = 0;
if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
"PEReference: %%%s; not found\n",
name, NULL);
} else
xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
"PEReference: %%%s; not found\n",
name, NULL);
ctxt->valid = 0;
}
xmlParserEntityCheck(ctxt, 0, NULL, 0);
} else {
@ -8124,6 +8005,9 @@ xmlParsePEReference(xmlParserCtxtPtr ctxt)
if (xmlPushInput(ctxt, input) < 0)
return;
} else {
xmlChar start[4];
xmlCharEncoding enc;
if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
((ctxt->options & XML_PARSE_NOENT) == 0) &&
((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
@ -8133,27 +8017,36 @@ xmlParsePEReference(xmlParserCtxtPtr ctxt)
(ctxt->validate == 0))
return;
/*
* TODO !!!
* handle the extra spaces added before and after
* c.f. http://www.w3.org/TR/REC-xml#as-PE
*/
input = xmlNewEntityInputStream(ctxt, entity);
if (xmlPushInput(ctxt, input) < 0)
return;
/*
* Get the 4 first bytes and decode the charset
* if enc != XML_CHAR_ENCODING_NONE
* plug some encoding conversion routines.
* Note that, since we may have some non-UTF8
* encoding (like UTF16, bug 135229), the 'length'
* is not known, but we can calculate based upon
* the amount of data in the buffer.
*/
GROW
if (ctxt->instate == XML_PARSER_EOF)
return;
if ((ctxt->input->end - ctxt->input->cur)>=4) {
start[0] = RAW;
start[1] = NXT(1);
start[2] = NXT(2);
start[3] = NXT(3);
enc = xmlDetectCharEncoding(start, 4);
if (enc != XML_CHAR_ENCODING_NONE) {
xmlSwitchEncoding(ctxt, enc);
}
}
if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
(CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
(IS_BLANK_CH(NXT(5)))) {
xmlParseTextDecl(ctxt);
if (ctxt->errNo ==
XML_ERR_UNSUPPORTED_ENCODING) {
/*
* The XML REC instructs us to stop parsing
* right here
*/
xmlHaltParser(ctxt);
return;
}
}
}
}