mirror of
https://gitlab.gnome.org/GNOME/libxml2.git
synced 2024-10-26 12:25:09 +03:00
parser: Rework parsing of attribute and entity values
Don't use a separate function to handle "complex" attributes. Validate UTF-8 byte sequences without decoding. This should improve performance considerably when parsing multi-byte UTF-8 sequences. Use a string buffer to avoid unnecessary allocations and copying when expanding entities. Normalize attribute values in a single pass while expanding entities. Be more lenient in recovery mode. If no entity substitution was requested, validate entities without expanding. Fixes #596. Also fixes #655.
This commit is contained in:
parent
4dcc2d743e
commit
37c6618be5
30
SAX2.c
30
SAX2.c
@ -970,10 +970,8 @@ xmlSAX2AttributeInternal(void *ctx, const xmlChar *fullname,
|
||||
(void) nsret;
|
||||
|
||||
if (!ctxt->replaceEntities) {
|
||||
ctxt->depth++;
|
||||
val = xmlStringDecodeEntities(ctxt, value, XML_SUBSTITUTE_REF,
|
||||
0,0,0);
|
||||
ctxt->depth--;
|
||||
/* TODO: normalize if needed */
|
||||
val = xmlExpandEntitiesInAttValue(ctxt, value, /* normalize */ 0);
|
||||
if (val == NULL) {
|
||||
xmlSAX2ErrMemory(ctxt);
|
||||
if (name != NULL)
|
||||
@ -1038,10 +1036,8 @@ xmlSAX2AttributeInternal(void *ctx, const xmlChar *fullname,
|
||||
(void) nsret;
|
||||
|
||||
if (!ctxt->replaceEntities) {
|
||||
ctxt->depth++;
|
||||
val = xmlStringDecodeEntities(ctxt, value, XML_SUBSTITUTE_REF,
|
||||
0,0,0);
|
||||
ctxt->depth--;
|
||||
/* TODO: normalize if needed */
|
||||
val = xmlExpandEntitiesInAttValue(ctxt, value, /* normalize */ 0);
|
||||
if (val == NULL) {
|
||||
xmlSAX2ErrMemory(ctxt);
|
||||
xmlFree(ns);
|
||||
@ -1179,10 +1175,8 @@ xmlSAX2AttributeInternal(void *ctx, const xmlChar *fullname,
|
||||
if (!ctxt->replaceEntities) {
|
||||
xmlChar *val;
|
||||
|
||||
ctxt->depth++;
|
||||
val = xmlStringDecodeEntities(ctxt, value, XML_SUBSTITUTE_REF,
|
||||
0,0,0);
|
||||
ctxt->depth--;
|
||||
/* TODO: normalize if needed */
|
||||
val = xmlExpandEntitiesInAttValue(ctxt, value, /* normalize */ 0);
|
||||
|
||||
if (val == NULL)
|
||||
ctxt->valid &= xmlValidateOneAttribute(&ctxt->vctxt,
|
||||
@ -1736,7 +1730,6 @@ static xmlChar *
|
||||
xmlSAX2DecodeAttrEntities(xmlParserCtxtPtr ctxt, const xmlChar *str,
|
||||
const xmlChar *end) {
|
||||
const xmlChar *in;
|
||||
xmlChar *ret;
|
||||
|
||||
in = str;
|
||||
while (in < end)
|
||||
@ -1744,11 +1737,12 @@ xmlSAX2DecodeAttrEntities(xmlParserCtxtPtr ctxt, const xmlChar *str,
|
||||
goto decode;
|
||||
return(NULL);
|
||||
decode:
|
||||
ctxt->depth++;
|
||||
ret = xmlStringLenDecodeEntities(ctxt, str, end - str,
|
||||
XML_SUBSTITUTE_REF, 0,0,0);
|
||||
ctxt->depth--;
|
||||
return(ret);
|
||||
/*
|
||||
* If the value contains '&', we can be sure it was allocated and is
|
||||
* zero-terminated.
|
||||
*/
|
||||
/* TODO: normalize if needed */
|
||||
return(xmlExpandEntitiesInAttValue(ctxt, str, /* normalize */ 0));
|
||||
}
|
||||
#endif /* LIBXML_VALID_ENABLED */
|
||||
|
||||
|
@ -9,13 +9,17 @@
|
||||
*
|
||||
* XML_ENT_PARSED: The entity was parsed and `children` points to the
|
||||
* content.
|
||||
* XML_ENT_CHECKED: The entity was checked for loops.
|
||||
*
|
||||
* XML_ENT_CHECKED: The entity was checked for loops and amplification.
|
||||
* expandedSize was set.
|
||||
*
|
||||
* XML_ENT_VALIDATED: The entity contains a valid attribute value.
|
||||
* Only used when entities aren't substituted.
|
||||
*/
|
||||
#define XML_ENT_PARSED (1<<0)
|
||||
#define XML_ENT_CHECKED (1<<1)
|
||||
#define XML_ENT_EXPANDING (1<<2)
|
||||
#define XML_ENT_CHECKED_LT (1<<3)
|
||||
#define XML_ENT_CONTAINS_LT (1<<4)
|
||||
#define XML_ENT_PARSED (1u << 0)
|
||||
#define XML_ENT_CHECKED (1u << 1)
|
||||
#define XML_ENT_VALIDATED (1u << 2)
|
||||
#define XML_ENT_EXPANDING (1u << 3)
|
||||
|
||||
XML_HIDDEN xmlChar *
|
||||
xmlEncodeAttributeEntities(xmlDocPtr doc, const xmlChar *input);
|
||||
|
@ -87,4 +87,8 @@ XML_HIDDEN xmlParserInputPtr
|
||||
xmlNewInputPush(xmlParserCtxtPtr ctxt, const char *url,
|
||||
const char *chunk, int size, const char *encoding);
|
||||
|
||||
XML_HIDDEN xmlChar *
|
||||
xmlExpandEntitiesInAttValue(xmlParserCtxtPtr ctxt, const xmlChar *str,
|
||||
int normalize);
|
||||
|
||||
#endif /* XML_PARSER_H_PRIVATE__ */
|
||||
|
@ -1,9 +1,3 @@
|
||||
./test/errors/attr4.xml:1: parser error : invalid character in attribute value
|
||||
<ROOT attr="XY"/>
|
||||
^
|
||||
./test/errors/attr4.xml:1: parser error : attributes construct error
|
||||
<ROOT attr="XY"/>
|
||||
^
|
||||
./test/errors/attr4.xml:1: parser error : Couldn't find end of Start Tag ROOT line 1
|
||||
<ROOT attr="XY"/>
|
||||
^
|
||||
|
@ -1,9 +1,3 @@
|
||||
./test/errors/attr4.xml:1: parser error : invalid character in attribute value
|
||||
<ROOT attr="XY"/>
|
||||
^
|
||||
./test/errors/attr4.xml:1: parser error : attributes construct error
|
||||
<ROOT attr="XY"/>
|
||||
^
|
||||
./test/errors/attr4.xml:1: parser error : Couldn't find end of Start Tag ROOT line 1
|
||||
<ROOT attr="XY"/>
|
||||
^
|
||||
|
@ -1,10 +1,4 @@
|
||||
./test/errors/attr4.xml:1: parser error : invalid character in attribute value
|
||||
<ROOT attr="XY"/>
|
||||
^
|
||||
./test/errors/attr4.xml:1: parser error : attributes construct error
|
||||
<ROOT attr="XY"/>
|
||||
^
|
||||
./test/errors/attr4.xml:1: parser error : Couldn't find end of Start Tag ROOT
|
||||
<ROOT attr="XY"/>
|
||||
^
|
||||
./test/errors/attr4.xml : failed to parse
|
||||
|
5
result/issue655.xml
Normal file
5
result/issue655.xml
Normal file
@ -0,0 +1,5 @@
|
||||
<?xml version="1.0"?>
|
||||
<!DOCTYPE test [
|
||||
<!ENTITY newline "&#xA;">
|
||||
]>
|
||||
<test newline="&newline;"/>
|
2
result/issue655.xml.rde
Normal file
2
result/issue655.xml.rde
Normal file
@ -0,0 +1,2 @@
|
||||
0 10 test 0 0
|
||||
0 1 test 1 0
|
2
result/issue655.xml.rdr
Normal file
2
result/issue655.xml.rdr
Normal file
@ -0,0 +1,2 @@
|
||||
0 10 test 0 0
|
||||
0 1 test 1 0
|
10
result/issue655.xml.sax
Normal file
10
result/issue655.xml.sax
Normal file
@ -0,0 +1,10 @@
|
||||
SAX.setDocumentLocator()
|
||||
SAX.startDocument()
|
||||
SAX.internalSubset(test, , )
|
||||
SAX.entityDecl(newline, 1, (null), (null), 
)
|
||||
SAX.getEntity(newline)
|
||||
SAX.externalSubset(test, , )
|
||||
SAX.getEntity(newline)
|
||||
SAX.startElement(test, newline='&newline;')
|
||||
SAX.endElement(test)
|
||||
SAX.endDocument()
|
10
result/issue655.xml.sax2
Normal file
10
result/issue655.xml.sax2
Normal file
@ -0,0 +1,10 @@
|
||||
SAX.setDocumentLocator()
|
||||
SAX.startDocument()
|
||||
SAX.internalSubset(test, , )
|
||||
SAX.entityDecl(newline, 1, (null), (null), 
)
|
||||
SAX.getEntity(newline)
|
||||
SAX.externalSubset(test, , )
|
||||
SAX.getEntity(newline)
|
||||
SAX.startElementNs(test, NULL, NULL, 0, 1, 0, newline='&new...', 9)
|
||||
SAX.endElementNs(test, NULL, NULL)
|
||||
SAX.endDocument()
|
5
result/noent/issue655.xml
Normal file
5
result/noent/issue655.xml
Normal file
@ -0,0 +1,5 @@
|
||||
<?xml version="1.0"?>
|
||||
<!DOCTYPE test [
|
||||
<!ENTITY newline "&#xA;">
|
||||
]>
|
||||
<test newline=" "/>
|
11
result/noent/issue655.xml.sax2
Normal file
11
result/noent/issue655.xml.sax2
Normal file
@ -0,0 +1,11 @@
|
||||
SAX.setDocumentLocator()
|
||||
SAX.startDocument()
|
||||
SAX.internalSubset(test, , )
|
||||
SAX.entityDecl(newline, 1, (null), (null), 
)
|
||||
SAX.getEntity(newline)
|
||||
SAX.externalSubset(test, , )
|
||||
SAX.getEntity(newline)
|
||||
SAX.startElementNs(test, NULL, NULL, 0, 1, 0, newline='
|
||||
...', 1)
|
||||
SAX.endElementNs(test, NULL, NULL)
|
||||
SAX.endDocument()
|
@ -17,50 +17,50 @@
|
||||
|
||||
For the list of proposed modifications, see:
|
||||
http://www.nitf.org/proposed-changes.html
|
||||
--><!ENTITY % enriched-text "
|
||||
#PCDATA
|
||||
| chron
|
||||
| copyrite
|
||||
| event
|
||||
| function
|
||||
| location
|
||||
| money
|
||||
| num
|
||||
| object.title
|
||||
| org
|
||||
| person
|
||||
| postaddr
|
||||
| virtloc
|
||||
| a
|
||||
| br
|
||||
| em
|
||||
| lang
|
||||
| pronounce
|
||||
| q
|
||||
--><!ENTITY % enriched-text "
|
||||
#PCDATA
|
||||
| chron
|
||||
| copyrite
|
||||
| event
|
||||
| function
|
||||
| location
|
||||
| money
|
||||
| num
|
||||
| object.title
|
||||
| org
|
||||
| person
|
||||
| postaddr
|
||||
| virtloc
|
||||
| a
|
||||
| br
|
||||
| em
|
||||
| lang
|
||||
| pronounce
|
||||
| q
|
||||
">
|
||||
<!ENTITY % block.head "dateline?, copyrite?, abstract?">
|
||||
<!ENTITY % block.content "p | hl2 | table | media | ol | ul | dl | bq | fn | note | pre | hr">
|
||||
<!ENTITY % block.end "datasource?">
|
||||
<!ENTITY % global-attributes "
|
||||
id ID #IMPLIED
|
||||
<!ENTITY % global-attributes "
|
||||
id ID #IMPLIED
|
||||
">
|
||||
<!ENTITY % common-attributes "
|
||||
%global-attributes;
|
||||
class NMTOKENS #IMPLIED
|
||||
style CDATA #IMPLIED
|
||||
lang NMTOKEN #IMPLIED
|
||||
dir (ltr | rtl) #IMPLIED
|
||||
<!ENTITY % common-attributes "
|
||||
%global-attributes;
|
||||
class NMTOKENS #IMPLIED
|
||||
style CDATA #IMPLIED
|
||||
lang NMTOKEN #IMPLIED
|
||||
dir (ltr | rtl) #IMPLIED
|
||||
">
|
||||
<!ENTITY % cell.align "
|
||||
align (left | center | right | justify | char) #IMPLIED
|
||||
char CDATA #IMPLIED
|
||||
charoff CDATA #IMPLIED
|
||||
<!ENTITY % cell.align "
|
||||
align (left | center | right | justify | char) #IMPLIED
|
||||
char CDATA #IMPLIED
|
||||
charoff CDATA #IMPLIED
|
||||
">
|
||||
<!ENTITY % cell.valign "
|
||||
valign (top | middle | bottom | baseline) #IMPLIED
|
||||
<!ENTITY % cell.valign "
|
||||
valign (top | middle | bottom | baseline) #IMPLIED
|
||||
">
|
||||
<!ENTITY % url.link "
|
||||
md CDATA #IMPLIED
|
||||
<!ENTITY % url.link "
|
||||
md CDATA #IMPLIED
|
||||
">
|
||||
<!ENTITY % boolean "(true | false)">
|
||||
<!ELEMENT nitf (head , body)>
|
||||
|
4
test/issue655.xml
Normal file
4
test/issue655.xml
Normal file
@ -0,0 +1,4 @@
|
||||
<!DOCTYPE test [
|
||||
<!ENTITY newline "&#xA;">
|
||||
]>
|
||||
<test newline="&newline;"/>
|
Loading…
Reference in New Issue
Block a user