1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2024-10-26 12:25:09 +03:00

parser: Rework parsing of attribute and entity values

Don't use a separate function to handle "complex" attributes. Validate
UTF-8 byte sequences without decoding. This should improve performance
considerably when parsing multi-byte UTF-8 sequences.

Use a string buffer to avoid unnecessary allocations and copying when
expanding entities.

Normalize attribute values in a single pass while expanding entities.

Be more lenient in recovery mode.

If no entity substitution was requested, validate entities without
expanding. Fixes #596.

Also fixes #655.
This commit is contained in:
Nick Wellnhofer 2023-12-30 02:50:34 +01:00
parent 4dcc2d743e
commit 37c6618be5
16 changed files with 1080 additions and 1033 deletions

30
SAX2.c
View File

@ -970,10 +970,8 @@ xmlSAX2AttributeInternal(void *ctx, const xmlChar *fullname,
(void) nsret;
if (!ctxt->replaceEntities) {
ctxt->depth++;
val = xmlStringDecodeEntities(ctxt, value, XML_SUBSTITUTE_REF,
0,0,0);
ctxt->depth--;
/* TODO: normalize if needed */
val = xmlExpandEntitiesInAttValue(ctxt, value, /* normalize */ 0);
if (val == NULL) {
xmlSAX2ErrMemory(ctxt);
if (name != NULL)
@ -1038,10 +1036,8 @@ xmlSAX2AttributeInternal(void *ctx, const xmlChar *fullname,
(void) nsret;
if (!ctxt->replaceEntities) {
ctxt->depth++;
val = xmlStringDecodeEntities(ctxt, value, XML_SUBSTITUTE_REF,
0,0,0);
ctxt->depth--;
/* TODO: normalize if needed */
val = xmlExpandEntitiesInAttValue(ctxt, value, /* normalize */ 0);
if (val == NULL) {
xmlSAX2ErrMemory(ctxt);
xmlFree(ns);
@ -1179,10 +1175,8 @@ xmlSAX2AttributeInternal(void *ctx, const xmlChar *fullname,
if (!ctxt->replaceEntities) {
xmlChar *val;
ctxt->depth++;
val = xmlStringDecodeEntities(ctxt, value, XML_SUBSTITUTE_REF,
0,0,0);
ctxt->depth--;
/* TODO: normalize if needed */
val = xmlExpandEntitiesInAttValue(ctxt, value, /* normalize */ 0);
if (val == NULL)
ctxt->valid &= xmlValidateOneAttribute(&ctxt->vctxt,
@ -1736,7 +1730,6 @@ static xmlChar *
xmlSAX2DecodeAttrEntities(xmlParserCtxtPtr ctxt, const xmlChar *str,
const xmlChar *end) {
const xmlChar *in;
xmlChar *ret;
in = str;
while (in < end)
@ -1744,11 +1737,12 @@ xmlSAX2DecodeAttrEntities(xmlParserCtxtPtr ctxt, const xmlChar *str,
goto decode;
return(NULL);
decode:
ctxt->depth++;
ret = xmlStringLenDecodeEntities(ctxt, str, end - str,
XML_SUBSTITUTE_REF, 0,0,0);
ctxt->depth--;
return(ret);
/*
* If the value contains '&', we can be sure it was allocated and is
* zero-terminated.
*/
/* TODO: normalize if needed */
return(xmlExpandEntitiesInAttValue(ctxt, str, /* normalize */ 0));
}
#endif /* LIBXML_VALID_ENABLED */

View File

@ -9,13 +9,17 @@
*
* XML_ENT_PARSED: The entity was parsed and `children` points to the
* content.
* XML_ENT_CHECKED: The entity was checked for loops.
*
* XML_ENT_CHECKED: The entity was checked for loops and amplification.
* expandedSize was set.
*
* XML_ENT_VALIDATED: The entity contains a valid attribute value.
* Only used when entities aren't substituted.
*/
#define XML_ENT_PARSED (1<<0)
#define XML_ENT_CHECKED (1<<1)
#define XML_ENT_EXPANDING (1<<2)
#define XML_ENT_CHECKED_LT (1<<3)
#define XML_ENT_CONTAINS_LT (1<<4)
#define XML_ENT_PARSED (1u << 0)
#define XML_ENT_CHECKED (1u << 1)
#define XML_ENT_VALIDATED (1u << 2)
#define XML_ENT_EXPANDING (1u << 3)
XML_HIDDEN xmlChar *
xmlEncodeAttributeEntities(xmlDocPtr doc, const xmlChar *input);

View File

@ -87,4 +87,8 @@ XML_HIDDEN xmlParserInputPtr
xmlNewInputPush(xmlParserCtxtPtr ctxt, const char *url,
const char *chunk, int size, const char *encoding);
XML_HIDDEN xmlChar *
xmlExpandEntitiesInAttValue(xmlParserCtxtPtr ctxt, const xmlChar *str,
int normalize);
#endif /* XML_PARSER_H_PRIVATE__ */

1924
parser.c

File diff suppressed because it is too large Load Diff

View File

@ -1,9 +1,3 @@
./test/errors/attr4.xml:1: parser error : invalid character in attribute value
<ROOT attr="XY"/>
^
./test/errors/attr4.xml:1: parser error : attributes construct error
<ROOT attr="XY"/>
^
./test/errors/attr4.xml:1: parser error : Couldn't find end of Start Tag ROOT line 1
<ROOT attr="XY"/>
^

View File

@ -1,9 +1,3 @@
./test/errors/attr4.xml:1: parser error : invalid character in attribute value
<ROOT attr="XY"/>
^
./test/errors/attr4.xml:1: parser error : attributes construct error
<ROOT attr="XY"/>
^
./test/errors/attr4.xml:1: parser error : Couldn't find end of Start Tag ROOT line 1
<ROOT attr="XY"/>
^

View File

@ -1,10 +1,4 @@
./test/errors/attr4.xml:1: parser error : invalid character in attribute value
<ROOT attr="XY"/>
^
./test/errors/attr4.xml:1: parser error : attributes construct error
<ROOT attr="XY"/>
^
./test/errors/attr4.xml:1: parser error : Couldn't find end of Start Tag ROOT
<ROOT attr="XY"/>
^
./test/errors/attr4.xml : failed to parse

5
result/issue655.xml Normal file
View File

@ -0,0 +1,5 @@
<?xml version="1.0"?>
<!DOCTYPE test [
<!ENTITY newline "&#x26;#xA;">
]>
<test newline="&newline;"/>

2
result/issue655.xml.rde Normal file
View File

@ -0,0 +1,2 @@
0 10 test 0 0
0 1 test 1 0

2
result/issue655.xml.rdr Normal file
View File

@ -0,0 +1,2 @@
0 10 test 0 0
0 1 test 1 0

10
result/issue655.xml.sax Normal file
View File

@ -0,0 +1,10 @@
SAX.setDocumentLocator()
SAX.startDocument()
SAX.internalSubset(test, , )
SAX.entityDecl(newline, 1, (null), (null), &#xA;)
SAX.getEntity(newline)
SAX.externalSubset(test, , )
SAX.getEntity(newline)
SAX.startElement(test, newline='&newline;')
SAX.endElement(test)
SAX.endDocument()

10
result/issue655.xml.sax2 Normal file
View File

@ -0,0 +1,10 @@
SAX.setDocumentLocator()
SAX.startDocument()
SAX.internalSubset(test, , )
SAX.entityDecl(newline, 1, (null), (null), &#xA;)
SAX.getEntity(newline)
SAX.externalSubset(test, , )
SAX.getEntity(newline)
SAX.startElementNs(test, NULL, NULL, 0, 1, 0, newline='&new...', 9)
SAX.endElementNs(test, NULL, NULL)
SAX.endDocument()

View File

@ -0,0 +1,5 @@
<?xml version="1.0"?>
<!DOCTYPE test [
<!ENTITY newline "&#x26;#xA;">
]>
<test newline="&#10;"/>

View File

@ -0,0 +1,11 @@
SAX.setDocumentLocator()
SAX.startDocument()
SAX.internalSubset(test, , )
SAX.entityDecl(newline, 1, (null), (null), &#xA;)
SAX.getEntity(newline)
SAX.externalSubset(test, , )
SAX.getEntity(newline)
SAX.startElementNs(test, NULL, NULL, 0, 1, 0, newline='
...', 1)
SAX.endElementNs(test, NULL, NULL)
SAX.endDocument()

View File

@ -17,50 +17,50 @@
For the list of proposed modifications, see:
http://www.nitf.org/proposed-changes.html
--><!ENTITY % enriched-text "
#PCDATA
| chron
| copyrite
| event
| function
| location
| money
| num
| object.title
| org
| person
| postaddr
| virtloc
| a
| br
| em
| lang
| pronounce
| q
--><!ENTITY % enriched-text "
#PCDATA
| chron
| copyrite
| event
| function
| location
| money
| num
| object.title
| org
| person
| postaddr
| virtloc
| a
| br
| em
| lang
| pronounce
| q
">
<!ENTITY % block.head "dateline?, copyrite?, abstract?">
<!ENTITY % block.content "p | hl2 | table | media | ol | ul | dl | bq | fn | note | pre | hr">
<!ENTITY % block.end "datasource?">
<!ENTITY % global-attributes "
id ID #IMPLIED
<!ENTITY % global-attributes "
id ID #IMPLIED
">
<!ENTITY % common-attributes "
%global-attributes;
class NMTOKENS #IMPLIED
style CDATA #IMPLIED
lang NMTOKEN #IMPLIED
dir (ltr | rtl) #IMPLIED
<!ENTITY % common-attributes "
%global-attributes;
class NMTOKENS #IMPLIED
style CDATA #IMPLIED
lang NMTOKEN #IMPLIED
dir (ltr | rtl) #IMPLIED
">
<!ENTITY % cell.align "
align (left | center | right | justify | char) #IMPLIED
char CDATA #IMPLIED
charoff CDATA #IMPLIED
<!ENTITY % cell.align "
align (left | center | right | justify | char) #IMPLIED
char CDATA #IMPLIED
charoff CDATA #IMPLIED
">
<!ENTITY % cell.valign "
valign (top | middle | bottom | baseline) #IMPLIED
<!ENTITY % cell.valign "
valign (top | middle | bottom | baseline) #IMPLIED
">
<!ENTITY % url.link "
md CDATA #IMPLIED
<!ENTITY % url.link "
md CDATA #IMPLIED
">
<!ENTITY % boolean "(true | false)">
<!ELEMENT nitf (head , body)>

4
test/issue655.xml Normal file
View File

@ -0,0 +1,4 @@
<!DOCTYPE test [
<!ENTITY newline "&#x26;#xA;">
]>
<test newline="&newline;"/>