mirror of
https://gitlab.gnome.org/GNOME/libxml2.git
synced 2025-02-05 05:47:00 +03:00
parser: Align push parsing of CDATA sections with pull parser
Remove special handling of CDATA sections in push parser. This makes sure that only a single callback is generated for large sections. Fixes #22 and needed for #412.
This commit is contained in:
parent
4d10e53af1
commit
dde62ae5d5
164
parser.c
164
parser.c
@ -11013,74 +11013,6 @@ xmlParseLookupInternalSubset(xmlParserCtxtPtr ctxt) {
|
||||
return(0);
|
||||
}
|
||||
|
||||
/**
|
||||
* xmlCheckCdataPush:
|
||||
* @cur: pointer to the block of characters
|
||||
* @len: length of the block in bytes
|
||||
* @complete: 1 if complete CDATA block is passed in, 0 if partial block
|
||||
*
|
||||
* Check that the block of characters is okay as SCdata content [20]
|
||||
*
|
||||
* Returns the number of bytes to pass if okay, a negative index where an
|
||||
* UTF-8 error occurred otherwise
|
||||
*/
|
||||
static int
|
||||
xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
|
||||
int ix;
|
||||
unsigned char c;
|
||||
int codepoint;
|
||||
|
||||
if ((utf == NULL) || (len <= 0))
|
||||
return(0);
|
||||
|
||||
for (ix = 0; ix < len;) { /* string is 0-terminated */
|
||||
c = utf[ix];
|
||||
if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
|
||||
if (c >= 0x20)
|
||||
ix++;
|
||||
else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
|
||||
ix++;
|
||||
else
|
||||
return(-ix);
|
||||
} else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
|
||||
if (ix + 2 > len) return(complete ? -ix : ix);
|
||||
if ((utf[ix+1] & 0xc0 ) != 0x80)
|
||||
return(-ix);
|
||||
codepoint = (utf[ix] & 0x1f) << 6;
|
||||
codepoint |= utf[ix+1] & 0x3f;
|
||||
if (!xmlIsCharQ(codepoint))
|
||||
return(-ix);
|
||||
ix += 2;
|
||||
} else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
|
||||
if (ix + 3 > len) return(complete ? -ix : ix);
|
||||
if (((utf[ix+1] & 0xc0) != 0x80) ||
|
||||
((utf[ix+2] & 0xc0) != 0x80))
|
||||
return(-ix);
|
||||
codepoint = (utf[ix] & 0xf) << 12;
|
||||
codepoint |= (utf[ix+1] & 0x3f) << 6;
|
||||
codepoint |= utf[ix+2] & 0x3f;
|
||||
if (!xmlIsCharQ(codepoint))
|
||||
return(-ix);
|
||||
ix += 3;
|
||||
} else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
|
||||
if (ix + 4 > len) return(complete ? -ix : ix);
|
||||
if (((utf[ix+1] & 0xc0) != 0x80) ||
|
||||
((utf[ix+2] & 0xc0) != 0x80) ||
|
||||
((utf[ix+3] & 0xc0) != 0x80))
|
||||
return(-ix);
|
||||
codepoint = (utf[ix] & 0x7) << 18;
|
||||
codepoint |= (utf[ix+1] & 0x3f) << 12;
|
||||
codepoint |= (utf[ix+2] & 0x3f) << 6;
|
||||
codepoint |= utf[ix+3] & 0x3f;
|
||||
if (!xmlIsCharQ(codepoint))
|
||||
return(-ix);
|
||||
ix += 4;
|
||||
} else /* unknown encoding */
|
||||
return(-ix);
|
||||
}
|
||||
return(ix);
|
||||
}
|
||||
|
||||
/**
|
||||
* xmlParseTryOrFinish:
|
||||
* @ctxt: an XML parser context
|
||||
@ -11308,8 +11240,12 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
|
||||
(ctxt->input->cur[6] == 'T') &&
|
||||
(ctxt->input->cur[7] == 'A') &&
|
||||
(ctxt->input->cur[8] == '[')) {
|
||||
SKIP(9);
|
||||
if ((!terminate) &&
|
||||
(!xmlParseLookupString(ctxt, 9, "]]>", 3)))
|
||||
goto done;
|
||||
ctxt->instate = XML_PARSER_CDATA_SECTION;
|
||||
xmlParseCDSect(ctxt);
|
||||
ctxt->instate = XML_PARSER_CONTENT;
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -11361,89 +11297,6 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
|
||||
ctxt->instate = XML_PARSER_CONTENT;
|
||||
}
|
||||
break;
|
||||
case XML_PARSER_CDATA_SECTION: {
|
||||
/*
|
||||
* The Push mode need to have the SAX callback for
|
||||
* cdataBlock merge back contiguous callbacks.
|
||||
*/
|
||||
const xmlChar *term;
|
||||
|
||||
if (terminate) {
|
||||
/*
|
||||
* Don't call xmlParseLookupString. If 'terminate'
|
||||
* is set, checkIndex is invalid.
|
||||
*/
|
||||
term = BAD_CAST strstr((const char *) ctxt->input->cur,
|
||||
"]]>");
|
||||
} else {
|
||||
term = xmlParseLookupString(ctxt, 0, "]]>", 3);
|
||||
}
|
||||
|
||||
if (term == NULL) {
|
||||
int tmp, size;
|
||||
|
||||
if (terminate) {
|
||||
/* Unfinished CDATA section */
|
||||
size = ctxt->input->end - ctxt->input->cur;
|
||||
} else {
|
||||
if (avail < XML_PARSER_BIG_BUFFER_SIZE + 2)
|
||||
goto done;
|
||||
ctxt->checkIndex = 0;
|
||||
/* XXX: Why don't we pass the full buffer? */
|
||||
size = XML_PARSER_BIG_BUFFER_SIZE;
|
||||
}
|
||||
tmp = xmlCheckCdataPush(ctxt->input->cur, size, 0);
|
||||
if (tmp <= 0) {
|
||||
tmp = -tmp;
|
||||
ctxt->input->cur += tmp;
|
||||
goto encoding_error;
|
||||
}
|
||||
if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
|
||||
if (ctxt->sax->cdataBlock != NULL)
|
||||
ctxt->sax->cdataBlock(ctxt->userData,
|
||||
ctxt->input->cur, tmp);
|
||||
else if (ctxt->sax->characters != NULL)
|
||||
ctxt->sax->characters(ctxt->userData,
|
||||
ctxt->input->cur, tmp);
|
||||
}
|
||||
SKIPL(tmp);
|
||||
} else {
|
||||
int base = term - CUR_PTR;
|
||||
int tmp;
|
||||
|
||||
tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
|
||||
if ((tmp < 0) || (tmp != base)) {
|
||||
tmp = -tmp;
|
||||
ctxt->input->cur += tmp;
|
||||
goto encoding_error;
|
||||
}
|
||||
if ((ctxt->sax != NULL) && (base == 0) &&
|
||||
(ctxt->sax->cdataBlock != NULL) &&
|
||||
(!ctxt->disableSAX)) {
|
||||
/*
|
||||
* Special case to provide identical behaviour
|
||||
* between pull and push parsers on enpty CDATA
|
||||
* sections
|
||||
*/
|
||||
if ((ctxt->input->cur - ctxt->input->base >= 9) &&
|
||||
(!strncmp((const char *)&ctxt->input->cur[-9],
|
||||
"<![CDATA[", 9)))
|
||||
ctxt->sax->cdataBlock(ctxt->userData,
|
||||
BAD_CAST "", 0);
|
||||
} else if ((ctxt->sax != NULL) && (base > 0) &&
|
||||
(!ctxt->disableSAX)) {
|
||||
if (ctxt->sax->cdataBlock != NULL)
|
||||
ctxt->sax->cdataBlock(ctxt->userData,
|
||||
ctxt->input->cur, base);
|
||||
else if (ctxt->sax->characters != NULL)
|
||||
ctxt->sax->characters(ctxt->userData,
|
||||
ctxt->input->cur, base);
|
||||
}
|
||||
SKIPL(base + 3);
|
||||
ctxt->instate = XML_PARSER_CONTENT;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case XML_PARSER_MISC:
|
||||
case XML_PARSER_PROLOG:
|
||||
case XML_PARSER_EPILOG:
|
||||
@ -11546,13 +11399,6 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
|
||||
}
|
||||
done:
|
||||
return(ret);
|
||||
encoding_error:
|
||||
/* Only report the first error */
|
||||
if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) {
|
||||
xmlCtxtErrIO(ctxt, XML_ERR_INVALID_ENCODING, NULL);
|
||||
ctxt->input->flags |= XML_INPUT_ENCODING_ERROR;
|
||||
}
|
||||
return(0);
|
||||
}
|
||||
|
||||
/**
|
||||
|
Loading…
x
Reference in New Issue
Block a user