1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2025-03-25 10:50:08 +03:00

parser: Recover more input from encoding errors

Don't halt the parser in xmlParserGrow to allow more input to be
recovered in case of encoding errors.

Fixes #543.
This commit is contained in:
Nick Wellnhofer 2023-06-07 14:05:34 +02:00
parent db21cd5db9
commit 20f5c73457
2 changed files with 16 additions and 12 deletions

View File

@ -411,9 +411,11 @@ htmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
return(ctxt->token);
}
if ((ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) &&
(xmlParserGrow(ctxt) < 0))
return(0);
if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) {
xmlParserGrow(ctxt);
if (ctxt->instate == XML_PARSER_EOF)
return(0);
}
if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
xmlChar * guess;

View File

@ -549,6 +549,8 @@ xmlParserGrow(xmlParserCtxtPtr ctxt) {
/* Don't grow memory buffers. */
if ((buf->encoder == NULL) && (buf->readcallback == NULL))
return(0);
if (buf->error != 0)
return(-1);
if (((curEnd > XML_MAX_LOOKUP_LIMIT) ||
(curBase > XML_MAX_LOOKUP_LIMIT)) &&
@ -564,10 +566,8 @@ xmlParserGrow(xmlParserCtxtPtr ctxt) {
ret = xmlParserInputBufferGrow(buf, INPUT_CHUNK);
xmlBufSetInputBaseCur(buf->buffer, in, 0, curBase);
if (ret < 0) {
if (ret < 0)
xmlFatalErr(ctxt, buf->error, NULL);
xmlHaltParser(ctxt);
}
return(ret);
}
@ -755,9 +755,9 @@ xmlNextChar(xmlParserCtxtPtr ctxt)
}
if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) {
if (xmlParserGrow(ctxt) < 0)
return;
if (ctxt->input->cur >= ctxt->input->end)
xmlParserGrow(ctxt);
if ((ctxt->instate == XML_PARSER_EOF) ||
(ctxt->input->cur >= ctxt->input->end))
return;
}
@ -903,9 +903,11 @@ xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
if (ctxt->instate == XML_PARSER_EOF)
return(0);
if ((ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) &&
(xmlParserGrow(ctxt) < 0))
return(0);
if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) {
xmlParserGrow(ctxt);
if (ctxt->instate == XML_PARSER_EOF)
return(0);
}
if ((*ctxt->input->cur >= 0x20) && (*ctxt->input->cur <= 0x7F)) {
*len = 1;