1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2025-03-25 10:50:08 +03:00

parser: Revert change to doc->encoding

Fixes #579.
This commit is contained in:
Nick Wellnhofer 2023-08-16 19:43:02 +02:00
parent 61b8e097b9
commit f1c1f5c6b4
4 changed files with 29 additions and 16 deletions

32
SAX2.c
View File

@ -977,10 +977,6 @@ xmlSAX2StartDocument(void *ctx)
if (ctxt->options & XML_PARSE_OLD10)
doc->properties |= XML_DOC_OLD10;
doc->parseFlags = ctxt->options;
if (ctxt->encoding != NULL)
doc->encoding = xmlStrdup(ctxt->encoding);
else
doc->encoding = NULL;
doc->standalone = ctxt->standalone;
} else {
xmlSAX2ErrMemory(ctxt, "xmlSAX2StartDocument");
@ -1009,6 +1005,8 @@ void
xmlSAX2EndDocument(void *ctx)
{
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx;
xmlDocPtr doc;
#ifdef DEBUG_SAX
xmlGenericError(xmlGenericErrorContext,
"SAX.xmlSAX2EndDocument()\n");
@ -1020,13 +1018,25 @@ xmlSAX2EndDocument(void *ctx)
ctxt->valid &= xmlValidateDocumentFinal(&ctxt->vctxt, ctxt->myDoc);
#endif /* LIBXML_VALID_ENABLED */
/*
* Grab the encoding if it was added on-the-fly
*/
if ((ctxt->encoding != NULL) && (ctxt->myDoc != NULL) &&
(ctxt->myDoc->encoding == NULL)) {
ctxt->myDoc->encoding = ctxt->encoding;
ctxt->encoding = NULL;
doc = ctxt->myDoc;
if ((doc != NULL) && (doc->encoding == NULL)) {
const xmlChar *encoding = NULL;
if ((ctxt->input->flags & XML_INPUT_USES_ENC_DECL) ||
(ctxt->input->flags & XML_INPUT_AUTO_ENCODING)) {
/* Preserve encoding exactly */
encoding = ctxt->encoding;
} else if ((ctxt->input->buf) && (ctxt->input->buf->encoder)) {
encoding = BAD_CAST ctxt->input->buf->encoder->name;
} else if (ctxt->input->flags & XML_INPUT_HAS_ENCODING) {
encoding = BAD_CAST "UTF-8";
}
if (encoding != NULL) {
doc->encoding = xmlStrdup(encoding);
if (doc->encoding == NULL)
xmlSAX2ErrMemory(ctxt, "xmlSAX2EndDocument");
}
}
}

View File

@ -573,7 +573,7 @@ struct _xmlDoc {
struct _xmlDtd *extSubset; /* the document external subset */
struct _xmlNs *oldNs; /* Global namespace, the old way */
const xmlChar *version; /* the XML version string */
const xmlChar *encoding; /* encoding from XML declaration, if any */
const xmlChar *encoding; /* actual encoding, if any */
void *ids; /* Hash table for ID attributes if any */
void *refs; /* Hash table for IDREFs attributes if any */
const xmlChar *URL; /* The URI for that document */

View File

@ -23,7 +23,8 @@
#define XML_INPUT_AUTO_UTF16LE (2u << 1)
#define XML_INPUT_AUTO_UTF16BE (3u << 1)
#define XML_INPUT_AUTO_OTHER (4u << 1)
#define XML_INPUT_8_BIT (1u << 4)
#define XML_INPUT_USES_ENC_DECL (1u << 4)
#define XML_INPUT_8_BIT (1u << 5)
XML_HIDDEN void
xmlErrMemory(xmlParserCtxtPtr ctxt, const char *extra);

View File

@ -1590,13 +1590,15 @@ xmlSetDeclaredEncoding(xmlParserCtxtPtr ctxt, xmlChar *encoding) {
xmlCharEncodingHandlerPtr handler;
handler = xmlFindCharEncodingHandler((const char *) encoding);
if (handler != NULL) {
xmlSwitchToEncoding(ctxt, handler);
} else {
if (handler == NULL) {
__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
"Unsupported encoding: %s\n",
encoding, NULL);
return;
}
xmlSwitchToEncoding(ctxt, handler);
ctxt->input->flags |= XML_INPUT_USES_ENC_DECL;
} else if (ctxt->input->flags & XML_INPUT_AUTO_ENCODING) {
static const char *allowedUTF8[] = {
"UTF-8", "UTF8", NULL