1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2024-12-25 23:21:26 +03:00

encoding: Fix encoding lookup with xmlOpenCharEncodingHandler

Make xmlOpenCharEncodingHandler call xmlParseCharEncoding first so we
prefer our own handlers for names like "UTF8". Only UTF-16 needs an
exception.

Make callers check the return value. For UTF-8, a NULL encoding doesn't
mean an error.

Remove unnecessary UTF-8 check from htmlFindOutputEncoder. Don't try to
look up ASCII handler since the HTML handler is always available.

Fix return code of xmlParseCharEncoding.

Should fix #744.
This commit is contained in:
Nick Wellnhofer 2024-06-22 19:15:17 +02:00
parent b8903b9e0d
commit 5b893fa999
3 changed files with 34 additions and 32 deletions

View File

@ -385,22 +385,17 @@ htmlFindOutputEncoder(const char *encoding) {
xmlCharEncodingHandler *handler = NULL; xmlCharEncodingHandler *handler = NULL;
if (encoding != NULL) { if (encoding != NULL) {
xmlCharEncoding enc; int res;
enc = xmlParseCharEncoding(encoding); res = xmlOpenCharEncodingHandler(encoding, /* output */ 1,
if (enc != XML_CHAR_ENCODING_UTF8) { &handler);
xmlOpenCharEncodingHandler(encoding, /* output */ 1, &handler); if (res != XML_ERR_OK)
if (handler == NULL) htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
}
} else { } else {
/* /*
* Fallback to HTML or ASCII when the encoding is unspecified * Fallback to HTML when the encoding is unspecified
*/ */
if (handler == NULL) xmlOpenCharEncodingHandler("HTML", /* output */ 1, &handler);
xmlOpenCharEncodingHandler("HTML", /* output */ 1, &handler);
if (handler == NULL)
xmlOpenCharEncodingHandler("ascii", /* output */ 1, &handler);
} }
return(handler); return(handler);

View File

@ -1161,7 +1161,7 @@ xmlParseCharEncoding(const char* name)
if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS); if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP); if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
return(XML_CHAR_ENCODING_ERROR); return(XML_CHAR_ENCODING_NONE);
} }
/** /**
@ -1931,9 +1931,7 @@ int
xmlOpenCharEncodingHandler(const char *name, int output, xmlOpenCharEncodingHandler(const char *name, int output,
xmlCharEncodingHandler **out) { xmlCharEncodingHandler **out) {
const char *nalias; const char *nalias;
const char *norig;
xmlCharEncoding enc; xmlCharEncoding enc;
int ret;
if (out == NULL) if (out == NULL)
return(XML_ERR_ARGUMENT); return(XML_ERR_ARGUMENT);
@ -1945,22 +1943,27 @@ xmlOpenCharEncodingHandler(const char *name, int output,
/* /*
* Do the alias resolution * Do the alias resolution
*/ */
norig = name;
nalias = xmlGetEncodingAlias(name); nalias = xmlGetEncodingAlias(name);
if (nalias != NULL) if (nalias != NULL)
name = nalias; name = nalias;
ret = xmlFindHandler(name, output, out);
if (*out != NULL)
return(0);
if (ret != XML_ERR_UNSUPPORTED_ENCODING)
return(ret);
/* /*
* Fallback using the canonical names * UTF-16 needs the built-in handler which is only available via
* xmlFindHandler.
*/ */
enc = xmlParseCharEncoding(norig); if (xmlStrcasecmp(BAD_CAST name, BAD_CAST "UTF16") == 0) {
return(xmlLookupCharEncodingHandler(enc, out)); name = "UTF-16";
} else if (xmlStrcasecmp(BAD_CAST name, BAD_CAST "UTF-16") != 0) {
enc = xmlParseCharEncoding(name);
if (enc != XML_CHAR_ENCODING_NONE) {
int res = xmlLookupCharEncodingHandler(enc, out);
if (res != XML_ERR_UNSUPPORTED_ENCODING)
return(res);
}
}
return(xmlFindHandler(name, output, out));
} }
/** /**

View File

@ -343,7 +343,7 @@ xmlNewSaveCtxt(const char *encoding, int options)
res = xmlOpenCharEncodingHandler(encoding, /* output */ 1, res = xmlOpenCharEncodingHandler(encoding, /* output */ 1,
&ret->handler); &ret->handler);
if (ret->handler == NULL) { if (res != XML_ERR_OK) {
xmlSaveErr(NULL, res, NULL, encoding); xmlSaveErr(NULL, res, NULL, encoding);
xmlFreeSaveCtxt(ret); xmlFreeSaveCtxt(ret);
return(NULL); return(NULL);
@ -801,7 +801,7 @@ static int xmlSaveSwitchEncoding(xmlSaveCtxtPtr ctxt, const char *encoding) {
int res; int res;
res = xmlOpenCharEncodingHandler(encoding, /* output */ 1, &handler); res = xmlOpenCharEncodingHandler(encoding, /* output */ 1, &handler);
if (handler == NULL) { if (res != XML_ERR_OK) {
xmlSaveErr(buf, res, NULL, encoding); xmlSaveErr(buf, res, NULL, encoding);
return(-1); return(-1);
} }
@ -2669,7 +2669,7 @@ xmlDocDumpFormatMemoryEnc(xmlDocPtr out_doc, xmlChar **doc_txt_ptr,
res = xmlOpenCharEncodingHandler(txt_encoding, /* output */ 1, res = xmlOpenCharEncodingHandler(txt_encoding, /* output */ 1,
&conv_hdlr); &conv_hdlr);
if (conv_hdlr == NULL) { if (res != XML_ERR_OK) {
xmlSaveErr(NULL, res, NULL, txt_encoding); xmlSaveErr(NULL, res, NULL, txt_encoding);
return; return;
} }
@ -2784,8 +2784,10 @@ xmlDocFormatDump(FILE *f, xmlDocPtr cur, int format) {
encoding = (const char *) cur->encoding; encoding = (const char *) cur->encoding;
if (encoding != NULL) { if (encoding != NULL) {
xmlOpenCharEncodingHandler(encoding, /* output */ 1, &handler); int res;
if (handler == NULL) {
res = xmlOpenCharEncodingHandler(encoding, /* output */ 1, &handler);
if (res != XML_ERR_OK) {
xmlFree((char *) cur->encoding); xmlFree((char *) cur->encoding);
cur->encoding = NULL; cur->encoding = NULL;
encoding = NULL; encoding = NULL;
@ -2921,8 +2923,10 @@ xmlSaveFormatFileEnc( const char * filename, xmlDocPtr cur,
encoding = (const char *) cur->encoding; encoding = (const char *) cur->encoding;
if (encoding != NULL) { if (encoding != NULL) {
xmlOpenCharEncodingHandler(encoding, /* output */ 1, &handler); int res;
if (handler == NULL)
res = xmlOpenCharEncodingHandler(encoding, /* output */ 1, &handler);
if (res != XML_ERR_OK)
return(-1); return(-1);
} }