mirror of
https://gitlab.gnome.org/GNOME/libxml2.git
synced 2025-03-09 04:58:16 +03:00
io: Rearrange code in xmlSwitchInputEncodingInt
No functional change.
This commit is contained in:
parent
1ef4938fd0
commit
6b57061909
@ -1021,118 +1021,14 @@ xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
|
||||
xmlCharEncodingHandlerPtr handler, int len)
|
||||
{
|
||||
int nbchars;
|
||||
xmlParserInputBufferPtr in;
|
||||
|
||||
if (handler == NULL)
|
||||
return (-1);
|
||||
if (input == NULL)
|
||||
return (-1);
|
||||
if (input->buf != NULL) {
|
||||
ctxt->charset = XML_CHAR_ENCODING_UTF8;
|
||||
|
||||
if (input->buf->encoder != NULL) {
|
||||
/*
|
||||
* Check in case the auto encoding detection triggered
|
||||
* in already.
|
||||
*/
|
||||
if (input->buf->encoder == handler)
|
||||
return (0);
|
||||
|
||||
/*
|
||||
* "UTF-16" can be used for both LE and BE
|
||||
if ((!xmlStrncmp(BAD_CAST input->buf->encoder->name,
|
||||
BAD_CAST "UTF-16", 6)) &&
|
||||
(!xmlStrncmp(BAD_CAST handler->name,
|
||||
BAD_CAST "UTF-16", 6))) {
|
||||
return(0);
|
||||
}
|
||||
*/
|
||||
|
||||
/*
|
||||
* Note: this is a bit dangerous, but that's what it
|
||||
* takes to use nearly compatible signature for different
|
||||
* encodings.
|
||||
*
|
||||
* FIXME: Encoders might buffer partial byte sequences, so
|
||||
* this probably can't work. We should return an error and
|
||||
* make sure that callers never try to switch the encoding
|
||||
* twice.
|
||||
*/
|
||||
xmlCharEncCloseFunc(input->buf->encoder);
|
||||
input->buf->encoder = handler;
|
||||
return (0);
|
||||
}
|
||||
input->buf->encoder = handler;
|
||||
|
||||
/*
|
||||
* Is there already some content down the pipe to convert ?
|
||||
*/
|
||||
if (xmlBufIsEmpty(input->buf->buffer) == 0) {
|
||||
int processed;
|
||||
unsigned int use;
|
||||
|
||||
/*
|
||||
* Specific handling of the Byte Order Mark for
|
||||
* UTF-16
|
||||
*/
|
||||
if ((handler->name != NULL) &&
|
||||
(!strcmp(handler->name, "UTF-16LE") ||
|
||||
!strcmp(handler->name, "UTF-16")) &&
|
||||
(input->cur[0] == 0xFF) && (input->cur[1] == 0xFE)) {
|
||||
input->cur += 2;
|
||||
}
|
||||
if ((handler->name != NULL) &&
|
||||
(!strcmp(handler->name, "UTF-16BE")) &&
|
||||
(input->cur[0] == 0xFE) && (input->cur[1] == 0xFF)) {
|
||||
input->cur += 2;
|
||||
}
|
||||
/*
|
||||
* Errata on XML-1.0 June 20 2001
|
||||
* Specific handling of the Byte Order Mark for
|
||||
* UTF-8
|
||||
*/
|
||||
if ((handler->name != NULL) &&
|
||||
(!strcmp(handler->name, "UTF-8")) &&
|
||||
(input->cur[0] == 0xEF) &&
|
||||
(input->cur[1] == 0xBB) && (input->cur[2] == 0xBF)) {
|
||||
input->cur += 3;
|
||||
}
|
||||
|
||||
/*
|
||||
* Shrink the current input buffer.
|
||||
* Move it as the raw buffer and create a new input buffer
|
||||
*/
|
||||
processed = input->cur - input->base;
|
||||
xmlBufShrink(input->buf->buffer, processed);
|
||||
input->buf->raw = input->buf->buffer;
|
||||
input->buf->buffer = xmlBufCreate();
|
||||
input->buf->rawconsumed = processed;
|
||||
use = xmlBufUse(input->buf->raw);
|
||||
|
||||
if (ctxt->html) {
|
||||
/*
|
||||
* convert as much as possible of the buffer
|
||||
*/
|
||||
nbchars = xmlCharEncInput(input->buf, 1);
|
||||
} else {
|
||||
/*
|
||||
* convert just enough to get
|
||||
* '<?xml version="1.0" encoding="xxx"?>'
|
||||
* parsed with the autodetected encoding
|
||||
* into the parser reading buffer.
|
||||
*/
|
||||
nbchars = xmlCharEncFirstLineInput(input->buf, len);
|
||||
}
|
||||
xmlBufResetInput(input->buf->buffer, input);
|
||||
if (nbchars < 0) {
|
||||
xmlErrInternal(ctxt,
|
||||
"switching encoding: encoder error\n",
|
||||
NULL);
|
||||
return (-1);
|
||||
}
|
||||
input->buf->rawconsumed += use - xmlBufUse(input->buf->raw);
|
||||
}
|
||||
return (0);
|
||||
} else {
|
||||
in = input->buf;
|
||||
if (in == NULL) {
|
||||
xmlErrInternal(ctxt,
|
||||
"static memory buffer doesn't support encoding\n", NULL);
|
||||
/*
|
||||
@ -1143,6 +1039,102 @@ xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
|
||||
xmlCharEncCloseFunc(handler);
|
||||
return (-1);
|
||||
}
|
||||
|
||||
ctxt->charset = XML_CHAR_ENCODING_UTF8;
|
||||
|
||||
if (in->encoder != NULL) {
|
||||
/*
|
||||
* Check in case the auto encoding detection triggered
|
||||
* in already.
|
||||
*/
|
||||
if (in->encoder == handler)
|
||||
return (0);
|
||||
|
||||
/*
|
||||
* Note: this is a bit dangerous, but that's what it
|
||||
* takes to use nearly compatible signature for different
|
||||
* encodings.
|
||||
*
|
||||
* FIXME: Encoders might buffer partial byte sequences, so
|
||||
* this probably can't work. We should return an error and
|
||||
* make sure that callers never try to switch the encoding
|
||||
* twice.
|
||||
*/
|
||||
xmlCharEncCloseFunc(in->encoder);
|
||||
in->encoder = handler;
|
||||
return (0);
|
||||
}
|
||||
in->encoder = handler;
|
||||
|
||||
/*
|
||||
* Is there already some content down the pipe to convert ?
|
||||
*/
|
||||
if (xmlBufIsEmpty(in->buffer) == 0) {
|
||||
int processed;
|
||||
unsigned int use;
|
||||
|
||||
/*
|
||||
* Specific handling of the Byte Order Mark for
|
||||
* UTF-16
|
||||
*/
|
||||
if ((handler->name != NULL) &&
|
||||
(!strcmp(handler->name, "UTF-16LE") ||
|
||||
!strcmp(handler->name, "UTF-16")) &&
|
||||
(input->cur[0] == 0xFF) && (input->cur[1] == 0xFE)) {
|
||||
input->cur += 2;
|
||||
}
|
||||
if ((handler->name != NULL) &&
|
||||
(!strcmp(handler->name, "UTF-16BE")) &&
|
||||
(input->cur[0] == 0xFE) && (input->cur[1] == 0xFF)) {
|
||||
input->cur += 2;
|
||||
}
|
||||
/*
|
||||
* Errata on XML-1.0 June 20 2001
|
||||
* Specific handling of the Byte Order Mark for
|
||||
* UTF-8
|
||||
*/
|
||||
if ((handler->name != NULL) &&
|
||||
(!strcmp(handler->name, "UTF-8")) &&
|
||||
(input->cur[0] == 0xEF) &&
|
||||
(input->cur[1] == 0xBB) && (input->cur[2] == 0xBF)) {
|
||||
input->cur += 3;
|
||||
}
|
||||
|
||||
/*
|
||||
* Shrink the current input buffer.
|
||||
* Move it as the raw buffer and create a new input buffer
|
||||
*/
|
||||
processed = input->cur - input->base;
|
||||
xmlBufShrink(in->buffer, processed);
|
||||
in->raw = in->buffer;
|
||||
in->buffer = xmlBufCreate();
|
||||
in->rawconsumed = processed;
|
||||
use = xmlBufUse(in->raw);
|
||||
|
||||
if (ctxt->html) {
|
||||
/*
|
||||
* convert as much as possible of the buffer
|
||||
*/
|
||||
nbchars = xmlCharEncInput(in, 1);
|
||||
} else {
|
||||
/*
|
||||
* convert just enough to get
|
||||
* '<?xml version="1.0" encoding="xxx"?>'
|
||||
* parsed with the autodetected encoding
|
||||
* into the parser reading buffer.
|
||||
*/
|
||||
nbchars = xmlCharEncFirstLineInput(in, len);
|
||||
}
|
||||
xmlBufResetInput(in->buffer, input);
|
||||
if (nbchars < 0) {
|
||||
xmlErrInternal(ctxt,
|
||||
"switching encoding: encoder error\n",
|
||||
NULL);
|
||||
return (-1);
|
||||
}
|
||||
in->rawconsumed += use - xmlBufUse(in->raw);
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
|
||||
/**
|
||||
|
Loading…
x
Reference in New Issue
Block a user