io: Rearrange code in xmlSwitchInputEncodingInt

No functional change.
2025-03-09 04:58:16 +03:00 · 2022-11-13 19:44:00 +01:00 · 2022-11-13 19:44:00 +01:00 · 6b57061909
commit 6b57061909
parent 1ef4938fd0
1 changed files with 99 additions and 107 deletions
--- a/parserInternals.c
+++ b/parserInternals.c
@ -1021,118 +1021,14 @@ xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
                          xmlCharEncodingHandlerPtr handler, int len)
 {
    int nbchars;
+    xmlParserInputBufferPtr in;

    if (handler == NULL)
        return (-1);
    if (input == NULL)
        return (-1);
-    if (input->buf != NULL) {
-	ctxt->charset = XML_CHAR_ENCODING_UTF8;
-
-        if (input->buf->encoder != NULL) {
-            /*
-             * Check in case the auto encoding detection triggered
-             * in already.
-             */
-            if (input->buf->encoder == handler)
-                return (0);
-
-            /*
-             * "UTF-16" can be used for both LE and BE
-             if ((!xmlStrncmp(BAD_CAST input->buf->encoder->name,
-             BAD_CAST "UTF-16", 6)) &&
-             (!xmlStrncmp(BAD_CAST handler->name,
-             BAD_CAST "UTF-16", 6))) {
-             return(0);
-             }
-             */
-
-            /*
-             * Note: this is a bit dangerous, but that's what it
-             * takes to use nearly compatible signature for different
-             * encodings.
-             *
-             * FIXME: Encoders might buffer partial byte sequences, so
-             * this probably can't work. We should return an error and
-             * make sure that callers never try to switch the encoding
-             * twice.
-             */
-            xmlCharEncCloseFunc(input->buf->encoder);
-            input->buf->encoder = handler;
-            return (0);
-        }
-        input->buf->encoder = handler;
-
-        /*
-         * Is there already some content down the pipe to convert ?
-         */
-        if (xmlBufIsEmpty(input->buf->buffer) == 0) {
-            int processed;
-	    unsigned int use;
-
-            /*
-             * Specific handling of the Byte Order Mark for
-             * UTF-16
-             */
-            if ((handler->name != NULL) &&
-                (!strcmp(handler->name, "UTF-16LE") ||
-                 !strcmp(handler->name, "UTF-16")) &&
-                (input->cur[0] == 0xFF) && (input->cur[1] == 0xFE)) {
-                input->cur += 2;
-            }
-            if ((handler->name != NULL) &&
-                (!strcmp(handler->name, "UTF-16BE")) &&
-                (input->cur[0] == 0xFE) && (input->cur[1] == 0xFF)) {
-                input->cur += 2;
-            }
-            /*
-             * Errata on XML-1.0 June 20 2001
-             * Specific handling of the Byte Order Mark for
-             * UTF-8
-             */
-            if ((handler->name != NULL) &&
-                (!strcmp(handler->name, "UTF-8")) &&
-                (input->cur[0] == 0xEF) &&
-                (input->cur[1] == 0xBB) && (input->cur[2] == 0xBF)) {
-                input->cur += 3;
-            }
-
-            /*
-             * Shrink the current input buffer.
-             * Move it as the raw buffer and create a new input buffer
-             */
-            processed = input->cur - input->base;
-            xmlBufShrink(input->buf->buffer, processed);
-            input->buf->raw = input->buf->buffer;
-            input->buf->buffer = xmlBufCreate();
-	    input->buf->rawconsumed = processed;
-	    use = xmlBufUse(input->buf->raw);
-
-            if (ctxt->html) {
-                /*
-                 * convert as much as possible of the buffer
-                 */
-                nbchars = xmlCharEncInput(input->buf, 1);
-            } else {
-                /*
-                 * convert just enough to get
-                 * '<?xml version="1.0" encoding="xxx"?>'
-                 * parsed with the autodetected encoding
-                 * into the parser reading buffer.
-                 */
-                nbchars = xmlCharEncFirstLineInput(input->buf, len);
-            }
-            xmlBufResetInput(input->buf->buffer, input);
-            if (nbchars < 0) {
-                xmlErrInternal(ctxt,
-                               "switching encoding: encoder error\n",
-                               NULL);
-                return (-1);
-            }
-	    input->buf->rawconsumed += use - xmlBufUse(input->buf->raw);
-        }
-        return (0);
-    } else {
+    in = input->buf;
+    if (in == NULL) {
 	xmlErrInternal(ctxt,
                "static memory buffer doesn't support encoding\n", NULL);
        /*
@ -1143,6 +1039,102 @@ xmlSwitchInputEncodingInt(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
        xmlCharEncCloseFunc(handler);
 	return (-1);
    }
+
+    ctxt->charset = XML_CHAR_ENCODING_UTF8;
+
+    if (in->encoder != NULL) {
+        /*
+         * Check in case the auto encoding detection triggered
+         * in already.
+         */
+        if (in->encoder == handler)
+            return (0);
+
+        /*
+         * Note: this is a bit dangerous, but that's what it
+         * takes to use nearly compatible signature for different
+         * encodings.
+         *
+         * FIXME: Encoders might buffer partial byte sequences, so
+         * this probably can't work. We should return an error and
+         * make sure that callers never try to switch the encoding
+         * twice.
+         */
+        xmlCharEncCloseFunc(in->encoder);
+        in->encoder = handler;
+        return (0);
+    }
+    in->encoder = handler;
+
+    /*
+     * Is there already some content down the pipe to convert ?
+     */
+    if (xmlBufIsEmpty(in->buffer) == 0) {
+        int processed;
+        unsigned int use;
+
+        /*
+         * Specific handling of the Byte Order Mark for
+         * UTF-16
+         */
+        if ((handler->name != NULL) &&
+            (!strcmp(handler->name, "UTF-16LE") ||
+             !strcmp(handler->name, "UTF-16")) &&
+            (input->cur[0] == 0xFF) && (input->cur[1] == 0xFE)) {
+            input->cur += 2;
+        }
+        if ((handler->name != NULL) &&
+            (!strcmp(handler->name, "UTF-16BE")) &&
+            (input->cur[0] == 0xFE) && (input->cur[1] == 0xFF)) {
+            input->cur += 2;
+        }
+        /*
+         * Errata on XML-1.0 June 20 2001
+         * Specific handling of the Byte Order Mark for
+         * UTF-8
+         */
+        if ((handler->name != NULL) &&
+            (!strcmp(handler->name, "UTF-8")) &&
+            (input->cur[0] == 0xEF) &&
+            (input->cur[1] == 0xBB) && (input->cur[2] == 0xBF)) {
+            input->cur += 3;
+        }
+
+        /*
+         * Shrink the current input buffer.
+         * Move it as the raw buffer and create a new input buffer
+         */
+        processed = input->cur - input->base;
+        xmlBufShrink(in->buffer, processed);
+        in->raw = in->buffer;
+        in->buffer = xmlBufCreate();
+        in->rawconsumed = processed;
+        use = xmlBufUse(in->raw);
+
+        if (ctxt->html) {
+            /*
+             * convert as much as possible of the buffer
+             */
+            nbchars = xmlCharEncInput(in, 1);
+        } else {
+            /*
+             * convert just enough to get
+             * '<?xml version="1.0" encoding="xxx"?>'
+             * parsed with the autodetected encoding
+             * into the parser reading buffer.
+             */
+            nbchars = xmlCharEncFirstLineInput(in, len);
+        }
+        xmlBufResetInput(in->buffer, input);
+        if (nbchars < 0) {
+            xmlErrInternal(ctxt,
+                           "switching encoding: encoder error\n",
+                           NULL);
+            return (-1);
+        }
+        in->rawconsumed += use - xmlBufUse(in->raw);
+    }
+    return (0);
 }

 /**