diff --git a/HTMLparser.c b/HTMLparser.c
index fa1fe380..9c3359f8 100644
--- a/HTMLparser.c
+++ b/HTMLparser.c
@@ -4851,6 +4851,14 @@ htmlParseDocument(htmlParserCtxtPtr ctxt) {
 
     xmlDetectEncoding(ctxt);
 
+    /*
+     * This is wrong but matches long-standing behavior. In most cases,
+     * a document starting with an XML declaration will specify UTF-8.
+     */
+    if (((ctxt->input->flags & XML_INPUT_HAS_ENCODING) == 0) &&
+        (xmlStrncmp(ctxt->input->cur, BAD_CAST "<?xm", 4) == 0))
+        xmlSwitchEncoding(ctxt, XML_CHAR_ENCODING_UTF8);
+
     /*
      * Wipe out everything which is before the first '<'
      */
@@ -5408,6 +5416,16 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
 		 */
 	        goto done;
             case XML_PARSER_START:
+                /*
+                 * This is wrong but matches long-standing behavior. In most
+                 * cases, a document starting with an XML declaration will
+                 * specify UTF-8.
+                 */
+                if (((ctxt->input->flags & XML_INPUT_HAS_ENCODING) == 0) &&
+                    (xmlStrncmp(ctxt->input->cur, BAD_CAST "<?xm", 4) == 0)) {
+                    xmlSwitchEncoding(ctxt, XML_CHAR_ENCODING_UTF8);
+                }
+
 	        /*
 		 * Very first chars read from the document flow.
 		 */
diff --git a/result/HTML/xml-declaration-1.html b/result/HTML/xml-declaration-1.html
new file mode 100644
index 00000000..8c9ebe39
--- /dev/null
+++ b/result/HTML/xml-declaration-1.html
@@ -0,0 +1,4 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
+<?xml encoding="UTF-8"><html><body>
+<p>&ouml;&auml;&uuml;&szlig;</p>
+</body></html>
diff --git a/result/HTML/xml-declaration-1.html.sax b/result/HTML/xml-declaration-1.html.sax
new file mode 100644
index 00000000..83fe8eb6
--- /dev/null
+++ b/result/HTML/xml-declaration-1.html.sax
@@ -0,0 +1,13 @@
+SAX.setDocumentLocator()
+SAX.startDocument()
+SAX.processingInstruction(xml, encoding="UTF-8")
+SAX.startElement(html)
+SAX.startElement(body)
+SAX.startElement(p)
+SAX.characters(&ouml;&auml;&uuml;&szlig;, 8)
+SAX.endElement(p)
+SAX.characters(
+, 1)
+SAX.endElement(body)
+SAX.endElement(html)
+SAX.endDocument()
diff --git a/runtest.c b/runtest.c
index ff65fe86..c78eec81 100644
--- a/runtest.c
+++ b/runtest.c
@@ -2140,6 +2140,12 @@ pushBoundaryTest(const char *filename, const char *result,
     int cur = 0;
     unsigned long avail, oldConsumed, consumed;
 
+    /*
+     * HTML encoding detection doesn't work when data is fed bytewise.
+     */
+    if (strcmp(filename, "./test/HTML/xml-declaration-1.html") == 0)
+        return(0);
+
     /*
      * If the parser made progress, check that exactly one construct was
      * processed and that the input buffer is (almost) empty.
diff --git a/test/HTML/xml-declaration-1.html b/test/HTML/xml-declaration-1.html
new file mode 100644
index 00000000..1950be71
--- /dev/null
+++ b/test/HTML/xml-declaration-1.html
@@ -0,0 +1,2 @@
+<?xml encoding="UTF-8">
+<p>öäüß</p>