diff --git a/HTMLparser.c b/HTMLparser.c
index 1af0190c..fa1fe380 100644
--- a/HTMLparser.c
+++ b/HTMLparser.c
@@ -321,7 +321,6 @@ htmlNodeInfoPop(htmlParserCtxtPtr ctxt)
************/
#define CUR_CHAR(l) htmlCurrentChar(ctxt, &l)
-#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
#define COPY_BUF(l,b,i,v) \
if (l == 1) b[i++] = v; \
diff --git a/parserInternals.c b/parserInternals.c
index 51a472ee..6c3fb786 100644
--- a/parserInternals.c
+++ b/parserInternals.c
@@ -1029,104 +1029,18 @@ incomplete_sequence:
*/
int
-xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar * cur, int *len)
-{
- if ((len == NULL) || (cur == NULL)) return(0);
- if ((ctxt == NULL) || (ctxt->input == NULL) ||
- ((ctxt->input->flags & XML_INPUT_8_BIT) == 0)) {
- /*
- * We are supposed to handle UTF8, check it's valid
- * From rfc2044: encoding of the Unicode values on UTF-8:
- *
- * UCS-4 range (hex.) UTF-8 octet sequence (binary)
- * 0000 0000-0000 007F 0xxxxxxx
- * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
- * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
- *
- * Check for the 0x110000 limit too
- */
- unsigned char c;
- unsigned int val;
+xmlStringCurrentChar(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED,
+ const xmlChar *cur, int *len) {
+ int c;
- c = *cur;
- if (c & 0x80) {
- if ((cur[1] & 0xc0) != 0x80)
- goto encoding_error;
- if ((c & 0xe0) == 0xe0) {
+ if ((cur == NULL) || (len == NULL))
+ return(0);
- if ((cur[2] & 0xc0) != 0x80)
- goto encoding_error;
- if ((c & 0xf0) == 0xf0) {
- if (((c & 0xf8) != 0xf0) || ((cur[3] & 0xc0) != 0x80))
- goto encoding_error;
- /* 4-byte code */
- *len = 4;
- val = (cur[0] & 0x7) << 18;
- val |= (cur[1] & 0x3f) << 12;
- val |= (cur[2] & 0x3f) << 6;
- val |= cur[3] & 0x3f;
- } else {
- /* 3-byte code */
- *len = 3;
- val = (cur[0] & 0xf) << 12;
- val |= (cur[1] & 0x3f) << 6;
- val |= cur[2] & 0x3f;
- }
- } else {
- /* 2-byte code */
- *len = 2;
- val = (cur[0] & 0x1f) << 6;
- val |= cur[1] & 0x3f;
- }
- if (!IS_CHAR(val)) {
- xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
- "Char 0x%X out of allowed range\n", val);
- }
- return (val);
- } else {
- /* 1-byte code */
- *len = 1;
- return (*cur);
- }
- }
- /*
- * Assume it's a fixed length encoding (1) with
- * a compatible encoding for the ASCII set, since
- * XML constructs only use < 128 chars
- */
- *len = 1;
- return (*cur);
-encoding_error:
+ /* cur is zero-terminated, so we can lie about its length. */
+ *len = 4;
+ c = xmlGetUTF8Char(cur, len);
- /*
- * An encoding problem may arise from a truncated input buffer
- * splitting a character in the middle. In that case do not raise
- * an error but return 0 to indicate an end of stream problem
- */
- if ((ctxt == NULL) || (ctxt->input == NULL) ||
- (ctxt->input->end - ctxt->input->cur < 4)) {
- *len = 0;
- return(0);
- }
- /*
- * If we detect an UTF8 error that probably mean that the
- * input encoding didn't get properly advertised in the
- * declaration header. Report the error and switch the encoding
- * to ISO-Latin-1 (if you don't like this policy, just declare the
- * encoding !)
- */
- {
- char buffer[150];
-
- snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
- ctxt->input->cur[0], ctxt->input->cur[1],
- ctxt->input->cur[2], ctxt->input->cur[3]);
- __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
- "Input is not proper UTF-8, indicate encoding !\n%s",
- BAD_CAST buffer, NULL);
- }
- *len = 1;
- return (*cur);
+ return((c < 0) ? 0 : c);
}
/**
diff --git a/xinclude.c b/xinclude.c
index 8eaf4d85..9c144a1e 100644
--- a/xinclude.c
+++ b/xinclude.c
@@ -1715,8 +1715,9 @@ xmlXIncludeLoadTxt(xmlXIncludeCtxtPtr ctxt, const xmlChar *url,
int cur;
int l;
- cur = xmlStringCurrentChar(NULL, &content[i], &l);
- if (!IS_CHAR(cur)) {
+ l = len - i;
+ cur = xmlGetUTF8Char(&content[i], &l);
+ if ((cur < 0) || (!IS_CHAR(cur))) {
xmlXIncludeErr(ctxt, ref->elem, XML_XINCLUDE_INVALID_CHAR,
"%s contains invalid char\n", URL);
goto error;