1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2024-10-26 12:25:09 +03:00

Avoid potential integer overflow in xmlstring.c

For historical reasons, the string API operates with int indices which
can overflow, especially on 64-bit systems. libxml2 always made the
tacit assumption that strings will be never larger than INT_MAX bytes.
It should be considered a bug if any part of the code can produce
larger strings, whether they are externally visible or not.

Likewise, API users are expected not to supply strings larger than
INT_MAX bytes. This requirement isn't documented. But even if it was,
we must handle larger strings passed in by accident without causing
memory errors.

- xmlStrndup, xmlCharStrndup, xmlUTF8Strndup
  Avoid integer overflow if len == INT_MAX.

- xmlStrlen, xmlUTF8Strsize, xmlUTF8Strloc
  Avoid integer overflow by using size_t for index. If an input string
  larger than INT_MAX bytes is detected, these functions now return 0
  instead of a wrong and possibly negative value.

- xmlCheckUTF8
  Avoid integer overflow by limiting index range.

- xmlStrncat, xmlStrncatNew, xmlEscapeFormatString
  Avoid integer overflow. Return NULL instead of producing strings
  larger than INT_MAX bytes.
This commit is contained in:
Nick Wellnhofer 2022-01-28 16:27:12 +01:00
parent 8f5ccada05
commit 6010a5369f

View File

@ -18,6 +18,7 @@
#include <stdlib.h>
#include <string.h>
#include <limits.h>
#include <libxml/xmlmemory.h>
#include <libxml/parserInternals.h>
#include <libxml/xmlstring.h>
@ -42,7 +43,7 @@ xmlStrndup(const xmlChar *cur, int len) {
xmlChar *ret;
if ((cur == NULL) || (len < 0)) return(NULL);
ret = (xmlChar *) xmlMallocAtomic((len + 1) * sizeof(xmlChar));
ret = (xmlChar *) xmlMallocAtomic(((size_t) len + 1) * sizeof(xmlChar));
if (ret == NULL) {
xmlErrMemory(NULL, NULL);
return(NULL);
@ -87,7 +88,7 @@ xmlCharStrndup(const char *cur, int len) {
xmlChar *ret;
if ((cur == NULL) || (len < 0)) return(NULL);
ret = (xmlChar *) xmlMallocAtomic((len + 1) * sizeof(xmlChar));
ret = (xmlChar *) xmlMallocAtomic(((size_t) len + 1) * sizeof(xmlChar));
if (ret == NULL) {
xmlErrMemory(NULL, NULL);
return(NULL);
@ -423,14 +424,14 @@ xmlStrsub(const xmlChar *str, int start, int len) {
int
xmlStrlen(const xmlChar *str) {
int len = 0;
size_t len = 0;
if (str == NULL) return(0);
while (*str != 0) { /* non input consuming */
str++;
len++;
}
return(len);
return(len > INT_MAX ? 0 : len);
}
/**
@ -460,9 +461,9 @@ xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
return(xmlStrndup(add, len));
size = xmlStrlen(cur);
if (size < 0)
if ((size < 0) || (size > INT_MAX - len))
return(NULL);
ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
ret = (xmlChar *) xmlRealloc(cur, ((size_t) size + len + 1) * sizeof(xmlChar));
if (ret == NULL) {
xmlErrMemory(NULL, NULL);
return(cur);
@ -500,9 +501,9 @@ xmlStrncatNew(const xmlChar *str1, const xmlChar *str2, int len) {
return(xmlStrndup(str2, len));
size = xmlStrlen(str1);
if (size < 0)
if ((size < 0) || (size > INT_MAX - len))
return(NULL);
ret = (xmlChar *) xmlMalloc((size + len + 1) * sizeof(xmlChar));
ret = (xmlChar *) xmlMalloc(((size_t) size + len + 1) * sizeof(xmlChar));
if (ret == NULL) {
xmlErrMemory(NULL, NULL);
return(xmlStrndup(str1, size));
@ -667,7 +668,7 @@ xmlUTF8Charcmp(const xmlChar *utf1, const xmlChar *utf2) {
*/
int
xmlUTF8Strlen(const xmlChar *utf) {
int ret = 0;
size_t ret = 0;
if (utf == NULL)
return(-1);
@ -694,7 +695,7 @@ xmlUTF8Strlen(const xmlChar *utf) {
}
ret++;
}
return(ret);
return(ret > INT_MAX ? 0 : ret);
}
/**
@ -796,26 +797,28 @@ xmlCheckUTF8(const unsigned char *utf)
* 1110xxxx 10xxxxxx 10xxxxxx valid 3-byte
* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx valid 4-byte
*/
for (ix = 0; (c = utf[ix]);) { /* string is 0-terminated */
while ((c = utf[0])) { /* string is 0-terminated */
ix = 0;
if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
ix++;
ix = 1;
} else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
if ((utf[ix+1] & 0xc0 ) != 0x80)
if ((utf[1] & 0xc0 ) != 0x80)
return 0;
ix += 2;
ix = 2;
} else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
if (((utf[ix+1] & 0xc0) != 0x80) ||
((utf[ix+2] & 0xc0) != 0x80))
if (((utf[1] & 0xc0) != 0x80) ||
((utf[2] & 0xc0) != 0x80))
return 0;
ix += 3;
ix = 3;
} else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
if (((utf[ix+1] & 0xc0) != 0x80) ||
((utf[ix+2] & 0xc0) != 0x80) ||
((utf[ix+3] & 0xc0) != 0x80))
if (((utf[1] & 0xc0) != 0x80) ||
((utf[2] & 0xc0) != 0x80) ||
((utf[3] & 0xc0) != 0x80))
return 0;
ix += 4;
ix = 4;
} else /* unknown encoding */
return 0;
utf += ix;
}
return(1);
}
@ -836,6 +839,7 @@ int
xmlUTF8Strsize(const xmlChar *utf, int len) {
const xmlChar *ptr=utf;
int ch;
size_t ret;
if (utf == NULL)
return(0);
@ -852,7 +856,8 @@ xmlUTF8Strsize(const xmlChar *utf, int len) {
ptr++;
}
}
return (ptr - utf);
ret = ptr - utf;
return (ret > INT_MAX ? 0 : ret);
}
@ -872,11 +877,8 @@ xmlUTF8Strndup(const xmlChar *utf, int len) {
if ((utf == NULL) || (len < 0)) return(NULL);
i = xmlUTF8Strsize(utf, len);
ret = (xmlChar *) xmlMallocAtomic((i + 1) * sizeof(xmlChar));
ret = (xmlChar *) xmlMallocAtomic(((size_t) i + 1) * sizeof(xmlChar));
if (ret == NULL) {
xmlGenericError(xmlGenericErrorContext,
"malloc of %ld byte failed\n",
(len + 1) * (long)sizeof(xmlChar));
return(NULL);
}
memcpy(ret, utf, i * sizeof(xmlChar));
@ -928,14 +930,15 @@ xmlUTF8Strpos(const xmlChar *utf, int pos) {
*/
int
xmlUTF8Strloc(const xmlChar *utf, const xmlChar *utfchar) {
int i, size;
size_t i;
int size;
int ch;
if (utf==NULL || utfchar==NULL) return -1;
size = xmlUTF8Strsize(utfchar, 1);
for(i=0; (ch=*utf) != 0; i++) {
if (xmlStrncmp(utf, utfchar, size)==0)
return(i);
return(i > INT_MAX ? 0 : i);
utf++;
if ( ch & 0x80 ) {
/* if not simple ascii, verify proper format */
@ -1022,6 +1025,8 @@ xmlEscapeFormatString(xmlChar **msg)
if (count == 0)
return(*msg);
if ((count > INT_MAX) || (msgLen > INT_MAX - count))
return(NULL);
resultLen = msgLen + count + 1;
result = (xmlChar *) xmlMallocAtomic(resultLen * sizeof(xmlChar));
if (result == NULL) {