1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2024-12-24 21:33:51 +03:00

string: Don't validate UTF-8 in xmlUTF8Strsub

Most string functions can assume valid UTF-8. In order to detect malloc
failures reliably, xmlUTF8Strsub should only return NULL if the start
index is out of bounds or a memory allocation failed.
This commit is contained in:
Nick Wellnhofer 2024-04-02 15:25:28 +02:00
parent 86c27206f9
commit 650f842dbe

View File

@ -1106,8 +1106,9 @@ xmlUTF8Strloc(const xmlChar *utf, const xmlChar *utfchar) {
* Create a substring from a given UTF-8 string
* Note: positions are given in units of UTF-8 chars
*
* Returns a pointer to a newly created string
* or NULL if any problem
* Returns a pointer to a newly created string or NULL if the
* start index is out of bounds or a memory allocation failed.
* If len is too large, the result is truncated.
*/
xmlChar *
@ -1122,16 +1123,18 @@ xmlUTF8Strsub(const xmlChar *utf, int start, int len) {
/*
* Skip over any leading chars
*/
for (i = 0;i < start;i++) {
if ((ch=*utf++) == 0) return(NULL);
if ( ch & 0x80 ) {
/* if not simple ascii, verify proper format */
if ( (ch & 0xc0) != 0xc0 )
return(NULL);
/* then skip over remaining bytes for this char */
while ( (ch <<= 1) & 0x80 )
if ( (*utf++ & 0xc0) != 0x80 )
for (i = 0; i < start; i++) {
ch = *utf++;
if (ch == 0)
return(NULL);
/* skip over remaining bytes for this char */
if (ch & 0x80) {
ch <<= 1;
while (ch & 0x80) {
if (*utf++ == 0)
return(NULL);
ch <<= 1;
}
}
}