From 650f842dbe131496a28284a26ed0646cd7e4af80 Mon Sep 17 00:00:00 2001 From: Nick Wellnhofer Date: Tue, 2 Apr 2024 15:25:28 +0200 Subject: [PATCH] string: Don't validate UTF-8 in xmlUTF8Strsub Most string functions can assume valid UTF-8. In order to detect malloc failures reliably, xmlUTF8Strsub should only return NULL if the start index is out of bounds or a memory allocation failed. --- xmlstring.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/xmlstring.c b/xmlstring.c index 8b088025..72e3453e 100644 --- a/xmlstring.c +++ b/xmlstring.c @@ -1106,8 +1106,9 @@ xmlUTF8Strloc(const xmlChar *utf, const xmlChar *utfchar) { * Create a substring from a given UTF-8 string * Note: positions are given in units of UTF-8 chars * - * Returns a pointer to a newly created string - * or NULL if any problem + * Returns a pointer to a newly created string or NULL if the + * start index is out of bounds or a memory allocation failed. + * If len is too large, the result is truncated. */ xmlChar * @@ -1122,16 +1123,18 @@ xmlUTF8Strsub(const xmlChar *utf, int start, int len) { /* * Skip over any leading chars */ - for (i = 0;i < start;i++) { - if ((ch=*utf++) == 0) return(NULL); - if ( ch & 0x80 ) { - /* if not simple ascii, verify proper format */ - if ( (ch & 0xc0) != 0xc0 ) - return(NULL); - /* then skip over remaining bytes for this char */ - while ( (ch <<= 1) & 0x80 ) - if ( (*utf++ & 0xc0) != 0x80 ) + for (i = 0; i < start; i++) { + ch = *utf++; + if (ch == 0) + return(NULL); + /* skip over remaining bytes for this char */ + if (ch & 0x80) { + ch <<= 1; + while (ch & 0x80) { + if (*utf++ == 0) return(NULL); + ch <<= 1; + } } }