mirror of
https://gitlab.gnome.org/GNOME/libxml2.git
synced 2025-02-05 05:47:00 +03:00
fixed a problem with xmlCheckUTF8 reported on the mailing list by Julius
* xmlstring.c: fixed a problem with xmlCheckUTF8 reported on the mailing list by Julius Mittenzwei
This commit is contained in:
parent
da3336f2d1
commit
3ffe90ea1c
@ -1,3 +1,8 @@
|
|||||||
|
Fri Aug 27 18:32:24 PST 2004 William Brack <wbrack@mmm.com.hk>
|
||||||
|
|
||||||
|
* xmlstring.c: fixed a problem with xmlCheckUTF8 reported on the
|
||||||
|
mailing list by Julius Mittenzwei
|
||||||
|
|
||||||
Fri Aug 27 00:13:39 CEST 2004 Daniel Veillard <daniel@veillard.com>
|
Fri Aug 27 00:13:39 CEST 2004 Daniel Veillard <daniel@veillard.com>
|
||||||
|
|
||||||
* libxml.spec.in: added BuildRequires: zlib-devel, fixes
|
* libxml.spec.in: added BuildRequires: zlib-devel, fixes
|
||||||
|
29
xmlstring.c
29
xmlstring.c
@ -764,13 +764,36 @@ xmlCheckUTF8(const unsigned char *utf)
|
|||||||
int ix;
|
int ix;
|
||||||
unsigned char c;
|
unsigned char c;
|
||||||
|
|
||||||
for (ix = 0; (c = utf[ix]);) {
|
/*
|
||||||
if (c & 0x80) {
|
* utf is a string of 1, 2, 3 or 4 bytes. The valid strings
|
||||||
if ((c & 0xc0) != 0x80 || (utf[ix + 1] & 0xc0) != 0x80)
|
* are as follows (in "bit format"):
|
||||||
|
* 0xxxxxxx valid 1-byte
|
||||||
|
* 110xxxxx 10xxxxxx valid 2-byte
|
||||||
|
* 1110xxxx 10xxxxxx 10xxxxxx valid 3-byte
|
||||||
|
* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx valid 4-byte
|
||||||
|
*/
|
||||||
|
for (ix = 0; (c = utf[ix]);) { /* string is 0-terminated */
|
||||||
|
if (c & 0x80) { /* if it is not a single byte */
|
||||||
|
/*
|
||||||
|
* We know the first byte starts with '1', so check
|
||||||
|
* the following bits and bytes.
|
||||||
|
*
|
||||||
|
* if the first byte does *not* start with 1 1, or the
|
||||||
|
* second byte does *not* start with 1 0 it's an error
|
||||||
|
*/
|
||||||
|
if (((c & 0xc0) != 0xc0) || ((utf[ix + 1] & 0xc0) != 0x80))
|
||||||
return(0);
|
return(0);
|
||||||
|
/*
|
||||||
|
* if the first three bits are set then the 3rd byte *must* start
|
||||||
|
* with 1 0
|
||||||
|
*/
|
||||||
if ((c & 0xe0) == 0xe0) {
|
if ((c & 0xe0) == 0xe0) {
|
||||||
if ((utf[ix + 2] & 0xc0) != 0x80)
|
if ((utf[ix + 2] & 0xc0) != 0x80)
|
||||||
return(0);
|
return(0);
|
||||||
|
/*
|
||||||
|
* if the first four bits are set then the fifth bit
|
||||||
|
* must not be set, and the 4th byte *must* start with 1 0
|
||||||
|
*/
|
||||||
if ((c & 0xf0) == 0xf0) {
|
if ((c & 0xf0) == 0xf0) {
|
||||||
if ((c & 0xf8) != 0xf0 || (utf[ix + 3] & 0xc0) != 0x80)
|
if ((c & 0xf8) != 0xf0 || (utf[ix + 3] & 0xc0) != 0x80)
|
||||||
return(0);
|
return(0);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user