mirror of
https://gitlab.gnome.org/GNOME/libxml2.git
synced 2025-03-24 06:50:08 +03:00
entities: Rework serialization of numeric character references
This commit is contained in:
parent
8d1606265d
commit
1cfc5b8089
@ -43,6 +43,7 @@
|
||||
|
||||
#include "private/buf.h"
|
||||
#include "private/enc.h"
|
||||
#include "private/entities.h"
|
||||
#include "private/error.h"
|
||||
|
||||
#ifdef LIBXML_ICU_ENABLED
|
||||
@ -1744,8 +1745,7 @@ retry:
|
||||
* and continue the transcoding phase, hoping the error
|
||||
* did not mangle the encoder state.
|
||||
*/
|
||||
charrefLen = snprintf((char *) &charref[0], sizeof(charref),
|
||||
"&#%d;", cur);
|
||||
charrefLen = xmlSerializeDecCharRef((char *) charref, cur);
|
||||
xmlBufGrow(out, charrefLen * 4);
|
||||
c_out = xmlBufAvail(out);
|
||||
c_in = charrefLen;
|
||||
@ -1856,8 +1856,7 @@ retry:
|
||||
* and continue the transcoding phase, hoping the error
|
||||
* did not mangle the encoder state.
|
||||
*/
|
||||
charrefLen = snprintf((char *) &charref[0], sizeof(charref),
|
||||
"&#%d;", cur);
|
||||
charrefLen = xmlSerializeDecCharRef((char *) charref, cur);
|
||||
xmlBufferShrink(in, len);
|
||||
xmlBufferGrow(out, charrefLen * 4);
|
||||
written = out->size - out->use - 1;
|
||||
|
69
entities.c
69
entities.c
@ -512,6 +512,71 @@ xmlGetDocEntity(const xmlDoc *doc, const xmlChar *name) {
|
||||
return(xmlGetPredefinedEntity(name));
|
||||
}
|
||||
|
||||
int
|
||||
xmlSerializeHexCharRef(char *buf, int val) {
|
||||
char *out = buf;
|
||||
int shift = 0, bits;
|
||||
|
||||
*out++ = '&';
|
||||
*out++ = '#';
|
||||
*out++ = 'x';
|
||||
|
||||
bits = val;
|
||||
if (bits & 0xFF0000) {
|
||||
shift = 16;
|
||||
bits &= 0xFF0000;
|
||||
} else if (bits & 0x00FF00) {
|
||||
shift = 8;
|
||||
bits &= 0x00FF00;
|
||||
}
|
||||
if (bits & 0xF0F0F0) {
|
||||
shift += 4;
|
||||
}
|
||||
|
||||
do {
|
||||
int d = (val >> shift) & 0x0F;
|
||||
|
||||
if (d < 10)
|
||||
*out++ = '0' + d;
|
||||
else
|
||||
*out++ = 'A' + (d - 10);
|
||||
|
||||
shift -= 4;
|
||||
} while (shift >= 0);
|
||||
|
||||
*out++ = ';';
|
||||
|
||||
return(out - buf);
|
||||
}
|
||||
|
||||
int
|
||||
xmlSerializeDecCharRef(char *buf, int val) {
|
||||
char *out = buf;
|
||||
int len, i;
|
||||
|
||||
*out++ = '&';
|
||||
*out++ = '#';
|
||||
|
||||
if (val < 100) {
|
||||
len = (val < 10) ? 1 : 2;
|
||||
} else if (val < 10000) {
|
||||
len = (val < 1000) ? 3 : 4;
|
||||
} else if (val < 1000000) {
|
||||
len = (val < 100000) ? 5 : 6;
|
||||
} else {
|
||||
len = 7;
|
||||
}
|
||||
|
||||
for (i = len - 1; i >= 0; i--) {
|
||||
out[i] = '0' + val % 10;
|
||||
val /= 10;
|
||||
}
|
||||
|
||||
out[len] = ';';
|
||||
|
||||
return(len + 3);
|
||||
}
|
||||
|
||||
static const char xmlEscapeSafe[128] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
@ -540,7 +605,7 @@ xmlEscapeText(const xmlChar *text, int flags) {
|
||||
unescaped = cur;
|
||||
|
||||
while (*cur != '\0') {
|
||||
char buf[13];
|
||||
char buf[12];
|
||||
const xmlChar *end;
|
||||
const xmlChar *repl;
|
||||
size_t used;
|
||||
@ -618,7 +683,7 @@ xmlEscapeText(const xmlChar *text, int flags) {
|
||||
val = 0xFFFD;
|
||||
}
|
||||
|
||||
replSize = snprintf(buf, sizeof(buf), "&#x%X;", val);
|
||||
replSize = xmlSerializeHexCharRef(buf, val);
|
||||
repl = BAD_CAST buf;
|
||||
} else if ((flags & XML_ESCAPE_ALLOW_INVALID) ||
|
||||
(c >= 0x20) ||
|
||||
|
@ -27,6 +27,11 @@
|
||||
#define XML_ESCAPE_QUOT (1u << 3)
|
||||
#define XML_ESCAPE_ALLOW_INVALID (1u << 4)
|
||||
|
||||
XML_HIDDEN int
|
||||
xmlSerializeHexCharRef(char *buf, int val);
|
||||
XML_HIDDEN int
|
||||
xmlSerializeDecCharRef(char *buf, int val);
|
||||
|
||||
XML_HIDDEN xmlChar *
|
||||
xmlEscapeText(const xmlChar *text, int flags);
|
||||
|
||||
|
82
xmlsave.c
82
xmlsave.c
@ -125,51 +125,10 @@ xmlSaveErr(xmlOutputBufferPtr out, int code, xmlNodePtr node,
|
||||
* Special escaping routines *
|
||||
* *
|
||||
************************************************************************/
|
||||
static char *
|
||||
xmlSerializeHexCharRef(char *out, int val) {
|
||||
char *ptr;
|
||||
|
||||
*out++ = '&';
|
||||
*out++ = '#';
|
||||
*out++ = 'x';
|
||||
if (val < 0x10) ptr = out;
|
||||
else if (val < 0x100) ptr = out + 1;
|
||||
else if (val < 0x1000) ptr = out + 2;
|
||||
else if (val < 0x10000) ptr = out + 3;
|
||||
else if (val < 0x100000) ptr = out + 4;
|
||||
else ptr = out + 5;
|
||||
out = ptr + 1;
|
||||
while (val > 0) {
|
||||
switch (val & 0xF) {
|
||||
case 0: *ptr-- = '0'; break;
|
||||
case 1: *ptr-- = '1'; break;
|
||||
case 2: *ptr-- = '2'; break;
|
||||
case 3: *ptr-- = '3'; break;
|
||||
case 4: *ptr-- = '4'; break;
|
||||
case 5: *ptr-- = '5'; break;
|
||||
case 6: *ptr-- = '6'; break;
|
||||
case 7: *ptr-- = '7'; break;
|
||||
case 8: *ptr-- = '8'; break;
|
||||
case 9: *ptr-- = '9'; break;
|
||||
case 0xA: *ptr-- = 'A'; break;
|
||||
case 0xB: *ptr-- = 'B'; break;
|
||||
case 0xC: *ptr-- = 'C'; break;
|
||||
case 0xD: *ptr-- = 'D'; break;
|
||||
case 0xE: *ptr-- = 'E'; break;
|
||||
case 0xF: *ptr-- = 'F'; break;
|
||||
default: *ptr-- = '0'; break;
|
||||
}
|
||||
val >>= 4;
|
||||
}
|
||||
*out++ = ';';
|
||||
*out = 0;
|
||||
return(out);
|
||||
}
|
||||
|
||||
static void
|
||||
xmlSerializeText(xmlOutputBufferPtr buf, const xmlChar *string,
|
||||
unsigned flags) {
|
||||
char tmp[12];
|
||||
const char *base, *cur;
|
||||
|
||||
if (string == NULL)
|
||||
@ -178,33 +137,12 @@ xmlSerializeText(xmlOutputBufferPtr buf, const xmlChar *string,
|
||||
base = cur = (const char *) string;
|
||||
|
||||
while (*cur != 0) {
|
||||
char tempBuf[12];
|
||||
const char *repl = NULL;
|
||||
int replSize = 0;
|
||||
int chunkSize = 1;
|
||||
int c = (unsigned char) *cur;
|
||||
|
||||
if ((c >= 0x80) && (flags & XML_ESCAPE_NON_ASCII)) {
|
||||
int val = 0, l = 4;
|
||||
|
||||
if (base != cur)
|
||||
xmlOutputBufferWrite(buf, cur - base, base);
|
||||
|
||||
val = xmlGetUTF8Char((const xmlChar *) cur, &l);
|
||||
if (val < 0) {
|
||||
val = 0xFFFD;
|
||||
cur++;
|
||||
} else {
|
||||
if (!IS_CHAR(val))
|
||||
val = 0xFFFD;
|
||||
cur += l;
|
||||
}
|
||||
|
||||
xmlSerializeHexCharRef(tmp, val);
|
||||
xmlOutputBufferWriteString(buf, tmp);
|
||||
base = cur;
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
switch (c) {
|
||||
case '\t':
|
||||
if (flags & XML_ESCAPE_ATTR) {
|
||||
@ -255,6 +193,20 @@ xmlSerializeText(xmlOutputBufferPtr buf, const xmlChar *string,
|
||||
if (c < 0x20) {
|
||||
repl = "�";
|
||||
replSize = 8;
|
||||
} else if ((c >= 0x80) && (flags & XML_ESCAPE_NON_ASCII)) {
|
||||
int val = 0, l = 4;
|
||||
|
||||
val = xmlGetUTF8Char((const xmlChar *) cur, &l);
|
||||
if (val < 0) {
|
||||
val = 0xFFFD;
|
||||
} else {
|
||||
if (!IS_CHAR(val))
|
||||
val = 0xFFFD;
|
||||
chunkSize = l;
|
||||
}
|
||||
|
||||
replSize = xmlSerializeHexCharRef(tempBuf, val);
|
||||
repl = tempBuf;
|
||||
}
|
||||
break;
|
||||
}
|
||||
@ -265,7 +217,7 @@ xmlSerializeText(xmlOutputBufferPtr buf, const xmlChar *string,
|
||||
if (base != cur)
|
||||
xmlOutputBufferWrite(buf, cur - base, base);
|
||||
xmlOutputBufferWrite(buf, replSize, repl);
|
||||
cur++;
|
||||
cur += chunkSize;
|
||||
base = cur;
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user