mirror of
https://gitlab.gnome.org/GNOME/libxml2.git
synced 2025-03-27 18:50:07 +03:00
Enhanced the handling of UTF-16, UTF-16LE and UTF-16BE encodings. Now
* encoding.c, include/libxml/encoding.h: Enhanced the handling of UTF-16, UTF-16LE and UTF-16BE encodings. Now UTF-16 output is handled internally by default, with proper BOM and UTF-16LE encoding. Native UTF-16LE and UTF-16BE encoding will not generate a BOM on output, and will be automatically recognized on input. * test/utf16lebom.xml, test/utf16bebom.xml, result/utf16?ebom*: added regression tests for above.
This commit is contained in:
parent
ae8c9642d8
commit
f9415e4989
10
ChangeLog
10
ChangeLog
@ -1,3 +1,13 @@
|
||||
Fri Nov 28 17:28:47 HKT 2003 William Brack <wbrack@mmm.com.hk>
|
||||
|
||||
* encoding.c, include/libxml/encoding.h: Enhanced the handling of UTF-16,
|
||||
UTF-16LE and UTF-16BE encodings. Now UTF-16 output is handled internally
|
||||
by default, with proper BOM and UTF-16LE encoding. Native UTF-16LE and
|
||||
UTF-16BE encoding will not generate a BOM on output, and will be
|
||||
automatically recognized on input.
|
||||
* test/utf16lebom.xml, test/utf16bebom.xml, result/utf16?ebom*: added
|
||||
regression tests for above.
|
||||
|
||||
Thu Nov 27 19:25:10 CET 2003 Igor Zlatkovic <igor@zlatkovic.com>
|
||||
|
||||
* win32/Makefile.* win32/configure.js: Modified to allow coexistent
|
||||
|
158
encoding.c
158
encoding.c
@ -92,7 +92,7 @@ static int xmlLittleEndian = 1;
|
||||
* xmlUTF8Size:
|
||||
* @utf: pointer to the UTF8 character
|
||||
*
|
||||
* calulates the internal size of a UTF8 character
|
||||
* calculates the internal size of a UTF8 character
|
||||
*
|
||||
* returns the numbers of bytes in the character, -1 on format error
|
||||
*/
|
||||
@ -186,8 +186,8 @@ xmlUTF8Strlen(const xmlChar *utf) {
|
||||
*
|
||||
* Read one UTF8 Char from @utf
|
||||
*
|
||||
* Returns the char value or -1 in case of error and update @len with the
|
||||
* number of bytes used
|
||||
* Returns the char value or -1 in case of error, and updates *len with the
|
||||
* number of bytes consumed
|
||||
*/
|
||||
int
|
||||
xmlGetUTF8Char(const unsigned char *utf, int *len) {
|
||||
@ -248,11 +248,11 @@ error:
|
||||
|
||||
/**
|
||||
* xmlCheckUTF8:
|
||||
* @utf: Pointer to putative utf-8 encoded string.
|
||||
* @utf: Pointer to putative UTF-8 encoded string.
|
||||
*
|
||||
* Checks @utf for being valid utf-8. @utf is assumed to be
|
||||
* Checks @utf for being valid UTF-8. @utf is assumed to be
|
||||
* null-terminated. This function is not super-strict, as it will
|
||||
* allow longer utf-8 sequences than necessary. Note that Java is
|
||||
* allow longer UTF-8 sequences than necessary. Note that Java is
|
||||
* capable of producing these sequences if provoked. Also note, this
|
||||
* routine checks for the 4-byte maximum size, but does not check for
|
||||
* 0x10ffff maximum value.
|
||||
@ -386,7 +386,7 @@ xmlUTF8Strpos(const xmlChar *utf, int pos) {
|
||||
* @utf: the input UTF8 *
|
||||
* @utfchar: the UTF8 character to be found
|
||||
*
|
||||
* a function to provide relative location of a UTF8 char
|
||||
* a function to provide the relative location of a UTF8 char
|
||||
*
|
||||
* Returns the relative character position of the desired char
|
||||
* or -1 if not found
|
||||
@ -421,6 +421,7 @@ xmlUTF8Strloc(const xmlChar *utf, const xmlChar *utfchar) {
|
||||
* @start: relative pos of first char
|
||||
* @len: total number to copy
|
||||
*
|
||||
* Create a substring from a given UTF-8 string
|
||||
* Note: positions are given in units of UTF-8 chars
|
||||
*
|
||||
* Returns a pointer to a newly created string
|
||||
@ -472,8 +473,8 @@ xmlUTF8Strsub(const xmlChar *utf, int start, int len) {
|
||||
* block of chars out.
|
||||
* Returns 0 if success, or -1 otherwise
|
||||
* The value of @inlen after return is the number of octets consumed
|
||||
* as the return value is positive, else unpredictable.
|
||||
* The value of @outlen after return is the number of ocetes consumed.
|
||||
* if the return value is positive, else unpredictable.
|
||||
* The value of @outlen after return is the number of octets consumed.
|
||||
*/
|
||||
static int
|
||||
asciiToUTF8(unsigned char* out, int *outlen,
|
||||
@ -525,8 +526,8 @@ asciiToUTF8(unsigned char* out, int *outlen,
|
||||
*
|
||||
* Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
|
||||
* The value of @inlen after return is the number of octets consumed
|
||||
* as the return value is positive, else unpredictable.
|
||||
* The value of @outlen after return is the number of ocetes consumed.
|
||||
* if the return value is positive, else unpredictable.
|
||||
* The value of @outlen after return is the number of octets consumed.
|
||||
*/
|
||||
static int
|
||||
UTF8Toascii(unsigned char* out, int *outlen,
|
||||
@ -608,8 +609,8 @@ UTF8Toascii(unsigned char* out, int *outlen,
|
||||
* block of chars out.
|
||||
* Returns 0 if success, or -1 otherwise
|
||||
* The value of @inlen after return is the number of octets consumed
|
||||
* as the return value is positive, else unpredictable.
|
||||
* The value of @outlen after return is the number of ocetes consumed.
|
||||
* if the return value is positive, else unpredictable.
|
||||
* The value of @outlen after return is the number of octets consumed.
|
||||
*/
|
||||
int
|
||||
isolat1ToUTF8(unsigned char* out, int *outlen,
|
||||
@ -656,10 +657,9 @@ isolat1ToUTF8(unsigned char* out, int *outlen,
|
||||
*
|
||||
* No op copy operation for UTF8 handling.
|
||||
*
|
||||
* Returns the number of byte written, or -1 by lack of space, or -2
|
||||
* if the transcoding fails (for *in is not valid utf16 string)
|
||||
* Returns the number of bytes written, or -1 if lack of space.
|
||||
* The value of *inlen after return is the number of octets consumed
|
||||
* as the return value is positive, else unpredictable.
|
||||
* if the return value is positive, else unpredictable.
|
||||
*/
|
||||
static int
|
||||
UTF8ToUTF8(unsigned char* out, int *outlen,
|
||||
@ -698,8 +698,8 @@ UTF8ToUTF8(unsigned char* out, int *outlen,
|
||||
*
|
||||
* Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
|
||||
* The value of @inlen after return is the number of octets consumed
|
||||
* as the return value is positive, else unpredictable.
|
||||
* The value of @outlen after return is the number of ocetes consumed.
|
||||
* if the return value is positive, else unpredictable.
|
||||
* The value of @outlen after return is the number of octets consumed.
|
||||
*/
|
||||
int
|
||||
UTF8Toisolat1(unsigned char* out, int *outlen,
|
||||
@ -783,14 +783,14 @@ UTF8Toisolat1(unsigned char* out, int *outlen,
|
||||
* @inlenb: the length of @in in UTF-16LE chars
|
||||
*
|
||||
* Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8
|
||||
* block of chars out. This function assume the endian property
|
||||
* block of chars out. This function assumes the endian property
|
||||
* is the same between the native type of this machine and the
|
||||
* inputed one.
|
||||
*
|
||||
* Returns the number of byte written, or -1 by lack of space, or -2
|
||||
* if the transcoding fails (for *in is not valid utf16 string)
|
||||
* Returns the number of bytes written, or -1 if lack of space, or -2
|
||||
* if the transcoding fails (if *in is not a valid utf16 string)
|
||||
* The value of *inlen after return is the number of octets consumed
|
||||
* as the return value is positive, else unpredictable.
|
||||
* if the return value is positive, else unpredictable.
|
||||
*/
|
||||
static int
|
||||
UTF16LEToUTF8(unsigned char* out, int *outlen,
|
||||
@ -874,7 +874,7 @@ UTF16LEToUTF8(unsigned char* out, int *outlen,
|
||||
* Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
|
||||
* block of chars out.
|
||||
*
|
||||
* Returns the number of byte written, or -1 by lack of space, or -2
|
||||
* Returns the number of bytes written, or -1 if lack of space, or -2
|
||||
* if the transcoding failed.
|
||||
*/
|
||||
static int
|
||||
@ -892,21 +892,8 @@ UTF8ToUTF16LE(unsigned char* outb, int *outlen,
|
||||
unsigned char *tmp;
|
||||
unsigned short tmp1, tmp2;
|
||||
|
||||
/* UTF16LE encoding has no BOM */
|
||||
if (in == NULL) {
|
||||
/*
|
||||
* initialization, add the Byte Order Mark
|
||||
*/
|
||||
if (*outlen >= 2) {
|
||||
outb[0] = 0xFF;
|
||||
outb[1] = 0xFE;
|
||||
*outlen = 2;
|
||||
*inlen = 0;
|
||||
#ifdef DEBUG_ENCODING
|
||||
xmlGenericError(xmlGenericErrorContext,
|
||||
"Added FFFE Byte Order Mark\n");
|
||||
#endif
|
||||
return(2);
|
||||
}
|
||||
*outlen = 0;
|
||||
*inlen = 0;
|
||||
return(0);
|
||||
@ -985,22 +972,61 @@ UTF8ToUTF16LE(unsigned char* outb, int *outlen,
|
||||
}
|
||||
#endif /* LIBXML_OUTPUT_ENABLED */
|
||||
|
||||
/**
|
||||
* UTF8ToUTF16:
|
||||
* @outb: a pointer to an array of bytes to store the result
|
||||
* @outlen: the length of @outb
|
||||
* @in: a pointer to an array of UTF-8 chars
|
||||
* @inlen: the length of @in
|
||||
*
|
||||
* Take a block of UTF-8 chars in and try to convert it to an UTF-16
|
||||
* block of chars out.
|
||||
*
|
||||
* Returns the number of bytes written, or -1 if lack of space, or -2
|
||||
* if the transcoding failed.
|
||||
*/
|
||||
static int
|
||||
UTF8ToUTF16(unsigned char* outb, int *outlen,
|
||||
const unsigned char* in, int *inlen)
|
||||
{
|
||||
if (in == NULL) {
|
||||
/*
|
||||
* initialization, add the Byte Order Mark for UTF-16LE
|
||||
*/
|
||||
if (*outlen >= 2) {
|
||||
outb[0] = 0xFF;
|
||||
outb[1] = 0xFE;
|
||||
*outlen = 2;
|
||||
*inlen = 0;
|
||||
#ifdef DEBUG_ENCODING
|
||||
xmlGenericError(xmlGenericErrorContext,
|
||||
"Added FFFE Byte Order Mark\n");
|
||||
#endif
|
||||
return(2);
|
||||
}
|
||||
*outlen = 0;
|
||||
*inlen = 0;
|
||||
return(0);
|
||||
}
|
||||
return (UTF8ToUTF16LE(outb, outlen, in, inlen));
|
||||
}
|
||||
|
||||
/**
|
||||
* UTF16BEToUTF8:
|
||||
* @out: a pointer to an array of bytes to store the result
|
||||
* @outlen: the length of @out
|
||||
* @inb: a pointer to an array of UTF-16 passwd as a byte array
|
||||
* @inb: a pointer to an array of UTF-16 passed as a byte array
|
||||
* @inlenb: the length of @in in UTF-16 chars
|
||||
*
|
||||
* Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
|
||||
* block of chars out. This function assume the endian property
|
||||
* block of chars out. This function assumes the endian property
|
||||
* is the same between the native type of this machine and the
|
||||
* inputed one.
|
||||
*
|
||||
* Returns the number of byte written, or -1 by lack of space, or -2
|
||||
* if the transcoding fails (for *in is not valid utf16 string)
|
||||
* Returns the number of bytes written, or -1 if lack of space, or -2
|
||||
* if the transcoding fails (if *in is not a valid utf16 string)
|
||||
* The value of *inlen after return is the number of octets consumed
|
||||
* as the return value is positive, else unpredictable.
|
||||
* if the return value is positive, else unpredictable.
|
||||
*/
|
||||
static int
|
||||
UTF16BEToUTF8(unsigned char* out, int *outlen,
|
||||
@ -1106,21 +1132,8 @@ UTF8ToUTF16BE(unsigned char* outb, int *outlen,
|
||||
unsigned char *tmp;
|
||||
unsigned short tmp1, tmp2;
|
||||
|
||||
/* UTF-16BE has no BOM */
|
||||
if (in == NULL) {
|
||||
/*
|
||||
* initialization, add the Byte Order Mark
|
||||
*/
|
||||
if (*outlen >= 2) {
|
||||
outb[0] = 0xFE;
|
||||
outb[1] = 0xFF;
|
||||
*outlen = 2;
|
||||
*inlen = 0;
|
||||
#ifdef DEBUG_ENCODING
|
||||
xmlGenericError(xmlGenericErrorContext,
|
||||
"Added FEFF Byte Order Mark\n");
|
||||
#endif
|
||||
return(2);
|
||||
}
|
||||
*outlen = 0;
|
||||
*inlen = 0;
|
||||
return(0);
|
||||
@ -1205,11 +1218,11 @@ UTF8ToUTF16BE(unsigned char* outb, int *outlen,
|
||||
/**
|
||||
* xmlDetectCharEncoding:
|
||||
* @in: a pointer to the first bytes of the XML entity, must be at least
|
||||
* 4 bytes long.
|
||||
* 2 bytes long (at least 4 if encoding is UTF4 variant).
|
||||
* @len: pointer to the length of the buffer
|
||||
*
|
||||
* Guess the encoding of the entity using the first bytes of the entity content
|
||||
* accordingly of the non-normative appendix F of the XML-1.0 recommendation.
|
||||
* according to the non-normative appendix F of the XML-1.0 recommendation.
|
||||
*
|
||||
* Returns one of the XML_CHAR_ENCODING_... values.
|
||||
*/
|
||||
@ -1235,6 +1248,17 @@ xmlDetectCharEncoding(const unsigned char* in, int len)
|
||||
if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
|
||||
(in[2] == 0x78) && (in[3] == 0x6D))
|
||||
return(XML_CHAR_ENCODING_UTF8);
|
||||
/*
|
||||
* Although not part of the recommendation, we also
|
||||
* attempt an "auto-recognition" of UTF-16LE and
|
||||
* UTF-16BE encodings.
|
||||
*/
|
||||
if ((in[0] == 0x3C) && (in[1] == 0x00) &&
|
||||
(in[2] == 0x3F) && (in[3] == 0x00))
|
||||
return(XML_CHAR_ENCODING_UTF16LE);
|
||||
if ((in[0] == 0x00) && (in[1] == 0x3C) &&
|
||||
(in[2] == 0x00) && (in[3] == 0x3F))
|
||||
return(XML_CHAR_ENCODING_UTF16BE);
|
||||
}
|
||||
if (len >= 3) {
|
||||
/*
|
||||
@ -1245,6 +1269,7 @@ xmlDetectCharEncoding(const unsigned char* in, int len)
|
||||
(in[2] == 0xBF))
|
||||
return(XML_CHAR_ENCODING_UTF8);
|
||||
}
|
||||
/* For UTF-16 we can recognize by the BOM */
|
||||
if (len >= 2) {
|
||||
if ((in[0] == 0xFE) && (in[1] == 0xFF))
|
||||
return(XML_CHAR_ENCODING_UTF16BE);
|
||||
@ -1284,7 +1309,7 @@ xmlCleanupEncodingAliases(void) {
|
||||
*
|
||||
* Lookup an encoding name for the given alias.
|
||||
*
|
||||
* Returns NULL if not found the original name otherwise
|
||||
* Returns NULL if not found, otherwise the original name
|
||||
*/
|
||||
const char *
|
||||
xmlGetEncodingAlias(const char *alias) {
|
||||
@ -1319,7 +1344,7 @@ xmlGetEncodingAlias(const char *alias) {
|
||||
* @name: the encoding name as parsed, in UTF-8 format (ASCII actually)
|
||||
* @alias: the alias name as parsed, in UTF-8 format (ASCII actually)
|
||||
*
|
||||
* Registers and alias @alias for an encoding named @name. Existing alias
|
||||
* Registers an alias @alias for an encoding named @name. Existing alias
|
||||
* will be overwritten.
|
||||
*
|
||||
* Returns 0 in case of success, -1 in case of error
|
||||
@ -1410,7 +1435,7 @@ xmlDelEncodingAlias(const char *alias) {
|
||||
* xmlParseCharEncoding:
|
||||
* @name: the encoding name as parsed, in UTF-8 format (ASCII actually)
|
||||
*
|
||||
* Compare the string to the known encoding schemes already known. Note
|
||||
* Compare the string to the encoding schemes already known. Note
|
||||
* that the comparison is case insensitive accordingly to the section
|
||||
* [XML] 4.3.3 Character Encoding in Entities.
|
||||
*
|
||||
@ -1686,6 +1711,7 @@ xmlInitCharEncodingHandlers(void) {
|
||||
xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE);
|
||||
xmlUTF16BEHandler =
|
||||
xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE);
|
||||
xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, UTF8ToUTF16);
|
||||
xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1);
|
||||
xmlNewCharEncodingHandler("ASCII", asciiToUTF8, UTF8Toascii);
|
||||
xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, UTF8Toascii);
|
||||
@ -1697,6 +1723,7 @@ xmlInitCharEncodingHandlers(void) {
|
||||
xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, NULL);
|
||||
xmlUTF16BEHandler =
|
||||
xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, NULL);
|
||||
xmlNewCharEncodingHandler("UTF-16", UTF16LEToUTF8, NULL);
|
||||
xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, NULL);
|
||||
xmlNewCharEncodingHandler("ASCII", asciiToUTF8, NULL);
|
||||
xmlNewCharEncodingHandler("US-ASCII", asciiToUTF8, NULL);
|
||||
@ -1985,14 +2012,7 @@ xmlFindCharEncodingHandler(const char *name) {
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* If nothing was found and it is "UTF-16" then use the Little endian
|
||||
* version.
|
||||
*/
|
||||
if ((xmlStrEqual(BAD_CAST upper, BAD_CAST "UTF-16")) ||
|
||||
(xmlStrEqual(BAD_CAST upper, BAD_CAST "UTF16")))
|
||||
return(xmlUTF16LEHandler);
|
||||
|
||||
/* If "none of the above", give up */
|
||||
return(NULL);
|
||||
}
|
||||
|
||||
|
@ -35,19 +35,22 @@ extern "C" {
|
||||
* xmlCharEncoding:
|
||||
*
|
||||
* Predefined values for some standard encodings.
|
||||
* Libxml don't do beforehand translation on UTF8, ISOLatinX.
|
||||
* It also support UTF16 (LE and BE) by default.
|
||||
* Libxml does not do beforehand translation on UTF8 and ISOLatinX.
|
||||
* It also supports ASCII, ISO-8859-1, and UTF16 (LE and BE) by default.
|
||||
*
|
||||
* Anything else would have to be translated to UTF8 before being
|
||||
* given to the parser itself. The BOM for UTF16 and the encoding
|
||||
* declaration are looked at and a converter is looked for at that
|
||||
* point. If not found the parser stops here as asked by the XML REC
|
||||
* Converter can be registered by the user using xmlRegisterCharEncodingHandler
|
||||
* point. If not found the parser stops here as asked by the XML REC. A
|
||||
* converter can be registered by the user using xmlRegisterCharEncodingHandler
|
||||
* but the current form doesn't allow stateful transcoding (a serious
|
||||
* problem agreed !). If iconv has been found it will be used
|
||||
* automatically and allow stateful transcoding, the simplest is then
|
||||
* to be sure to enable icon and to provide iconv libs for the encoding
|
||||
* to be sure to enable iconv and to provide iconv libs for the encoding
|
||||
* support needed.
|
||||
*
|
||||
* Note that the generic "UTF-16" is not a predefined value. Instead, only
|
||||
* the specific UTF-16LE and UTF-16BE are present.
|
||||
*/
|
||||
typedef enum {
|
||||
XML_CHAR_ENCODING_ERROR= -1, /* No char encoding detected */
|
||||
@ -86,10 +89,10 @@ typedef enum {
|
||||
* Take a block of chars in the original encoding and try to convert
|
||||
* it to an UTF-8 block of chars out.
|
||||
*
|
||||
* Returns the number of byte written, or -1 by lack of space, or -2
|
||||
* Returns the number of bytes written, -1 if lack of space, or -2
|
||||
* if the transcoding failed.
|
||||
* The value of @inlen after return is the number of octets consumed
|
||||
* as the return value is positive, else unpredictiable.
|
||||
* if the return value is positive, else unpredictiable.
|
||||
* The value of @outlen after return is the number of octets consumed.
|
||||
*/
|
||||
typedef int (* xmlCharEncodingInputFunc)(unsigned char *out, int *outlen,
|
||||
@ -103,15 +106,15 @@ typedef int (* xmlCharEncodingInputFunc)(unsigned char *out, int *outlen,
|
||||
* @in: a pointer to an array of UTF-8 chars
|
||||
* @inlen: the length of @in
|
||||
*
|
||||
* Take a block of UTF-8 chars in and try to convert it to an other
|
||||
* Take a block of UTF-8 chars in and try to convert it to another
|
||||
* encoding.
|
||||
* Note: a first call designed to produce heading info is called with
|
||||
* in = NULL. If stateful this should also initialize the encoder state.
|
||||
*
|
||||
* Returns the number of byte written, or -1 by lack of space, or -2
|
||||
* Returns the number of bytes written, -1 if lack of space, or -2
|
||||
* if the transcoding failed.
|
||||
* The value of @inlen after return is the number of octets consumed
|
||||
* as the return value is positive, else unpredictiable.
|
||||
* if the return value is positive, else unpredictiable.
|
||||
* The value of @outlen after return is the number of ocetes consumed.
|
||||
*/
|
||||
typedef int (* xmlCharEncodingOutputFunc)(unsigned char *out, int *outlen,
|
||||
@ -120,7 +123,7 @@ typedef int (* xmlCharEncodingOutputFunc)(unsigned char *out, int *outlen,
|
||||
|
||||
/*
|
||||
* Block defining the handlers for non UTF-8 encodings.
|
||||
* If iconv is supported, there is two extra fields.
|
||||
* If iconv is supported, there are two extra fields.
|
||||
*/
|
||||
|
||||
typedef struct _xmlCharEncodingHandler xmlCharEncodingHandler;
|
||||
@ -218,7 +221,7 @@ XMLPUBFUN int XMLCALL
|
||||
xmlGetUTF8Char (const unsigned char *utf,
|
||||
int *len);
|
||||
/*
|
||||
* exports additional "UTF-8 aware" string routines which are.
|
||||
* Export additional string routines which are "UTF-8 aware".
|
||||
*/
|
||||
|
||||
XMLPUBFUN int XMLCALL
|
||||
|
BIN
result/noent/utf16bebom.xml
Normal file
BIN
result/noent/utf16bebom.xml
Normal file
Binary file not shown.
BIN
result/noent/utf16lebom.xml
Normal file
BIN
result/noent/utf16lebom.xml
Normal file
Binary file not shown.
BIN
result/utf16bebom.xml
Normal file
BIN
result/utf16bebom.xml
Normal file
Binary file not shown.
4
result/utf16bebom.xml.rdr
Normal file
4
result/utf16bebom.xml.rdr
Normal file
@ -0,0 +1,4 @@
|
||||
0 8 #comment 0 1 This file is encoded in UTF-16BE
|
||||
0 1 repository 0 0
|
||||
1 1 namespace 1 0
|
||||
0 15 repository 0 0
|
8
result/utf16bebom.xml.sax
Normal file
8
result/utf16bebom.xml.sax
Normal file
@ -0,0 +1,8 @@
|
||||
SAX.setDocumentLocator()
|
||||
SAX.startDocument()
|
||||
SAX.comment( This file is encoded in UTF-16BE )
|
||||
SAX.startElement(repository, repositroy_id='test')
|
||||
SAX.startElement(namespace, name='test')
|
||||
SAX.endElement(namespace)
|
||||
SAX.endElement(repository)
|
||||
SAX.endDocument()
|
BIN
result/utf16lebom.xml
Normal file
BIN
result/utf16lebom.xml
Normal file
Binary file not shown.
4
result/utf16lebom.xml.rdr
Normal file
4
result/utf16lebom.xml.rdr
Normal file
@ -0,0 +1,4 @@
|
||||
0 8 #comment 0 1 This file is encoded in UTF-16LE
|
||||
0 1 repository 0 0
|
||||
1 1 namespace 1 0
|
||||
0 15 repository 0 0
|
8
result/utf16lebom.xml.sax
Normal file
8
result/utf16lebom.xml.sax
Normal file
@ -0,0 +1,8 @@
|
||||
SAX.setDocumentLocator()
|
||||
SAX.startDocument()
|
||||
SAX.comment( This file is encoded in UTF-16LE )
|
||||
SAX.startElement(repository, repositroy_id='test')
|
||||
SAX.startElement(namespace, name='test')
|
||||
SAX.endElement(namespace)
|
||||
SAX.endElement(repository)
|
||||
SAX.endDocument()
|
BIN
test/utf16bebom.xml
Normal file
BIN
test/utf16bebom.xml
Normal file
Binary file not shown.
BIN
test/utf16lebom.xml
Normal file
BIN
test/utf16lebom.xml
Normal file
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user