mirror of
https://gitlab.gnome.org/GNOME/libxml2.git
synced 2025-01-12 09:17:37 +03:00
revamped the encoding support, added iconv support, so now libxml if
* encoding.[ch], xmlIO.[ch], parser.c, configure.in : revamped the encoding support, added iconv support, so now libxml if compiled with iconv automatically support japanese encodings among others. Work based on initial patch from Yuan-Chen Cheng I may have broken binary compat in the encoding handler registration scheme, but that was so utterly broken I don't expect anybody to have used this feature until now. * parserInternals.h: fixup on the CHAR range macro * xml-error.h, parser.c: catch URL/URI errors using the uri.c code. * tree.[ch]: added xmlBufferGrow(), was needed for iconv * uri.c: added xmlParseURI() I can't believe I forgot to implement this one in 2.0 !!! * SAX.c: moved doc->encoding update in the endDocument() call. * TODO: updated. Iconv rules :-) Daniel
This commit is contained in:
parent
06047432eb
commit
496a1cf592
18
ChangeLog
18
ChangeLog
@ -1,3 +1,21 @@
|
||||
Wed May 3 14:21:25 CEST 2000 Daniel Veillard <Daniel.Veillard@w3.org>
|
||||
|
||||
* encoding.[ch], xmlIO.[ch], parser.c, configure.in : revamped
|
||||
the encoding support, added iconv support, so now libxml if
|
||||
compiled with iconv automatically support japanese encodings
|
||||
among others. Work based on initial patch from Yuan-Chen Cheng
|
||||
I may have broken binary compat in the encoding handler
|
||||
registration scheme, but that was so utterly broken I don't
|
||||
expect anybody to have used this feature until now.
|
||||
* parserInternals.h: fixup on the CHAR range macro
|
||||
* xml-error.h, parser.c: catch URL/URI errors using the uri.c
|
||||
code.
|
||||
* tree.[ch]: added xmlBufferGrow(), was needed for iconv
|
||||
* uri.c: added xmlParseURI() I can't believe I forgot to
|
||||
implement this one in 2.0 !!!
|
||||
* SAX.c: moved doc->encoding update in the endDocument() call.
|
||||
* TODO: updated.
|
||||
|
||||
Mon Apr 24 13:30:13 CEST 2000 Daniel Veillard <Daniel.Veillard@w3.org>
|
||||
|
||||
* tree.h: removed extraneous xmlRemoveProp definition
|
||||
|
9
SAX.c
9
SAX.c
@ -595,6 +595,15 @@ endDocument(void *ctx)
|
||||
if (ctxt->validate && ctxt->wellFormed &&
|
||||
ctxt->myDoc && ctxt->myDoc->intSubset)
|
||||
ctxt->valid &= xmlValidateDocumentFinal(&ctxt->vctxt, ctxt->myDoc);
|
||||
|
||||
/*
|
||||
* Grab the encoding if it was added on-the-fly
|
||||
*/
|
||||
if ((ctxt->encoding != NULL) && (ctxt->myDoc != NULL) &&
|
||||
(ctxt->myDoc->encoding == NULL)) {
|
||||
ctxt->myDoc->encoding = ctxt->encoding;
|
||||
ctxt->encoding = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
9
TODO
9
TODO
@ -6,6 +6,8 @@
|
||||
TODO:
|
||||
=====
|
||||
|
||||
- xmlSwitchToEncoding() need a rewrite for correct handling of conversion
|
||||
error code conditions.
|
||||
- DOM needs
|
||||
xmlAttrPtr xmlNewDocProp(xmlDocPtr doc, const xmlChar *name, const xmlChar *value)
|
||||
int xmlPruneProp(xmlNodePtr node, xmlAtttrPtr attr);
|
||||
@ -14,7 +16,6 @@ TODO:
|
||||
- add support for the trick from Henry conf/sun/valid/empty.xml
|
||||
- Correct standalone checking/emitting (hard)
|
||||
2.9 Standalone Document Declaration
|
||||
- URI checkings (no fragments) rfc2396.txt
|
||||
- Better checking of external parsed entities TAG 1234
|
||||
- Find way of representing PERefs in the Dtd so that %entity; can
|
||||
be saved back.
|
||||
@ -22,6 +23,7 @@ TODO:
|
||||
http://www.w3.org/XML/xml-19980210-errata ... bummmer
|
||||
- Handle undefined namespaces in entity contents better ... at least
|
||||
issue a warning
|
||||
- Issue warning when using non-absolute namespaces URI.
|
||||
- General checking of DTD validation in presence of namespaces ... hairy
|
||||
- fix --disable-corba configure switch handling, and use XML_WITHOUT_CORBA
|
||||
not WITHOUT_CORBA flag
|
||||
@ -30,7 +32,7 @@ TODO:
|
||||
=====
|
||||
|
||||
- Get OASIS testsuite to a more friendly result, check all the results
|
||||
once stable.
|
||||
once stable. Current state at:
|
||||
http://xmlsoft.org/conf/result.html
|
||||
|
||||
- Optimization of tag strings allocation ?
|
||||
@ -55,11 +57,13 @@ EXTENSIONS:
|
||||
|
||||
- Add Xlink recognition/API
|
||||
=> started adding an xlink.[ch] with a unified API for XML and HTML.
|
||||
it's crap :-(
|
||||
|
||||
- Implement XSLT
|
||||
=> seems that someone volunteered ?!?
|
||||
|
||||
- Implement XSchemas
|
||||
=> Really need to be done <grin/>
|
||||
|
||||
- O2K parsing;
|
||||
=> this is a somewhat ugly mix of HTML and XML, adding a specific
|
||||
@ -88,6 +92,7 @@ EXTENSIONS:
|
||||
Done:
|
||||
=====
|
||||
|
||||
- URI checkings (no fragments) rfc2396.txt
|
||||
- Added a clean mechanism for overload or added input methods:
|
||||
xmlRegisterInputCallbacks()
|
||||
- dynamically adapt the alloc entry point to use g_alloc()/g_free()
|
||||
|
16
configure.in
16
configure.in
@ -4,7 +4,7 @@ AC_INIT(entities.h)
|
||||
AM_CONFIG_HEADER(config.h)
|
||||
|
||||
LIBXML_MAJOR_VERSION=2
|
||||
LIBXML_MINOR_VERSION=0
|
||||
LIBXML_MINOR_VERSION=1
|
||||
LIBXML_MICRO_VERSION=0
|
||||
LIBXML_VERSION=$LIBXML_MAJOR_VERSION.$LIBXML_MINOR_VERSION.$LIBXML_MICRO_VERSION
|
||||
LIBXML_VERSION_INFO=`expr $LIBXML_MAJOR_VERSION + $LIBXML_MINOR_VERSION`:$LIBXML_MICRO_VERSION:$LIBXML_MINOR_VERSION
|
||||
@ -203,6 +203,20 @@ fi
|
||||
AC_SUBST(WITH_XPATH)
|
||||
AC_SUBST(XPATH_OBJ)
|
||||
|
||||
AC_ARG_WITH(iconv, [ --with-iconv Add the ICONV support (on)])
|
||||
if test "$with_iconv" = "no" ; then
|
||||
echo Disabling ICONV support
|
||||
WITH_ICONV=0
|
||||
else
|
||||
if test "$have_iconv" != "" ; then
|
||||
echo Iconv support not found
|
||||
WITH_ICONV=0
|
||||
else
|
||||
WITH_ICONV=1
|
||||
fi
|
||||
fi
|
||||
AC_SUBST(WITH_ICONV)
|
||||
|
||||
AC_ARG_WITH(debug, [ --with-debug Add the debugging module (on)])
|
||||
if test "$with_debug" = "no" ; then
|
||||
echo Disabling DEBUG support
|
||||
|
728
encoding.c
728
encoding.c
File diff suppressed because it is too large
Load Diff
47
encoding.h
47
encoding.h
@ -22,12 +22,30 @@
|
||||
#define __XML_CHAR_ENCODING_H__
|
||||
|
||||
#include <libxml/xmlversion.h>
|
||||
#ifdef LIBXML_ICONV_ENABLED
|
||||
#include <iconv.h>
|
||||
#endif
|
||||
#include <libxml/tree.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Predefined values for some standard encodings
|
||||
* Libxml don't do beforehand translation on UTF8, ISOLatinX
|
||||
* It also support UTF16 (LE and BE) by default.
|
||||
*
|
||||
* Anything else would have to be translated to UTF8 before being
|
||||
* given to the parser itself. The BOM for UTF16 and the encoding
|
||||
* declaration are looked at and a converter is looked for at that
|
||||
* point. If not found the parser stops here as asked by the XML REC
|
||||
* Converter can be registered by the user using xmlRegisterCharEncodingHandler
|
||||
* but the currentl form doesn't allow stateful transcoding (a serious
|
||||
* problem agreed !). If iconv has been found it will be used
|
||||
* automatically and allow stateful transcoding, the simplest is then
|
||||
* to be sure to enable icon and to provide iconv libs for the encoding
|
||||
* support needed.
|
||||
*/
|
||||
typedef enum {
|
||||
XML_CHAR_ENCODING_ERROR= -1, /* No char encoding detected */
|
||||
@ -65,9 +83,13 @@ typedef enum {
|
||||
* Take a block of chars in the original encoding and try to convert
|
||||
* it to an UTF-8 block of chars out.
|
||||
*
|
||||
* Returns the number of byte written, or -1 by lack of space.
|
||||
* Returns the number of byte written, or -1 by lack of space, or -2
|
||||
* if the transcoding failed.
|
||||
* The value of @inlen after return is the number of octets consumed
|
||||
* as the return value is positive, else unpredictiable.
|
||||
* The value of @outlen after return is the number of ocetes consumed.
|
||||
*/
|
||||
typedef int (* xmlCharEncodingInputFunc)(unsigned char* out, int outlen,
|
||||
typedef int (* xmlCharEncodingInputFunc)(unsigned char* out, int *outlen,
|
||||
const unsigned char* in, int *inlen);
|
||||
|
||||
|
||||
@ -83,12 +105,17 @@ typedef int (* xmlCharEncodingInputFunc)(unsigned char* out, int outlen,
|
||||
*
|
||||
* Returns the number of byte written, or -1 by lack of space, or -2
|
||||
* if the transcoding failed.
|
||||
* The value of @inlen after return is the number of octets consumed
|
||||
* as the return value is positive, else unpredictiable.
|
||||
* The value of @outlen after return is the number of ocetes consumed.
|
||||
*/
|
||||
typedef int (* xmlCharEncodingOutputFunc)(unsigned char* out, int outlen,
|
||||
typedef int (* xmlCharEncodingOutputFunc)(unsigned char* out, int *outlen,
|
||||
const unsigned char* in, int *inlen);
|
||||
|
||||
|
||||
/*
|
||||
* Block defining the handlers for non UTF-8 encodings.
|
||||
* If iconv is supported, there is two extra fields
|
||||
*/
|
||||
|
||||
typedef struct _xmlCharEncodingHandler xmlCharEncodingHandler;
|
||||
@ -96,7 +123,11 @@ typedef xmlCharEncodingHandler *xmlCharEncodingHandlerPtr;
|
||||
struct _xmlCharEncodingHandler {
|
||||
char *name;
|
||||
xmlCharEncodingInputFunc input;
|
||||
xmlCharEncodingOutputFunc output;
|
||||
xmlCharEncodingOutputFunc output;
|
||||
#ifdef LIBXML_ICONV_ENABLED
|
||||
iconv_t iconv_in;
|
||||
iconv_t iconv_out;
|
||||
#endif /* LIBXML_ICONV_ENABLED */
|
||||
};
|
||||
|
||||
void xmlInitCharEncodingHandlers (void);
|
||||
@ -109,6 +140,14 @@ xmlCharEncodingHandlerPtr xmlGetCharEncodingHandler(xmlCharEncoding enc);
|
||||
xmlCharEncodingHandlerPtr xmlFindCharEncodingHandler(const char *name);
|
||||
int xmlCheckUTF8 (const unsigned char *utf);
|
||||
|
||||
int xmlCharEncOutFunc (xmlCharEncodingHandler *handler,
|
||||
xmlBufferPtr out,
|
||||
xmlBufferPtr in);
|
||||
|
||||
int xmlCharEncInFunc (xmlCharEncodingHandler *handler,
|
||||
xmlBufferPtr out,
|
||||
xmlBufferPtr in);
|
||||
int xmlCharEncCloseFunc (xmlCharEncodingHandler *handler);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
@ -22,12 +22,30 @@
|
||||
#define __XML_CHAR_ENCODING_H__
|
||||
|
||||
#include <libxml/xmlversion.h>
|
||||
#ifdef LIBXML_ICONV_ENABLED
|
||||
#include <iconv.h>
|
||||
#endif
|
||||
#include <libxml/tree.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Predefined values for some standard encodings
|
||||
* Libxml don't do beforehand translation on UTF8, ISOLatinX
|
||||
* It also support UTF16 (LE and BE) by default.
|
||||
*
|
||||
* Anything else would have to be translated to UTF8 before being
|
||||
* given to the parser itself. The BOM for UTF16 and the encoding
|
||||
* declaration are looked at and a converter is looked for at that
|
||||
* point. If not found the parser stops here as asked by the XML REC
|
||||
* Converter can be registered by the user using xmlRegisterCharEncodingHandler
|
||||
* but the currentl form doesn't allow stateful transcoding (a serious
|
||||
* problem agreed !). If iconv has been found it will be used
|
||||
* automatically and allow stateful transcoding, the simplest is then
|
||||
* to be sure to enable icon and to provide iconv libs for the encoding
|
||||
* support needed.
|
||||
*/
|
||||
typedef enum {
|
||||
XML_CHAR_ENCODING_ERROR= -1, /* No char encoding detected */
|
||||
@ -65,9 +83,13 @@ typedef enum {
|
||||
* Take a block of chars in the original encoding and try to convert
|
||||
* it to an UTF-8 block of chars out.
|
||||
*
|
||||
* Returns the number of byte written, or -1 by lack of space.
|
||||
* Returns the number of byte written, or -1 by lack of space, or -2
|
||||
* if the transcoding failed.
|
||||
* The value of @inlen after return is the number of octets consumed
|
||||
* as the return value is positive, else unpredictiable.
|
||||
* The value of @outlen after return is the number of ocetes consumed.
|
||||
*/
|
||||
typedef int (* xmlCharEncodingInputFunc)(unsigned char* out, int outlen,
|
||||
typedef int (* xmlCharEncodingInputFunc)(unsigned char* out, int *outlen,
|
||||
const unsigned char* in, int *inlen);
|
||||
|
||||
|
||||
@ -83,12 +105,17 @@ typedef int (* xmlCharEncodingInputFunc)(unsigned char* out, int outlen,
|
||||
*
|
||||
* Returns the number of byte written, or -1 by lack of space, or -2
|
||||
* if the transcoding failed.
|
||||
* The value of @inlen after return is the number of octets consumed
|
||||
* as the return value is positive, else unpredictiable.
|
||||
* The value of @outlen after return is the number of ocetes consumed.
|
||||
*/
|
||||
typedef int (* xmlCharEncodingOutputFunc)(unsigned char* out, int outlen,
|
||||
typedef int (* xmlCharEncodingOutputFunc)(unsigned char* out, int *outlen,
|
||||
const unsigned char* in, int *inlen);
|
||||
|
||||
|
||||
/*
|
||||
* Block defining the handlers for non UTF-8 encodings.
|
||||
* If iconv is supported, there is two extra fields
|
||||
*/
|
||||
|
||||
typedef struct _xmlCharEncodingHandler xmlCharEncodingHandler;
|
||||
@ -96,7 +123,11 @@ typedef xmlCharEncodingHandler *xmlCharEncodingHandlerPtr;
|
||||
struct _xmlCharEncodingHandler {
|
||||
char *name;
|
||||
xmlCharEncodingInputFunc input;
|
||||
xmlCharEncodingOutputFunc output;
|
||||
xmlCharEncodingOutputFunc output;
|
||||
#ifdef LIBXML_ICONV_ENABLED
|
||||
iconv_t iconv_in;
|
||||
iconv_t iconv_out;
|
||||
#endif /* LIBXML_ICONV_ENABLED */
|
||||
};
|
||||
|
||||
void xmlInitCharEncodingHandlers (void);
|
||||
@ -109,6 +140,14 @@ xmlCharEncodingHandlerPtr xmlGetCharEncodingHandler(xmlCharEncoding enc);
|
||||
xmlCharEncodingHandlerPtr xmlFindCharEncodingHandler(const char *name);
|
||||
int xmlCheckUTF8 (const unsigned char *utf);
|
||||
|
||||
int xmlCharEncOutFunc (xmlCharEncodingHandler *handler,
|
||||
xmlBufferPtr out,
|
||||
xmlBufferPtr in);
|
||||
|
||||
int xmlCharEncInFunc (xmlCharEncodingHandler *handler,
|
||||
xmlBufferPtr out,
|
||||
xmlBufferPtr in);
|
||||
int xmlCharEncCloseFunc (xmlCharEncodingHandler *handler);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
@ -28,10 +28,10 @@ extern "C" {
|
||||
* any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
|
||||
*/
|
||||
#define IS_CHAR(c) \
|
||||
((((c) == 0x09) || ((c) == 0x0A) || ((c) == 0x0D) || \
|
||||
(((c) >= 0x20) && ((c) != 0xFFFE) && ((c) != 0xFFFF))) && \
|
||||
(((c) <= 0xD7FF) || ((c) >= 0xE000)) && ((c) >= 0) && \
|
||||
((c) <= 0x10FFFF))
|
||||
(((c) == 0x09) || ((c) == 0x0A) || ((c) == 0x0D) || \
|
||||
(((c) >= 0x20) && ((c) <= 0xD7FF)) || \
|
||||
(((c) >= 0xE000) && ((c) <= 0xFFFD)) || \
|
||||
(((c) >= 0x10000) && ((c) <= 0x10FFFF)))
|
||||
|
||||
/*
|
||||
* [3] S ::= (#x20 | #x9 | #xD | #xA)+
|
||||
@ -442,8 +442,10 @@ xmlParserCtxtPtr xmlNewParserCtxt (void);
|
||||
xmlParserCtxtPtr xmlCreateEntityParserCtxt(const xmlChar *URL,
|
||||
const xmlChar *ID,
|
||||
const xmlChar *base);
|
||||
void xmlSwitchEncoding (xmlParserCtxtPtr ctxt,
|
||||
int xmlSwitchEncoding (xmlParserCtxtPtr ctxt,
|
||||
xmlCharEncoding enc);
|
||||
int xmlSwitchToEncoding (xmlParserCtxtPtr ctxt,
|
||||
xmlCharEncodingHandlerPtr handler);
|
||||
void xmlFreeParserCtxt (xmlParserCtxtPtr ctxt);
|
||||
|
||||
/**
|
||||
|
@ -380,6 +380,8 @@ void xmlBufferCCat (xmlBufferPtr buf,
|
||||
const char *str);
|
||||
int xmlBufferShrink (xmlBufferPtr buf,
|
||||
int len);
|
||||
int xmlBufferGrow (xmlBufferPtr buf,
|
||||
int len);
|
||||
void xmlBufferEmpty (xmlBufferPtr buf);
|
||||
const xmlChar* xmlBufferContent (const xmlBufferPtr buf);
|
||||
int xmlBufferUse (const xmlBufferPtr buf);
|
||||
|
@ -33,6 +33,7 @@ struct _xmlParserInputBuffer {
|
||||
xmlCharEncodingHandlerPtr encoder; /* I18N conversions to UTF-8 */
|
||||
|
||||
xmlBufferPtr buffer; /* Local buffer encoded in UTF-8 */
|
||||
xmlBufferPtr raw; /* if encoder != NULL buffer for raw input */
|
||||
};
|
||||
|
||||
|
||||
|
600
parser.c
600
parser.c
@ -41,6 +41,7 @@
|
||||
#include <libxml/valid.h>
|
||||
#include <libxml/parserInternals.h>
|
||||
#include <libxml/xmlIO.h>
|
||||
#include <libxml/uri.h>
|
||||
#include "xml-error.h"
|
||||
|
||||
#define XML_PARSER_BIG_BUFFER_SIZE 1000
|
||||
@ -483,7 +484,7 @@ xmlNextChar(xmlParserCtxtPtr ctxt) {
|
||||
if ((ctxt->sax != NULL) &&
|
||||
(ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData,
|
||||
"Char out of allowed range\n");
|
||||
"Char 0x%X out of allowed range\n", val);
|
||||
ctxt->errNo = XML_ERR_INVALID_ENCODING;
|
||||
ctxt->wellFormed = 0;
|
||||
ctxt->disableSAX = 1;
|
||||
@ -612,7 +613,7 @@ xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
|
||||
if ((ctxt->sax != NULL) &&
|
||||
(ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData,
|
||||
"Char out of allowed range\n");
|
||||
"Char 0x%X out of allowed range\n", val);
|
||||
ctxt->errNo = XML_ERR_INVALID_ENCODING;
|
||||
ctxt->wellFormed = 0;
|
||||
ctxt->disableSAX = 1;
|
||||
@ -727,7 +728,7 @@ xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar *cur, int *len) {
|
||||
if ((ctxt->sax != NULL) &&
|
||||
(ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData,
|
||||
"Char out of allowed range\n");
|
||||
"Char 0x%X out of allowed range\n", val);
|
||||
ctxt->errNo = XML_ERR_INVALID_ENCODING;
|
||||
ctxt->wellFormed = 0;
|
||||
ctxt->disableSAX = 1;
|
||||
@ -2278,96 +2279,209 @@ xmlCheckLanguageID(const xmlChar *lang) {
|
||||
*
|
||||
* change the input functions when discovering the character encoding
|
||||
* of a given entity.
|
||||
*
|
||||
* Returns 0 in case of success, -1 otherwise
|
||||
*/
|
||||
void
|
||||
int
|
||||
xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
|
||||
{
|
||||
xmlCharEncodingHandlerPtr handler;
|
||||
|
||||
switch (enc) {
|
||||
case XML_CHAR_ENCODING_ERROR:
|
||||
ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData, "encoding unknown\n");
|
||||
ctxt->wellFormed = 0;
|
||||
ctxt->disableSAX = 1;
|
||||
break;
|
||||
case XML_CHAR_ENCODING_NONE:
|
||||
/* let's assume it's UTF-8 without the XML decl */
|
||||
return(0);
|
||||
case XML_CHAR_ENCODING_UTF8:
|
||||
/* default encoding, no conversion should be needed */
|
||||
return(0);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
handler = xmlGetCharEncodingHandler(enc);
|
||||
if (handler == NULL) {
|
||||
/*
|
||||
* Default handlers.
|
||||
*/
|
||||
switch (enc) {
|
||||
case XML_CHAR_ENCODING_ERROR:
|
||||
ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData, "encoding unknown\n");
|
||||
ctxt->wellFormed = 0;
|
||||
ctxt->disableSAX = 1;
|
||||
break;
|
||||
case XML_CHAR_ENCODING_NONE:
|
||||
/* let's assume it's UTF-8 without the XML decl */
|
||||
return(0);
|
||||
case XML_CHAR_ENCODING_UTF8:
|
||||
/* default encoding, no conversion should be needed */
|
||||
return(0);
|
||||
case XML_CHAR_ENCODING_UTF16LE:
|
||||
break;
|
||||
case XML_CHAR_ENCODING_UTF16BE:
|
||||
break;
|
||||
case XML_CHAR_ENCODING_UCS4LE:
|
||||
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData,
|
||||
"char encoding USC4 little endian not supported\n");
|
||||
break;
|
||||
case XML_CHAR_ENCODING_UCS4BE:
|
||||
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData,
|
||||
"char encoding USC4 big endian not supported\n");
|
||||
break;
|
||||
case XML_CHAR_ENCODING_EBCDIC:
|
||||
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData,
|
||||
"char encoding EBCDIC not supported\n");
|
||||
break;
|
||||
case XML_CHAR_ENCODING_UCS4_2143:
|
||||
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData,
|
||||
"char encoding UCS4 2143 not supported\n");
|
||||
break;
|
||||
case XML_CHAR_ENCODING_UCS4_3412:
|
||||
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData,
|
||||
"char encoding UCS4 3412 not supported\n");
|
||||
break;
|
||||
case XML_CHAR_ENCODING_UCS2:
|
||||
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData,
|
||||
"char encoding UCS2 not supported\n");
|
||||
break;
|
||||
case XML_CHAR_ENCODING_8859_1:
|
||||
case XML_CHAR_ENCODING_8859_2:
|
||||
case XML_CHAR_ENCODING_8859_3:
|
||||
case XML_CHAR_ENCODING_8859_4:
|
||||
case XML_CHAR_ENCODING_8859_5:
|
||||
case XML_CHAR_ENCODING_8859_6:
|
||||
case XML_CHAR_ENCODING_8859_7:
|
||||
case XML_CHAR_ENCODING_8859_8:
|
||||
case XML_CHAR_ENCODING_8859_9:
|
||||
/*
|
||||
* Keep the internal content in the document encoding
|
||||
*/
|
||||
if ((ctxt->inputNr == 1) &&
|
||||
(ctxt->encoding == NULL) &&
|
||||
(ctxt->input->encoding != NULL)) {
|
||||
ctxt->encoding = xmlStrdup(ctxt->input->encoding);
|
||||
}
|
||||
return(0);
|
||||
case XML_CHAR_ENCODING_2022_JP:
|
||||
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData,
|
||||
"char encoding ISO-2022-JPnot supported\n");
|
||||
break;
|
||||
case XML_CHAR_ENCODING_SHIFT_JIS:
|
||||
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData,
|
||||
"char encoding Shift_JIS not supported\n");
|
||||
break;
|
||||
case XML_CHAR_ENCODING_EUC_JP:
|
||||
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData,
|
||||
"char encoding EUC-JPnot supported\n");
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (handler == NULL)
|
||||
return(-1);
|
||||
return(xmlSwitchToEncoding(ctxt, handler));
|
||||
}
|
||||
|
||||
/**
|
||||
* xmlSwitchToEncoding:
|
||||
* @ctxt: the parser context
|
||||
* @handler: the encoding handler
|
||||
*
|
||||
* change the input functions when discovering the character encoding
|
||||
* of a given entity.
|
||||
*
|
||||
* Returns 0 in case of success, -1 otherwise
|
||||
*/
|
||||
int
|
||||
xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
|
||||
{
|
||||
int nbchars;
|
||||
|
||||
if (handler != NULL) {
|
||||
if (ctxt->input != NULL) {
|
||||
if (ctxt->input->buf != NULL) {
|
||||
if (ctxt->input->buf->encoder != NULL) {
|
||||
if (ctxt->input->buf->encoder == handler)
|
||||
return(0);
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData,
|
||||
"xmlSwitchEncoding : encoder already regitered\n");
|
||||
return;
|
||||
return(-1);
|
||||
}
|
||||
ctxt->input->buf->encoder = handler;
|
||||
|
||||
/*
|
||||
* Is there already some content down the pipe to convert
|
||||
* Is there already some content down the pipe to convert ?
|
||||
*/
|
||||
if ((ctxt->input->buf->buffer != NULL) &&
|
||||
(ctxt->input->buf->buffer->use > 0)) {
|
||||
xmlChar *buf;
|
||||
int res, len, size;
|
||||
int processed;
|
||||
|
||||
/*
|
||||
* Specific handling of the Byte Order Mark for
|
||||
* UTF-16
|
||||
*/
|
||||
if ((enc == XML_CHAR_ENCODING_UTF16LE) &&
|
||||
if ((handler->name != NULL) &&
|
||||
(!strcmp(handler->name, "UTF-16LE")) &&
|
||||
(ctxt->input->cur[0] == 0xFF) &&
|
||||
(ctxt->input->cur[1] == 0xFE)) {
|
||||
SKIP(2);
|
||||
ctxt->input->cur += 2;
|
||||
}
|
||||
if ((enc == XML_CHAR_ENCODING_UTF16BE) &&
|
||||
if ((handler->name != NULL) &&
|
||||
(!strcmp(handler->name, "UTF-16BE")) &&
|
||||
(ctxt->input->cur[0] == 0xFE) &&
|
||||
(ctxt->input->cur[1] == 0xFF)) {
|
||||
SKIP(2);
|
||||
ctxt->input->cur += 2;
|
||||
}
|
||||
|
||||
/*
|
||||
* convert the non processed part
|
||||
* Shring the current input buffer.
|
||||
* Move it as the raw buffer and create a new input buffer
|
||||
*/
|
||||
processed = ctxt->input->cur - ctxt->input->base;
|
||||
len = ctxt->input->buf->buffer->use - processed;
|
||||
|
||||
if (len <= 0) {
|
||||
return;
|
||||
}
|
||||
size = ctxt->input->buf->buffer->use * 4;
|
||||
if (size < 4000)
|
||||
size = 4000;
|
||||
retry_larger:
|
||||
buf = (xmlChar *) xmlMalloc(size + 1);
|
||||
if (buf == NULL) {
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData,
|
||||
"xmlSwitchEncoding : out of memory\n");
|
||||
return;
|
||||
}
|
||||
/* TODO !!! Handling of buf too small */
|
||||
res = handler->input(buf, size, ctxt->input->cur, &len);
|
||||
if (res == -1) {
|
||||
size *= 2;
|
||||
xmlFree(buf);
|
||||
goto retry_larger;
|
||||
}
|
||||
if ((res < 0) ||
|
||||
(len != ctxt->input->buf->buffer->use - processed)) {
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData,
|
||||
"xmlSwitchEncoding : conversion failed\n");
|
||||
xmlFree(buf);
|
||||
return;
|
||||
}
|
||||
xmlBufferShrink(ctxt->input->buf->buffer, processed);
|
||||
ctxt->input->buf->raw = ctxt->input->buf->buffer;
|
||||
ctxt->input->buf->buffer = xmlBufferCreate();
|
||||
|
||||
/*
|
||||
* Conversion succeeded, get rid of the old buffer
|
||||
* convert as much as possible of the raw input
|
||||
* to the parser reading buffer.
|
||||
*/
|
||||
xmlFree(ctxt->input->buf->buffer->content);
|
||||
ctxt->input->buf->buffer->content = buf;
|
||||
ctxt->input->base = buf;
|
||||
ctxt->input->cur = buf;
|
||||
ctxt->input->buf->buffer->size = size;
|
||||
ctxt->input->buf->buffer->use = res;
|
||||
buf[res] = 0;
|
||||
nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
|
||||
ctxt->input->buf->buffer,
|
||||
ctxt->input->buf->raw);
|
||||
if (nbchars < 0) {
|
||||
fprintf(stderr, "xmlSwitchToEncoding: encoder error\n");
|
||||
return(-1);
|
||||
}
|
||||
ctxt->input->base =
|
||||
ctxt->input->cur = ctxt->input->buf->buffer->content;
|
||||
}
|
||||
return;
|
||||
return(0);
|
||||
} else {
|
||||
if (ctxt->input->length == 0) {
|
||||
/*
|
||||
@ -2377,191 +2491,59 @@ retry_larger:
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData,
|
||||
"xmlSwitchEncoding : no input\n");
|
||||
return;
|
||||
return(-1);
|
||||
} else {
|
||||
xmlChar *buf;
|
||||
int res, len;
|
||||
int processed = ctxt->input->cur - ctxt->input->base;
|
||||
int processed;
|
||||
|
||||
/*
|
||||
* convert the non processed part
|
||||
* Shring the current input buffer.
|
||||
* Move it as the raw buffer and create a new input buffer
|
||||
*/
|
||||
len = ctxt->input->length - processed;
|
||||
if (len <= 0) {
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData,
|
||||
"xmlSwitchEncoding : input fully consumed?\n");
|
||||
return;
|
||||
}
|
||||
buf = (xmlChar *) xmlMalloc(ctxt->input->length * 4);
|
||||
if (buf == NULL) {
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData,
|
||||
"xmlSwitchEncoding : out of memory\n");
|
||||
return;
|
||||
}
|
||||
res = handler->input(buf, ctxt->input->length * 4,
|
||||
ctxt->input->cur, &len);
|
||||
if ((res < 0) ||
|
||||
(len != ctxt->input->length - processed)) {
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData,
|
||||
"xmlSwitchEncoding : conversion failed\n");
|
||||
xmlFree(buf);
|
||||
return;
|
||||
processed = ctxt->input->cur - ctxt->input->base;
|
||||
ctxt->input->buf->raw = xmlBufferCreate();
|
||||
xmlBufferAdd(ctxt->input->buf->raw, ctxt->input->cur,
|
||||
ctxt->input->length - processed);
|
||||
ctxt->input->buf->buffer = xmlBufferCreate();
|
||||
|
||||
/*
|
||||
* convert as much as possible of the raw input
|
||||
* to the parser reading buffer.
|
||||
*/
|
||||
nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
|
||||
ctxt->input->buf->buffer,
|
||||
ctxt->input->buf->raw);
|
||||
if (nbchars < 0) {
|
||||
fprintf(stderr, "xmlSwitchToEncoding: encoder error\n");
|
||||
return(-1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Conversion succeeded, get rid of the old buffer
|
||||
*/
|
||||
if ((ctxt->input->free != NULL) &&
|
||||
(ctxt->input->base != NULL))
|
||||
ctxt->input->free((xmlChar *) ctxt->input->base);
|
||||
ctxt->input->base = ctxt->input->cur = buf;
|
||||
ctxt->input->length = res;
|
||||
ctxt->input->base =
|
||||
ctxt->input->cur = ctxt->input->buf->buffer->content;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData,
|
||||
"xmlSwitchEncoding : no input\n");
|
||||
return(-1);
|
||||
}
|
||||
}
|
||||
/*
|
||||
* The parsing is now done in UTF8 natively
|
||||
*/
|
||||
if (ctxt->encoding != NULL) {
|
||||
xmlFree((xmlChar *) ctxt->encoding);
|
||||
ctxt->encoding = NULL;
|
||||
}
|
||||
} else
|
||||
return(-1);
|
||||
return(0);
|
||||
|
||||
switch (enc) {
|
||||
case XML_CHAR_ENCODING_ERROR:
|
||||
ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData, "encoding unknown\n");
|
||||
ctxt->wellFormed = 0;
|
||||
ctxt->disableSAX = 1;
|
||||
break;
|
||||
case XML_CHAR_ENCODING_NONE:
|
||||
/* let's assume it's UTF-8 without the XML decl */
|
||||
return;
|
||||
case XML_CHAR_ENCODING_UTF8:
|
||||
/* default encoding, no conversion should be needed */
|
||||
return;
|
||||
case XML_CHAR_ENCODING_UTF16LE:
|
||||
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData,
|
||||
"char encoding UTF16 little endian not supported\n");
|
||||
break;
|
||||
case XML_CHAR_ENCODING_UTF16BE:
|
||||
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData,
|
||||
"char encoding UTF16 big endian not supported\n");
|
||||
break;
|
||||
case XML_CHAR_ENCODING_UCS4LE:
|
||||
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData,
|
||||
"char encoding USC4 little endian not supported\n");
|
||||
break;
|
||||
case XML_CHAR_ENCODING_UCS4BE:
|
||||
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData,
|
||||
"char encoding USC4 big endian not supported\n");
|
||||
break;
|
||||
case XML_CHAR_ENCODING_EBCDIC:
|
||||
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData,
|
||||
"char encoding EBCDIC not supported\n");
|
||||
break;
|
||||
case XML_CHAR_ENCODING_UCS4_2143:
|
||||
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData,
|
||||
"char encoding UCS4 2143 not supported\n");
|
||||
break;
|
||||
case XML_CHAR_ENCODING_UCS4_3412:
|
||||
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData,
|
||||
"char encoding UCS4 3412 not supported\n");
|
||||
break;
|
||||
case XML_CHAR_ENCODING_UCS2:
|
||||
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData,
|
||||
"char encoding UCS2 not supported\n");
|
||||
break;
|
||||
case XML_CHAR_ENCODING_8859_1:
|
||||
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData,
|
||||
"char encoding ISO_8859_1 ISO Latin 1 not supported\n");
|
||||
break;
|
||||
case XML_CHAR_ENCODING_8859_2:
|
||||
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData,
|
||||
"char encoding ISO_8859_2 ISO Latin 2 not supported\n");
|
||||
break;
|
||||
case XML_CHAR_ENCODING_8859_3:
|
||||
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData,
|
||||
"char encoding ISO_8859_3 not supported\n");
|
||||
break;
|
||||
case XML_CHAR_ENCODING_8859_4:
|
||||
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData,
|
||||
"char encoding ISO_8859_4 not supported\n");
|
||||
break;
|
||||
case XML_CHAR_ENCODING_8859_5:
|
||||
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData,
|
||||
"char encoding ISO_8859_5 not supported\n");
|
||||
break;
|
||||
case XML_CHAR_ENCODING_8859_6:
|
||||
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData,
|
||||
"char encoding ISO_8859_6 not supported\n");
|
||||
break;
|
||||
case XML_CHAR_ENCODING_8859_7:
|
||||
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData,
|
||||
"char encoding ISO_8859_7 not supported\n");
|
||||
break;
|
||||
case XML_CHAR_ENCODING_8859_8:
|
||||
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData,
|
||||
"char encoding ISO_8859_8 not supported\n");
|
||||
break;
|
||||
case XML_CHAR_ENCODING_8859_9:
|
||||
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData,
|
||||
"char encoding ISO_8859_9 not supported\n");
|
||||
break;
|
||||
case XML_CHAR_ENCODING_2022_JP:
|
||||
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData,
|
||||
"char encoding ISO-2022-JPnot supported\n");
|
||||
break;
|
||||
case XML_CHAR_ENCODING_SHIFT_JIS:
|
||||
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData,
|
||||
"char encoding Shift_JISnot supported\n");
|
||||
break;
|
||||
case XML_CHAR_ENCODING_EUC_JP:
|
||||
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData,
|
||||
"char encoding EUC-JPnot supported\n");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/************************************************************************
|
||||
@ -4253,7 +4235,7 @@ xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
|
||||
void
|
||||
xmlParseComment(xmlParserCtxtPtr ctxt) {
|
||||
xmlChar *buf = NULL;
|
||||
int len = 0;
|
||||
int len;
|
||||
int size = XML_PARSER_BUFFER_SIZE;
|
||||
int q, ql;
|
||||
int r, rl;
|
||||
@ -4282,10 +4264,11 @@ xmlParseComment(xmlParserCtxtPtr ctxt) {
|
||||
r = CUR_CHAR(rl);
|
||||
NEXTL(rl);
|
||||
cur = CUR_CHAR(l);
|
||||
len = 0;
|
||||
while (IS_CHAR(cur) &&
|
||||
((cur != '>') ||
|
||||
(r != '-') || (q != '-'))) {
|
||||
if ((r == '-') && (q == '-')) {
|
||||
if ((r == '-') && (q == '-') && (len > 1)) {
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData,
|
||||
"Comment must not contain '--' (double-hyphen)`\n");
|
||||
@ -4732,11 +4715,36 @@ xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
|
||||
ctxt->disableSAX = 1;
|
||||
}
|
||||
if (URI) {
|
||||
if ((ctxt->sax != NULL) &&
|
||||
(!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
|
||||
ctxt->sax->entityDecl(ctxt->userData, name,
|
||||
XML_EXTERNAL_PARAMETER_ENTITY,
|
||||
literal, URI, NULL);
|
||||
xmlURIPtr uri;
|
||||
|
||||
uri = xmlParseURI((const char *) URI);
|
||||
if (uri == NULL) {
|
||||
if ((ctxt->sax != NULL) &&
|
||||
(!ctxt->disableSAX) &&
|
||||
(ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData,
|
||||
"Invalid URI: %s\n", URI);
|
||||
ctxt->wellFormed = 0;
|
||||
ctxt->errNo = XML_ERR_INVALID_URI;
|
||||
} else {
|
||||
if (uri->fragment != NULL) {
|
||||
if ((ctxt->sax != NULL) &&
|
||||
(!ctxt->disableSAX) &&
|
||||
(ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData,
|
||||
"Fragment not allowed: %s\n", URI);
|
||||
ctxt->wellFormed = 0;
|
||||
ctxt->errNo = XML_ERR_URI_FRAGMENT;
|
||||
} else {
|
||||
if ((ctxt->sax != NULL) &&
|
||||
(!ctxt->disableSAX) &&
|
||||
(ctxt->sax->entityDecl != NULL))
|
||||
ctxt->sax->entityDecl(ctxt->userData, name,
|
||||
XML_EXTERNAL_PARAMETER_ENTITY,
|
||||
literal, URI, NULL);
|
||||
}
|
||||
xmlFreeURI(uri);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
@ -4757,6 +4765,31 @@ xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
|
||||
ctxt->wellFormed = 0;
|
||||
ctxt->disableSAX = 1;
|
||||
}
|
||||
if (URI) {
|
||||
xmlURIPtr uri;
|
||||
|
||||
uri = xmlParseURI((const char *)URI);
|
||||
if (uri == NULL) {
|
||||
if ((ctxt->sax != NULL) &&
|
||||
(!ctxt->disableSAX) &&
|
||||
(ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData,
|
||||
"Invalid URI: %s\n", URI);
|
||||
ctxt->wellFormed = 0;
|
||||
ctxt->errNo = XML_ERR_INVALID_URI;
|
||||
} else {
|
||||
if (uri->fragment != NULL) {
|
||||
if ((ctxt->sax != NULL) &&
|
||||
(!ctxt->disableSAX) &&
|
||||
(ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData,
|
||||
"Fragment not allowed: %s\n", URI);
|
||||
ctxt->wellFormed = 0;
|
||||
ctxt->errNo = XML_ERR_URI_FRAGMENT;
|
||||
}
|
||||
xmlFreeURI(uri);
|
||||
}
|
||||
}
|
||||
if ((RAW != '>') && (!IS_BLANK(CUR))) {
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData,
|
||||
@ -5973,7 +6006,20 @@ xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
|
||||
/*
|
||||
* We know that '<?xml' is here.
|
||||
*/
|
||||
SKIP(5);
|
||||
if ((RAW == '<') && (NXT(1) == '?') &&
|
||||
(NXT(2) == 'x') && (NXT(3) == 'm') &&
|
||||
(NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
|
||||
SKIP(5);
|
||||
} else {
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData,
|
||||
"Text declaration '<?xml' required\n");
|
||||
ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
|
||||
ctxt->wellFormed = 0;
|
||||
ctxt->disableSAX = 1;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
if (!IS_BLANK(CUR)) {
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||
@ -6003,7 +6049,13 @@ xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
|
||||
ctxt->wellFormed = 0;
|
||||
ctxt->disableSAX = 1;
|
||||
}
|
||||
ctxt->input->encoding = xmlParseEncodingDecl(ctxt);
|
||||
xmlParseEncodingDecl(ctxt);
|
||||
if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
|
||||
/*
|
||||
* The XML REC instructs us to stop parsing right here
|
||||
*/
|
||||
return;
|
||||
}
|
||||
|
||||
SKIP_BLANKS;
|
||||
if ((RAW == '?') && (NXT(1) == '>')) {
|
||||
@ -6192,6 +6244,13 @@ xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
|
||||
(NXT(2) == 'x') && (NXT(3) == 'm') &&
|
||||
(NXT(4) == 'l')) {
|
||||
xmlParseTextDecl(ctxt);
|
||||
if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
|
||||
/*
|
||||
* The XML REC instructs us to stop parsing right here
|
||||
*/
|
||||
ctxt->instate = XML_PARSER_EOF;
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (ctxt->myDoc == NULL) {
|
||||
ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
|
||||
@ -6441,6 +6500,13 @@ xmlParseReference(xmlParserCtxtPtr ctxt) {
|
||||
(NXT(2) == 'x') && (NXT(3) == 'm') &&
|
||||
(NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
|
||||
xmlParseTextDecl(ctxt);
|
||||
if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
|
||||
/*
|
||||
* The XML REC instructs us to stop parsing right here
|
||||
*/
|
||||
ctxt->instate = XML_PARSER_EOF;
|
||||
return;
|
||||
}
|
||||
if (input->standalone) {
|
||||
if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
|
||||
ctxt->sax->error(ctxt->userData,
|
||||
@ -6947,6 +7013,15 @@ xmlParsePEReference(xmlParserCtxtPtr ctxt) {
|
||||
(NXT(2) == 'x') && (NXT(3) == 'm') &&
|
||||
(NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
|
||||
xmlParseTextDecl(ctxt);
|
||||
if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
|
||||
/*
|
||||
* The XML REC instructs us to stop parsing
|
||||
* right here
|
||||
*/
|
||||
ctxt->instate = XML_PARSER_EOF;
|
||||
xmlFree(name);
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (ctxt->token == 0)
|
||||
ctxt->token = ' ';
|
||||
@ -8197,6 +8272,38 @@ xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
|
||||
ctxt->disableSAX = 1;
|
||||
ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
|
||||
}
|
||||
if (encoding != NULL) {
|
||||
xmlCharEncoding enc;
|
||||
xmlCharEncodingHandlerPtr handler;
|
||||
|
||||
if (ctxt->input->encoding != NULL)
|
||||
xmlFree((xmlChar *) ctxt->input->encoding);
|
||||
ctxt->input->encoding = encoding;
|
||||
|
||||
enc = xmlParseCharEncoding((const char *) encoding);
|
||||
/*
|
||||
* registered set of known encodings
|
||||
*/
|
||||
if (enc != XML_CHAR_ENCODING_ERROR) {
|
||||
xmlSwitchEncoding(ctxt, enc);
|
||||
if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
|
||||
xmlFree(encoding);
|
||||
return(NULL);
|
||||
}
|
||||
} else {
|
||||
/*
|
||||
* fallback for unknown encodings
|
||||
*/
|
||||
handler = xmlFindCharEncodingHandler((const char *) encoding);
|
||||
if (handler != NULL) {
|
||||
xmlSwitchToEncoding(ctxt, handler);
|
||||
} else {
|
||||
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
|
||||
xmlFree(encoding);
|
||||
return(NULL);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return(encoding);
|
||||
}
|
||||
@ -8362,7 +8469,13 @@ xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
|
||||
ctxt->wellFormed = 0;
|
||||
ctxt->disableSAX = 1;
|
||||
}
|
||||
ctxt->input->encoding = xmlParseEncodingDecl(ctxt);
|
||||
xmlParseEncodingDecl(ctxt);
|
||||
if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
|
||||
/*
|
||||
* The XML REC instructs us to stop parsing right here
|
||||
*/
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* We may have the standalone status.
|
||||
@ -8489,12 +8602,19 @@ xmlParseDocument(xmlParserCtxtPtr ctxt) {
|
||||
if ((RAW == '<') && (NXT(1) == '?') &&
|
||||
(NXT(2) == 'x') && (NXT(3) == 'm') &&
|
||||
(NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
|
||||
|
||||
/*
|
||||
* Note that we will switch encoding on the fly.
|
||||
*/
|
||||
xmlParseXMLDecl(ctxt);
|
||||
if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
|
||||
/*
|
||||
* The XML REC instructs us to stop parsing right here
|
||||
*/
|
||||
return(-1);
|
||||
}
|
||||
ctxt->standalone = ctxt->input->standalone;
|
||||
SKIP_BLANKS;
|
||||
if ((ctxt->encoding == NULL) && (ctxt->input->encoding != NULL))
|
||||
ctxt->encoding = xmlStrdup(ctxt->input->encoding);
|
||||
|
||||
} else {
|
||||
ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
|
||||
}
|
||||
@ -8581,14 +8701,6 @@ xmlParseDocument(xmlParserCtxtPtr ctxt) {
|
||||
(!ctxt->disableSAX))
|
||||
ctxt->sax->endDocument(ctxt->userData);
|
||||
|
||||
/*
|
||||
* Grab the encoding if it was added on-the-fly
|
||||
*/
|
||||
if ((ctxt->encoding != NULL) && (ctxt->myDoc != NULL) &&
|
||||
(ctxt->myDoc->encoding == NULL)) {
|
||||
ctxt->myDoc->encoding = ctxt->encoding;
|
||||
ctxt->encoding = NULL;
|
||||
}
|
||||
if (! ctxt->wellFormed) return(-1);
|
||||
return(0);
|
||||
}
|
||||
@ -8805,6 +8917,14 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
|
||||
fprintf(stderr, "PP: Parsing XML Decl\n");
|
||||
#endif
|
||||
xmlParseXMLDecl(ctxt);
|
||||
if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
|
||||
/*
|
||||
* The XML REC instructs us to stop parsing right
|
||||
* here
|
||||
*/
|
||||
ctxt->instate = XML_PARSER_EOF;
|
||||
return(0);
|
||||
}
|
||||
ctxt->standalone = ctxt->input->standalone;
|
||||
if ((ctxt->encoding == NULL) &&
|
||||
(ctxt->input->encoding != NULL))
|
||||
|
@ -28,10 +28,10 @@ extern "C" {
|
||||
* any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
|
||||
*/
|
||||
#define IS_CHAR(c) \
|
||||
((((c) == 0x09) || ((c) == 0x0A) || ((c) == 0x0D) || \
|
||||
(((c) >= 0x20) && ((c) != 0xFFFE) && ((c) != 0xFFFF))) && \
|
||||
(((c) <= 0xD7FF) || ((c) >= 0xE000)) && ((c) >= 0) && \
|
||||
((c) <= 0x10FFFF))
|
||||
(((c) == 0x09) || ((c) == 0x0A) || ((c) == 0x0D) || \
|
||||
(((c) >= 0x20) && ((c) <= 0xD7FF)) || \
|
||||
(((c) >= 0xE000) && ((c) <= 0xFFFD)) || \
|
||||
(((c) >= 0x10000) && ((c) <= 0x10FFFF)))
|
||||
|
||||
/*
|
||||
* [3] S ::= (#x20 | #x9 | #xD | #xA)+
|
||||
@ -442,8 +442,10 @@ xmlParserCtxtPtr xmlNewParserCtxt (void);
|
||||
xmlParserCtxtPtr xmlCreateEntityParserCtxt(const xmlChar *URL,
|
||||
const xmlChar *ID,
|
||||
const xmlChar *base);
|
||||
void xmlSwitchEncoding (xmlParserCtxtPtr ctxt,
|
||||
int xmlSwitchEncoding (xmlParserCtxtPtr ctxt,
|
||||
xmlCharEncoding enc);
|
||||
int xmlSwitchToEncoding (xmlParserCtxtPtr ctxt,
|
||||
xmlCharEncodingHandlerPtr handler);
|
||||
void xmlFreeParserCtxt (xmlParserCtxtPtr ctxt);
|
||||
|
||||
/**
|
||||
|
25
tree.c
25
tree.c
@ -3771,6 +3771,31 @@ xmlBufferShrink(xmlBufferPtr buf, int len) {
|
||||
return(len);
|
||||
}
|
||||
|
||||
/**
|
||||
* xmlBufferGrow:
|
||||
* @buf: the buffer
|
||||
* @len: the minimum free sie to allocate
|
||||
*
|
||||
* Grow the available space of an XML buffer.
|
||||
*
|
||||
* Returns the new available space or -1 in case of error
|
||||
*/
|
||||
int
|
||||
xmlBufferGrow(xmlBufferPtr buf, int len) {
|
||||
int size;
|
||||
xmlChar *newbuf;
|
||||
|
||||
if (len <= buf->use) return(0);
|
||||
|
||||
size = buf->size + buf->use + len + 100;
|
||||
|
||||
newbuf = xmlRealloc(buf->content, size);
|
||||
if (newbuf == NULL) return(-1);
|
||||
buf->content = newbuf;
|
||||
buf->size = size;
|
||||
return(buf->size - buf->use);
|
||||
}
|
||||
|
||||
/**
|
||||
* xmlBufferDump:
|
||||
* @file: the file output
|
||||
|
2
tree.h
2
tree.h
@ -380,6 +380,8 @@ void xmlBufferCCat (xmlBufferPtr buf,
|
||||
const char *str);
|
||||
int xmlBufferShrink (xmlBufferPtr buf,
|
||||
int len);
|
||||
int xmlBufferGrow (xmlBufferPtr buf,
|
||||
int len);
|
||||
void xmlBufferEmpty (xmlBufferPtr buf);
|
||||
const xmlChar* xmlBufferContent (const xmlBufferPtr buf);
|
||||
int xmlBufferUse (const xmlBufferPtr buf);
|
||||
|
28
uri.c
28
uri.c
@ -1283,6 +1283,34 @@ xmlParseURIReference(xmlURIPtr uri, const char *str) {
|
||||
return(0);
|
||||
}
|
||||
|
||||
/**
|
||||
* xmlParseURI:
|
||||
* @str: the URI string to analyze
|
||||
*
|
||||
* Parse an URI
|
||||
*
|
||||
* URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
|
||||
*
|
||||
* Returns a newly build xmlURIPtr or NULL in case of error
|
||||
*/
|
||||
xmlURIPtr
|
||||
xmlParseURI(const char *str) {
|
||||
xmlURIPtr uri;
|
||||
int ret;
|
||||
|
||||
if (str == NULL)
|
||||
return(NULL);
|
||||
uri = xmlCreateURI();
|
||||
if (uri != NULL) {
|
||||
ret = xmlParseURIReference(uri, str);
|
||||
if (ret) {
|
||||
xmlFreeURI(uri);
|
||||
return(NULL);
|
||||
}
|
||||
}
|
||||
return(uri);
|
||||
}
|
||||
|
||||
/**
|
||||
* xmlNormalizeURIPath:
|
||||
* @path: pointer to the path string
|
||||
|
@ -130,7 +130,9 @@ typedef enum {
|
||||
XML_ERR_ENTITY_CHAR_ERROR, /* 88 */
|
||||
XML_ERR_ENTITY_PE_INTERNAL, /* 88 */
|
||||
XML_ERR_ENTITY_LOOP, /* 89 */
|
||||
XML_ERR_ENTITY_BOUNDARY /* 90 */
|
||||
XML_ERR_ENTITY_BOUNDARY, /* 90 */
|
||||
XML_ERR_INVALID_URI, /* 91 */
|
||||
XML_ERR_URI_FRAGMENT /* 92 */
|
||||
}xmlParserErrors;
|
||||
|
||||
void xmlParserError (void *ctx,
|
||||
|
89
xmlIO.c
89
xmlIO.c
@ -498,6 +498,10 @@ xmlAllocParserInputBuffer(xmlCharEncoding enc) {
|
||||
}
|
||||
ret->buffer->alloc = XML_BUFFER_ALLOC_DOUBLEIT;
|
||||
ret->encoder = xmlGetCharEncodingHandler(enc);
|
||||
if (ret->encoder != NULL)
|
||||
ret->raw = xmlBufferCreate();
|
||||
else
|
||||
ret->raw = NULL;
|
||||
ret->readcallback = NULL;
|
||||
ret->closecallback = NULL;
|
||||
ret->context = NULL;
|
||||
@ -513,13 +517,20 @@ xmlAllocParserInputBuffer(xmlCharEncoding enc) {
|
||||
*/
|
||||
void
|
||||
xmlFreeParserInputBuffer(xmlParserInputBufferPtr in) {
|
||||
if (in->buffer != NULL) {
|
||||
xmlBufferFree(in->buffer);
|
||||
in->buffer = NULL;
|
||||
if (in->raw) {
|
||||
xmlBufferFree(in->raw);
|
||||
in->raw = NULL;
|
||||
}
|
||||
if (in->encoder != NULL) {
|
||||
xmlCharEncCloseFunc(in->encoder);
|
||||
}
|
||||
if (in->closecallback != NULL) {
|
||||
in->closecallback(in->context);
|
||||
}
|
||||
if (in->buffer != NULL) {
|
||||
xmlBufferFree(in->buffer);
|
||||
in->buffer = NULL;
|
||||
}
|
||||
|
||||
memset(in, 0xbe, (size_t) sizeof(xmlParserInputBuffer));
|
||||
xmlFree(in);
|
||||
@ -683,34 +694,22 @@ xmlParserInputBufferPush(xmlParserInputBufferPtr in, int len, const char *buf) {
|
||||
|
||||
if (len < 0) return(0);
|
||||
if (in->encoder != NULL) {
|
||||
xmlChar *buffer;
|
||||
int processed = len;
|
||||
|
||||
buffer = (xmlChar *) xmlMalloc((len + 1) * 2 * sizeof(xmlChar));
|
||||
if (buffer == NULL) {
|
||||
fprintf(stderr, "xmlParserInputBufferGrow : out of memory !\n");
|
||||
return(-1);
|
||||
}
|
||||
nbchars = in->encoder->input(buffer, (len + 1) * 2 * sizeof(xmlChar),
|
||||
(xmlChar *) buf, &processed);
|
||||
/*
|
||||
* TODO : we really need to have something atomic or the
|
||||
* encoder must report the number of bytes read
|
||||
/*
|
||||
* Store the data in the incoming raw buffer
|
||||
*/
|
||||
if (in->raw == NULL) {
|
||||
in->raw = xmlBufferCreate();
|
||||
}
|
||||
xmlBufferAdd(in->raw, (const xmlChar *) buf, len);
|
||||
|
||||
/*
|
||||
* convert as much as possible to the parser reading buffer.
|
||||
*/
|
||||
nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
|
||||
if (nbchars < 0) {
|
||||
fprintf(stderr, "xmlParserInputBufferPush: encoder error\n");
|
||||
xmlFree(buffer);
|
||||
return(-1);
|
||||
}
|
||||
if (processed != len) {
|
||||
fprintf(stderr,
|
||||
"TODO xmlParserInputBufferPush: processed != len\n");
|
||||
xmlFree(buffer);
|
||||
return(-1);
|
||||
}
|
||||
buffer[nbchars] = 0;
|
||||
xmlBufferAdd(in->buffer, (xmlChar *) buffer, nbchars);
|
||||
xmlFree(buffer);
|
||||
} else {
|
||||
nbchars = len;
|
||||
xmlBufferAdd(in->buffer, (xmlChar *) buf, nbchars);
|
||||
@ -730,7 +729,9 @@ xmlParserInputBufferPush(xmlParserInputBufferPtr in, int len, const char *buf) {
|
||||
* Grow up the content of the input buffer, the old data are preserved
|
||||
* This routine handle the I18N transcoding to internal UTF-8
|
||||
* This routine is used when operating the parser in normal (pull) mode
|
||||
* TODO: one should be able to remove one extra copy
|
||||
*
|
||||
* TODO: one should be able to remove one extra copy by copying directy
|
||||
* onto in->buffer or in->raw
|
||||
*
|
||||
* Returns the number of chars read and stored in the buffer, or -1
|
||||
* in case of error.
|
||||
@ -779,34 +780,22 @@ xmlParserInputBufferGrow(xmlParserInputBufferPtr in, int len) {
|
||||
return(-1);
|
||||
}
|
||||
if (in->encoder != NULL) {
|
||||
xmlChar *buf;
|
||||
int wrote = res;
|
||||
|
||||
buf = (xmlChar *) xmlMalloc((res + 1) * 2 * sizeof(xmlChar));
|
||||
if (buf == NULL) {
|
||||
fprintf(stderr, "xmlParserInputBufferGrow : out of memory !\n");
|
||||
xmlFree(buffer);
|
||||
return(-1);
|
||||
/*
|
||||
* Store the data in the incoming raw buffer
|
||||
*/
|
||||
if (in->raw == NULL) {
|
||||
in->raw = xmlBufferCreate();
|
||||
}
|
||||
nbchars = in->encoder->input(buf, (res + 1) * 2 * sizeof(xmlChar),
|
||||
BAD_CAST buffer, &wrote);
|
||||
buf[nbchars] = 0;
|
||||
xmlBufferAdd(in->buffer, (xmlChar *) buf, nbchars);
|
||||
xmlFree(buf);
|
||||
xmlBufferAdd(in->raw, (const xmlChar *) buffer, len);
|
||||
|
||||
/*
|
||||
* Check that the encoder was able to process the full input
|
||||
* convert as much as possible to the parser reading buffer.
|
||||
*/
|
||||
if (wrote != res) {
|
||||
fprintf(stderr,
|
||||
"TODO : xmlParserInputBufferGrow wrote %d != res %d\n",
|
||||
wrote, res);
|
||||
/*
|
||||
* TODO !!!
|
||||
* Need to keep the unprocessed input in a buffer in->unprocessed
|
||||
*/
|
||||
nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
|
||||
if (nbchars < 0) {
|
||||
fprintf(stderr, "xmlParserInputBufferGrow: encoder error\n");
|
||||
return(-1);
|
||||
}
|
||||
|
||||
} else {
|
||||
nbchars = res;
|
||||
buffer[nbchars] = 0;
|
||||
|
Loading…
Reference in New Issue
Block a user