2001-02-23 17:55:21 +00:00
/*
2001-12-31 16:16:02 +00:00
* parserInternals . c : Internal routines ( and obsolete ones ) needed for the
* XML and HTML parsers .
2001-02-23 17:55:21 +00:00
*
* See Copyright for the status of this software .
*
2001-06-24 12:13:24 +00:00
* daniel @ veillard . com
2001-02-23 17:55:21 +00:00
*/
2002-03-18 19:37:11 +00:00
# define IN_LIBXML
2001-04-21 16:57:29 +00:00
# include "libxml.h"
2002-01-08 10:36:16 +00:00
# if defined(WIN32) && !defined (__CYGWIN__)
2001-02-23 17:55:21 +00:00
# define XML_DIR_SEP '\\'
# else
# define XML_DIR_SEP ' / '
# endif
# include <string.h>
# ifdef HAVE_CTYPE_H
# include <ctype.h>
# endif
# ifdef HAVE_STDLIB_H
# include <stdlib.h>
# endif
# ifdef HAVE_SYS_STAT_H
# include <sys/stat.h>
# endif
# ifdef HAVE_FCNTL_H
# include <fcntl.h>
# endif
# ifdef HAVE_UNISTD_H
# include <unistd.h>
# endif
# ifdef HAVE_ZLIB_H
# include <zlib.h>
# endif
# include <libxml/xmlmemory.h>
# include <libxml/tree.h>
# include <libxml/parser.h>
# include <libxml/parserInternals.h>
# include <libxml/valid.h>
# include <libxml/entities.h>
# include <libxml/xmlerror.h>
# include <libxml/encoding.h>
# include <libxml/valid.h>
# include <libxml/xmlIO.h>
# include <libxml/uri.h>
2003-08-18 12:15:38 +00:00
# include <libxml/dict.h>
2001-09-14 10:29:27 +00:00
# include <libxml/SAX.h>
2001-08-22 14:29:45 +00:00
# ifdef LIBXML_CATALOG_ENABLED
# include <libxml/catalog.h>
# endif
2001-10-17 15:58:35 +00:00
# include <libxml/globals.h>
2003-10-11 15:22:13 +00:00
# include <libxml/chvalid.h>
2001-02-23 17:55:21 +00:00
2001-07-25 17:18:57 +00:00
/*
* Various global defaults for parsing
*/
2001-02-23 17:55:21 +00:00
2001-07-18 19:30:27 +00:00
/**
2001-02-23 17:55:21 +00:00
* xmlCheckVersion :
* @ version : the include version number
*
* check the compiled lib version against the include one .
* This can warn or immediately kill the application
*/
void
xmlCheckVersion ( int version ) {
int myversion = ( int ) LIBXML_VERSION ;
2001-10-14 09:56:15 +00:00
xmlInitParser ( ) ;
2001-05-07 20:50:47 +00:00
2001-02-23 17:55:21 +00:00
if ( ( myversion / 10000 ) ! = ( version / 10000 ) ) {
xmlGenericError ( xmlGenericErrorContext ,
" Fatal: program compiled against libxml %d using libxml %d \n " ,
( version / 10000 ) , ( myversion / 10000 ) ) ;
2001-11-20 08:35:07 +00:00
fprintf ( stderr ,
" Fatal: program compiled against libxml %d using libxml %d \n " ,
( version / 10000 ) , ( myversion / 10000 ) ) ;
2001-02-23 17:55:21 +00:00
}
if ( ( myversion / 100 ) < ( version / 100 ) ) {
xmlGenericError ( xmlGenericErrorContext ,
" Warning: program compiled against libxml %d using older %d \n " ,
( version / 100 ) , ( myversion / 100 ) ) ;
}
}
2003-10-05 21:33:18 +00:00
/************************************************************************
* *
* Some factorized error routines *
* *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
/**
* xmlErrMemory :
* @ ctxt : an XML parser context
* @ extra : extra informations
*
* Handle a redefinition of attribute error
*/
void
xmlErrMemory ( xmlParserCtxtPtr ctxt , const char * extra )
{
2003-10-31 10:36:03 +00:00
if ( ( ctxt ! = NULL ) & & ( ctxt - > disableSAX ! = 0 ) & &
( ctxt - > instate = = XML_PARSER_EOF ) )
return ;
2003-10-05 21:33:18 +00:00
if ( ctxt ! = NULL ) {
ctxt - > errNo = XML_ERR_NO_MEMORY ;
ctxt - > instate = XML_PARSER_EOF ;
ctxt - > disableSAX = 1 ;
}
if ( extra )
2003-10-10 14:10:40 +00:00
__xmlRaiseError ( NULL , NULL , NULL , ctxt , NULL , XML_FROM_PARSER ,
2003-10-05 21:33:18 +00:00
XML_ERR_NO_MEMORY , XML_ERR_FATAL , NULL , 0 , extra ,
NULL , NULL , 0 , 0 ,
" Memory allocation failed : %s \n " , extra ) ;
else
2003-10-10 14:10:40 +00:00
__xmlRaiseError ( NULL , NULL , NULL , ctxt , NULL , XML_FROM_PARSER ,
2003-10-05 21:33:18 +00:00
XML_ERR_NO_MEMORY , XML_ERR_FATAL , NULL , 0 , NULL ,
NULL , NULL , 0 , 0 , " Memory allocation failed \n " ) ;
}
/**
2003-10-19 13:35:37 +00:00
* __xmlErrEncoding :
2003-10-05 21:33:18 +00:00
* @ ctxt : an XML parser context
* @ error : the error number
* @ msg : the error message
* @ str1 : an string info
* @ str2 : an string info
*
* Handle an encoding error
*/
2003-10-19 13:35:37 +00:00
void
__xmlErrEncoding ( xmlParserCtxtPtr ctxt , xmlParserErrors error ,
const char * msg , const xmlChar * str1 , const xmlChar * str2 )
2003-10-05 21:33:18 +00:00
{
2003-10-31 10:36:03 +00:00
if ( ( ctxt ! = NULL ) & & ( ctxt - > disableSAX ! = 0 ) & &
( ctxt - > instate = = XML_PARSER_EOF ) )
return ;
2003-10-05 21:33:18 +00:00
if ( ctxt ! = NULL )
ctxt - > errNo = error ;
2003-10-10 14:10:40 +00:00
__xmlRaiseError ( NULL , NULL , NULL ,
2003-10-05 21:33:18 +00:00
ctxt , NULL , XML_FROM_PARSER , error , XML_ERR_FATAL ,
NULL , 0 , ( const char * ) str1 , ( const char * ) str2 ,
NULL , 0 , 0 , msg , str1 , str2 ) ;
if ( ctxt ! = NULL ) {
ctxt - > wellFormed = 0 ;
if ( ctxt - > recovery = = 0 )
ctxt - > disableSAX = 1 ;
}
}
/**
* xmlErrInternal :
* @ ctxt : an XML parser context
* @ msg : the error message
* @ str : error informations
*
* Handle an internal error
*/
static void
xmlErrInternal ( xmlParserCtxtPtr ctxt , const char * msg , const xmlChar * str )
{
2003-10-31 10:36:03 +00:00
if ( ( ctxt ! = NULL ) & & ( ctxt - > disableSAX ! = 0 ) & &
( ctxt - > instate = = XML_PARSER_EOF ) )
return ;
2003-10-05 21:33:18 +00:00
if ( ctxt ! = NULL )
ctxt - > errNo = XML_ERR_INTERNAL_ERROR ;
2003-10-10 14:10:40 +00:00
__xmlRaiseError ( NULL , NULL , NULL ,
2003-10-05 21:33:18 +00:00
ctxt , NULL , XML_FROM_PARSER , XML_ERR_INTERNAL_ERROR ,
XML_ERR_FATAL , NULL , 0 , ( const char * ) str , NULL , NULL ,
0 , 0 , msg , str ) ;
if ( ctxt ! = NULL ) {
ctxt - > wellFormed = 0 ;
if ( ctxt - > recovery = = 0 )
ctxt - > disableSAX = 1 ;
}
}
/**
* xmlErrEncodingInt :
* @ ctxt : an XML parser context
* @ error : the error number
* @ msg : the error message
* @ val : an integer value
*
* n encoding error
*/
static void
xmlErrEncodingInt ( xmlParserCtxtPtr ctxt , xmlParserErrors error ,
const char * msg , int val )
{
2003-10-31 10:36:03 +00:00
if ( ( ctxt ! = NULL ) & & ( ctxt - > disableSAX ! = 0 ) & &
( ctxt - > instate = = XML_PARSER_EOF ) )
return ;
2003-10-05 21:33:18 +00:00
if ( ctxt ! = NULL )
ctxt - > errNo = error ;
2003-10-10 14:10:40 +00:00
__xmlRaiseError ( NULL , NULL , NULL ,
2003-10-05 21:33:18 +00:00
ctxt , NULL , XML_FROM_PARSER , error , XML_ERR_FATAL ,
NULL , 0 , NULL , NULL , NULL , val , 0 , msg , val ) ;
if ( ctxt ! = NULL ) {
ctxt - > wellFormed = 0 ;
if ( ctxt - > recovery = = 0 )
ctxt - > disableSAX = 1 ;
}
}
2001-02-23 17:55:21 +00:00
/**
* xmlIsLetter :
* @ c : an unicode character ( int )
*
* Check whether the character is allowed by the production
* [ 84 ] Letter : : = BaseChar | Ideographic
*
* Returns 0 if not , non - zero otherwise
*/
int
xmlIsLetter ( int c ) {
return ( IS_BASECHAR ( c ) | | IS_IDEOGRAPHIC ( c ) ) ;
}
/************************************************************************
* *
* Input handling functions for progressive parsing *
* *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
/* #define DEBUG_INPUT */
/* #define DEBUG_STACK */
/* #define DEBUG_PUSH */
/* we need to keep enough input to show errors in context */
# define LINE_LEN 80
# ifdef DEBUG_INPUT
# define CHECK_BUFFER(in) check_buffer(in)
2002-12-10 15:19:08 +00:00
static
2001-02-23 17:55:21 +00:00
void check_buffer ( xmlParserInputPtr in ) {
if ( in - > base ! = in - > buf - > buffer - > content ) {
xmlGenericError ( xmlGenericErrorContext ,
" xmlParserInput: base mismatch problem \n " ) ;
}
if ( in - > cur < in - > base ) {
xmlGenericError ( xmlGenericErrorContext ,
" xmlParserInput: cur < base problem \n " ) ;
}
if ( in - > cur > in - > base + in - > buf - > buffer - > use ) {
xmlGenericError ( xmlGenericErrorContext ,
" xmlParserInput: cur > base + use problem \n " ) ;
}
xmlGenericError ( xmlGenericErrorContext , " buffer %x : content %x, cur %d, use %d, size %d \n " ,
( int ) in , ( int ) in - > buf - > buffer - > content , in - > cur - in - > base ,
in - > buf - > buffer - > use , in - > buf - > buffer - > size ) ;
}
# else
# define CHECK_BUFFER(in)
# endif
/**
* xmlParserInputRead :
* @ in : an XML parser input
* @ len : an indicative size for the lookahead
*
* This function refresh the input for the parser . It doesn ' t try to
* preserve pointers to the input buffer , and discard already read data
*
* Returns the number of xmlChars read , or - 1 in case of error , 0 indicate the
* end of this entity
*/
int
xmlParserInputRead ( xmlParserInputPtr in , int len ) {
int ret ;
int used ;
2001-03-24 17:00:36 +00:00
int indx ;
2001-02-23 17:55:21 +00:00
# ifdef DEBUG_INPUT
xmlGenericError ( xmlGenericErrorContext , " Read \n " ) ;
# endif
if ( in - > buf = = NULL ) return ( - 1 ) ;
if ( in - > base = = NULL ) return ( - 1 ) ;
if ( in - > cur = = NULL ) return ( - 1 ) ;
if ( in - > buf - > buffer = = NULL ) return ( - 1 ) ;
if ( in - > buf - > readcallback = = NULL ) return ( - 1 ) ;
CHECK_BUFFER ( in ) ;
used = in - > cur - in - > buf - > buffer - > content ;
ret = xmlBufferShrink ( in - > buf - > buffer , used ) ;
if ( ret > 0 ) {
in - > cur - = ret ;
in - > consumed + = ret ;
}
ret = xmlParserInputBufferRead ( in - > buf , len ) ;
if ( in - > base ! = in - > buf - > buffer - > content ) {
/*
2001-12-31 16:16:02 +00:00
* the buffer has been reallocated
2001-02-23 17:55:21 +00:00
*/
2001-03-24 17:00:36 +00:00
indx = in - > cur - in - > base ;
2001-02-23 17:55:21 +00:00
in - > base = in - > buf - > buffer - > content ;
2001-03-24 17:00:36 +00:00
in - > cur = & in - > buf - > buffer - > content [ indx ] ;
2001-02-23 17:55:21 +00:00
}
2001-02-25 16:11:03 +00:00
in - > end = & in - > buf - > buffer - > content [ in - > buf - > buffer - > use ] ;
2001-02-23 17:55:21 +00:00
CHECK_BUFFER ( in ) ;
return ( ret ) ;
}
/**
* xmlParserInputGrow :
* @ in : an XML parser input
* @ len : an indicative size for the lookahead
*
* This function increase the input for the parser . It tries to
* preserve pointers to the input buffer , and keep already read data
*
* Returns the number of xmlChars read , or - 1 in case of error , 0 indicate the
* end of this entity
*/
int
xmlParserInputGrow ( xmlParserInputPtr in , int len ) {
int ret ;
2001-03-24 17:00:36 +00:00
int indx ;
2001-02-23 17:55:21 +00:00
# ifdef DEBUG_INPUT
xmlGenericError ( xmlGenericErrorContext , " Grow \n " ) ;
# endif
if ( in - > buf = = NULL ) return ( - 1 ) ;
if ( in - > base = = NULL ) return ( - 1 ) ;
if ( in - > cur = = NULL ) return ( - 1 ) ;
if ( in - > buf - > buffer = = NULL ) return ( - 1 ) ;
CHECK_BUFFER ( in ) ;
2001-03-24 17:00:36 +00:00
indx = in - > cur - in - > base ;
if ( in - > buf - > buffer - > use > ( unsigned int ) indx + INPUT_CHUNK ) {
2001-02-23 17:55:21 +00:00
CHECK_BUFFER ( in ) ;
return ( 0 ) ;
}
if ( in - > buf - > readcallback ! = NULL )
ret = xmlParserInputBufferGrow ( in - > buf , len ) ;
else
return ( 0 ) ;
/*
2001-02-25 16:11:03 +00:00
* NOTE : in - > base may be a " dangling " i . e . freed pointer in this
2001-02-23 17:55:21 +00:00
* block , but we use it really as an integer to do some
* pointer arithmetic . Insure will raise it as a bug but in
* that specific case , that ' s not !
*/
if ( in - > base ! = in - > buf - > buffer - > content ) {
/*
2001-12-31 16:16:02 +00:00
* the buffer has been reallocated
2001-02-23 17:55:21 +00:00
*/
2001-03-24 17:00:36 +00:00
indx = in - > cur - in - > base ;
2001-02-23 17:55:21 +00:00
in - > base = in - > buf - > buffer - > content ;
2001-03-24 17:00:36 +00:00
in - > cur = & in - > buf - > buffer - > content [ indx ] ;
2001-02-23 17:55:21 +00:00
}
2001-02-25 16:11:03 +00:00
in - > end = & in - > buf - > buffer - > content [ in - > buf - > buffer - > use ] ;
2001-02-23 17:55:21 +00:00
CHECK_BUFFER ( in ) ;
return ( ret ) ;
}
/**
* xmlParserInputShrink :
* @ in : an XML parser input
*
* This function removes used input for the parser .
*/
void
xmlParserInputShrink ( xmlParserInputPtr in ) {
int used ;
int ret ;
2001-03-24 17:00:36 +00:00
int indx ;
2001-02-23 17:55:21 +00:00
# ifdef DEBUG_INPUT
xmlGenericError ( xmlGenericErrorContext , " Shrink \n " ) ;
# endif
if ( in - > buf = = NULL ) return ;
if ( in - > base = = NULL ) return ;
if ( in - > cur = = NULL ) return ;
if ( in - > buf - > buffer = = NULL ) return ;
CHECK_BUFFER ( in ) ;
used = in - > cur - in - > buf - > buffer - > content ;
/*
* Do not shrink on large buffers whose only a tiny fraction
2001-12-31 16:16:02 +00:00
* was consumed
2001-02-23 17:55:21 +00:00
*/
if ( used > INPUT_CHUNK ) {
ret = xmlBufferShrink ( in - > buf - > buffer , used - LINE_LEN ) ;
if ( ret > 0 ) {
in - > cur - = ret ;
in - > consumed + = ret ;
}
2001-02-25 16:11:03 +00:00
in - > end = & in - > buf - > buffer - > content [ in - > buf - > buffer - > use ] ;
2001-02-23 17:55:21 +00:00
}
CHECK_BUFFER ( in ) ;
if ( in - > buf - > buffer - > use > INPUT_CHUNK ) {
return ;
}
xmlParserInputBufferRead ( in - > buf , 2 * INPUT_CHUNK ) ;
if ( in - > base ! = in - > buf - > buffer - > content ) {
/*
2002-02-09 18:03:01 +00:00
* the buffer has been reallocated
2001-02-23 17:55:21 +00:00
*/
2001-03-24 17:00:36 +00:00
indx = in - > cur - in - > base ;
2001-02-23 17:55:21 +00:00
in - > base = in - > buf - > buffer - > content ;
2001-03-24 17:00:36 +00:00
in - > cur = & in - > buf - > buffer - > content [ indx ] ;
2001-02-23 17:55:21 +00:00
}
2001-02-25 16:11:03 +00:00
in - > end = & in - > buf - > buffer - > content [ in - > buf - > buffer - > use ] ;
2001-02-23 17:55:21 +00:00
CHECK_BUFFER ( in ) ;
}
/************************************************************************
* *
* UTF8 character input and related functions *
* *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
/**
* xmlNextChar :
* @ ctxt : the XML parser context
*
* Skip to the next char input char .
*/
void
2003-03-22 00:04:05 +00:00
xmlNextChar ( xmlParserCtxtPtr ctxt )
{
2001-02-23 17:55:21 +00:00
if ( ctxt - > instate = = XML_PARSER_EOF )
2003-03-22 00:04:05 +00:00
return ;
2001-02-23 17:55:21 +00:00
2002-07-01 21:52:03 +00:00
if ( ctxt - > charset = = XML_CHAR_ENCODING_UTF8 ) {
2003-03-22 00:04:05 +00:00
if ( ( * ctxt - > input - > cur = = 0 ) & &
( xmlParserInputGrow ( ctxt - > input , INPUT_CHUNK ) < = 0 ) & &
( ctxt - > instate ! = XML_PARSER_COMMENT ) ) {
/*
* If we are at the end of the current entity and
* the context allows it , we pop consumed entities
* automatically .
* the auto closing should be blocked in other cases
*/
xmlPopInput ( ctxt ) ;
} else {
const unsigned char * cur ;
unsigned char c ;
/*
* 2.11 End - of - Line Handling
* the literal two - character sequence " #xD#xA " or a standalone
* literal # xD , an XML processor must pass to the application
* the single character # xA .
*/
if ( * ( ctxt - > input - > cur ) = = ' \n ' ) {
ctxt - > input - > line + + ;
ctxt - > input - > col = 1 ;
} else
ctxt - > input - > col + + ;
/*
* We are supposed to handle UTF8 , check it ' s valid
* From rfc2044 : encoding of the Unicode values on UTF - 8 :
*
* UCS - 4 range ( hex . ) UTF - 8 octet sequence ( binary )
* 0000 0000 - 0000 007F 0 xxxxxxx
* 0000 00 80 - 0000 07FF 110 xxxxx 10 xxxxxx
* 0000 0800 - 0000 FFFF 1110 xxxx 10 xxxxxx 10 xxxxxx
*
* Check for the 0x110000 limit too
*/
cur = ctxt - > input - > cur ;
c = * cur ;
if ( c & 0x80 ) {
2003-05-20 12:22:41 +00:00
if ( c = = 0xC0 )
goto encoding_error ;
2003-03-22 00:04:05 +00:00
if ( cur [ 1 ] = = 0 )
xmlParserInputGrow ( ctxt - > input , INPUT_CHUNK ) ;
if ( ( cur [ 1 ] & 0xc0 ) ! = 0x80 )
goto encoding_error ;
if ( ( c & 0xe0 ) = = 0xe0 ) {
unsigned int val ;
if ( cur [ 2 ] = = 0 )
xmlParserInputGrow ( ctxt - > input , INPUT_CHUNK ) ;
if ( ( cur [ 2 ] & 0xc0 ) ! = 0x80 )
goto encoding_error ;
if ( ( c & 0xf0 ) = = 0xf0 ) {
if ( cur [ 3 ] = = 0 )
xmlParserInputGrow ( ctxt - > input , INPUT_CHUNK ) ;
if ( ( ( c & 0xf8 ) ! = 0xf0 ) | |
( ( cur [ 3 ] & 0xc0 ) ! = 0x80 ) )
goto encoding_error ;
/* 4-byte code */
ctxt - > input - > cur + = 4 ;
val = ( cur [ 0 ] & 0x7 ) < < 18 ;
val | = ( cur [ 1 ] & 0x3f ) < < 12 ;
val | = ( cur [ 2 ] & 0x3f ) < < 6 ;
val | = cur [ 3 ] & 0x3f ;
} else {
/* 3-byte code */
ctxt - > input - > cur + = 3 ;
val = ( cur [ 0 ] & 0xf ) < < 12 ;
val | = ( cur [ 1 ] & 0x3f ) < < 6 ;
val | = cur [ 2 ] & 0x3f ;
}
if ( ( ( val > 0xd7ff ) & & ( val < 0xe000 ) ) | |
( ( val > 0xfffd ) & & ( val < 0x10000 ) ) | |
( val > = 0x110000 ) ) {
2003-10-05 21:33:18 +00:00
xmlErrEncodingInt ( ctxt , XML_ERR_INVALID_CHAR ,
" Char 0x%X out of allowed range \n " ,
val ) ;
2003-03-22 00:04:05 +00:00
}
} else
/* 2-byte code */
ctxt - > input - > cur + = 2 ;
} else
/* 1-byte code */
ctxt - > input - > cur + + ;
ctxt - > nbChars + + ;
if ( * ctxt - > input - > cur = = 0 )
xmlParserInputGrow ( ctxt - > input , INPUT_CHUNK ) ;
}
2001-02-23 17:55:21 +00:00
} else {
2003-03-22 00:04:05 +00:00
/*
* Assume it ' s a fixed length encoding ( 1 ) with
* a compatible encoding for the ASCII set , since
* XML constructs only use < 128 chars
*/
if ( * ( ctxt - > input - > cur ) = = ' \n ' ) {
ctxt - > input - > line + + ;
ctxt - > input - > col = 1 ;
} else
ctxt - > input - > col + + ;
ctxt - > input - > cur + + ;
ctxt - > nbChars + + ;
if ( * ctxt - > input - > cur = = 0 )
xmlParserInputGrow ( ctxt - > input , INPUT_CHUNK ) ;
2001-02-23 17:55:21 +00:00
}
2002-03-20 21:55:57 +00:00
if ( ( * ctxt - > input - > cur = = ' % ' ) & & ( ! ctxt - > html ) )
2003-03-22 00:04:05 +00:00
xmlParserHandlePEReference ( ctxt ) ;
2002-03-20 21:55:57 +00:00
if ( ( * ctxt - > input - > cur = = 0 ) & &
2001-02-23 17:55:21 +00:00
( xmlParserInputGrow ( ctxt - > input , INPUT_CHUNK ) < = 0 ) )
2003-03-22 00:04:05 +00:00
xmlPopInput ( ctxt ) ;
2001-02-23 17:55:21 +00:00
return ;
2003-10-05 13:51:35 +00:00
encoding_error :
2001-02-23 17:55:21 +00:00
/*
* If we detect an UTF8 error that probably mean that the
2001-12-31 16:16:02 +00:00
* input encoding didn ' t get properly advertised in the
2001-02-23 17:55:21 +00:00
* declaration header . Report the error and switch the encoding
* to ISO - Latin - 1 ( if you don ' t like this policy , just declare the
* encoding ! )
*/
2003-10-19 13:35:37 +00:00
__xmlErrEncoding ( ctxt , XML_ERR_INVALID_CHAR ,
2003-10-05 21:33:18 +00:00
" Input is not proper UTF-8, indicate encoding ! \n " ,
NULL , NULL ) ;
2001-02-23 17:55:21 +00:00
if ( ( ctxt - > sax ! = NULL ) & & ( ctxt - > sax - > error ! = NULL ) ) {
2003-03-22 00:04:05 +00:00
ctxt - > sax - > error ( ctxt - > userData ,
" Bytes: 0x%02X 0x%02X 0x%02X 0x%02X \n " ,
ctxt - > input - > cur [ 0 ] , ctxt - > input - > cur [ 1 ] ,
ctxt - > input - > cur [ 2 ] , ctxt - > input - > cur [ 3 ] ) ;
2001-02-23 17:55:21 +00:00
}
2003-03-22 00:04:05 +00:00
ctxt - > charset = XML_CHAR_ENCODING_8859_1 ;
2002-03-20 21:55:57 +00:00
ctxt - > input - > cur + + ;
2001-02-23 17:55:21 +00:00
return ;
}
/**
* xmlCurrentChar :
* @ ctxt : the XML parser context
* @ len : pointer to the length of the char read
*
2001-12-31 16:16:02 +00:00
* The current char value , if using UTF - 8 this may actually span multiple
2001-02-23 17:55:21 +00:00
* bytes in the input buffer . Implement the end of line normalization :
* 2.11 End - of - Line Handling
* Wherever an external parsed entity or the literal entity value
* of an internal parsed entity contains either the literal two - character
* sequence " #xD#xA " or a standalone literal # xD , an XML processor
* must pass to the application the single character # xA .
* This behavior can conveniently be produced by normalizing all
* line breaks to # xA on input , before parsing . )
*
2001-10-10 09:45:09 +00:00
* Returns the current char value and its length
2001-02-23 17:55:21 +00:00
*/
int
xmlCurrentChar ( xmlParserCtxtPtr ctxt , int * len ) {
if ( ctxt - > instate = = XML_PARSER_EOF )
return ( 0 ) ;
2002-03-20 21:55:57 +00:00
if ( ( * ctxt - > input - > cur > = 0x20 ) & & ( * ctxt - > input - > cur < = 0x7F ) ) {
* len = 1 ;
return ( ( int ) * ctxt - > input - > cur ) ;
2001-02-23 17:55:21 +00:00
}
if ( ctxt - > charset = = XML_CHAR_ENCODING_UTF8 ) {
/*
* We are supposed to handle UTF8 , check it ' s valid
* From rfc2044 : encoding of the Unicode values on UTF - 8 :
*
* UCS - 4 range ( hex . ) UTF - 8 octet sequence ( binary )
* 0000 0000 - 0000 007F 0 xxxxxxx
* 0000 00 80 - 0000 07FF 110 xxxxx 10 xxxxxx
* 0000 0800 - 0000 FFFF 1110 xxxx 10 xxxxxx 10 xxxxxx
*
* Check for the 0x110000 limit too
*/
const unsigned char * cur = ctxt - > input - > cur ;
unsigned char c ;
unsigned int val ;
c = * cur ;
if ( c & 0x80 ) {
2003-05-20 12:22:41 +00:00
if ( c = = 0xC0 )
goto encoding_error ;
2002-03-20 21:55:57 +00:00
if ( cur [ 1 ] = = 0 )
xmlParserInputGrow ( ctxt - > input , INPUT_CHUNK ) ;
if ( ( cur [ 1 ] & 0xc0 ) ! = 0x80 )
2001-02-23 17:55:21 +00:00
goto encoding_error ;
if ( ( c & 0xe0 ) = = 0xe0 ) {
2002-03-20 21:55:57 +00:00
if ( cur [ 2 ] = = 0 )
xmlParserInputGrow ( ctxt - > input , INPUT_CHUNK ) ;
if ( ( cur [ 2 ] & 0xc0 ) ! = 0x80 )
2001-02-23 17:55:21 +00:00
goto encoding_error ;
if ( ( c & 0xf0 ) = = 0xf0 ) {
if ( cur [ 3 ] = = 0 )
xmlParserInputGrow ( ctxt - > input , INPUT_CHUNK ) ;
2002-03-20 21:55:57 +00:00
if ( ( ( c & 0xf8 ) ! = 0xf0 ) | |
2001-02-23 17:55:21 +00:00
( ( cur [ 3 ] & 0xc0 ) ! = 0x80 ) )
goto encoding_error ;
/* 4-byte code */
* len = 4 ;
val = ( cur [ 0 ] & 0x7 ) < < 18 ;
val | = ( cur [ 1 ] & 0x3f ) < < 12 ;
val | = ( cur [ 2 ] & 0x3f ) < < 6 ;
val | = cur [ 3 ] & 0x3f ;
} else {
/* 3-byte code */
* len = 3 ;
val = ( cur [ 0 ] & 0xf ) < < 12 ;
val | = ( cur [ 1 ] & 0x3f ) < < 6 ;
val | = cur [ 2 ] & 0x3f ;
}
} else {
/* 2-byte code */
* len = 2 ;
val = ( cur [ 0 ] & 0x1f ) < < 6 ;
val | = cur [ 1 ] & 0x3f ;
}
if ( ! IS_CHAR ( val ) ) {
2003-10-05 21:33:18 +00:00
xmlErrEncodingInt ( ctxt , XML_ERR_INVALID_CHAR ,
" Char 0x%X out of allowed range \n " , val ) ;
2001-02-23 17:55:21 +00:00
}
return ( val ) ;
} else {
/* 1-byte code */
* len = 1 ;
if ( * ctxt - > input - > cur = = 0xD ) {
2002-03-20 21:55:57 +00:00
if ( ctxt - > input - > cur [ 1 ] = = 0xA ) {
2001-02-23 17:55:21 +00:00
ctxt - > nbChars + + ;
ctxt - > input - > cur + + ;
}
return ( 0xA ) ;
}
return ( ( int ) * ctxt - > input - > cur ) ;
}
}
/*
2001-10-10 09:45:09 +00:00
* Assume it ' s a fixed length encoding ( 1 ) with
2001-12-31 16:16:02 +00:00
* a compatible encoding for the ASCII set , since
2001-02-23 17:55:21 +00:00
* XML constructs only use < 128 chars
*/
* len = 1 ;
if ( * ctxt - > input - > cur = = 0xD ) {
2002-03-20 21:55:57 +00:00
if ( ctxt - > input - > cur [ 1 ] = = 0xA ) {
2001-02-23 17:55:21 +00:00
ctxt - > nbChars + + ;
ctxt - > input - > cur + + ;
}
return ( 0xA ) ;
}
return ( ( int ) * ctxt - > input - > cur ) ;
encoding_error :
2002-11-22 12:28:38 +00:00
/*
* An encoding problem may arise from a truncated input buffer
* splitting a character in the middle . In that case do not raise
* an error but return 0 to endicate an end of stream problem
*/
if ( ctxt - > input - > end - ctxt - > input - > cur < 4 ) {
* len = 0 ;
return ( 0 ) ;
}
2001-02-23 17:55:21 +00:00
/*
* If we detect an UTF8 error that probably mean that the
2001-12-31 16:16:02 +00:00
* input encoding didn ' t get properly advertised in the
2001-02-23 17:55:21 +00:00
* declaration header . Report the error and switch the encoding
* to ISO - Latin - 1 ( if you don ' t like this policy , just declare the
* encoding ! )
*/
2003-10-19 13:35:37 +00:00
__xmlErrEncoding ( ctxt , XML_ERR_INVALID_CHAR ,
2003-10-05 21:33:18 +00:00
" Input is not proper UTF-8, indicate encoding ! \n " ,
NULL , NULL ) ;
2001-02-23 17:55:21 +00:00
if ( ( ctxt - > sax ! = NULL ) & & ( ctxt - > sax - > error ! = NULL ) ) {
ctxt - > sax - > error ( ctxt - > userData , " Bytes: 0x%02X 0x%02X 0x%02X 0x%02X \n " ,
2002-03-20 21:55:57 +00:00
ctxt - > input - > cur [ 0 ] , ctxt - > input - > cur [ 1 ] ,
ctxt - > input - > cur [ 2 ] , ctxt - > input - > cur [ 3 ] ) ;
2001-02-23 17:55:21 +00:00
}
ctxt - > charset = XML_CHAR_ENCODING_8859_1 ;
* len = 1 ;
return ( ( int ) * ctxt - > input - > cur ) ;
}
/**
* xmlStringCurrentChar :
* @ ctxt : the XML parser context
* @ cur : pointer to the beginning of the char
* @ len : pointer to the length of the char read
*
2001-12-31 16:16:02 +00:00
* The current char value , if using UTF - 8 this may actually span multiple
2001-02-23 17:55:21 +00:00
* bytes in the input buffer .
*
2001-10-10 09:45:09 +00:00
* Returns the current char value and its length
2001-02-23 17:55:21 +00:00
*/
int
2002-01-13 15:43:22 +00:00
xmlStringCurrentChar ( xmlParserCtxtPtr ctxt , const xmlChar * cur , int * len )
{
2001-04-27 17:13:01 +00:00
if ( ( ctxt = = NULL ) | | ( ctxt - > charset = = XML_CHAR_ENCODING_UTF8 ) ) {
2002-01-13 15:43:22 +00:00
/*
* We are supposed to handle UTF8 , check it ' s valid
* From rfc2044 : encoding of the Unicode values on UTF - 8 :
*
* UCS - 4 range ( hex . ) UTF - 8 octet sequence ( binary )
* 0000 0000 - 0000 007F 0 xxxxxxx
* 0000 00 80 - 0000 07FF 110 xxxxx 10 xxxxxx
* 0000 0800 - 0000 FFFF 1110 xxxx 10 xxxxxx 10 xxxxxx
*
* Check for the 0x110000 limit too
*/
unsigned char c ;
unsigned int val ;
c = * cur ;
if ( c & 0x80 ) {
if ( ( cur [ 1 ] & 0xc0 ) ! = 0x80 )
goto encoding_error ;
if ( ( c & 0xe0 ) = = 0xe0 ) {
if ( ( cur [ 2 ] & 0xc0 ) ! = 0x80 )
goto encoding_error ;
if ( ( c & 0xf0 ) = = 0xf0 ) {
if ( ( ( c & 0xf8 ) ! = 0xf0 ) | | ( ( cur [ 3 ] & 0xc0 ) ! = 0x80 ) )
goto encoding_error ;
/* 4-byte code */
* len = 4 ;
val = ( cur [ 0 ] & 0x7 ) < < 18 ;
val | = ( cur [ 1 ] & 0x3f ) < < 12 ;
val | = ( cur [ 2 ] & 0x3f ) < < 6 ;
val | = cur [ 3 ] & 0x3f ;
} else {
/* 3-byte code */
* len = 3 ;
val = ( cur [ 0 ] & 0xf ) < < 12 ;
val | = ( cur [ 1 ] & 0x3f ) < < 6 ;
val | = cur [ 2 ] & 0x3f ;
}
} else {
/* 2-byte code */
* len = 2 ;
val = ( cur [ 0 ] & 0x1f ) < < 6 ;
val | = cur [ 1 ] & 0x3f ;
}
if ( ! IS_CHAR ( val ) ) {
2003-10-05 21:33:18 +00:00
xmlErrEncodingInt ( ctxt , XML_ERR_INVALID_CHAR ,
" Char 0x%X out of allowed range \n " , val ) ;
2002-01-13 15:43:22 +00:00
}
return ( val ) ;
} else {
/* 1-byte code */
* len = 1 ;
return ( ( int ) * cur ) ;
}
2001-02-23 17:55:21 +00:00
}
/*
2001-10-10 09:45:09 +00:00
* Assume it ' s a fixed length encoding ( 1 ) with
2001-12-31 16:16:02 +00:00
* a compatible encoding for the ASCII set , since
2001-02-23 17:55:21 +00:00
* XML constructs only use < 128 chars
*/
* len = 1 ;
2002-01-13 15:43:22 +00:00
return ( ( int ) * cur ) ;
2001-02-23 17:55:21 +00:00
encoding_error :
2002-01-13 15:43:22 +00:00
2001-02-23 17:55:21 +00:00
/*
* If we detect an UTF8 error that probably mean that the
2001-12-31 16:16:02 +00:00
* input encoding didn ' t get properly advertised in the
2001-02-23 17:55:21 +00:00
* declaration header . Report the error and switch the encoding
* to ISO - Latin - 1 ( if you don ' t like this policy , just declare the
* encoding ! )
*/
2003-10-19 13:35:37 +00:00
__xmlErrEncoding ( ctxt , XML_ERR_INVALID_CHAR ,
2003-10-05 21:33:18 +00:00
" Input is not proper UTF-8, indicate encoding ! \n " ,
NULL , NULL ) ;
if ( ( ctxt ! = NULL ) & & ( ctxt - > sax ! = NULL ) & & ( ctxt - > sax - > error ! = NULL ) ) {
ctxt - > sax - > error ( ctxt - > userData ,
" Bytes: 0x%02X 0x%02X 0x%02X 0x%02X \n " ,
ctxt - > input - > cur [ 0 ] , ctxt - > input - > cur [ 1 ] ,
ctxt - > input - > cur [ 2 ] , ctxt - > input - > cur [ 3 ] ) ;
2001-02-23 17:55:21 +00:00
}
* len = 1 ;
2002-01-13 15:43:22 +00:00
return ( ( int ) * cur ) ;
2001-02-23 17:55:21 +00:00
}
/**
2001-03-24 17:00:36 +00:00
* xmlCopyCharMultiByte :
2001-12-31 16:16:02 +00:00
* @ out : pointer to an array of xmlChar
2001-02-23 17:55:21 +00:00
* @ val : the char value
*
* append the char value in the array
*
* Returns the number of xmlChar written
*/
int
2001-03-24 17:00:36 +00:00
xmlCopyCharMultiByte ( xmlChar * out , int val ) {
2001-02-23 17:55:21 +00:00
/*
* We are supposed to handle UTF8 , check it ' s valid
* From rfc2044 : encoding of the Unicode values on UTF - 8 :
*
* UCS - 4 range ( hex . ) UTF - 8 octet sequence ( binary )
* 0000 0000 - 0000 007F 0 xxxxxxx
* 0000 00 80 - 0000 07FF 110 xxxxx 10 xxxxxx
* 0000 0800 - 0000 FFFF 1110 xxxx 10 xxxxxx 10 xxxxxx
*/
2001-03-24 17:00:36 +00:00
if ( val > = 0x80 ) {
xmlChar * savedout = out ;
int bits ;
if ( val < 0x800 ) { * out + + = ( val > > 6 ) | 0xC0 ; bits = 0 ; }
else if ( val < 0x10000 ) { * out + + = ( val > > 12 ) | 0xE0 ; bits = 6 ; }
else if ( val < 0x110000 ) { * out + + = ( val > > 18 ) | 0xF0 ; bits = 12 ; }
else {
2003-10-05 21:33:18 +00:00
xmlErrEncodingInt ( NULL , XML_ERR_INVALID_CHAR ,
2001-12-31 16:16:02 +00:00
" Internal error, xmlCopyCharMultiByte 0x%X out of bound \n " ,
2003-10-05 21:33:18 +00:00
val ) ;
2001-02-23 17:55:21 +00:00
return ( 0 ) ;
}
2001-03-24 17:00:36 +00:00
for ( ; bits > = 0 ; bits - = 6 )
* out + + = ( ( val > > bits ) & 0x3F ) | 0x80 ;
return ( out - savedout ) ;
2001-02-23 17:55:21 +00:00
}
2001-03-24 17:00:36 +00:00
* out = ( xmlChar ) val ;
return 1 ;
}
2001-02-23 17:55:21 +00:00
2001-03-24 17:00:36 +00:00
/**
* xmlCopyChar :
* @ len : Ignored , compatibility
2001-12-31 16:16:02 +00:00
* @ out : pointer to an array of xmlChar
2001-03-24 17:00:36 +00:00
* @ val : the char value
*
* append the char value in the array
*
* Returns the number of xmlChar written
*/
2001-02-23 17:55:21 +00:00
2001-03-24 17:00:36 +00:00
int
2001-03-26 16:28:29 +00:00
xmlCopyChar ( int len ATTRIBUTE_UNUSED , xmlChar * out , int val ) {
2001-03-24 17:00:36 +00:00
/* the len parameter is ignored */
if ( val > = 0x80 ) {
return ( xmlCopyCharMultiByte ( out , val ) ) ;
2001-02-23 17:55:21 +00:00
}
* out = ( xmlChar ) val ;
2001-03-24 17:00:36 +00:00
return 1 ;
2001-02-23 17:55:21 +00:00
}
/************************************************************************
* *
* Commodity functions to switch encodings *
* *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
/**
* xmlSwitchEncoding :
* @ ctxt : the parser context
* @ enc : the encoding value ( number )
*
* change the input functions when discovering the character encoding
* of a given entity .
*
* Returns 0 in case of success , - 1 otherwise
*/
int
xmlSwitchEncoding ( xmlParserCtxtPtr ctxt , xmlCharEncoding enc )
{
xmlCharEncodingHandlerPtr handler ;
switch ( enc ) {
case XML_CHAR_ENCODING_ERROR :
2003-10-19 13:35:37 +00:00
__xmlErrEncoding ( ctxt , XML_ERR_UNKNOWN_ENCODING ,
2003-10-05 21:33:18 +00:00
" encoding unknown \n " , NULL , NULL ) ;
2001-02-23 17:55:21 +00:00
break ;
case XML_CHAR_ENCODING_NONE :
/* let's assume it's UTF-8 without the XML decl */
ctxt - > charset = XML_CHAR_ENCODING_UTF8 ;
return ( 0 ) ;
case XML_CHAR_ENCODING_UTF8 :
/* default encoding, no conversion should be needed */
ctxt - > charset = XML_CHAR_ENCODING_UTF8 ;
2001-06-20 17:41:10 +00:00
/*
* Errata on XML - 1.0 June 20 2001
* Specific handling of the Byte Order Mark for
* UTF - 8
*/
2001-06-27 16:34:34 +00:00
if ( ( ctxt - > input ! = NULL ) & &
( ctxt - > input - > cur [ 0 ] = = 0xEF ) & &
2001-06-20 17:41:10 +00:00
( ctxt - > input - > cur [ 1 ] = = 0xBB ) & &
( ctxt - > input - > cur [ 2 ] = = 0xBF ) ) {
ctxt - > input - > cur + = 3 ;
}
2001-02-23 17:55:21 +00:00
return ( 0 ) ;
2003-07-16 21:18:19 +00:00
case XML_CHAR_ENCODING_UTF16LE :
case XML_CHAR_ENCODING_UTF16BE :
/*The raw input characters are encoded
* in UTF - 16. As we expect this function
* to be called after xmlCharEncInFunc , we expect
* ctxt - > input - > cur to contain UTF - 8 encoded characters .
* So the raw UTF16 Byte Order Mark
* has also been converted into
* an UTF - 8 BOM . Let ' s skip that BOM .
*/
if ( ( ctxt - > input ! = NULL ) & &
( ctxt - > input - > cur [ 0 ] = = 0xEF ) & &
( ctxt - > input - > cur [ 1 ] = = 0xBB ) & &
( ctxt - > input - > cur [ 2 ] = = 0xBF ) ) {
ctxt - > input - > cur + = 3 ;
}
break ;
2001-02-23 17:55:21 +00:00
default :
break ;
}
handler = xmlGetCharEncodingHandler ( enc ) ;
if ( handler = = NULL ) {
/*
* Default handlers .
*/
switch ( enc ) {
case XML_CHAR_ENCODING_ERROR :
2003-10-19 13:35:37 +00:00
__xmlErrEncoding ( ctxt , XML_ERR_UNKNOWN_ENCODING ,
2003-10-05 21:33:18 +00:00
" encoding unknown \n " , NULL , NULL ) ;
2001-02-23 17:55:21 +00:00
break ;
case XML_CHAR_ENCODING_NONE :
/* let's assume it's UTF-8 without the XML decl */
ctxt - > charset = XML_CHAR_ENCODING_UTF8 ;
return ( 0 ) ;
case XML_CHAR_ENCODING_UTF8 :
case XML_CHAR_ENCODING_ASCII :
/* default encoding, no conversion should be needed */
ctxt - > charset = XML_CHAR_ENCODING_UTF8 ;
return ( 0 ) ;
case XML_CHAR_ENCODING_UTF16LE :
break ;
case XML_CHAR_ENCODING_UTF16BE :
break ;
case XML_CHAR_ENCODING_UCS4LE :
2003-10-19 13:35:37 +00:00
__xmlErrEncoding ( ctxt , XML_ERR_UNSUPPORTED_ENCODING ,
2003-10-05 21:33:18 +00:00
" encoding not supported %s \n " ,
BAD_CAST " USC4 little endian " , NULL ) ;
2001-02-23 17:55:21 +00:00
break ;
case XML_CHAR_ENCODING_UCS4BE :
2003-10-19 13:35:37 +00:00
__xmlErrEncoding ( ctxt , XML_ERR_UNSUPPORTED_ENCODING ,
2003-10-05 21:33:18 +00:00
" encoding not supported %s \n " ,
BAD_CAST " USC4 big endian " , NULL ) ;
2001-02-23 17:55:21 +00:00
break ;
case XML_CHAR_ENCODING_EBCDIC :
2003-10-19 13:35:37 +00:00
__xmlErrEncoding ( ctxt , XML_ERR_UNSUPPORTED_ENCODING ,
2003-10-05 21:33:18 +00:00
" encoding not supported %s \n " ,
BAD_CAST " EBCDIC " , NULL ) ;
2001-02-23 17:55:21 +00:00
break ;
case XML_CHAR_ENCODING_UCS4_2143 :
2003-10-19 13:35:37 +00:00
__xmlErrEncoding ( ctxt , XML_ERR_UNSUPPORTED_ENCODING ,
2003-10-05 21:33:18 +00:00
" encoding not supported %s \n " ,
BAD_CAST " UCS4 2143 " , NULL ) ;
2001-02-23 17:55:21 +00:00
break ;
case XML_CHAR_ENCODING_UCS4_3412 :
2003-10-19 13:35:37 +00:00
__xmlErrEncoding ( ctxt , XML_ERR_UNSUPPORTED_ENCODING ,
2003-10-05 21:33:18 +00:00
" encoding not supported %s \n " ,
BAD_CAST " UCS4 3412 " , NULL ) ;
2001-02-23 17:55:21 +00:00
break ;
case XML_CHAR_ENCODING_UCS2 :
2003-10-19 13:35:37 +00:00
__xmlErrEncoding ( ctxt , XML_ERR_UNSUPPORTED_ENCODING ,
2003-10-05 21:33:18 +00:00
" encoding not supported %s \n " ,
BAD_CAST " UCS2 " , NULL ) ;
2001-02-23 17:55:21 +00:00
break ;
case XML_CHAR_ENCODING_8859_1 :
case XML_CHAR_ENCODING_8859_2 :
case XML_CHAR_ENCODING_8859_3 :
case XML_CHAR_ENCODING_8859_4 :
case XML_CHAR_ENCODING_8859_5 :
case XML_CHAR_ENCODING_8859_6 :
case XML_CHAR_ENCODING_8859_7 :
case XML_CHAR_ENCODING_8859_8 :
case XML_CHAR_ENCODING_8859_9 :
/*
* We used to keep the internal content in the
* document encoding however this turns being unmaintainable
* So xmlGetCharEncodingHandler ( ) will return non - null
* values for this now .
*/
if ( ( ctxt - > inputNr = = 1 ) & &
( ctxt - > encoding = = NULL ) & &
( ctxt - > input - > encoding ! = NULL ) ) {
ctxt - > encoding = xmlStrdup ( ctxt - > input - > encoding ) ;
}
ctxt - > charset = enc ;
return ( 0 ) ;
case XML_CHAR_ENCODING_2022_JP :
2003-10-19 13:35:37 +00:00
__xmlErrEncoding ( ctxt , XML_ERR_UNSUPPORTED_ENCODING ,
2003-10-05 21:33:18 +00:00
" encoding not supported %s \n " ,
BAD_CAST " ISO-2022-JP " , NULL ) ;
2001-02-23 17:55:21 +00:00
break ;
case XML_CHAR_ENCODING_SHIFT_JIS :
2003-10-19 13:35:37 +00:00
__xmlErrEncoding ( ctxt , XML_ERR_UNSUPPORTED_ENCODING ,
2003-10-05 21:33:18 +00:00
" encoding not supported %s \n " ,
BAD_CAST " Shift_JIS " , NULL ) ;
2001-02-23 17:55:21 +00:00
break ;
case XML_CHAR_ENCODING_EUC_JP :
2003-10-19 13:35:37 +00:00
__xmlErrEncoding ( ctxt , XML_ERR_UNSUPPORTED_ENCODING ,
2003-10-05 21:33:18 +00:00
" encoding not supported %s \n " ,
BAD_CAST " EUC-JP " , NULL ) ;
2001-02-23 17:55:21 +00:00
break ;
}
}
if ( handler = = NULL )
return ( - 1 ) ;
ctxt - > charset = XML_CHAR_ENCODING_UTF8 ;
return ( xmlSwitchToEncoding ( ctxt , handler ) ) ;
}
/**
2003-10-19 13:35:37 +00:00
* xmlSwitchInputEncoding :
2001-02-23 17:55:21 +00:00
* @ ctxt : the parser context
2003-10-19 13:35:37 +00:00
* @ input : the input stream
2001-02-23 17:55:21 +00:00
* @ handler : the encoding handler
*
* change the input functions when discovering the character encoding
* of a given entity .
*
* Returns 0 in case of success , - 1 otherwise
*/
int
2003-10-19 13:35:37 +00:00
xmlSwitchInputEncoding ( xmlParserCtxtPtr ctxt , xmlParserInputPtr input ,
xmlCharEncodingHandlerPtr handler )
2001-02-23 17:55:21 +00:00
{
int nbchars ;
2003-10-19 13:35:37 +00:00
if ( handler = = NULL )
return ( - 1 ) ;
if ( input = = NULL )
return ( - 1 ) ;
if ( input - > buf ! = NULL ) {
if ( input - > buf - > encoder ! = NULL ) {
/*
* Check in case the auto encoding detetection triggered
* in already .
*/
if ( input - > buf - > encoder = = handler )
return ( 0 ) ;
2001-02-23 17:55:21 +00:00
2003-10-19 13:35:37 +00:00
/*
* " UTF-16 " can be used for both LE and BE
if ( ( ! xmlStrncmp ( BAD_CAST input - > buf - > encoder - > name ,
BAD_CAST " UTF-16 " , 6 ) ) & &
( ! xmlStrncmp ( BAD_CAST handler - > name ,
BAD_CAST " UTF-16 " , 6 ) ) ) {
return ( 0 ) ;
}
*/
2001-02-23 17:55:21 +00:00
2003-10-19 13:35:37 +00:00
/*
* Note : this is a bit dangerous , but that ' s what it
* takes to use nearly compatible signature for different
* encodings .
*/
xmlCharEncCloseFunc ( input - > buf - > encoder ) ;
input - > buf - > encoder = handler ;
return ( 0 ) ;
}
input - > buf - > encoder = handler ;
/*
* Is there already some content down the pipe to convert ?
*/
if ( ( input - > buf - > buffer ! = NULL ) & & ( input - > buf - > buffer - > use > 0 ) ) {
int processed ;
2004-02-11 13:25:26 +00:00
unsigned int use ;
2003-10-19 13:35:37 +00:00
/*
* Specific handling of the Byte Order Mark for
* UTF - 16
*/
if ( ( handler - > name ! = NULL ) & &
( ! strcmp ( handler - > name , " UTF-16LE " ) ) & &
( input - > cur [ 0 ] = = 0xFF ) & & ( input - > cur [ 1 ] = = 0xFE ) ) {
input - > cur + = 2 ;
}
if ( ( handler - > name ! = NULL ) & &
( ! strcmp ( handler - > name , " UTF-16BE " ) ) & &
( input - > cur [ 0 ] = = 0xFE ) & & ( input - > cur [ 1 ] = = 0xFF ) ) {
input - > cur + = 2 ;
}
/*
* Errata on XML - 1.0 June 20 2001
* Specific handling of the Byte Order Mark for
* UTF - 8
*/
if ( ( handler - > name ! = NULL ) & &
( ! strcmp ( handler - > name , " UTF-8 " ) ) & &
( input - > cur [ 0 ] = = 0xEF ) & &
( input - > cur [ 1 ] = = 0xBB ) & & ( input - > cur [ 2 ] = = 0xBF ) ) {
input - > cur + = 3 ;
}
/*
* Shrink the current input buffer .
* Move it as the raw buffer and create a new input buffer
*/
processed = input - > cur - input - > base ;
xmlBufferShrink ( input - > buf - > buffer , processed ) ;
input - > buf - > raw = input - > buf - > buffer ;
input - > buf - > buffer = xmlBufferCreate ( ) ;
2004-02-11 13:25:26 +00:00
input - > buf - > rawconsumed = processed ;
use = input - > buf - > raw - > use ;
2003-10-19 13:35:37 +00:00
if ( ctxt - > html ) {
/*
* convert as much as possible of the buffer
*/
nbchars = xmlCharEncInFunc ( input - > buf - > encoder ,
input - > buf - > buffer ,
input - > buf - > raw ) ;
} else {
/*
* convert just enough to get
* ' < ? xml version = " 1.0 " encoding = " xxx " ? > '
* parsed with the autodetected encoding
* into the parser reading buffer .
*/
nbchars = xmlCharEncFirstLine ( input - > buf - > encoder ,
input - > buf - > buffer ,
input - > buf - > raw ) ;
}
if ( nbchars < 0 ) {
xmlErrInternal ( ctxt ,
" switching encoding: encoder error \n " ,
NULL ) ;
return ( - 1 ) ;
}
2004-02-11 13:25:26 +00:00
input - > buf - > rawconsumed + = use - input - > buf - > raw - > use ;
2003-10-19 13:35:37 +00:00
input - > base = input - > cur = input - > buf - > buffer - > content ;
input - > end = & input - > base [ input - > buf - > buffer - > use ] ;
}
return ( 0 ) ;
} else {
if ( ( input - > length = = 0 ) | | ( input - > buf = = NULL ) ) {
/*
* When parsing a static memory array one must know the
* size to be able to convert the buffer .
*/
xmlErrInternal ( ctxt , " switching encoding : no input \n " , NULL ) ;
return ( - 1 ) ;
} else {
int processed ;
/*
* Shrink the current input buffer .
* Move it as the raw buffer and create a new input buffer
*/
processed = input - > cur - input - > base ;
input - > buf - > raw = xmlBufferCreate ( ) ;
xmlBufferAdd ( input - > buf - > raw , input - > cur ,
input - > length - processed ) ;
input - > buf - > buffer = xmlBufferCreate ( ) ;
/*
* convert as much as possible of the raw input
* to the parser reading buffer .
*/
nbchars = xmlCharEncInFunc ( input - > buf - > encoder ,
input - > buf - > buffer ,
input - > buf - > raw ) ;
if ( nbchars < 0 ) {
xmlErrInternal ( ctxt ,
" switching encoding: encoder error \n " ,
NULL ) ;
return ( - 1 ) ;
}
/*
* Conversion succeeded , get rid of the old buffer
*/
if ( ( input - > free ! = NULL ) & & ( input - > base ! = NULL ) )
input - > free ( ( xmlChar * ) input - > base ) ;
input - > base = input - > cur = input - > buf - > buffer - > content ;
input - > end = & input - > base [ input - > buf - > buffer - > use ] ;
}
}
return ( 0 ) ;
}
/**
* xmlSwitchToEncoding :
* @ ctxt : the parser context
* @ handler : the encoding handler
*
* change the input functions when discovering the character encoding
* of a given entity .
*
* Returns 0 in case of success , - 1 otherwise
*/
int
xmlSwitchToEncoding ( xmlParserCtxtPtr ctxt , xmlCharEncodingHandlerPtr handler )
{
if ( handler ! = NULL ) {
if ( ctxt - > input ! = NULL ) {
xmlSwitchInputEncoding ( ctxt , ctxt - > input , handler ) ;
2001-02-23 17:55:21 +00:00
} else {
2003-10-05 21:33:18 +00:00
xmlErrInternal ( ctxt , " xmlSwitchToEncoding : no input \n " ,
NULL ) ;
2001-02-23 17:55:21 +00:00
return ( - 1 ) ;
}
/*
* The parsing is now done in UTF8 natively
*/
ctxt - > charset = XML_CHAR_ENCODING_UTF8 ;
} else
return ( - 1 ) ;
return ( 0 ) ;
}
/************************************************************************
* *
* Commodity functions to handle entities processing *
* *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
/**
* xmlFreeInputStream :
* @ input : an xmlParserInputPtr
*
* Free up an input stream .
*/
void
xmlFreeInputStream ( xmlParserInputPtr input ) {
if ( input = = NULL ) return ;
if ( input - > filename ! = NULL ) xmlFree ( ( char * ) input - > filename ) ;
if ( input - > directory ! = NULL ) xmlFree ( ( char * ) input - > directory ) ;
if ( input - > encoding ! = NULL ) xmlFree ( ( char * ) input - > encoding ) ;
if ( input - > version ! = NULL ) xmlFree ( ( char * ) input - > version ) ;
if ( ( input - > free ! = NULL ) & & ( input - > base ! = NULL ) )
input - > free ( ( xmlChar * ) input - > base ) ;
if ( input - > buf ! = NULL )
xmlFreeParserInputBuffer ( input - > buf ) ;
xmlFree ( input ) ;
}
/**
* xmlNewInputStream :
* @ ctxt : an XML parser context
*
* Create a new input stream structure
* Returns the new input stream or NULL
*/
xmlParserInputPtr
xmlNewInputStream ( xmlParserCtxtPtr ctxt ) {
xmlParserInputPtr input ;
2003-09-14 19:56:14 +00:00
static int id = 0 ;
2001-02-23 17:55:21 +00:00
input = ( xmlParserInputPtr ) xmlMalloc ( sizeof ( xmlParserInput ) ) ;
if ( input = = NULL ) {
2003-10-05 21:33:18 +00:00
xmlErrMemory ( ctxt , " couldn't allocate a new input stream \n " ) ;
2001-02-23 17:55:21 +00:00
return ( NULL ) ;
}
memset ( input , 0 , sizeof ( xmlParserInput ) ) ;
input - > line = 1 ;
input - > col = 1 ;
input - > standalone = - 1 ;
2003-09-14 19:56:14 +00:00
/*
* we don ' t care about thread reentrancy unicity for a single
* parser context ( and hence thread ) is sufficient .
*/
input - > id = id + + ;
2001-02-23 17:55:21 +00:00
return ( input ) ;
}
/**
* xmlNewIOInputStream :
* @ ctxt : an XML parser context
* @ input : an I / O Input
* @ enc : the charset encoding if known
*
* Create a new input stream structure encapsulating the @ input into
* a stream suitable for the parser .
*
* Returns the new input stream or NULL
*/
xmlParserInputPtr
xmlNewIOInputStream ( xmlParserCtxtPtr ctxt , xmlParserInputBufferPtr input ,
xmlCharEncoding enc ) {
xmlParserInputPtr inputStream ;
if ( xmlParserDebugEntities )
xmlGenericError ( xmlGenericErrorContext , " new input from I/O \n " ) ;
inputStream = xmlNewInputStream ( ctxt ) ;
if ( inputStream = = NULL ) {
return ( NULL ) ;
}
inputStream - > filename = NULL ;
inputStream - > buf = input ;
inputStream - > base = inputStream - > buf - > buffer - > content ;
inputStream - > cur = inputStream - > buf - > buffer - > content ;
2001-02-25 16:11:03 +00:00
inputStream - > end = & inputStream - > base [ inputStream - > buf - > buffer - > use ] ;
2001-02-23 17:55:21 +00:00
if ( enc ! = XML_CHAR_ENCODING_NONE ) {
xmlSwitchEncoding ( ctxt , enc ) ;
}
return ( inputStream ) ;
}
/**
* xmlNewEntityInputStream :
* @ ctxt : an XML parser context
* @ entity : an Entity pointer
*
* Create a new input stream based on an xmlEntityPtr
*
* Returns the new input stream or NULL
*/
xmlParserInputPtr
xmlNewEntityInputStream ( xmlParserCtxtPtr ctxt , xmlEntityPtr entity ) {
xmlParserInputPtr input ;
if ( entity = = NULL ) {
2003-10-05 21:33:18 +00:00
xmlErrInternal ( ctxt , " xmlNewEntityInputStream entity = NULL \n " ,
NULL ) ;
2001-02-23 17:55:21 +00:00
return ( NULL ) ;
}
if ( xmlParserDebugEntities )
xmlGenericError ( xmlGenericErrorContext ,
" new input from entity: %s \n " , entity - > name ) ;
if ( entity - > content = = NULL ) {
switch ( entity - > etype ) {
case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY :
2003-10-05 21:33:18 +00:00
xmlErrInternal ( ctxt , " Cannot parse entity %s \n " ,
entity - > name ) ;
2001-02-23 17:55:21 +00:00
break ;
case XML_EXTERNAL_GENERAL_PARSED_ENTITY :
case XML_EXTERNAL_PARAMETER_ENTITY :
return ( xmlLoadExternalEntity ( ( char * ) entity - > URI ,
( char * ) entity - > ExternalID , ctxt ) ) ;
case XML_INTERNAL_GENERAL_ENTITY :
2003-10-05 21:33:18 +00:00
xmlErrInternal ( ctxt ,
" Internal entity %s without content ! \n " ,
entity - > name ) ;
2001-02-23 17:55:21 +00:00
break ;
case XML_INTERNAL_PARAMETER_ENTITY :
2003-10-05 21:33:18 +00:00
xmlErrInternal ( ctxt ,
" Internal parameter entity %s without content ! \n " ,
entity - > name ) ;
2001-02-23 17:55:21 +00:00
break ;
case XML_INTERNAL_PREDEFINED_ENTITY :
2003-10-05 21:33:18 +00:00
xmlErrInternal ( ctxt ,
" Predefined entity %s without content ! \n " ,
entity - > name ) ;
2001-02-23 17:55:21 +00:00
break ;
}
return ( NULL ) ;
}
input = xmlNewInputStream ( ctxt ) ;
if ( input = = NULL ) {
return ( NULL ) ;
}
input - > filename = ( char * ) entity - > URI ;
input - > base = entity - > content ;
input - > cur = entity - > content ;
input - > length = entity - > length ;
2001-02-25 16:11:03 +00:00
input - > end = & entity - > content [ input - > length ] ;
2001-02-23 17:55:21 +00:00
return ( input ) ;
}
/**
* xmlNewStringInputStream :
* @ ctxt : an XML parser context
* @ buffer : an memory buffer
*
* Create a new input stream based on a memory buffer .
* Returns the new input stream
*/
xmlParserInputPtr
xmlNewStringInputStream ( xmlParserCtxtPtr ctxt , const xmlChar * buffer ) {
xmlParserInputPtr input ;
if ( buffer = = NULL ) {
2003-10-05 21:33:18 +00:00
xmlErrInternal ( ctxt , " xmlNewStringInputStream string = NULL \n " ,
NULL ) ;
2001-02-23 17:55:21 +00:00
return ( NULL ) ;
}
if ( xmlParserDebugEntities )
xmlGenericError ( xmlGenericErrorContext ,
" new fixed input: %.30s \n " , buffer ) ;
input = xmlNewInputStream ( ctxt ) ;
if ( input = = NULL ) {
2003-10-05 21:33:18 +00:00
xmlErrMemory ( ctxt , " couldn't allocate a new input stream \n " ) ;
2001-02-23 17:55:21 +00:00
return ( NULL ) ;
}
input - > base = buffer ;
input - > cur = buffer ;
input - > length = xmlStrlen ( buffer ) ;
2001-02-25 16:11:03 +00:00
input - > end = & buffer [ input - > length ] ;
2001-02-23 17:55:21 +00:00
return ( input ) ;
}
/**
* xmlNewInputFromFile :
* @ ctxt : an XML parser context
* @ filename : the filename to use as entity
*
2003-10-19 13:35:37 +00:00
* Create a new input stream based on a file or an URL .
2001-02-23 17:55:21 +00:00
*
* Returns the new input stream or NULL in case of error
*/
xmlParserInputPtr
xmlNewInputFromFile ( xmlParserCtxtPtr ctxt , const char * filename ) {
xmlParserInputBufferPtr buf ;
xmlParserInputPtr inputStream ;
char * directory = NULL ;
xmlChar * URI = NULL ;
if ( xmlParserDebugEntities )
xmlGenericError ( xmlGenericErrorContext ,
" new input from file: %s \n " , filename ) ;
if ( ctxt = = NULL ) return ( NULL ) ;
buf = xmlParserInputBufferCreateFilename ( filename , XML_CHAR_ENCODING_NONE ) ;
2003-10-27 11:25:13 +00:00
if ( buf = = NULL ) {
__xmlLoaderErr ( ctxt , " failed to load external entity \" %s \" \n " ,
( const char * ) filename ) ;
2001-02-23 17:55:21 +00:00
return ( NULL ) ;
2003-10-27 11:25:13 +00:00
}
2001-02-23 17:55:21 +00:00
inputStream = xmlNewInputStream ( ctxt ) ;
if ( inputStream = = NULL ) {
if ( directory ! = NULL ) xmlFree ( ( char * ) directory ) ;
if ( URI ! = NULL ) xmlFree ( ( char * ) URI ) ;
return ( NULL ) ;
}
2003-10-19 13:35:37 +00:00
inputStream - > buf = buf ;
inputStream = xmlCheckHTTPInput ( ctxt , inputStream ) ;
if ( inputStream = = NULL )
return ( NULL ) ;
if ( inputStream - > filename = = NULL )
URI = xmlStrdup ( ( xmlChar * ) filename ) ;
else
URI = xmlStrdup ( ( xmlChar * ) inputStream - > filename ) ;
directory = xmlParserGetDirectory ( ( const char * ) URI ) ;
2003-09-17 19:36:25 +00:00
inputStream - > filename = ( char * ) xmlCanonicPath ( ( const xmlChar * ) URI ) ;
2003-09-17 20:54:38 +00:00
if ( URI ! = NULL ) xmlFree ( ( char * ) URI ) ;
2001-02-23 17:55:21 +00:00
inputStream - > directory = directory ;
inputStream - > base = inputStream - > buf - > buffer - > content ;
inputStream - > cur = inputStream - > buf - > buffer - > content ;
2001-02-25 16:11:03 +00:00
inputStream - > end = & inputStream - > base [ inputStream - > buf - > buffer - > use ] ;
2001-02-23 17:55:21 +00:00
if ( ( ctxt - > directory = = NULL ) & & ( directory ! = NULL ) )
ctxt - > directory = ( char * ) xmlStrdup ( ( const xmlChar * ) directory ) ;
return ( inputStream ) ;
}
/************************************************************************
* *
* Commodity functions to handle parser contexts *
* *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
/**
* xmlInitParserCtxt :
* @ ctxt : an XML parser context
*
* Initialize a parser context
2003-04-24 16:06:47 +00:00
*
* Returns 0 in case of success and - 1 in case of error
2001-02-23 17:55:21 +00:00
*/
2003-04-24 16:06:47 +00:00
int
2001-02-23 17:55:21 +00:00
xmlInitParserCtxt ( xmlParserCtxtPtr ctxt )
{
2001-08-31 14:55:30 +00:00
if ( ctxt = = NULL ) {
2003-10-05 21:33:18 +00:00
xmlErrInternal ( NULL , " Got NULL parser context \n " , NULL ) ;
2003-04-24 16:06:47 +00:00
return ( - 1 ) ;
2001-08-31 14:55:30 +00:00
}
2001-02-23 17:55:21 +00:00
xmlDefaultSAXHandlerInit ( ) ;
2003-08-18 12:15:38 +00:00
ctxt - > dict = xmlDictCreate ( ) ;
if ( ctxt - > dict = = NULL ) {
2003-10-05 21:33:18 +00:00
xmlErrMemory ( NULL , " cannot initialize parser context \n " ) ;
2003-08-18 12:15:38 +00:00
return ( - 1 ) ;
}
2002-11-22 05:07:29 +00:00
ctxt - > sax = ( xmlSAXHandler * ) xmlMalloc ( sizeof ( xmlSAXHandler ) ) ;
if ( ctxt - > sax = = NULL ) {
2003-10-05 21:33:18 +00:00
xmlErrMemory ( NULL , " cannot initialize parser context \n " ) ;
2003-04-24 16:06:47 +00:00
return ( - 1 ) ;
2001-02-23 17:55:21 +00:00
}
else
2003-09-25 14:29:29 +00:00
xmlSAXVersion ( ctxt - > sax , 2 ) ;
2001-02-23 17:55:21 +00:00
2003-08-19 15:01:28 +00:00
ctxt - > maxatts = 0 ;
ctxt - > atts = NULL ;
2001-02-23 17:55:21 +00:00
/* Allocate the Input stack */
ctxt - > inputTab = ( xmlParserInputPtr * )
xmlMalloc ( 5 * sizeof ( xmlParserInputPtr ) ) ;
if ( ctxt - > inputTab = = NULL ) {
2003-10-05 21:33:18 +00:00
xmlErrMemory ( NULL , " cannot initialize parser context \n " ) ;
2001-02-23 17:55:21 +00:00
ctxt - > inputNr = 0 ;
ctxt - > inputMax = 0 ;
ctxt - > input = NULL ;
2003-04-24 16:06:47 +00:00
return ( - 1 ) ;
2001-02-23 17:55:21 +00:00
}
ctxt - > inputNr = 0 ;
ctxt - > inputMax = 5 ;
ctxt - > input = NULL ;
ctxt - > version = NULL ;
ctxt - > encoding = NULL ;
ctxt - > standalone = - 1 ;
ctxt - > hasExternalSubset = 0 ;
ctxt - > hasPErefs = 0 ;
ctxt - > html = 0 ;
ctxt - > external = 0 ;
ctxt - > instate = XML_PARSER_START ;
ctxt - > token = 0 ;
ctxt - > directory = NULL ;
/* Allocate the Node stack */
ctxt - > nodeTab = ( xmlNodePtr * ) xmlMalloc ( 10 * sizeof ( xmlNodePtr ) ) ;
if ( ctxt - > nodeTab = = NULL ) {
2003-10-05 21:33:18 +00:00
xmlErrMemory ( NULL , " cannot initialize parser context \n " ) ;
2001-02-23 17:55:21 +00:00
ctxt - > nodeNr = 0 ;
ctxt - > nodeMax = 0 ;
ctxt - > node = NULL ;
ctxt - > inputNr = 0 ;
ctxt - > inputMax = 0 ;
ctxt - > input = NULL ;
2003-04-24 16:06:47 +00:00
return ( - 1 ) ;
2001-02-23 17:55:21 +00:00
}
ctxt - > nodeNr = 0 ;
ctxt - > nodeMax = 10 ;
ctxt - > node = NULL ;
/* Allocate the Name stack */
2003-08-18 12:15:38 +00:00
ctxt - > nameTab = ( const xmlChar * * ) xmlMalloc ( 10 * sizeof ( xmlChar * ) ) ;
2001-02-23 17:55:21 +00:00
if ( ctxt - > nameTab = = NULL ) {
2003-10-05 21:33:18 +00:00
xmlErrMemory ( NULL , " cannot initialize parser context \n " ) ;
2001-02-23 17:55:21 +00:00
ctxt - > nodeNr = 0 ;
ctxt - > nodeMax = 0 ;
ctxt - > node = NULL ;
ctxt - > inputNr = 0 ;
ctxt - > inputMax = 0 ;
ctxt - > input = NULL ;
ctxt - > nameNr = 0 ;
ctxt - > nameMax = 0 ;
ctxt - > name = NULL ;
2003-04-24 16:06:47 +00:00
return ( - 1 ) ;
2001-02-23 17:55:21 +00:00
}
ctxt - > nameNr = 0 ;
ctxt - > nameMax = 10 ;
ctxt - > name = NULL ;
/* Allocate the space stack */
ctxt - > spaceTab = ( int * ) xmlMalloc ( 10 * sizeof ( int ) ) ;
if ( ctxt - > spaceTab = = NULL ) {
2003-10-05 21:33:18 +00:00
xmlErrMemory ( NULL , " cannot initialize parser context \n " ) ;
2001-02-23 17:55:21 +00:00
ctxt - > nodeNr = 0 ;
ctxt - > nodeMax = 0 ;
ctxt - > node = NULL ;
ctxt - > inputNr = 0 ;
ctxt - > inputMax = 0 ;
ctxt - > input = NULL ;
ctxt - > nameNr = 0 ;
ctxt - > nameMax = 0 ;
ctxt - > name = NULL ;
ctxt - > spaceNr = 0 ;
ctxt - > spaceMax = 0 ;
ctxt - > space = NULL ;
2003-04-24 16:06:47 +00:00
return ( - 1 ) ;
2001-02-23 17:55:21 +00:00
}
ctxt - > spaceNr = 1 ;
ctxt - > spaceMax = 10 ;
ctxt - > spaceTab [ 0 ] = - 1 ;
ctxt - > space = & ctxt - > spaceTab [ 0 ] ;
ctxt - > userData = ctxt ;
ctxt - > myDoc = NULL ;
ctxt - > wellFormed = 1 ;
2003-09-11 23:42:01 +00:00
ctxt - > nsWellFormed = 1 ;
2001-02-23 17:55:21 +00:00
ctxt - > valid = 1 ;
ctxt - > loadsubset = xmlLoadExtDtdDefaultValue ;
ctxt - > validate = xmlDoValidityCheckingDefaultValue ;
ctxt - > pedantic = xmlPedanticParserDefaultValue ;
2001-07-25 17:18:57 +00:00
ctxt - > linenumbers = xmlLineNumbersDefaultValue ;
2001-02-23 17:55:21 +00:00
ctxt - > keepBlanks = xmlKeepBlanksDefaultValue ;
2001-09-14 10:29:27 +00:00
if ( ctxt - > keepBlanks = = 0 )
2003-09-26 14:51:39 +00:00
ctxt - > sax - > ignorableWhitespace = xmlSAX2IgnorableWhitespace ;
2001-09-14 10:29:27 +00:00
2001-02-23 17:55:21 +00:00
ctxt - > vctxt . userData = ctxt ;
2002-02-03 20:13:06 +00:00
ctxt - > vctxt . error = xmlParserValidityError ;
ctxt - > vctxt . warning = xmlParserValidityWarning ;
2001-02-23 17:55:21 +00:00
if ( ctxt - > validate ) {
if ( xmlGetWarningsDefaultValue = = 0 )
ctxt - > vctxt . warning = NULL ;
else
ctxt - > vctxt . warning = xmlParserValidityWarning ;
2001-04-21 14:16:10 +00:00
ctxt - > vctxt . nodeMax = 0 ;
2001-02-23 17:55:21 +00:00
}
ctxt - > replaceEntities = xmlSubstituteEntitiesDefaultValue ;
ctxt - > record_info = 0 ;
ctxt - > nbChars = 0 ;
ctxt - > checkIndex = 0 ;
ctxt - > inSubset = 0 ;
ctxt - > errNo = XML_ERR_OK ;
ctxt - > depth = 0 ;
ctxt - > charset = XML_CHAR_ENCODING_UTF8 ;
2001-08-22 14:29:45 +00:00
ctxt - > catalogs = NULL ;
2001-02-23 17:55:21 +00:00
xmlInitNodeInfoSeq ( & ctxt - > node_seq ) ;
2003-04-24 16:06:47 +00:00
return ( 0 ) ;
2001-02-23 17:55:21 +00:00
}
/**
* xmlFreeParserCtxt :
* @ ctxt : an XML parser context
*
* Free all the memory used by a parser context . However the parsed
* document in ctxt - > myDoc is not freed .
*/
void
xmlFreeParserCtxt ( xmlParserCtxtPtr ctxt )
{
xmlParserInputPtr input ;
if ( ctxt = = NULL ) return ;
while ( ( input = inputPop ( ctxt ) ) ! = NULL ) { /* Non consuming */
xmlFreeInputStream ( input ) ;
}
if ( ctxt - > spaceTab ! = NULL ) xmlFree ( ctxt - > spaceTab ) ;
2003-08-28 10:34:33 +00:00
if ( ctxt - > nameTab ! = NULL ) xmlFree ( ( xmlChar * * ) ctxt - > nameTab ) ;
2001-02-23 17:55:21 +00:00
if ( ctxt - > nodeTab ! = NULL ) xmlFree ( ctxt - > nodeTab ) ;
if ( ctxt - > inputTab ! = NULL ) xmlFree ( ctxt - > inputTab ) ;
if ( ctxt - > version ! = NULL ) xmlFree ( ( char * ) ctxt - > version ) ;
if ( ctxt - > encoding ! = NULL ) xmlFree ( ( char * ) ctxt - > encoding ) ;
if ( ctxt - > extSubURI ! = NULL ) xmlFree ( ( char * ) ctxt - > extSubURI ) ;
if ( ctxt - > extSubSystem ! = NULL ) xmlFree ( ( char * ) ctxt - > extSubSystem ) ;
2003-09-30 00:43:48 +00:00
# ifdef LIBXML_SAX1_ENABLED
2003-09-25 14:29:29 +00:00
if ( ( ctxt - > sax ! = NULL ) & &
( ctxt - > sax ! = ( xmlSAXHandlerPtr ) & xmlDefaultSAXHandler ) )
2003-09-30 00:43:48 +00:00
# else
if ( ctxt - > sax ! = NULL )
# endif /* LIBXML_SAX1_ENABLED */
2001-02-23 17:55:21 +00:00
xmlFree ( ctxt - > sax ) ;
if ( ctxt - > directory ! = NULL ) xmlFree ( ( char * ) ctxt - > directory ) ;
2001-06-19 11:07:54 +00:00
if ( ctxt - > vctxt . nodeTab ! = NULL ) xmlFree ( ctxt - > vctxt . nodeTab ) ;
2003-08-28 10:34:33 +00:00
if ( ctxt - > atts ! = NULL ) xmlFree ( ( xmlChar * * ) ctxt - > atts ) ;
2003-08-18 12:15:38 +00:00
if ( ctxt - > dict ! = NULL ) xmlDictFree ( ctxt - > dict ) ;
2004-02-09 12:39:02 +00:00
if ( ctxt - > nsTab ! = NULL ) xmlFree ( ( char * ) ctxt - > nsTab ) ;
2003-09-10 10:50:59 +00:00
if ( ctxt - > pushTab ! = NULL ) xmlFree ( ctxt - > pushTab ) ;
if ( ctxt - > attallocs ! = NULL ) xmlFree ( ctxt - > attallocs ) ;
if ( ctxt - > attsDefault ! = NULL )
xmlHashFree ( ctxt - > attsDefault , ( xmlHashDeallocator ) xmlFree ) ;
2003-09-10 10:50:59 +00:00
if ( ctxt - > attsSpecial ! = NULL )
xmlHashFree ( ctxt - > attsSpecial , NULL ) ;
2003-09-17 10:26:25 +00:00
if ( ctxt - > freeElems ! = NULL ) {
xmlNodePtr cur , next ;
cur = ctxt - > freeElems ;
while ( cur ! = NULL ) {
next = cur - > next ;
xmlFree ( cur ) ;
cur = next ;
}
}
if ( ctxt - > freeAttrs ! = NULL ) {
xmlAttrPtr cur , next ;
cur = ctxt - > freeAttrs ;
while ( cur ! = NULL ) {
next = cur - > next ;
xmlFree ( cur ) ;
cur = next ;
}
}
2003-10-02 22:28:19 +00:00
/*
* cleanup the error strings
*/
if ( ctxt - > lastError . message ! = NULL )
xmlFree ( ctxt - > lastError . message ) ;
if ( ctxt - > lastError . file ! = NULL )
xmlFree ( ctxt - > lastError . file ) ;
if ( ctxt - > lastError . str1 ! = NULL )
xmlFree ( ctxt - > lastError . str1 ) ;
if ( ctxt - > lastError . str2 ! = NULL )
xmlFree ( ctxt - > lastError . str2 ) ;
if ( ctxt - > lastError . str3 ! = NULL )
xmlFree ( ctxt - > lastError . str3 ) ;
2003-09-07 09:14:37 +00:00
2001-08-22 14:29:45 +00:00
# ifdef LIBXML_CATALOG_ENABLED
if ( ctxt - > catalogs ! = NULL )
xmlCatalogFreeLocal ( ctxt - > catalogs ) ;
# endif
2001-02-23 17:55:21 +00:00
xmlFree ( ctxt ) ;
}
/**
* xmlNewParserCtxt :
*
* Allocate and initialize a new parser context .
*
* Returns the xmlParserCtxtPtr or NULL
*/
xmlParserCtxtPtr
xmlNewParserCtxt ( )
{
xmlParserCtxtPtr ctxt ;
ctxt = ( xmlParserCtxtPtr ) xmlMalloc ( sizeof ( xmlParserCtxt ) ) ;
if ( ctxt = = NULL ) {
2003-10-05 21:33:18 +00:00
xmlErrMemory ( NULL , " cannot allocate parser context \n " ) ;
2001-02-23 17:55:21 +00:00
return ( NULL ) ;
}
memset ( ctxt , 0 , sizeof ( xmlParserCtxt ) ) ;
2003-04-24 16:06:47 +00:00
if ( xmlInitParserCtxt ( ctxt ) < 0 ) {
xmlFreeParserCtxt ( ctxt ) ;
return ( NULL ) ;
}
2001-02-23 17:55:21 +00:00
return ( ctxt ) ;
}
/************************************************************************
* *
* Handling of node informations *
* *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
/**
* xmlClearParserCtxt :
* @ ctxt : an XML parser context
*
* Clear ( release owned resources ) and reinitialize a parser context
*/
void
xmlClearParserCtxt ( xmlParserCtxtPtr ctxt )
{
2001-08-31 14:55:30 +00:00
if ( ctxt = = NULL )
return ;
2001-02-23 17:55:21 +00:00
xmlClearNodeInfoSeq ( & ctxt - > node_seq ) ;
xmlInitParserCtxt ( ctxt ) ;
}
/**
* xmlParserFindNodeInfo :
2002-12-10 15:19:08 +00:00
* @ ctx : an XML parser context
2001-02-23 17:55:21 +00:00
* @ node : an XML node within the tree
*
* Find the parser node info struct for a given node
*
* Returns an xmlParserNodeInfo block pointer or NULL
*/
2002-01-20 22:08:18 +00:00
const xmlParserNodeInfo * xmlParserFindNodeInfo ( const xmlParserCtxtPtr ctx ,
const xmlNodePtr node )
2001-02-23 17:55:21 +00:00
{
unsigned long pos ;
/* Find position where node should be at */
pos = xmlParserFindNodeInfoIndex ( & ctx - > node_seq , node ) ;
2001-09-21 09:47:08 +00:00
if ( pos < ctx - > node_seq . length & & ctx - > node_seq . buffer [ pos ] . node = = node )
2001-02-23 17:55:21 +00:00
return & ctx - > node_seq . buffer [ pos ] ;
else
return NULL ;
}
/**
* xmlInitNodeInfoSeq :
* @ seq : a node info sequence pointer
*
* - - Initialize ( set to initial state ) node info sequence
*/
void
xmlInitNodeInfoSeq ( xmlParserNodeInfoSeqPtr seq )
{
seq - > length = 0 ;
seq - > maximum = 0 ;
seq - > buffer = NULL ;
}
/**
* xmlClearNodeInfoSeq :
* @ seq : a node info sequence pointer
*
* - - Clear ( release memory and reinitialize ) node
* info sequence
*/
void
xmlClearNodeInfoSeq ( xmlParserNodeInfoSeqPtr seq )
{
if ( seq - > buffer ! = NULL )
xmlFree ( seq - > buffer ) ;
xmlInitNodeInfoSeq ( seq ) ;
}
/**
* xmlParserFindNodeInfoIndex :
* @ seq : a node info sequence pointer
* @ node : an XML node pointer
*
*
* xmlParserFindNodeInfoIndex : Find the index that the info record for
* the given node is or should be at in a sorted sequence
*
* Returns a long indicating the position of the record
*/
2002-01-20 22:08:18 +00:00
unsigned long xmlParserFindNodeInfoIndex ( const xmlParserNodeInfoSeqPtr seq ,
const xmlNodePtr node )
2001-02-23 17:55:21 +00:00
{
unsigned long upper , lower , middle ;
int found = 0 ;
/* Do a binary search for the key */
lower = 1 ;
upper = seq - > length ;
middle = 0 ;
while ( lower < = upper & & ! found ) {
middle = lower + ( upper - lower ) / 2 ;
if ( node = = seq - > buffer [ middle - 1 ] . node )
found = 1 ;
else if ( node < seq - > buffer [ middle - 1 ] . node )
upper = middle - 1 ;
else
lower = middle + 1 ;
}
/* Return position */
if ( middle = = 0 | | seq - > buffer [ middle - 1 ] . node < node )
return middle ;
else
return middle - 1 ;
}
/**
* xmlParserAddNodeInfo :
* @ ctxt : an XML parser context
* @ info : a node info sequence pointer
*
* Insert node info record into the sorted sequence
*/
void
2002-01-23 17:53:44 +00:00
xmlParserAddNodeInfo ( xmlParserCtxtPtr ctxt ,
2002-01-20 22:08:18 +00:00
const xmlParserNodeInfoPtr info )
2001-02-23 17:55:21 +00:00
{
2002-01-23 17:53:44 +00:00
unsigned long pos ;
/* Find pos and check to see if node is already in the sequence */
2003-07-31 14:47:38 +00:00
pos = xmlParserFindNodeInfoIndex ( & ctxt - > node_seq , ( xmlNodePtr )
2002-01-23 17:53:44 +00:00
info - > node ) ;
if ( pos < ctxt - > node_seq . length
& & ctxt - > node_seq . buffer [ pos ] . node = = info - > node ) {
ctxt - > node_seq . buffer [ pos ] = * info ;
}
2001-02-23 17:55:21 +00:00
2002-01-23 17:53:44 +00:00
/* Otherwise, we need to add new node to buffer */
else {
if ( ctxt - > node_seq . length + 1 > ctxt - > node_seq . maximum ) {
xmlParserNodeInfo * tmp_buffer ;
unsigned int byte_size ;
if ( ctxt - > node_seq . maximum = = 0 )
ctxt - > node_seq . maximum = 2 ;
byte_size = ( sizeof ( * ctxt - > node_seq . buffer ) *
( 2 * ctxt - > node_seq . maximum ) ) ;
if ( ctxt - > node_seq . buffer = = NULL )
2003-04-21 23:07:45 +00:00
tmp_buffer = ( xmlParserNodeInfo * ) xmlMalloc ( byte_size ) ;
2002-01-23 17:53:44 +00:00
else
tmp_buffer =
( xmlParserNodeInfo * ) xmlRealloc ( ctxt - > node_seq . buffer ,
byte_size ) ;
if ( tmp_buffer = = NULL ) {
2003-10-05 21:33:18 +00:00
xmlErrMemory ( ctxt , " failed to allocate buffer \n " ) ;
2002-01-23 17:53:44 +00:00
return ;
}
ctxt - > node_seq . buffer = tmp_buffer ;
ctxt - > node_seq . maximum * = 2 ;
}
2001-02-23 17:55:21 +00:00
2002-01-23 17:53:44 +00:00
/* If position is not at end, move elements out of the way */
if ( pos ! = ctxt - > node_seq . length ) {
unsigned long i ;
2001-02-23 17:55:21 +00:00
2002-01-23 17:53:44 +00:00
for ( i = ctxt - > node_seq . length ; i > pos ; i - - )
ctxt - > node_seq . buffer [ i ] = ctxt - > node_seq . buffer [ i - 1 ] ;
}
2001-02-23 17:55:21 +00:00
2002-01-23 17:53:44 +00:00
/* Copy element and increase length */
ctxt - > node_seq . buffer [ pos ] = * info ;
ctxt - > node_seq . length + + ;
2001-02-23 17:55:21 +00:00
}
}
2001-07-25 17:18:57 +00:00
/************************************************************************
* *
* Defaults settings *
* *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
/**
* xmlPedanticParserDefault :
* @ val : int 0 or 1
*
* Set and return the previous value for enabling pedantic warnings .
*
* Returns the last value for 0 for no substitution , 1 for substitution .
*/
int
xmlPedanticParserDefault ( int val ) {
int old = xmlPedanticParserDefaultValue ;
xmlPedanticParserDefaultValue = val ;
return ( old ) ;
}
/**
* xmlLineNumbersDefault :
* @ val : int 0 or 1
*
* Set and return the previous value for enabling line numbers in elements
* contents . This may break on old application and is turned off by default .
*
* Returns the last value for 0 for no substitution , 1 for substitution .
*/
int
xmlLineNumbersDefault ( int val ) {
int old = xmlLineNumbersDefaultValue ;
xmlLineNumbersDefaultValue = val ;
return ( old ) ;
}
/**
* xmlSubstituteEntitiesDefault :
* @ val : int 0 or 1
*
* Set and return the previous value for default entity support .
* Initially the parser always keep entity references instead of substituting
* entity values in the output . This function has to be used to change the
2001-12-31 16:16:02 +00:00
* default parser behavior
* SAX : : substituteEntities ( ) has to be used for changing that on a file by
2001-07-25 17:18:57 +00:00
* file basis .
*
* Returns the last value for 0 for no substitution , 1 for substitution .
*/
int
xmlSubstituteEntitiesDefault ( int val ) {
int old = xmlSubstituteEntitiesDefaultValue ;
xmlSubstituteEntitiesDefaultValue = val ;
return ( old ) ;
}
/**
* xmlKeepBlanksDefault :
* @ val : int 0 or 1
*
* Set and return the previous value for default blanks text nodes support .
* The 1. x version of the parser used an heuristic to try to detect
* ignorable white spaces . As a result the SAX callback was generating
2003-09-26 14:51:39 +00:00
* xmlSAX2IgnorableWhitespace ( ) callbacks instead of characters ( ) one , and when
2001-07-25 17:18:57 +00:00
* using the DOM output text nodes containing those blanks were not generated .
* The 2. x and later version will switch to the XML standard way and
* ignorableWhitespace ( ) are only generated when running the parser in
* validating mode and when the current element doesn ' t allow CDATA or
* mixed content .
2001-12-31 16:16:02 +00:00
* This function is provided as a way to force the standard behavior
2001-07-25 17:18:57 +00:00
* on 1. X libs and to switch back to the old mode for compatibility when
* running 1. X client code on 2. X . Upgrade of 1. X code should be done
* by using xmlIsBlankNode ( ) commodity function to detect the " empty "
* nodes generated .
* This value also affect autogeneration of indentation when saving code
* if blanks sections are kept , indentation is not generated .
*
* Returns the last value for 0 for no substitution , 1 for substitution .
*/
int
xmlKeepBlanksDefault ( int val ) {
int old = xmlKeepBlanksDefaultValue ;
xmlKeepBlanksDefaultValue = val ;
xmlIndentTreeOutput = ! val ;
return ( old ) ;
}