2001-02-23 17:55:21 +00:00
/*
2001-12-31 16:16:02 +00:00
* parserInternals . c : Internal routines ( and obsolete ones ) needed for the
* XML and HTML parsers .
2001-02-23 17:55:21 +00:00
*
* See Copyright for the status of this software .
*
2001-06-24 12:13:24 +00:00
* daniel @ veillard . com
2001-02-23 17:55:21 +00:00
*/
2002-03-18 19:37:11 +00:00
# define IN_LIBXML
2001-04-21 16:57:29 +00:00
# include "libxml.h"
2017-10-09 00:20:01 +02:00
# if defined(_WIN32) && !defined (__CYGWIN__)
2001-02-23 17:55:21 +00:00
# define XML_DIR_SEP '\\'
# else
# define XML_DIR_SEP ' / '
# endif
# include <string.h>
# ifdef HAVE_CTYPE_H
# include <ctype.h>
# endif
# ifdef HAVE_STDLIB_H
# include <stdlib.h>
# endif
# ifdef HAVE_SYS_STAT_H
# include <sys/stat.h>
# endif
# ifdef HAVE_FCNTL_H
# include <fcntl.h>
# endif
# ifdef HAVE_UNISTD_H
# include <unistd.h>
# endif
2017-11-13 17:08:38 +01:00
# ifdef LIBXML_ZLIB_ENABLED
2001-02-23 17:55:21 +00:00
# include <zlib.h>
# endif
# include <libxml/xmlmemory.h>
# include <libxml/tree.h>
# include <libxml/parser.h>
# include <libxml/parserInternals.h>
# include <libxml/valid.h>
# include <libxml/entities.h>
# include <libxml/xmlerror.h>
# include <libxml/encoding.h>
# include <libxml/valid.h>
# include <libxml/xmlIO.h>
# include <libxml/uri.h>
2003-08-18 12:15:38 +00:00
# include <libxml/dict.h>
2001-09-14 10:29:27 +00:00
# include <libxml/SAX.h>
2001-08-22 14:29:45 +00:00
# ifdef LIBXML_CATALOG_ENABLED
# include <libxml/catalog.h>
# endif
2001-10-17 15:58:35 +00:00
# include <libxml/globals.h>
2003-10-11 15:22:13 +00:00
# include <libxml/chvalid.h>
2001-02-23 17:55:21 +00:00
2016-03-01 15:18:04 -08:00
# define CUR(ctxt) ctxt->input->cur
# define END(ctxt) ctxt->input->end
# define VALID_CTXT(ctxt) (CUR(ctxt) <= END(ctxt))
2012-07-16 14:19:49 +08:00
# include "buf.h"
# include "enc.h"
2001-07-25 17:18:57 +00:00
/*
* Various global defaults for parsing
*/
2001-02-23 17:55:21 +00:00
2001-07-18 19:30:27 +00:00
/**
2001-02-23 17:55:21 +00:00
* xmlCheckVersion :
* @ version : the include version number
*
* check the compiled lib version against the include one .
* This can warn or immediately kill the application
*/
void
xmlCheckVersion ( int version ) {
int myversion = ( int ) LIBXML_VERSION ;
2001-10-14 09:56:15 +00:00
xmlInitParser ( ) ;
2001-05-07 20:50:47 +00:00
2001-02-23 17:55:21 +00:00
if ( ( myversion / 10000 ) ! = ( version / 10000 ) ) {
2012-09-11 13:26:36 +08:00
xmlGenericError ( xmlGenericErrorContext ,
2001-02-23 17:55:21 +00:00
" Fatal: program compiled against libxml %d using libxml %d \n " ,
( version / 10000 ) , ( myversion / 10000 ) ) ;
2012-09-11 13:26:36 +08:00
fprintf ( stderr ,
2001-11-20 08:35:07 +00:00
" Fatal: program compiled against libxml %d using libxml %d \n " ,
( version / 10000 ) , ( myversion / 10000 ) ) ;
2001-02-23 17:55:21 +00:00
}
if ( ( myversion / 100 ) < ( version / 100 ) ) {
2012-09-11 13:26:36 +08:00
xmlGenericError ( xmlGenericErrorContext ,
2001-02-23 17:55:21 +00:00
" Warning: program compiled against libxml %d using older %d \n " ,
( version / 100 ) , ( myversion / 100 ) ) ;
}
}
2003-10-05 21:33:18 +00:00
/************************************************************************
* *
2012-09-11 13:26:36 +08:00
* Some factorized error routines *
2003-10-05 21:33:18 +00:00
* *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
/**
* xmlErrMemory :
* @ ctxt : an XML parser context
2020-03-08 17:19:42 +01:00
* @ extra : extra information
2003-10-05 21:33:18 +00:00
*
* Handle a redefinition of attribute error
*/
void
xmlErrMemory ( xmlParserCtxtPtr ctxt , const char * extra )
{
2003-10-31 10:36:03 +00:00
if ( ( ctxt ! = NULL ) & & ( ctxt - > disableSAX ! = 0 ) & &
( ctxt - > instate = = XML_PARSER_EOF ) )
return ;
2003-10-05 21:33:18 +00:00
if ( ctxt ! = NULL ) {
ctxt - > errNo = XML_ERR_NO_MEMORY ;
ctxt - > instate = XML_PARSER_EOF ;
ctxt - > disableSAX = 1 ;
}
if ( extra )
2003-10-10 14:10:40 +00:00
__xmlRaiseError ( NULL , NULL , NULL , ctxt , NULL , XML_FROM_PARSER ,
2003-10-05 21:33:18 +00:00
XML_ERR_NO_MEMORY , XML_ERR_FATAL , NULL , 0 , extra ,
NULL , NULL , 0 , 0 ,
" Memory allocation failed : %s \n " , extra ) ;
else
2003-10-10 14:10:40 +00:00
__xmlRaiseError ( NULL , NULL , NULL , ctxt , NULL , XML_FROM_PARSER ,
2003-10-05 21:33:18 +00:00
XML_ERR_NO_MEMORY , XML_ERR_FATAL , NULL , 0 , NULL ,
NULL , NULL , 0 , 0 , " Memory allocation failed \n " ) ;
}
/**
2003-10-19 13:35:37 +00:00
* __xmlErrEncoding :
2003-10-05 21:33:18 +00:00
* @ ctxt : an XML parser context
2004-08-16 00:39:03 +00:00
* @ xmlerr : the error number
2003-10-05 21:33:18 +00:00
* @ msg : the error message
* @ str1 : an string info
* @ str2 : an string info
*
* Handle an encoding error
*/
2003-10-19 13:35:37 +00:00
void
2004-08-16 00:39:03 +00:00
__xmlErrEncoding ( xmlParserCtxtPtr ctxt , xmlParserErrors xmlerr ,
2003-10-19 13:35:37 +00:00
const char * msg , const xmlChar * str1 , const xmlChar * str2 )
2003-10-05 21:33:18 +00:00
{
2003-10-31 10:36:03 +00:00
if ( ( ctxt ! = NULL ) & & ( ctxt - > disableSAX ! = 0 ) & &
( ctxt - > instate = = XML_PARSER_EOF ) )
return ;
2003-10-05 21:33:18 +00:00
if ( ctxt ! = NULL )
2004-08-16 00:39:03 +00:00
ctxt - > errNo = xmlerr ;
2003-10-10 14:10:40 +00:00
__xmlRaiseError ( NULL , NULL , NULL ,
2004-08-16 00:39:03 +00:00
ctxt , NULL , XML_FROM_PARSER , xmlerr , XML_ERR_FATAL ,
2003-10-05 21:33:18 +00:00
NULL , 0 , ( const char * ) str1 , ( const char * ) str2 ,
NULL , 0 , 0 , msg , str1 , str2 ) ;
if ( ctxt ! = NULL ) {
ctxt - > wellFormed = 0 ;
if ( ctxt - > recovery = = 0 )
ctxt - > disableSAX = 1 ;
}
}
/**
* xmlErrInternal :
* @ ctxt : an XML parser context
* @ msg : the error message
2020-03-08 17:19:42 +01:00
* @ str : error information
2003-10-05 21:33:18 +00:00
*
* Handle an internal error
*/
2016-05-13 15:13:17 +08:00
static void LIBXML_ATTR_FORMAT ( 2 , 0 )
2003-10-05 21:33:18 +00:00
xmlErrInternal ( xmlParserCtxtPtr ctxt , const char * msg , const xmlChar * str )
{
2003-10-31 10:36:03 +00:00
if ( ( ctxt ! = NULL ) & & ( ctxt - > disableSAX ! = 0 ) & &
( ctxt - > instate = = XML_PARSER_EOF ) )
return ;
2003-10-05 21:33:18 +00:00
if ( ctxt ! = NULL )
ctxt - > errNo = XML_ERR_INTERNAL_ERROR ;
2003-10-10 14:10:40 +00:00
__xmlRaiseError ( NULL , NULL , NULL ,
2003-10-05 21:33:18 +00:00
ctxt , NULL , XML_FROM_PARSER , XML_ERR_INTERNAL_ERROR ,
XML_ERR_FATAL , NULL , 0 , ( const char * ) str , NULL , NULL ,
0 , 0 , msg , str ) ;
if ( ctxt ! = NULL ) {
ctxt - > wellFormed = 0 ;
if ( ctxt - > recovery = = 0 )
ctxt - > disableSAX = 1 ;
}
}
/**
* xmlErrEncodingInt :
* @ ctxt : an XML parser context
* @ error : the error number
* @ msg : the error message
* @ val : an integer value
*
* n encoding error
*/
2016-05-13 15:13:17 +08:00
static void LIBXML_ATTR_FORMAT ( 3 , 0 )
2003-10-05 21:33:18 +00:00
xmlErrEncodingInt ( xmlParserCtxtPtr ctxt , xmlParserErrors error ,
const char * msg , int val )
{
2003-10-31 10:36:03 +00:00
if ( ( ctxt ! = NULL ) & & ( ctxt - > disableSAX ! = 0 ) & &
( ctxt - > instate = = XML_PARSER_EOF ) )
return ;
2003-10-05 21:33:18 +00:00
if ( ctxt ! = NULL )
ctxt - > errNo = error ;
2003-10-10 14:10:40 +00:00
__xmlRaiseError ( NULL , NULL , NULL ,
2003-10-05 21:33:18 +00:00
ctxt , NULL , XML_FROM_PARSER , error , XML_ERR_FATAL ,
NULL , 0 , NULL , NULL , NULL , val , 0 , msg , val ) ;
if ( ctxt ! = NULL ) {
ctxt - > wellFormed = 0 ;
if ( ctxt - > recovery = = 0 )
ctxt - > disableSAX = 1 ;
}
}
2001-02-23 17:55:21 +00:00
/**
* xmlIsLetter :
* @ c : an unicode character ( int )
*
* Check whether the character is allowed by the production
* [ 84 ] Letter : : = BaseChar | Ideographic
*
* Returns 0 if not , non - zero otherwise
*/
int
xmlIsLetter ( int c ) {
return ( IS_BASECHAR ( c ) | | IS_IDEOGRAPHIC ( c ) ) ;
}
/************************************************************************
* *
2012-07-16 14:19:49 +08:00
* Input handling functions for progressive parsing *
2001-02-23 17:55:21 +00:00
* *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
/* #define DEBUG_INPUT */
/* #define DEBUG_STACK */
/* #define DEBUG_PUSH */
/* we need to keep enough input to show errors in context */
# define LINE_LEN 80
# ifdef DEBUG_INPUT
# define CHECK_BUFFER(in) check_buffer(in)
2002-12-10 15:19:08 +00:00
static
2001-02-23 17:55:21 +00:00
void check_buffer ( xmlParserInputPtr in ) {
2012-07-16 14:19:49 +08:00
if ( in - > base ! = xmlBufContent ( in - > buf - > buffer ) ) {
2001-02-23 17:55:21 +00:00
xmlGenericError ( xmlGenericErrorContext ,
" xmlParserInput: base mismatch problem \n " ) ;
}
if ( in - > cur < in - > base ) {
xmlGenericError ( xmlGenericErrorContext ,
" xmlParserInput: cur < base problem \n " ) ;
}
2012-07-16 14:19:49 +08:00
if ( in - > cur > in - > base + xmlBufUse ( in - > buf - > buffer ) ) {
2001-02-23 17:55:21 +00:00
xmlGenericError ( xmlGenericErrorContext ,
" xmlParserInput: cur > base + use problem \n " ) ;
}
2012-07-16 14:19:49 +08:00
xmlGenericError ( xmlGenericErrorContext , " buffer %x : content %x, cur %d, use %d \n " ,
( int ) in , ( int ) xmlBufContent ( in - > buf - > buffer ) , in - > cur - in - > base ,
xmlBufUse ( in - > buf - > buffer ) ) ;
2001-02-23 17:55:21 +00:00
}
# else
2012-07-16 14:19:49 +08:00
# define CHECK_BUFFER(in)
2001-02-23 17:55:21 +00:00
# endif
/**
* xmlParserInputRead :
* @ in : an XML parser input
* @ len : an indicative size for the lookahead
*
2012-07-16 14:19:49 +08:00
* This function was internal and is deprecated .
2001-02-23 17:55:21 +00:00
*
2012-07-16 14:19:49 +08:00
* Returns - 1 as this is an error to use it .
2001-02-23 17:55:21 +00:00
*/
int
2012-07-16 14:19:49 +08:00
xmlParserInputRead ( xmlParserInputPtr in ATTRIBUTE_UNUSED , int len ATTRIBUTE_UNUSED ) {
return ( - 1 ) ;
2001-02-23 17:55:21 +00:00
}
/**
* xmlParserInputGrow :
* @ in : an XML parser input
* @ len : an indicative size for the lookahead
*
* This function increase the input for the parser . It tries to
* preserve pointers to the input buffer , and keep already read data
*
2012-07-16 14:19:49 +08:00
* Returns the amount of char read , or - 1 in case of error , 0 indicate the
2001-02-23 17:55:21 +00:00
* end of this entity
*/
int
xmlParserInputGrow ( xmlParserInputPtr in , int len ) {
2016-05-18 14:52:59 -07:00
int ret ;
2012-07-16 14:19:49 +08:00
size_t indx ;
const xmlChar * content ;
2001-02-23 17:55:21 +00:00
2012-07-16 14:19:49 +08:00
if ( ( in = = NULL ) | | ( len < 0 ) ) return ( - 1 ) ;
2001-02-23 17:55:21 +00:00
# ifdef DEBUG_INPUT
xmlGenericError ( xmlGenericErrorContext , " Grow \n " ) ;
# endif
if ( in - > buf = = NULL ) return ( - 1 ) ;
if ( in - > base = = NULL ) return ( - 1 ) ;
if ( in - > cur = = NULL ) return ( - 1 ) ;
if ( in - > buf - > buffer = = NULL ) return ( - 1 ) ;
CHECK_BUFFER ( in ) ;
2001-03-24 17:00:36 +00:00
indx = in - > cur - in - > base ;
2012-07-16 14:19:49 +08:00
if ( xmlBufUse ( in - > buf - > buffer ) > ( unsigned int ) indx + INPUT_CHUNK ) {
2001-02-23 17:55:21 +00:00
CHECK_BUFFER ( in ) ;
return ( 0 ) ;
}
2012-07-16 14:19:49 +08:00
if ( in - > buf - > readcallback ! = NULL ) {
2001-02-23 17:55:21 +00:00
ret = xmlParserInputBufferGrow ( in - > buf , len ) ;
2012-07-16 14:19:49 +08:00
} else
2001-02-23 17:55:21 +00:00
return ( 0 ) ;
/*
2001-02-25 16:11:03 +00:00
* NOTE : in - > base may be a " dangling " i . e . freed pointer in this
2001-02-23 17:55:21 +00:00
* block , but we use it really as an integer to do some
* pointer arithmetic . Insure will raise it as a bug but in
* that specific case , that ' s not !
*/
2012-07-16 14:19:49 +08:00
content = xmlBufContent ( in - > buf - > buffer ) ;
if ( in - > base ! = content ) {
2001-02-23 17:55:21 +00:00
/*
2001-12-31 16:16:02 +00:00
* the buffer has been reallocated
2001-02-23 17:55:21 +00:00
*/
2001-03-24 17:00:36 +00:00
indx = in - > cur - in - > base ;
2012-07-16 14:19:49 +08:00
in - > base = content ;
in - > cur = & content [ indx ] ;
2001-02-23 17:55:21 +00:00
}
2012-07-16 14:19:49 +08:00
in - > end = xmlBufEnd ( in - > buf - > buffer ) ;
2001-02-23 17:55:21 +00:00
CHECK_BUFFER ( in ) ;
return ( ret ) ;
}
/**
* xmlParserInputShrink :
* @ in : an XML parser input
*
* This function removes used input for the parser .
*/
void
xmlParserInputShrink ( xmlParserInputPtr in ) {
2012-07-16 14:19:49 +08:00
size_t used ;
size_t ret ;
size_t indx ;
const xmlChar * content ;
2001-02-23 17:55:21 +00:00
# ifdef DEBUG_INPUT
xmlGenericError ( xmlGenericErrorContext , " Shrink \n " ) ;
# endif
2004-11-08 14:02:18 +00:00
if ( in = = NULL ) return ;
2001-02-23 17:55:21 +00:00
if ( in - > buf = = NULL ) return ;
if ( in - > base = = NULL ) return ;
if ( in - > cur = = NULL ) return ;
if ( in - > buf - > buffer = = NULL ) return ;
CHECK_BUFFER ( in ) ;
2012-07-16 14:19:49 +08:00
used = in - > cur - xmlBufContent ( in - > buf - > buffer ) ;
2001-02-23 17:55:21 +00:00
/*
* Do not shrink on large buffers whose only a tiny fraction
2001-12-31 16:16:02 +00:00
* was consumed
2001-02-23 17:55:21 +00:00
*/
if ( used > INPUT_CHUNK ) {
2012-07-16 14:19:49 +08:00
ret = xmlBufShrink ( in - > buf - > buffer , used - LINE_LEN ) ;
2001-02-23 17:55:21 +00:00
if ( ret > 0 ) {
in - > cur - = ret ;
in - > consumed + = ret ;
}
2012-07-16 14:19:49 +08:00
in - > end = xmlBufEnd ( in - > buf - > buffer ) ;
2001-02-23 17:55:21 +00:00
}
CHECK_BUFFER ( in ) ;
2012-07-16 14:19:49 +08:00
if ( xmlBufUse ( in - > buf - > buffer ) > INPUT_CHUNK ) {
2001-02-23 17:55:21 +00:00
return ;
}
xmlParserInputBufferRead ( in - > buf , 2 * INPUT_CHUNK ) ;
2012-07-16 14:19:49 +08:00
content = xmlBufContent ( in - > buf - > buffer ) ;
if ( in - > base ! = content ) {
2001-02-23 17:55:21 +00:00
/*
2002-02-09 18:03:01 +00:00
* the buffer has been reallocated
2001-02-23 17:55:21 +00:00
*/
2001-03-24 17:00:36 +00:00
indx = in - > cur - in - > base ;
2012-07-16 14:19:49 +08:00
in - > base = content ;
in - > cur = & content [ indx ] ;
2001-02-23 17:55:21 +00:00
}
2012-07-16 14:19:49 +08:00
in - > end = xmlBufEnd ( in - > buf - > buffer ) ;
2001-02-23 17:55:21 +00:00
CHECK_BUFFER ( in ) ;
}
/************************************************************************
* *
2012-09-11 13:26:36 +08:00
* UTF8 character input and related functions *
2001-02-23 17:55:21 +00:00
* *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
/**
* xmlNextChar :
* @ ctxt : the XML parser context
*
* Skip to the next char input char .
*/
void
2003-03-22 00:04:05 +00:00
xmlNextChar ( xmlParserCtxtPtr ctxt )
{
2004-11-08 14:02:18 +00:00
if ( ( ctxt = = NULL ) | | ( ctxt - > instate = = XML_PARSER_EOF ) | |
( ctxt - > input = = NULL ) )
2003-03-22 00:04:05 +00:00
return ;
2001-02-23 17:55:21 +00:00
2016-03-01 15:18:04 -08:00
if ( ! ( VALID_CTXT ( ctxt ) ) ) {
xmlErrInternal ( ctxt , " Parser input data memory error \n " , NULL ) ;
ctxt - > errNo = XML_ERR_INTERNAL_ERROR ;
xmlStopParser ( ctxt ) ;
return ;
}
if ( ( * ctxt - > input - > cur = = 0 ) & &
( xmlParserInputGrow ( ctxt - > input , INPUT_CHUNK ) < = 0 ) ) {
return ;
}
if ( ctxt - > charset = = XML_CHAR_ENCODING_UTF8 ) {
const unsigned char * cur ;
unsigned char c ;
/*
* 2.11 End - of - Line Handling
* the literal two - character sequence " #xD#xA " or a standalone
* literal # xD , an XML processor must pass to the application
* the single character # xA .
*/
if ( * ( ctxt - > input - > cur ) = = ' \n ' ) {
ctxt - > input - > line + + ; ctxt - > input - > col = 1 ;
} else
ctxt - > input - > col + + ;
/*
* We are supposed to handle UTF8 , check it ' s valid
* From rfc2044 : encoding of the Unicode values on UTF - 8 :
*
* UCS - 4 range ( hex . ) UTF - 8 octet sequence ( binary )
* 0000 0000 - 0000 007F 0 xxxxxxx
* 0000 00 80 - 0000 07FF 110 xxxxx 10 xxxxxx
* 0000 0800 - 0000 FFFF 1110 xxxx 10 xxxxxx 10 xxxxxx
*
* Check for the 0x110000 limit too
*/
cur = ctxt - > input - > cur ;
c = * cur ;
if ( c & 0x80 ) {
2017-08-28 20:40:19 +02:00
if ( c = = 0xC0 )
goto encoding_error ;
2016-03-01 15:18:04 -08:00
if ( cur [ 1 ] = = 0 ) {
xmlParserInputGrow ( ctxt - > input , INPUT_CHUNK ) ;
cur = ctxt - > input - > cur ;
}
if ( ( cur [ 1 ] & 0xc0 ) ! = 0x80 )
goto encoding_error ;
if ( ( c & 0xe0 ) = = 0xe0 ) {
unsigned int val ;
if ( cur [ 2 ] = = 0 ) {
2003-03-22 00:04:05 +00:00
xmlParserInputGrow ( ctxt - > input , INPUT_CHUNK ) ;
2009-08-25 11:30:34 +02:00
cur = ctxt - > input - > cur ;
}
2016-03-01 15:18:04 -08:00
if ( ( cur [ 2 ] & 0xc0 ) ! = 0x80 )
2003-03-22 00:04:05 +00:00
goto encoding_error ;
2016-03-01 15:18:04 -08:00
if ( ( c & 0xf0 ) = = 0xf0 ) {
if ( cur [ 3 ] = = 0 ) {
2003-03-22 00:04:05 +00:00
xmlParserInputGrow ( ctxt - > input , INPUT_CHUNK ) ;
2009-08-25 11:30:34 +02:00
cur = ctxt - > input - > cur ;
}
2016-03-01 15:18:04 -08:00
if ( ( ( c & 0xf8 ) ! = 0xf0 ) | |
( ( cur [ 3 ] & 0xc0 ) ! = 0x80 ) )
2003-03-22 00:04:05 +00:00
goto encoding_error ;
2016-03-01 15:18:04 -08:00
/* 4-byte code */
ctxt - > input - > cur + = 4 ;
val = ( cur [ 0 ] & 0x7 ) < < 18 ;
val | = ( cur [ 1 ] & 0x3f ) < < 12 ;
val | = ( cur [ 2 ] & 0x3f ) < < 6 ;
val | = cur [ 3 ] & 0x3f ;
} else {
/* 3-byte code */
ctxt - > input - > cur + = 3 ;
val = ( cur [ 0 ] & 0xf ) < < 12 ;
val | = ( cur [ 1 ] & 0x3f ) < < 6 ;
val | = cur [ 2 ] & 0x3f ;
}
if ( ( ( val > 0xd7ff ) & & ( val < 0xe000 ) ) | |
( ( val > 0xfffd ) & & ( val < 0x10000 ) ) | |
( val > = 0x110000 ) ) {
xmlErrEncodingInt ( ctxt , XML_ERR_INVALID_CHAR ,
" Char 0x%X out of allowed range \n " ,
val ) ;
}
2003-03-22 00:04:05 +00:00
} else
2016-03-01 15:18:04 -08:00
/* 2-byte code */
ctxt - > input - > cur + = 2 ;
} else
/* 1-byte code */
ctxt - > input - > cur + + ;
ctxt - > nbChars + + ;
2001-02-23 17:55:21 +00:00
} else {
2003-03-22 00:04:05 +00:00
/*
* Assume it ' s a fixed length encoding ( 1 ) with
* a compatible encoding for the ASCII set , since
* XML constructs only use < 128 chars
*/
if ( * ( ctxt - > input - > cur ) = = ' \n ' ) {
2005-01-05 15:37:55 +00:00
ctxt - > input - > line + + ; ctxt - > input - > col = 1 ;
2003-03-22 00:04:05 +00:00
} else
ctxt - > input - > col + + ;
ctxt - > input - > cur + + ;
ctxt - > nbChars + + ;
2001-02-23 17:55:21 +00:00
}
2017-06-19 17:55:20 +02:00
if ( * ctxt - > input - > cur = = 0 )
xmlParserInputGrow ( ctxt - > input , INPUT_CHUNK ) ;
2001-02-23 17:55:21 +00:00
return ;
2003-10-05 13:51:35 +00:00
encoding_error :
2001-02-23 17:55:21 +00:00
/*
* If we detect an UTF8 error that probably mean that the
2001-12-31 16:16:02 +00:00
* input encoding didn ' t get properly advertised in the
2001-02-23 17:55:21 +00:00
* declaration header . Report the error and switch the encoding
* to ISO - Latin - 1 ( if you don ' t like this policy , just declare the
* encoding ! )
*/
2004-11-09 14:59:59 +00:00
if ( ( ctxt = = NULL ) | | ( ctxt - > input = = NULL ) | |
( ctxt - > input - > end - ctxt - > input - > cur < 4 ) ) {
__xmlErrEncoding ( ctxt , XML_ERR_INVALID_CHAR ,
" Input is not proper UTF-8, indicate encoding ! \n " ,
NULL , NULL ) ;
} else {
char buffer [ 150 ] ;
snprintf ( buffer , 149 , " Bytes: 0x%02X 0x%02X 0x%02X 0x%02X \n " ,
ctxt - > input - > cur [ 0 ] , ctxt - > input - > cur [ 1 ] ,
ctxt - > input - > cur [ 2 ] , ctxt - > input - > cur [ 3 ] ) ;
__xmlErrEncoding ( ctxt , XML_ERR_INVALID_CHAR ,
" Input is not proper UTF-8, indicate encoding ! \n %s " ,
BAD_CAST buffer , NULL ) ;
2001-02-23 17:55:21 +00:00
}
2003-03-22 00:04:05 +00:00
ctxt - > charset = XML_CHAR_ENCODING_8859_1 ;
2002-03-20 21:55:57 +00:00
ctxt - > input - > cur + + ;
2001-02-23 17:55:21 +00:00
return ;
}
/**
* xmlCurrentChar :
* @ ctxt : the XML parser context
* @ len : pointer to the length of the char read
*
2001-12-31 16:16:02 +00:00
* The current char value , if using UTF - 8 this may actually span multiple
2001-02-23 17:55:21 +00:00
* bytes in the input buffer . Implement the end of line normalization :
* 2.11 End - of - Line Handling
* Wherever an external parsed entity or the literal entity value
* of an internal parsed entity contains either the literal two - character
* sequence " #xD#xA " or a standalone literal # xD , an XML processor
* must pass to the application the single character # xA .
* This behavior can conveniently be produced by normalizing all
* line breaks to # xA on input , before parsing . )
*
2001-10-10 09:45:09 +00:00
* Returns the current char value and its length
2001-02-23 17:55:21 +00:00
*/
int
xmlCurrentChar ( xmlParserCtxtPtr ctxt , int * len ) {
2004-11-08 14:02:18 +00:00
if ( ( ctxt = = NULL ) | | ( len = = NULL ) | | ( ctxt - > input = = NULL ) ) return ( 0 ) ;
2001-02-23 17:55:21 +00:00
if ( ctxt - > instate = = XML_PARSER_EOF )
return ( 0 ) ;
2002-03-20 21:55:57 +00:00
if ( ( * ctxt - > input - > cur > = 0x20 ) & & ( * ctxt - > input - > cur < = 0x7F ) ) {
* len = 1 ;
return ( ( int ) * ctxt - > input - > cur ) ;
2001-02-23 17:55:21 +00:00
}
if ( ctxt - > charset = = XML_CHAR_ENCODING_UTF8 ) {
/*
* We are supposed to handle UTF8 , check it ' s valid
* From rfc2044 : encoding of the Unicode values on UTF - 8 :
*
* UCS - 4 range ( hex . ) UTF - 8 octet sequence ( binary )
* 0000 0000 - 0000 007F 0 xxxxxxx
* 0000 00 80 - 0000 07FF 110 xxxxx 10 xxxxxx
2012-09-11 13:26:36 +08:00
* 0000 0800 - 0000 FFFF 1110 xxxx 10 xxxxxx 10 xxxxxx
2001-02-23 17:55:21 +00:00
*
* Check for the 0x110000 limit too
*/
const unsigned char * cur = ctxt - > input - > cur ;
unsigned char c ;
unsigned int val ;
c = * cur ;
if ( c & 0x80 ) {
2008-01-11 08:00:57 +00:00
if ( ( ( c & 0x40 ) = = 0 ) | | ( c = = 0xC0 ) )
2003-05-20 12:22:41 +00:00
goto encoding_error ;
2009-08-25 11:30:34 +02:00
if ( cur [ 1 ] = = 0 ) {
2002-03-20 21:55:57 +00:00
xmlParserInputGrow ( ctxt - > input , INPUT_CHUNK ) ;
2009-08-25 11:30:34 +02:00
cur = ctxt - > input - > cur ;
}
2002-03-20 21:55:57 +00:00
if ( ( cur [ 1 ] & 0xc0 ) ! = 0x80 )
2001-02-23 17:55:21 +00:00
goto encoding_error ;
if ( ( c & 0xe0 ) = = 0xe0 ) {
2009-08-25 11:30:34 +02:00
if ( cur [ 2 ] = = 0 ) {
2002-03-20 21:55:57 +00:00
xmlParserInputGrow ( ctxt - > input , INPUT_CHUNK ) ;
2009-08-25 11:30:34 +02:00
cur = ctxt - > input - > cur ;
}
2002-03-20 21:55:57 +00:00
if ( ( cur [ 2 ] & 0xc0 ) ! = 0x80 )
2001-02-23 17:55:21 +00:00
goto encoding_error ;
if ( ( c & 0xf0 ) = = 0xf0 ) {
2009-08-25 11:30:34 +02:00
if ( cur [ 3 ] = = 0 ) {
2001-02-23 17:55:21 +00:00
xmlParserInputGrow ( ctxt - > input , INPUT_CHUNK ) ;
2009-08-25 11:30:34 +02:00
cur = ctxt - > input - > cur ;
}
2002-03-20 21:55:57 +00:00
if ( ( ( c & 0xf8 ) ! = 0xf0 ) | |
2001-02-23 17:55:21 +00:00
( ( cur [ 3 ] & 0xc0 ) ! = 0x80 ) )
goto encoding_error ;
/* 4-byte code */
* len = 4 ;
val = ( cur [ 0 ] & 0x7 ) < < 18 ;
val | = ( cur [ 1 ] & 0x3f ) < < 12 ;
val | = ( cur [ 2 ] & 0x3f ) < < 6 ;
val | = cur [ 3 ] & 0x3f ;
2008-01-11 08:00:57 +00:00
if ( val < 0x10000 )
goto encoding_error ;
2001-02-23 17:55:21 +00:00
} else {
/* 3-byte code */
* len = 3 ;
val = ( cur [ 0 ] & 0xf ) < < 12 ;
val | = ( cur [ 1 ] & 0x3f ) < < 6 ;
val | = cur [ 2 ] & 0x3f ;
2008-01-11 08:00:57 +00:00
if ( val < 0x800 )
goto encoding_error ;
2001-02-23 17:55:21 +00:00
}
} else {
/* 2-byte code */
* len = 2 ;
val = ( cur [ 0 ] & 0x1f ) < < 6 ;
val | = cur [ 1 ] & 0x3f ;
2008-01-11 08:00:57 +00:00
if ( val < 0x80 )
goto encoding_error ;
2001-02-23 17:55:21 +00:00
}
if ( ! IS_CHAR ( val ) ) {
2003-10-05 21:33:18 +00:00
xmlErrEncodingInt ( ctxt , XML_ERR_INVALID_CHAR ,
" Char 0x%X out of allowed range \n " , val ) ;
2012-09-11 13:26:36 +08:00
}
2001-02-23 17:55:21 +00:00
return ( val ) ;
} else {
/* 1-byte code */
* len = 1 ;
2008-01-11 08:00:57 +00:00
if ( * ctxt - > input - > cur = = 0 )
xmlParserInputGrow ( ctxt - > input , INPUT_CHUNK ) ;
if ( ( * ctxt - > input - > cur = = 0 ) & &
( ctxt - > input - > end > ctxt - > input - > cur ) ) {
xmlErrEncodingInt ( ctxt , XML_ERR_INVALID_CHAR ,
" Char 0x0 out of allowed range \n " , 0 ) ;
}
2001-02-23 17:55:21 +00:00
if ( * ctxt - > input - > cur = = 0xD ) {
2002-03-20 21:55:57 +00:00
if ( ctxt - > input - > cur [ 1 ] = = 0xA ) {
2001-02-23 17:55:21 +00:00
ctxt - > nbChars + + ;
ctxt - > input - > cur + + ;
}
return ( 0xA ) ;
}
return ( ( int ) * ctxt - > input - > cur ) ;
}
}
/*
2001-10-10 09:45:09 +00:00
* Assume it ' s a fixed length encoding ( 1 ) with
2001-12-31 16:16:02 +00:00
* a compatible encoding for the ASCII set , since
2001-02-23 17:55:21 +00:00
* XML constructs only use < 128 chars
*/
* len = 1 ;
if ( * ctxt - > input - > cur = = 0xD ) {
2002-03-20 21:55:57 +00:00
if ( ctxt - > input - > cur [ 1 ] = = 0xA ) {
2001-02-23 17:55:21 +00:00
ctxt - > nbChars + + ;
ctxt - > input - > cur + + ;
}
return ( 0xA ) ;
}
return ( ( int ) * ctxt - > input - > cur ) ;
encoding_error :
2017-08-30 14:16:01 +02:00
/*
* An encoding problem may arise from a truncated input buffer
* splitting a character in the middle . In that case do not raise
2019-09-30 17:04:54 +02:00
* an error but return 0 to indicate an end of stream problem
2017-08-30 14:16:01 +02:00
*/
if ( ctxt - > input - > end - ctxt - > input - > cur < 4 ) {
* len = 0 ;
return ( 0 ) ;
}
2001-02-23 17:55:21 +00:00
/*
* If we detect an UTF8 error that probably mean that the
2001-12-31 16:16:02 +00:00
* input encoding didn ' t get properly advertised in the
2001-02-23 17:55:21 +00:00
* declaration header . Report the error and switch the encoding
* to ISO - Latin - 1 ( if you don ' t like this policy , just declare the
* encoding ! )
*/
2004-11-09 14:59:59 +00:00
{
char buffer [ 150 ] ;
2017-08-30 14:16:01 +02:00
snprintf ( & buffer [ 0 ] , 149 , " Bytes: 0x%02X 0x%02X 0x%02X 0x%02X \n " ,
ctxt - > input - > cur [ 0 ] , ctxt - > input - > cur [ 1 ] ,
ctxt - > input - > cur [ 2 ] , ctxt - > input - > cur [ 3 ] ) ;
2004-11-09 14:59:59 +00:00
__xmlErrEncoding ( ctxt , XML_ERR_INVALID_CHAR ,
" Input is not proper UTF-8, indicate encoding ! \n %s " ,
BAD_CAST buffer , NULL ) ;
2001-02-23 17:55:21 +00:00
}
2012-09-11 13:26:36 +08:00
ctxt - > charset = XML_CHAR_ENCODING_8859_1 ;
2001-02-23 17:55:21 +00:00
* len = 1 ;
return ( ( int ) * ctxt - > input - > cur ) ;
}
/**
* xmlStringCurrentChar :
* @ ctxt : the XML parser context
* @ cur : pointer to the beginning of the char
* @ len : pointer to the length of the char read
*
2001-12-31 16:16:02 +00:00
* The current char value , if using UTF - 8 this may actually span multiple
2001-02-23 17:55:21 +00:00
* bytes in the input buffer .
*
2001-10-10 09:45:09 +00:00
* Returns the current char value and its length
2001-02-23 17:55:21 +00:00
*/
int
2002-01-13 15:43:22 +00:00
xmlStringCurrentChar ( xmlParserCtxtPtr ctxt , const xmlChar * cur , int * len )
{
2004-11-08 14:02:18 +00:00
if ( ( len = = NULL ) | | ( cur = = NULL ) ) return ( 0 ) ;
2001-04-27 17:13:01 +00:00
if ( ( ctxt = = NULL ) | | ( ctxt - > charset = = XML_CHAR_ENCODING_UTF8 ) ) {
2002-01-13 15:43:22 +00:00
/*
* We are supposed to handle UTF8 , check it ' s valid
* From rfc2044 : encoding of the Unicode values on UTF - 8 :
*
* UCS - 4 range ( hex . ) UTF - 8 octet sequence ( binary )
* 0000 0000 - 0000 007F 0 xxxxxxx
* 0000 00 80 - 0000 07FF 110 xxxxx 10 xxxxxx
2012-09-11 13:26:36 +08:00
* 0000 0800 - 0000 FFFF 1110 xxxx 10 xxxxxx 10 xxxxxx
2002-01-13 15:43:22 +00:00
*
* Check for the 0x110000 limit too
*/
unsigned char c ;
unsigned int val ;
c = * cur ;
if ( c & 0x80 ) {
if ( ( cur [ 1 ] & 0xc0 ) ! = 0x80 )
goto encoding_error ;
if ( ( c & 0xe0 ) = = 0xe0 ) {
if ( ( cur [ 2 ] & 0xc0 ) ! = 0x80 )
goto encoding_error ;
if ( ( c & 0xf0 ) = = 0xf0 ) {
if ( ( ( c & 0xf8 ) ! = 0xf0 ) | | ( ( cur [ 3 ] & 0xc0 ) ! = 0x80 ) )
goto encoding_error ;
/* 4-byte code */
* len = 4 ;
val = ( cur [ 0 ] & 0x7 ) < < 18 ;
val | = ( cur [ 1 ] & 0x3f ) < < 12 ;
val | = ( cur [ 2 ] & 0x3f ) < < 6 ;
val | = cur [ 3 ] & 0x3f ;
} else {
/* 3-byte code */
* len = 3 ;
val = ( cur [ 0 ] & 0xf ) < < 12 ;
val | = ( cur [ 1 ] & 0x3f ) < < 6 ;
val | = cur [ 2 ] & 0x3f ;
}
} else {
/* 2-byte code */
* len = 2 ;
val = ( cur [ 0 ] & 0x1f ) < < 6 ;
val | = cur [ 1 ] & 0x3f ;
}
if ( ! IS_CHAR ( val ) ) {
2003-10-05 21:33:18 +00:00
xmlErrEncodingInt ( ctxt , XML_ERR_INVALID_CHAR ,
" Char 0x%X out of allowed range \n " , val ) ;
2002-01-13 15:43:22 +00:00
}
return ( val ) ;
} else {
/* 1-byte code */
* len = 1 ;
return ( ( int ) * cur ) ;
}
2001-02-23 17:55:21 +00:00
}
/*
2001-10-10 09:45:09 +00:00
* Assume it ' s a fixed length encoding ( 1 ) with
2001-12-31 16:16:02 +00:00
* a compatible encoding for the ASCII set , since
2001-02-23 17:55:21 +00:00
* XML constructs only use < 128 chars
*/
* len = 1 ;
2002-01-13 15:43:22 +00:00
return ( ( int ) * cur ) ;
2001-02-23 17:55:21 +00:00
encoding_error :
2017-08-30 14:16:01 +02:00
/*
* An encoding problem may arise from a truncated input buffer
* splitting a character in the middle . In that case do not raise
2019-09-30 17:04:54 +02:00
* an error but return 0 to indicate an end of stream problem
2017-08-30 14:16:01 +02:00
*/
if ( ( ctxt = = NULL ) | | ( ctxt - > input = = NULL ) | |
( ctxt - > input - > end - ctxt - > input - > cur < 4 ) ) {
* len = 0 ;
return ( 0 ) ;
}
2001-02-23 17:55:21 +00:00
/*
* If we detect an UTF8 error that probably mean that the
2001-12-31 16:16:02 +00:00
* input encoding didn ' t get properly advertised in the
2001-02-23 17:55:21 +00:00
* declaration header . Report the error and switch the encoding
* to ISO - Latin - 1 ( if you don ' t like this policy , just declare the
* encoding ! )
*/
2004-11-09 14:59:59 +00:00
{
char buffer [ 150 ] ;
2017-08-30 14:16:01 +02:00
snprintf ( buffer , 149 , " Bytes: 0x%02X 0x%02X 0x%02X 0x%02X \n " ,
ctxt - > input - > cur [ 0 ] , ctxt - > input - > cur [ 1 ] ,
ctxt - > input - > cur [ 2 ] , ctxt - > input - > cur [ 3 ] ) ;
2004-11-09 14:59:59 +00:00
__xmlErrEncoding ( ctxt , XML_ERR_INVALID_CHAR ,
" Input is not proper UTF-8, indicate encoding ! \n %s " ,
BAD_CAST buffer , NULL ) ;
2001-02-23 17:55:21 +00:00
}
* len = 1 ;
2002-01-13 15:43:22 +00:00
return ( ( int ) * cur ) ;
2001-02-23 17:55:21 +00:00
}
/**
2001-03-24 17:00:36 +00:00
* xmlCopyCharMultiByte :
2001-12-31 16:16:02 +00:00
* @ out : pointer to an array of xmlChar
2001-02-23 17:55:21 +00:00
* @ val : the char value
*
2012-09-11 13:26:36 +08:00
* append the char value in the array
2001-02-23 17:55:21 +00:00
*
* Returns the number of xmlChar written
*/
int
2001-03-24 17:00:36 +00:00
xmlCopyCharMultiByte ( xmlChar * out , int val ) {
2004-11-08 14:02:18 +00:00
if ( out = = NULL ) return ( 0 ) ;
2001-02-23 17:55:21 +00:00
/*
* We are supposed to handle UTF8 , check it ' s valid
* From rfc2044 : encoding of the Unicode values on UTF - 8 :
*
* UCS - 4 range ( hex . ) UTF - 8 octet sequence ( binary )
* 0000 0000 - 0000 007F 0 xxxxxxx
* 0000 00 80 - 0000 07FF 110 xxxxx 10 xxxxxx
2012-09-11 13:26:36 +08:00
* 0000 0800 - 0000 FFFF 1110 xxxx 10 xxxxxx 10 xxxxxx
2001-02-23 17:55:21 +00:00
*/
2001-03-24 17:00:36 +00:00
if ( val > = 0x80 ) {
xmlChar * savedout = out ;
int bits ;
if ( val < 0x800 ) { * out + + = ( val > > 6 ) | 0xC0 ; bits = 0 ; }
else if ( val < 0x10000 ) { * out + + = ( val > > 12 ) | 0xE0 ; bits = 6 ; }
else if ( val < 0x110000 ) { * out + + = ( val > > 18 ) | 0xF0 ; bits = 12 ; }
else {
2003-10-05 21:33:18 +00:00
xmlErrEncodingInt ( NULL , XML_ERR_INVALID_CHAR ,
2001-12-31 16:16:02 +00:00
" Internal error, xmlCopyCharMultiByte 0x%X out of bound \n " ,
2003-10-05 21:33:18 +00:00
val ) ;
2001-02-23 17:55:21 +00:00
return ( 0 ) ;
}
2001-03-24 17:00:36 +00:00
for ( ; bits > = 0 ; bits - = 6 )
* out + + = ( ( val > > bits ) & 0x3F ) | 0x80 ;
return ( out - savedout ) ;
2001-02-23 17:55:21 +00:00
}
2001-03-24 17:00:36 +00:00
* out = ( xmlChar ) val ;
return 1 ;
}
2001-02-23 17:55:21 +00:00
2001-03-24 17:00:36 +00:00
/**
* xmlCopyChar :
* @ len : Ignored , compatibility
2001-12-31 16:16:02 +00:00
* @ out : pointer to an array of xmlChar
2001-03-24 17:00:36 +00:00
* @ val : the char value
*
2012-09-11 13:26:36 +08:00
* append the char value in the array
2001-03-24 17:00:36 +00:00
*
* Returns the number of xmlChar written
*/
2001-02-23 17:55:21 +00:00
2001-03-24 17:00:36 +00:00
int
2001-03-26 16:28:29 +00:00
xmlCopyChar ( int len ATTRIBUTE_UNUSED , xmlChar * out , int val ) {
2004-11-08 14:02:18 +00:00
if ( out = = NULL ) return ( 0 ) ;
2001-03-24 17:00:36 +00:00
/* the len parameter is ignored */
if ( val > = 0x80 ) {
return ( xmlCopyCharMultiByte ( out , val ) ) ;
2001-02-23 17:55:21 +00:00
}
* out = ( xmlChar ) val ;
2001-03-24 17:00:36 +00:00
return 1 ;
2001-02-23 17:55:21 +00:00
}
/************************************************************************
* *
* Commodity functions to switch encodings *
* *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2009-08-26 11:38:49 +02:00
static int
xmlSwitchToEncodingInt ( xmlParserCtxtPtr ctxt ,
xmlCharEncodingHandlerPtr handler , int len ) ;
static int
xmlSwitchInputEncodingInt ( xmlParserCtxtPtr ctxt , xmlParserInputPtr input ,
xmlCharEncodingHandlerPtr handler , int len ) ;
2001-02-23 17:55:21 +00:00
/**
* xmlSwitchEncoding :
* @ ctxt : the parser context
* @ enc : the encoding value ( number )
*
* change the input functions when discovering the character encoding
* of a given entity .
*
* Returns 0 in case of success , - 1 otherwise
*/
int
xmlSwitchEncoding ( xmlParserCtxtPtr ctxt , xmlCharEncoding enc )
{
xmlCharEncodingHandlerPtr handler ;
2009-08-26 11:38:49 +02:00
int len = - 1 ;
2015-11-09 18:07:18 +08:00
int ret ;
2001-02-23 17:55:21 +00:00
2004-11-08 10:52:06 +00:00
if ( ctxt = = NULL ) return ( - 1 ) ;
2001-02-23 17:55:21 +00:00
switch ( enc ) {
case XML_CHAR_ENCODING_ERROR :
2003-10-19 13:35:37 +00:00
__xmlErrEncoding ( ctxt , XML_ERR_UNKNOWN_ENCODING ,
2003-10-05 21:33:18 +00:00
" encoding unknown \n " , NULL , NULL ) ;
2006-03-09 14:13:55 +00:00
return ( - 1 ) ;
2001-02-23 17:55:21 +00:00
case XML_CHAR_ENCODING_NONE :
/* let's assume it's UTF-8 without the XML decl */
ctxt - > charset = XML_CHAR_ENCODING_UTF8 ;
return ( 0 ) ;
case XML_CHAR_ENCODING_UTF8 :
/* default encoding, no conversion should be needed */
ctxt - > charset = XML_CHAR_ENCODING_UTF8 ;
2001-06-20 17:41:10 +00:00
/*
* Errata on XML - 1.0 June 20 2001
* Specific handling of the Byte Order Mark for
* UTF - 8
*/
2001-06-27 16:34:34 +00:00
if ( ( ctxt - > input ! = NULL ) & &
( ctxt - > input - > cur [ 0 ] = = 0xEF ) & &
2001-06-20 17:41:10 +00:00
( ctxt - > input - > cur [ 1 ] = = 0xBB ) & &
( ctxt - > input - > cur [ 2 ] = = 0xBF ) ) {
ctxt - > input - > cur + = 3 ;
}
2001-02-23 17:55:21 +00:00
return ( 0 ) ;
2003-07-16 21:18:19 +00:00
case XML_CHAR_ENCODING_UTF16LE :
case XML_CHAR_ENCODING_UTF16BE :
/*The raw input characters are encoded
* in UTF - 16. As we expect this function
* to be called after xmlCharEncInFunc , we expect
* ctxt - > input - > cur to contain UTF - 8 encoded characters .
* So the raw UTF16 Byte Order Mark
* has also been converted into
* an UTF - 8 BOM . Let ' s skip that BOM .
*/
2006-03-09 14:13:55 +00:00
if ( ( ctxt - > input ! = NULL ) & & ( ctxt - > input - > cur ! = NULL ) & &
2003-07-16 21:18:19 +00:00
( ctxt - > input - > cur [ 0 ] = = 0xEF ) & &
( ctxt - > input - > cur [ 1 ] = = 0xBB ) & &
( ctxt - > input - > cur [ 2 ] = = 0xBF ) ) {
ctxt - > input - > cur + = 3 ;
}
2009-08-26 11:38:49 +02:00
len = 90 ;
break ;
case XML_CHAR_ENCODING_UCS2 :
len = 90 ;
break ;
case XML_CHAR_ENCODING_UCS4BE :
case XML_CHAR_ENCODING_UCS4LE :
case XML_CHAR_ENCODING_UCS4_2143 :
case XML_CHAR_ENCODING_UCS4_3412 :
len = 180 ;
break ;
case XML_CHAR_ENCODING_EBCDIC :
case XML_CHAR_ENCODING_8859_1 :
case XML_CHAR_ENCODING_8859_2 :
case XML_CHAR_ENCODING_8859_3 :
case XML_CHAR_ENCODING_8859_4 :
case XML_CHAR_ENCODING_8859_5 :
case XML_CHAR_ENCODING_8859_6 :
case XML_CHAR_ENCODING_8859_7 :
case XML_CHAR_ENCODING_8859_8 :
case XML_CHAR_ENCODING_8859_9 :
case XML_CHAR_ENCODING_ASCII :
case XML_CHAR_ENCODING_2022_JP :
case XML_CHAR_ENCODING_SHIFT_JIS :
case XML_CHAR_ENCODING_EUC_JP :
len = 45 ;
break ;
2001-02-23 17:55:21 +00:00
}
handler = xmlGetCharEncodingHandler ( enc ) ;
if ( handler = = NULL ) {
/*
* Default handlers .
*/
switch ( enc ) {
case XML_CHAR_ENCODING_ASCII :
/* default encoding, no conversion should be needed */
ctxt - > charset = XML_CHAR_ENCODING_UTF8 ;
return ( 0 ) ;
case XML_CHAR_ENCODING_UTF16LE :
break ;
case XML_CHAR_ENCODING_UTF16BE :
break ;
case XML_CHAR_ENCODING_UCS4LE :
2003-10-19 13:35:37 +00:00
__xmlErrEncoding ( ctxt , XML_ERR_UNSUPPORTED_ENCODING ,
2003-10-05 21:33:18 +00:00
" encoding not supported %s \n " ,
BAD_CAST " USC4 little endian " , NULL ) ;
2001-02-23 17:55:21 +00:00
break ;
case XML_CHAR_ENCODING_UCS4BE :
2003-10-19 13:35:37 +00:00
__xmlErrEncoding ( ctxt , XML_ERR_UNSUPPORTED_ENCODING ,
2003-10-05 21:33:18 +00:00
" encoding not supported %s \n " ,
BAD_CAST " USC4 big endian " , NULL ) ;
2001-02-23 17:55:21 +00:00
break ;
case XML_CHAR_ENCODING_EBCDIC :
2003-10-19 13:35:37 +00:00
__xmlErrEncoding ( ctxt , XML_ERR_UNSUPPORTED_ENCODING ,
2003-10-05 21:33:18 +00:00
" encoding not supported %s \n " ,
BAD_CAST " EBCDIC " , NULL ) ;
2001-02-23 17:55:21 +00:00
break ;
case XML_CHAR_ENCODING_UCS4_2143 :
2003-10-19 13:35:37 +00:00
__xmlErrEncoding ( ctxt , XML_ERR_UNSUPPORTED_ENCODING ,
2003-10-05 21:33:18 +00:00
" encoding not supported %s \n " ,
BAD_CAST " UCS4 2143 " , NULL ) ;
2001-02-23 17:55:21 +00:00
break ;
case XML_CHAR_ENCODING_UCS4_3412 :
2003-10-19 13:35:37 +00:00
__xmlErrEncoding ( ctxt , XML_ERR_UNSUPPORTED_ENCODING ,
2003-10-05 21:33:18 +00:00
" encoding not supported %s \n " ,
BAD_CAST " UCS4 3412 " , NULL ) ;
2001-02-23 17:55:21 +00:00
break ;
case XML_CHAR_ENCODING_UCS2 :
2003-10-19 13:35:37 +00:00
__xmlErrEncoding ( ctxt , XML_ERR_UNSUPPORTED_ENCODING ,
2003-10-05 21:33:18 +00:00
" encoding not supported %s \n " ,
BAD_CAST " UCS2 " , NULL ) ;
2001-02-23 17:55:21 +00:00
break ;
case XML_CHAR_ENCODING_8859_1 :
case XML_CHAR_ENCODING_8859_2 :
case XML_CHAR_ENCODING_8859_3 :
case XML_CHAR_ENCODING_8859_4 :
case XML_CHAR_ENCODING_8859_5 :
case XML_CHAR_ENCODING_8859_6 :
case XML_CHAR_ENCODING_8859_7 :
case XML_CHAR_ENCODING_8859_8 :
case XML_CHAR_ENCODING_8859_9 :
/*
* We used to keep the internal content in the
* document encoding however this turns being unmaintainable
* So xmlGetCharEncodingHandler ( ) will return non - null
* values for this now .
*/
if ( ( ctxt - > inputNr = = 1 ) & &
( ctxt - > encoding = = NULL ) & &
2006-03-09 14:13:55 +00:00
( ctxt - > input ! = NULL ) & &
2001-02-23 17:55:21 +00:00
( ctxt - > input - > encoding ! = NULL ) ) {
ctxt - > encoding = xmlStrdup ( ctxt - > input - > encoding ) ;
}
ctxt - > charset = enc ;
return ( 0 ) ;
case XML_CHAR_ENCODING_2022_JP :
2003-10-19 13:35:37 +00:00
__xmlErrEncoding ( ctxt , XML_ERR_UNSUPPORTED_ENCODING ,
2003-10-05 21:33:18 +00:00
" encoding not supported %s \n " ,
BAD_CAST " ISO-2022-JP " , NULL ) ;
2001-02-23 17:55:21 +00:00
break ;
case XML_CHAR_ENCODING_SHIFT_JIS :
2003-10-19 13:35:37 +00:00
__xmlErrEncoding ( ctxt , XML_ERR_UNSUPPORTED_ENCODING ,
2003-10-05 21:33:18 +00:00
" encoding not supported %s \n " ,
BAD_CAST " Shift_JIS " , NULL ) ;
2001-02-23 17:55:21 +00:00
break ;
case XML_CHAR_ENCODING_EUC_JP :
2003-10-19 13:35:37 +00:00
__xmlErrEncoding ( ctxt , XML_ERR_UNSUPPORTED_ENCODING ,
2003-10-05 21:33:18 +00:00
" encoding not supported %s \n " ,
BAD_CAST " EUC-JP " , NULL ) ;
2001-02-23 17:55:21 +00:00
break ;
2006-03-09 14:13:55 +00:00
default :
break ;
2001-02-23 17:55:21 +00:00
}
}
2017-06-09 15:27:25 +02:00
/*
2019-09-30 17:04:54 +02:00
* TODO : We could recover from errors in external entities if we
2017-06-09 15:27:25 +02:00
* didn ' t stop the parser . But most callers of this function don ' t
* check the return value .
*/
2017-06-07 16:07:33 +02:00
if ( handler = = NULL ) {
xmlStopParser ( ctxt ) ;
2001-02-23 17:55:21 +00:00
return ( - 1 ) ;
2017-06-07 16:07:33 +02:00
}
2001-02-23 17:55:21 +00:00
ctxt - > charset = XML_CHAR_ENCODING_UTF8 ;
2015-11-09 18:07:18 +08:00
ret = xmlSwitchToEncodingInt ( ctxt , handler , len ) ;
if ( ( ret < 0 ) | | ( ctxt - > errNo = = XML_I18N_CONV_FAILED ) ) {
/*
* on encoding conversion errors , stop the parser
*/
xmlStopParser ( ctxt ) ;
ctxt - > errNo = XML_I18N_CONV_FAILED ;
}
return ( ret ) ;
2001-02-23 17:55:21 +00:00
}
/**
2003-10-19 13:35:37 +00:00
* xmlSwitchInputEncoding :
2001-02-23 17:55:21 +00:00
* @ ctxt : the parser context
2003-10-19 13:35:37 +00:00
* @ input : the input stream
2001-02-23 17:55:21 +00:00
* @ handler : the encoding handler
2009-08-26 11:38:49 +02:00
* @ len : the number of bytes to convert for the first line or - 1
2001-02-23 17:55:21 +00:00
*
* change the input functions when discovering the character encoding
* of a given entity .
*
* Returns 0 in case of success , - 1 otherwise
*/
2009-08-26 11:38:49 +02:00
static int
xmlSwitchInputEncodingInt ( xmlParserCtxtPtr ctxt , xmlParserInputPtr input ,
xmlCharEncodingHandlerPtr handler , int len )
2001-02-23 17:55:21 +00:00
{
int nbchars ;
2003-10-19 13:35:37 +00:00
if ( handler = = NULL )
return ( - 1 ) ;
if ( input = = NULL )
return ( - 1 ) ;
if ( input - > buf ! = NULL ) {
if ( input - > buf - > encoder ! = NULL ) {
/*
2019-09-30 17:04:54 +02:00
* Check in case the auto encoding detection triggered
2003-10-19 13:35:37 +00:00
* in already .
*/
if ( input - > buf - > encoder = = handler )
return ( 0 ) ;
2001-02-23 17:55:21 +00:00
2003-10-19 13:35:37 +00:00
/*
* " UTF-16 " can be used for both LE and BE
if ( ( ! xmlStrncmp ( BAD_CAST input - > buf - > encoder - > name ,
BAD_CAST " UTF-16 " , 6 ) ) & &
( ! xmlStrncmp ( BAD_CAST handler - > name ,
BAD_CAST " UTF-16 " , 6 ) ) ) {
return ( 0 ) ;
}
*/
2001-02-23 17:55:21 +00:00
2003-10-19 13:35:37 +00:00
/*
* Note : this is a bit dangerous , but that ' s what it
* takes to use nearly compatible signature for different
* encodings .
*/
xmlCharEncCloseFunc ( input - > buf - > encoder ) ;
input - > buf - > encoder = handler ;
return ( 0 ) ;
}
input - > buf - > encoder = handler ;
/*
* Is there already some content down the pipe to convert ?
*/
2012-07-16 14:19:49 +08:00
if ( xmlBufIsEmpty ( input - > buf - > buffer ) = = 0 ) {
2003-10-19 13:35:37 +00:00
int processed ;
2004-02-11 13:25:26 +00:00
unsigned int use ;
2003-10-19 13:35:37 +00:00
/*
2012-09-11 13:26:36 +08:00
* Specific handling of the Byte Order Mark for
2003-10-19 13:35:37 +00:00
* UTF - 16
*/
if ( ( handler - > name ! = NULL ) & &
2004-12-25 10:14:57 +00:00
( ! strcmp ( handler - > name , " UTF-16LE " ) | |
! strcmp ( handler - > name , " UTF-16 " ) ) & &
2003-10-19 13:35:37 +00:00
( input - > cur [ 0 ] = = 0xFF ) & & ( input - > cur [ 1 ] = = 0xFE ) ) {
input - > cur + = 2 ;
}
if ( ( handler - > name ! = NULL ) & &
( ! strcmp ( handler - > name , " UTF-16BE " ) ) & &
( input - > cur [ 0 ] = = 0xFE ) & & ( input - > cur [ 1 ] = = 0xFF ) ) {
input - > cur + = 2 ;
}
/*
* Errata on XML - 1.0 June 20 2001
* Specific handling of the Byte Order Mark for
* UTF - 8
*/
if ( ( handler - > name ! = NULL ) & &
( ! strcmp ( handler - > name , " UTF-8 " ) ) & &
( input - > cur [ 0 ] = = 0xEF ) & &
( input - > cur [ 1 ] = = 0xBB ) & & ( input - > cur [ 2 ] = = 0xBF ) ) {
input - > cur + = 3 ;
}
/*
* Shrink the current input buffer .
* Move it as the raw buffer and create a new input buffer
*/
processed = input - > cur - input - > base ;
2012-07-16 14:19:49 +08:00
xmlBufShrink ( input - > buf - > buffer , processed ) ;
2003-10-19 13:35:37 +00:00
input - > buf - > raw = input - > buf - > buffer ;
2012-07-16 14:19:49 +08:00
input - > buf - > buffer = xmlBufCreate ( ) ;
2004-02-11 13:25:26 +00:00
input - > buf - > rawconsumed = processed ;
2012-07-16 14:19:49 +08:00
use = xmlBufUse ( input - > buf - > raw ) ;
2003-10-19 13:35:37 +00:00
if ( ctxt - > html ) {
/*
* convert as much as possible of the buffer
*/
2018-03-17 00:03:24 +01:00
nbchars = xmlCharEncInput ( input - > buf , 1 ) ;
2003-10-19 13:35:37 +00:00
} else {
/*
* convert just enough to get
* ' < ? xml version = " 1.0 " encoding = " xxx " ? > '
* parsed with the autodetected encoding
* into the parser reading buffer .
*/
2012-07-16 14:19:49 +08:00
nbchars = xmlCharEncFirstLineInput ( input - > buf , len ) ;
2003-10-19 13:35:37 +00:00
}
2017-06-10 00:33:07 +02:00
xmlBufResetInput ( input - > buf - > buffer , input ) ;
2003-10-19 13:35:37 +00:00
if ( nbchars < 0 ) {
xmlErrInternal ( ctxt ,
" switching encoding: encoder error \n " ,
NULL ) ;
return ( - 1 ) ;
}
2012-07-16 14:19:49 +08:00
input - > buf - > rawconsumed + = use - xmlBufUse ( input - > buf - > raw ) ;
2003-10-19 13:35:37 +00:00
}
return ( 0 ) ;
2017-08-21 13:06:29 +02:00
} else if ( input - > length = = 0 ) {
/*
* When parsing a static memory array one must know the
* size to be able to convert the buffer .
*/
2005-11-17 13:12:16 +00:00
xmlErrInternal ( ctxt , " switching encoding : no input \n " , NULL ) ;
2018-11-22 15:27:28 +01:00
/*
* Callers assume that the input buffer takes ownership of the
* encoding handler . xmlCharEncCloseFunc frees unregistered
* handlers and avoids a memory leak .
*/
xmlCharEncCloseFunc ( handler ) ;
2005-11-17 13:12:16 +00:00
return ( - 1 ) ;
2003-10-19 13:35:37 +00:00
}
2018-11-22 15:27:28 +01:00
/*
* We should actually raise an error here , see issue # 34.
*/
xmlCharEncCloseFunc ( handler ) ;
2003-10-19 13:35:37 +00:00
return ( 0 ) ;
}
/**
2009-08-26 11:38:49 +02:00
* xmlSwitchInputEncoding :
2003-10-19 13:35:37 +00:00
* @ ctxt : the parser context
2009-08-26 11:38:49 +02:00
* @ input : the input stream
2003-10-19 13:35:37 +00:00
* @ handler : the encoding handler
*
* change the input functions when discovering the character encoding
* of a given entity .
*
* Returns 0 in case of success , - 1 otherwise
*/
int
2009-08-26 11:38:49 +02:00
xmlSwitchInputEncoding ( xmlParserCtxtPtr ctxt , xmlParserInputPtr input ,
xmlCharEncodingHandlerPtr handler ) {
return ( xmlSwitchInputEncodingInt ( ctxt , input , handler , - 1 ) ) ;
}
/**
* xmlSwitchToEncodingInt :
* @ ctxt : the parser context
* @ handler : the encoding handler
2012-09-28 08:59:33 +02:00
* @ len : the length to convert or - 1
2009-08-26 11:38:49 +02:00
*
* change the input functions when discovering the character encoding
* of a given entity , and convert only @ len bytes of the output , this
* is needed on auto detect to allows any declared encoding later to
* convert the actual content after the xmlDecl
*
* Returns 0 in case of success , - 1 otherwise
*/
static int
xmlSwitchToEncodingInt ( xmlParserCtxtPtr ctxt ,
xmlCharEncodingHandlerPtr handler , int len ) {
2005-07-03 16:19:41 +00:00
int ret = 0 ;
2003-10-19 13:35:37 +00:00
if ( handler ! = NULL ) {
if ( ctxt - > input ! = NULL ) {
2009-08-26 11:38:49 +02:00
ret = xmlSwitchInputEncodingInt ( ctxt , ctxt - > input , handler , len ) ;
2001-02-23 17:55:21 +00:00
} else {
2003-10-05 21:33:18 +00:00
xmlErrInternal ( ctxt , " xmlSwitchToEncoding : no input \n " ,
NULL ) ;
2001-02-23 17:55:21 +00:00
return ( - 1 ) ;
}
/*
* The parsing is now done in UTF8 natively
*/
ctxt - > charset = XML_CHAR_ENCODING_UTF8 ;
2009-08-26 11:38:49 +02:00
} else
2001-02-23 17:55:21 +00:00
return ( - 1 ) ;
2005-07-03 16:19:41 +00:00
return ( ret ) ;
2001-02-23 17:55:21 +00:00
}
2009-08-26 11:38:49 +02:00
/**
* xmlSwitchToEncoding :
* @ ctxt : the parser context
* @ handler : the encoding handler
*
* change the input functions when discovering the character encoding
* of a given entity .
*
* Returns 0 in case of success , - 1 otherwise
*/
int
2012-09-11 13:26:36 +08:00
xmlSwitchToEncoding ( xmlParserCtxtPtr ctxt , xmlCharEncodingHandlerPtr handler )
2009-08-26 11:38:49 +02:00
{
return ( xmlSwitchToEncodingInt ( ctxt , handler , - 1 ) ) ;
}
2001-02-23 17:55:21 +00:00
/************************************************************************
* *
* Commodity functions to handle entities processing *
* *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
/**
* xmlFreeInputStream :
* @ input : an xmlParserInputPtr
*
* Free up an input stream .
*/
void
xmlFreeInputStream ( xmlParserInputPtr input ) {
if ( input = = NULL ) return ;
if ( input - > filename ! = NULL ) xmlFree ( ( char * ) input - > filename ) ;
if ( input - > directory ! = NULL ) xmlFree ( ( char * ) input - > directory ) ;
if ( input - > encoding ! = NULL ) xmlFree ( ( char * ) input - > encoding ) ;
if ( input - > version ! = NULL ) xmlFree ( ( char * ) input - > version ) ;
if ( ( input - > free ! = NULL ) & & ( input - > base ! = NULL ) )
input - > free ( ( xmlChar * ) input - > base ) ;
2012-09-11 13:26:36 +08:00
if ( input - > buf ! = NULL )
2001-02-23 17:55:21 +00:00
xmlFreeParserInputBuffer ( input - > buf ) ;
xmlFree ( input ) ;
}
/**
* xmlNewInputStream :
* @ ctxt : an XML parser context
*
2012-05-15 11:18:40 +08:00
* Create a new input stream structure .
*
2001-02-23 17:55:21 +00:00
* Returns the new input stream or NULL
*/
xmlParserInputPtr
xmlNewInputStream ( xmlParserCtxtPtr ctxt ) {
xmlParserInputPtr input ;
input = ( xmlParserInputPtr ) xmlMalloc ( sizeof ( xmlParserInput ) ) ;
if ( input = = NULL ) {
2003-10-05 21:33:18 +00:00
xmlErrMemory ( ctxt , " couldn't allocate a new input stream \n " ) ;
2001-02-23 17:55:21 +00:00
return ( NULL ) ;
}
memset ( input , 0 , sizeof ( xmlParserInput ) ) ;
input - > line = 1 ;
input - > col = 1 ;
input - > standalone = - 1 ;
2012-05-15 11:18:40 +08:00
2003-09-14 19:56:14 +00:00
/*
2012-05-15 11:18:40 +08:00
* If the context is NULL the id cannot be initialized , but that
* should not happen while parsing which is the situation where
* the id is actually needed .
2003-09-14 19:56:14 +00:00
*/
2012-05-15 11:18:40 +08:00
if ( ctxt ! = NULL )
input - > id = ctxt - > input_id + + ;
2001-02-23 17:55:21 +00:00
return ( input ) ;
}
/**
* xmlNewIOInputStream :
* @ ctxt : an XML parser context
* @ input : an I / O Input
* @ enc : the charset encoding if known
*
* Create a new input stream structure encapsulating the @ input into
* a stream suitable for the parser .
*
* Returns the new input stream or NULL
*/
xmlParserInputPtr
xmlNewIOInputStream ( xmlParserCtxtPtr ctxt , xmlParserInputBufferPtr input ,
xmlCharEncoding enc ) {
xmlParserInputPtr inputStream ;
2004-11-08 10:52:06 +00:00
if ( input = = NULL ) return ( NULL ) ;
2001-02-23 17:55:21 +00:00
if ( xmlParserDebugEntities )
xmlGenericError ( xmlGenericErrorContext , " new input from I/O \n " ) ;
inputStream = xmlNewInputStream ( ctxt ) ;
if ( inputStream = = NULL ) {
return ( NULL ) ;
}
inputStream - > filename = NULL ;
inputStream - > buf = input ;
2012-07-16 16:28:47 +08:00
xmlBufResetInput ( inputStream - > buf - > buffer , inputStream ) ;
2001-02-23 17:55:21 +00:00
if ( enc ! = XML_CHAR_ENCODING_NONE ) {
xmlSwitchEncoding ( ctxt , enc ) ;
}
return ( inputStream ) ;
}
/**
* xmlNewEntityInputStream :
* @ ctxt : an XML parser context
* @ entity : an Entity pointer
*
* Create a new input stream based on an xmlEntityPtr
*
* Returns the new input stream or NULL
*/
xmlParserInputPtr
xmlNewEntityInputStream ( xmlParserCtxtPtr ctxt , xmlEntityPtr entity ) {
xmlParserInputPtr input ;
if ( entity = = NULL ) {
2003-10-05 21:33:18 +00:00
xmlErrInternal ( ctxt , " xmlNewEntityInputStream entity = NULL \n " ,
NULL ) ;
2001-02-23 17:55:21 +00:00
return ( NULL ) ;
}
if ( xmlParserDebugEntities )
xmlGenericError ( xmlGenericErrorContext ,
" new input from entity: %s \n " , entity - > name ) ;
if ( entity - > content = = NULL ) {
switch ( entity - > etype ) {
case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY :
2003-10-05 21:33:18 +00:00
xmlErrInternal ( ctxt , " Cannot parse entity %s \n " ,
entity - > name ) ;
2001-02-23 17:55:21 +00:00
break ;
case XML_EXTERNAL_GENERAL_PARSED_ENTITY :
case XML_EXTERNAL_PARAMETER_ENTITY :
return ( xmlLoadExternalEntity ( ( char * ) entity - > URI ,
( char * ) entity - > ExternalID , ctxt ) ) ;
case XML_INTERNAL_GENERAL_ENTITY :
2003-10-05 21:33:18 +00:00
xmlErrInternal ( ctxt ,
" Internal entity %s without content ! \n " ,
entity - > name ) ;
2001-02-23 17:55:21 +00:00
break ;
case XML_INTERNAL_PARAMETER_ENTITY :
2003-10-05 21:33:18 +00:00
xmlErrInternal ( ctxt ,
" Internal parameter entity %s without content ! \n " ,
entity - > name ) ;
2001-02-23 17:55:21 +00:00
break ;
case XML_INTERNAL_PREDEFINED_ENTITY :
2003-10-05 21:33:18 +00:00
xmlErrInternal ( ctxt ,
" Predefined entity %s without content ! \n " ,
entity - > name ) ;
2001-02-23 17:55:21 +00:00
break ;
}
return ( NULL ) ;
}
input = xmlNewInputStream ( ctxt ) ;
if ( input = = NULL ) {
return ( NULL ) ;
}
2008-04-28 18:07:29 +00:00
if ( entity - > URI ! = NULL )
input - > filename = ( char * ) xmlStrdup ( ( xmlChar * ) entity - > URI ) ;
2001-02-23 17:55:21 +00:00
input - > base = entity - > content ;
2015-11-20 15:07:38 +08:00
if ( entity - > length = = 0 )
entity - > length = xmlStrlen ( entity - > content ) ;
2001-02-23 17:55:21 +00:00
input - > cur = entity - > content ;
input - > length = entity - > length ;
2001-02-25 16:11:03 +00:00
input - > end = & entity - > content [ input - > length ] ;
2001-02-23 17:55:21 +00:00
return ( input ) ;
}
/**
* xmlNewStringInputStream :
* @ ctxt : an XML parser context
* @ buffer : an memory buffer
*
* Create a new input stream based on a memory buffer .
* Returns the new input stream
*/
xmlParserInputPtr
xmlNewStringInputStream ( xmlParserCtxtPtr ctxt , const xmlChar * buffer ) {
xmlParserInputPtr input ;
if ( buffer = = NULL ) {
2003-10-05 21:33:18 +00:00
xmlErrInternal ( ctxt , " xmlNewStringInputStream string = NULL \n " ,
NULL ) ;
2001-02-23 17:55:21 +00:00
return ( NULL ) ;
}
if ( xmlParserDebugEntities )
xmlGenericError ( xmlGenericErrorContext ,
" new fixed input: %.30s \n " , buffer ) ;
input = xmlNewInputStream ( ctxt ) ;
if ( input = = NULL ) {
2003-10-05 21:33:18 +00:00
xmlErrMemory ( ctxt , " couldn't allocate a new input stream \n " ) ;
2001-02-23 17:55:21 +00:00
return ( NULL ) ;
}
input - > base = buffer ;
input - > cur = buffer ;
input - > length = xmlStrlen ( buffer ) ;
2001-02-25 16:11:03 +00:00
input - > end = & buffer [ input - > length ] ;
2001-02-23 17:55:21 +00:00
return ( input ) ;
}
/**
* xmlNewInputFromFile :
* @ ctxt : an XML parser context
* @ filename : the filename to use as entity
*
2003-10-19 13:35:37 +00:00
* Create a new input stream based on a file or an URL .
2001-02-23 17:55:21 +00:00
*
* Returns the new input stream or NULL in case of error
*/
xmlParserInputPtr
xmlNewInputFromFile ( xmlParserCtxtPtr ctxt , const char * filename ) {
xmlParserInputBufferPtr buf ;
xmlParserInputPtr inputStream ;
char * directory = NULL ;
xmlChar * URI = NULL ;
if ( xmlParserDebugEntities )
xmlGenericError ( xmlGenericErrorContext ,
" new input from file: %s \n " , filename ) ;
if ( ctxt = = NULL ) return ( NULL ) ;
buf = xmlParserInputBufferCreateFilename ( filename , XML_CHAR_ENCODING_NONE ) ;
2003-10-27 11:25:13 +00:00
if ( buf = = NULL ) {
2005-10-26 09:00:29 +00:00
if ( filename = = NULL )
__xmlLoaderErr ( ctxt ,
" failed to load external entity: NULL filename \n " ,
NULL ) ;
else
__xmlLoaderErr ( ctxt , " failed to load external entity \" %s \" \n " ,
( const char * ) filename ) ;
2001-02-23 17:55:21 +00:00
return ( NULL ) ;
2003-10-27 11:25:13 +00:00
}
2001-02-23 17:55:21 +00:00
inputStream = xmlNewInputStream ( ctxt ) ;
2006-03-09 14:13:55 +00:00
if ( inputStream = = NULL )
2001-02-23 17:55:21 +00:00
return ( NULL ) ;
2006-03-09 14:13:55 +00:00
2003-10-19 13:35:37 +00:00
inputStream - > buf = buf ;
inputStream = xmlCheckHTTPInput ( ctxt , inputStream ) ;
if ( inputStream = = NULL )
return ( NULL ) ;
2012-09-11 13:26:36 +08:00
2003-10-19 13:35:37 +00:00
if ( inputStream - > filename = = NULL )
URI = xmlStrdup ( ( xmlChar * ) filename ) ;
else
URI = xmlStrdup ( ( xmlChar * ) inputStream - > filename ) ;
directory = xmlParserGetDirectory ( ( const char * ) URI ) ;
2006-10-17 20:32:22 +00:00
if ( inputStream - > filename ! = NULL ) xmlFree ( ( char * ) inputStream - > filename ) ;
2003-09-17 19:36:25 +00:00
inputStream - > filename = ( char * ) xmlCanonicPath ( ( const xmlChar * ) URI ) ;
2003-09-17 20:54:38 +00:00
if ( URI ! = NULL ) xmlFree ( ( char * ) URI ) ;
2001-02-23 17:55:21 +00:00
inputStream - > directory = directory ;
2012-07-16 16:28:47 +08:00
xmlBufResetInput ( inputStream - > buf - > buffer , inputStream ) ;
2001-02-23 17:55:21 +00:00
if ( ( ctxt - > directory = = NULL ) & & ( directory ! = NULL ) )
ctxt - > directory = ( char * ) xmlStrdup ( ( const xmlChar * ) directory ) ;
return ( inputStream ) ;
}
/************************************************************************
* *
* Commodity functions to handle parser contexts *
* *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
/**
* xmlInitParserCtxt :
* @ ctxt : an XML parser context
*
* Initialize a parser context
2003-04-24 16:06:47 +00:00
*
* Returns 0 in case of success and - 1 in case of error
2001-02-23 17:55:21 +00:00
*/
2003-04-24 16:06:47 +00:00
int
2001-02-23 17:55:21 +00:00
xmlInitParserCtxt ( xmlParserCtxtPtr ctxt )
{
2004-11-09 14:59:59 +00:00
xmlParserInputPtr input ;
2001-08-31 14:55:30 +00:00
if ( ctxt = = NULL ) {
2003-10-05 21:33:18 +00:00
xmlErrInternal ( NULL , " Got NULL parser context \n " , NULL ) ;
2003-04-24 16:06:47 +00:00
return ( - 1 ) ;
2001-08-31 14:55:30 +00:00
}
2001-02-23 17:55:21 +00:00
xmlDefaultSAXHandlerInit ( ) ;
2004-11-02 14:52:23 +00:00
if ( ctxt - > dict = = NULL )
ctxt - > dict = xmlDictCreate ( ) ;
2003-08-18 12:15:38 +00:00
if ( ctxt - > dict = = NULL ) {
2003-10-05 21:33:18 +00:00
xmlErrMemory ( NULL , " cannot initialize parser context \n " ) ;
2003-08-18 12:15:38 +00:00
return ( - 1 ) ;
}
2012-07-30 10:08:45 +08:00
xmlDictSetLimit ( ctxt - > dict , XML_MAX_DICTIONARY_LIMIT ) ;
2004-11-02 14:52:23 +00:00
if ( ctxt - > sax = = NULL )
ctxt - > sax = ( xmlSAXHandler * ) xmlMalloc ( sizeof ( xmlSAXHandler ) ) ;
2002-11-22 05:07:29 +00:00
if ( ctxt - > sax = = NULL ) {
2003-10-05 21:33:18 +00:00
xmlErrMemory ( NULL , " cannot initialize parser context \n " ) ;
2003-04-24 16:06:47 +00:00
return ( - 1 ) ;
2001-02-23 17:55:21 +00:00
}
else
2003-09-25 14:29:29 +00:00
xmlSAXVersion ( ctxt - > sax , 2 ) ;
2001-02-23 17:55:21 +00:00
2003-08-19 15:01:28 +00:00
ctxt - > maxatts = 0 ;
ctxt - > atts = NULL ;
2001-02-23 17:55:21 +00:00
/* Allocate the Input stack */
2004-11-02 14:52:23 +00:00
if ( ctxt - > inputTab = = NULL ) {
ctxt - > inputTab = ( xmlParserInputPtr * )
xmlMalloc ( 5 * sizeof ( xmlParserInputPtr ) ) ;
ctxt - > inputMax = 5 ;
}
2001-02-23 17:55:21 +00:00
if ( ctxt - > inputTab = = NULL ) {
2003-10-05 21:33:18 +00:00
xmlErrMemory ( NULL , " cannot initialize parser context \n " ) ;
2001-02-23 17:55:21 +00:00
ctxt - > inputNr = 0 ;
ctxt - > inputMax = 0 ;
ctxt - > input = NULL ;
2003-04-24 16:06:47 +00:00
return ( - 1 ) ;
2001-02-23 17:55:21 +00:00
}
2004-11-09 14:59:59 +00:00
while ( ( input = inputPop ( ctxt ) ) ! = NULL ) { /* Non consuming */
xmlFreeInputStream ( input ) ;
}
2001-02-23 17:55:21 +00:00
ctxt - > inputNr = 0 ;
ctxt - > input = NULL ;
ctxt - > version = NULL ;
ctxt - > encoding = NULL ;
ctxt - > standalone = - 1 ;
ctxt - > hasExternalSubset = 0 ;
ctxt - > hasPErefs = 0 ;
ctxt - > html = 0 ;
ctxt - > external = 0 ;
ctxt - > instate = XML_PARSER_START ;
ctxt - > token = 0 ;
ctxt - > directory = NULL ;
/* Allocate the Node stack */
2004-11-02 14:52:23 +00:00
if ( ctxt - > nodeTab = = NULL ) {
ctxt - > nodeTab = ( xmlNodePtr * ) xmlMalloc ( 10 * sizeof ( xmlNodePtr ) ) ;
ctxt - > nodeMax = 10 ;
}
2001-02-23 17:55:21 +00:00
if ( ctxt - > nodeTab = = NULL ) {
2003-10-05 21:33:18 +00:00
xmlErrMemory ( NULL , " cannot initialize parser context \n " ) ;
2001-02-23 17:55:21 +00:00
ctxt - > nodeNr = 0 ;
ctxt - > nodeMax = 0 ;
ctxt - > node = NULL ;
ctxt - > inputNr = 0 ;
ctxt - > inputMax = 0 ;
ctxt - > input = NULL ;
2003-04-24 16:06:47 +00:00
return ( - 1 ) ;
2001-02-23 17:55:21 +00:00
}
ctxt - > nodeNr = 0 ;
ctxt - > node = NULL ;
/* Allocate the Name stack */
2004-11-02 14:52:23 +00:00
if ( ctxt - > nameTab = = NULL ) {
ctxt - > nameTab = ( const xmlChar * * ) xmlMalloc ( 10 * sizeof ( xmlChar * ) ) ;
ctxt - > nameMax = 10 ;
}
2001-02-23 17:55:21 +00:00
if ( ctxt - > nameTab = = NULL ) {
2003-10-05 21:33:18 +00:00
xmlErrMemory ( NULL , " cannot initialize parser context \n " ) ;
2001-02-23 17:55:21 +00:00
ctxt - > nodeNr = 0 ;
ctxt - > nodeMax = 0 ;
ctxt - > node = NULL ;
ctxt - > inputNr = 0 ;
ctxt - > inputMax = 0 ;
ctxt - > input = NULL ;
ctxt - > nameNr = 0 ;
ctxt - > nameMax = 0 ;
ctxt - > name = NULL ;
2003-04-24 16:06:47 +00:00
return ( - 1 ) ;
2001-02-23 17:55:21 +00:00
}
ctxt - > nameNr = 0 ;
ctxt - > name = NULL ;
/* Allocate the space stack */
2004-11-02 14:52:23 +00:00
if ( ctxt - > spaceTab = = NULL ) {
ctxt - > spaceTab = ( int * ) xmlMalloc ( 10 * sizeof ( int ) ) ;
ctxt - > spaceMax = 10 ;
}
2001-02-23 17:55:21 +00:00
if ( ctxt - > spaceTab = = NULL ) {
2003-10-05 21:33:18 +00:00
xmlErrMemory ( NULL , " cannot initialize parser context \n " ) ;
2001-02-23 17:55:21 +00:00
ctxt - > nodeNr = 0 ;
ctxt - > nodeMax = 0 ;
ctxt - > node = NULL ;
ctxt - > inputNr = 0 ;
ctxt - > inputMax = 0 ;
ctxt - > input = NULL ;
ctxt - > nameNr = 0 ;
ctxt - > nameMax = 0 ;
ctxt - > name = NULL ;
ctxt - > spaceNr = 0 ;
ctxt - > spaceMax = 0 ;
ctxt - > space = NULL ;
2003-04-24 16:06:47 +00:00
return ( - 1 ) ;
2001-02-23 17:55:21 +00:00
}
ctxt - > spaceNr = 1 ;
ctxt - > spaceMax = 10 ;
ctxt - > spaceTab [ 0 ] = - 1 ;
ctxt - > space = & ctxt - > spaceTab [ 0 ] ;
ctxt - > userData = ctxt ;
ctxt - > myDoc = NULL ;
ctxt - > wellFormed = 1 ;
2003-09-11 23:42:01 +00:00
ctxt - > nsWellFormed = 1 ;
2001-02-23 17:55:21 +00:00
ctxt - > valid = 1 ;
ctxt - > loadsubset = xmlLoadExtDtdDefaultValue ;
2014-06-11 16:59:16 +08:00
if ( ctxt - > loadsubset ) {
ctxt - > options | = XML_PARSE_DTDLOAD ;
}
2001-02-23 17:55:21 +00:00
ctxt - > validate = xmlDoValidityCheckingDefaultValue ;
ctxt - > pedantic = xmlPedanticParserDefaultValue ;
2014-06-11 16:59:16 +08:00
if ( ctxt - > pedantic ) {
ctxt - > options | = XML_PARSE_PEDANTIC ;
}
2001-07-25 17:18:57 +00:00
ctxt - > linenumbers = xmlLineNumbersDefaultValue ;
2001-02-23 17:55:21 +00:00
ctxt - > keepBlanks = xmlKeepBlanksDefaultValue ;
2014-06-11 16:59:16 +08:00
if ( ctxt - > keepBlanks = = 0 ) {
2003-09-26 14:51:39 +00:00
ctxt - > sax - > ignorableWhitespace = xmlSAX2IgnorableWhitespace ;
2014-06-11 16:59:16 +08:00
ctxt - > options | = XML_PARSE_NOBLANKS ;
}
2001-09-14 10:29:27 +00:00
2004-10-29 12:10:55 +00:00
ctxt - > vctxt . finishDtd = XML_CTXT_FINISH_DTD_0 ;
2001-02-23 17:55:21 +00:00
ctxt - > vctxt . userData = ctxt ;
2002-02-03 20:13:06 +00:00
ctxt - > vctxt . error = xmlParserValidityError ;
ctxt - > vctxt . warning = xmlParserValidityWarning ;
2001-02-23 17:55:21 +00:00
if ( ctxt - > validate ) {
if ( xmlGetWarningsDefaultValue = = 0 )
ctxt - > vctxt . warning = NULL ;
else
ctxt - > vctxt . warning = xmlParserValidityWarning ;
2001-04-21 14:16:10 +00:00
ctxt - > vctxt . nodeMax = 0 ;
2014-06-11 16:59:16 +08:00
ctxt - > options | = XML_PARSE_DTDVALID ;
2001-02-23 17:55:21 +00:00
}
ctxt - > replaceEntities = xmlSubstituteEntitiesDefaultValue ;
2014-06-11 16:59:16 +08:00
if ( ctxt - > replaceEntities ) {
ctxt - > options | = XML_PARSE_NOENT ;
}
2001-02-23 17:55:21 +00:00
ctxt - > record_info = 0 ;
ctxt - > nbChars = 0 ;
ctxt - > checkIndex = 0 ;
ctxt - > inSubset = 0 ;
ctxt - > errNo = XML_ERR_OK ;
ctxt - > depth = 0 ;
ctxt - > charset = XML_CHAR_ENCODING_UTF8 ;
2001-08-22 14:29:45 +00:00
ctxt - > catalogs = NULL ;
2008-08-20 17:04:30 +00:00
ctxt - > nbentities = 0 ;
2013-02-19 10:21:49 +08:00
ctxt - > sizeentities = 0 ;
ctxt - > sizeentcopy = 0 ;
2012-05-15 11:18:40 +08:00
ctxt - > input_id = 1 ;
2001-02-23 17:55:21 +00:00
xmlInitNodeInfoSeq ( & ctxt - > node_seq ) ;
2003-04-24 16:06:47 +00:00
return ( 0 ) ;
2001-02-23 17:55:21 +00:00
}
/**
* xmlFreeParserCtxt :
* @ ctxt : an XML parser context
*
* Free all the memory used by a parser context . However the parsed
* document in ctxt - > myDoc is not freed .
*/
void
xmlFreeParserCtxt ( xmlParserCtxtPtr ctxt )
{
xmlParserInputPtr input ;
if ( ctxt = = NULL ) return ;
while ( ( input = inputPop ( ctxt ) ) ! = NULL ) { /* Non consuming */
xmlFreeInputStream ( input ) ;
}
if ( ctxt - > spaceTab ! = NULL ) xmlFree ( ctxt - > spaceTab ) ;
2003-08-28 10:34:33 +00:00
if ( ctxt - > nameTab ! = NULL ) xmlFree ( ( xmlChar * * ) ctxt - > nameTab ) ;
2001-02-23 17:55:21 +00:00
if ( ctxt - > nodeTab ! = NULL ) xmlFree ( ctxt - > nodeTab ) ;
2010-03-15 15:16:02 +01:00
if ( ctxt - > nodeInfoTab ! = NULL ) xmlFree ( ctxt - > nodeInfoTab ) ;
2001-02-23 17:55:21 +00:00
if ( ctxt - > inputTab ! = NULL ) xmlFree ( ctxt - > inputTab ) ;
if ( ctxt - > version ! = NULL ) xmlFree ( ( char * ) ctxt - > version ) ;
if ( ctxt - > encoding ! = NULL ) xmlFree ( ( char * ) ctxt - > encoding ) ;
if ( ctxt - > extSubURI ! = NULL ) xmlFree ( ( char * ) ctxt - > extSubURI ) ;
if ( ctxt - > extSubSystem ! = NULL ) xmlFree ( ( char * ) ctxt - > extSubSystem ) ;
2003-09-30 00:43:48 +00:00
# ifdef LIBXML_SAX1_ENABLED
2003-09-25 14:29:29 +00:00
if ( ( ctxt - > sax ! = NULL ) & &
( ctxt - > sax ! = ( xmlSAXHandlerPtr ) & xmlDefaultSAXHandler ) )
2003-09-30 00:43:48 +00:00
# else
if ( ctxt - > sax ! = NULL )
# endif /* LIBXML_SAX1_ENABLED */
2001-02-23 17:55:21 +00:00
xmlFree ( ctxt - > sax ) ;
if ( ctxt - > directory ! = NULL ) xmlFree ( ( char * ) ctxt - > directory ) ;
2001-06-19 11:07:54 +00:00
if ( ctxt - > vctxt . nodeTab ! = NULL ) xmlFree ( ctxt - > vctxt . nodeTab ) ;
2003-08-28 10:34:33 +00:00
if ( ctxt - > atts ! = NULL ) xmlFree ( ( xmlChar * * ) ctxt - > atts ) ;
2003-08-18 12:15:38 +00:00
if ( ctxt - > dict ! = NULL ) xmlDictFree ( ctxt - > dict ) ;
2004-02-09 12:39:02 +00:00
if ( ctxt - > nsTab ! = NULL ) xmlFree ( ( char * ) ctxt - > nsTab ) ;
2003-09-10 10:50:59 +00:00
if ( ctxt - > pushTab ! = NULL ) xmlFree ( ctxt - > pushTab ) ;
if ( ctxt - > attallocs ! = NULL ) xmlFree ( ctxt - > attallocs ) ;
2012-09-11 13:26:36 +08:00
if ( ctxt - > attsDefault ! = NULL )
2017-11-09 16:42:47 +01:00
xmlHashFree ( ctxt - > attsDefault , xmlHashDefaultDeallocator ) ;
2003-09-10 10:50:59 +00:00
if ( ctxt - > attsSpecial ! = NULL )
xmlHashFree ( ctxt - > attsSpecial , NULL ) ;
2003-09-17 10:26:25 +00:00
if ( ctxt - > freeElems ! = NULL ) {
xmlNodePtr cur , next ;
cur = ctxt - > freeElems ;
while ( cur ! = NULL ) {
next = cur - > next ;
xmlFree ( cur ) ;
cur = next ;
}
}
if ( ctxt - > freeAttrs ! = NULL ) {
xmlAttrPtr cur , next ;
cur = ctxt - > freeAttrs ;
while ( cur ! = NULL ) {
next = cur - > next ;
xmlFree ( cur ) ;
cur = next ;
}
}
2003-10-02 22:28:19 +00:00
/*
* cleanup the error strings
*/
if ( ctxt - > lastError . message ! = NULL )
xmlFree ( ctxt - > lastError . message ) ;
if ( ctxt - > lastError . file ! = NULL )
xmlFree ( ctxt - > lastError . file ) ;
if ( ctxt - > lastError . str1 ! = NULL )
xmlFree ( ctxt - > lastError . str1 ) ;
if ( ctxt - > lastError . str2 ! = NULL )
xmlFree ( ctxt - > lastError . str2 ) ;
if ( ctxt - > lastError . str3 ! = NULL )
xmlFree ( ctxt - > lastError . str3 ) ;
2003-09-07 09:14:37 +00:00
2001-08-22 14:29:45 +00:00
# ifdef LIBXML_CATALOG_ENABLED
if ( ctxt - > catalogs ! = NULL )
xmlCatalogFreeLocal ( ctxt - > catalogs ) ;
# endif
2001-02-23 17:55:21 +00:00
xmlFree ( ctxt ) ;
}
/**
* xmlNewParserCtxt :
*
* Allocate and initialize a new parser context .
*
* Returns the xmlParserCtxtPtr or NULL
*/
xmlParserCtxtPtr
2005-07-29 22:02:24 +00:00
xmlNewParserCtxt ( void )
2001-02-23 17:55:21 +00:00
{
xmlParserCtxtPtr ctxt ;
ctxt = ( xmlParserCtxtPtr ) xmlMalloc ( sizeof ( xmlParserCtxt ) ) ;
if ( ctxt = = NULL ) {
2003-10-05 21:33:18 +00:00
xmlErrMemory ( NULL , " cannot allocate parser context \n " ) ;
2001-02-23 17:55:21 +00:00
return ( NULL ) ;
}
memset ( ctxt , 0 , sizeof ( xmlParserCtxt ) ) ;
2003-04-24 16:06:47 +00:00
if ( xmlInitParserCtxt ( ctxt ) < 0 ) {
xmlFreeParserCtxt ( ctxt ) ;
return ( NULL ) ;
}
2001-02-23 17:55:21 +00:00
return ( ctxt ) ;
}
/************************************************************************
* *
2020-03-08 17:19:42 +01:00
* Handling of node information *
2001-02-23 17:55:21 +00:00
* *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
/**
* xmlClearParserCtxt :
* @ ctxt : an XML parser context
*
* Clear ( release owned resources ) and reinitialize a parser context
*/
void
xmlClearParserCtxt ( xmlParserCtxtPtr ctxt )
{
2001-08-31 14:55:30 +00:00
if ( ctxt = = NULL )
return ;
2001-02-23 17:55:21 +00:00
xmlClearNodeInfoSeq ( & ctxt - > node_seq ) ;
2004-11-02 14:52:23 +00:00
xmlCtxtReset ( ctxt ) ;
2001-02-23 17:55:21 +00:00
}
2004-11-05 17:22:25 +00:00
2001-02-23 17:55:21 +00:00
/**
* xmlParserFindNodeInfo :
2002-12-10 15:19:08 +00:00
* @ ctx : an XML parser context
2001-02-23 17:55:21 +00:00
* @ node : an XML node within the tree
*
* Find the parser node info struct for a given node
2012-09-11 13:26:36 +08:00
*
2001-02-23 17:55:21 +00:00
* Returns an xmlParserNodeInfo block pointer or NULL
*/
2004-11-05 17:22:25 +00:00
const xmlParserNodeInfo *
xmlParserFindNodeInfo ( const xmlParserCtxtPtr ctx , const xmlNodePtr node )
2001-02-23 17:55:21 +00:00
{
2004-11-05 17:22:25 +00:00
unsigned long pos ;
if ( ( ctx = = NULL ) | | ( node = = NULL ) )
return ( NULL ) ;
/* Find position where node should be at */
pos = xmlParserFindNodeInfoIndex ( & ctx - > node_seq , node ) ;
if ( pos < ctx - > node_seq . length
& & ctx - > node_seq . buffer [ pos ] . node = = node )
return & ctx - > node_seq . buffer [ pos ] ;
else
return NULL ;
2001-02-23 17:55:21 +00:00
}
/**
* xmlInitNodeInfoSeq :
* @ seq : a node info sequence pointer
*
* - - Initialize ( set to initial state ) node info sequence
*/
void
xmlInitNodeInfoSeq ( xmlParserNodeInfoSeqPtr seq )
{
2004-11-05 17:22:25 +00:00
if ( seq = = NULL )
return ;
seq - > length = 0 ;
seq - > maximum = 0 ;
seq - > buffer = NULL ;
2001-02-23 17:55:21 +00:00
}
/**
* xmlClearNodeInfoSeq :
* @ seq : a node info sequence pointer
*
* - - Clear ( release memory and reinitialize ) node
* info sequence
*/
void
xmlClearNodeInfoSeq ( xmlParserNodeInfoSeqPtr seq )
{
2004-11-05 17:22:25 +00:00
if ( seq = = NULL )
return ;
if ( seq - > buffer ! = NULL )
xmlFree ( seq - > buffer ) ;
xmlInitNodeInfoSeq ( seq ) ;
2001-02-23 17:55:21 +00:00
}
/**
* xmlParserFindNodeInfoIndex :
* @ seq : a node info sequence pointer
* @ node : an XML node pointer
*
2012-09-11 13:26:36 +08:00
*
2001-02-23 17:55:21 +00:00
* xmlParserFindNodeInfoIndex : Find the index that the info record for
* the given node is or should be at in a sorted sequence
*
* Returns a long indicating the position of the record
*/
2004-11-05 17:22:25 +00:00
unsigned long
xmlParserFindNodeInfoIndex ( const xmlParserNodeInfoSeqPtr seq ,
const xmlNodePtr node )
2001-02-23 17:55:21 +00:00
{
2004-11-05 17:22:25 +00:00
unsigned long upper , lower , middle ;
int found = 0 ;
if ( ( seq = = NULL ) | | ( node = = NULL ) )
2005-12-10 11:11:12 +00:00
return ( ( unsigned long ) - 1 ) ;
2004-11-05 17:22:25 +00:00
/* Do a binary search for the key */
lower = 1 ;
upper = seq - > length ;
middle = 0 ;
while ( lower < = upper & & ! found ) {
middle = lower + ( upper - lower ) / 2 ;
if ( node = = seq - > buffer [ middle - 1 ] . node )
found = 1 ;
else if ( node < seq - > buffer [ middle - 1 ] . node )
upper = middle - 1 ;
else
lower = middle + 1 ;
}
/* Return position */
if ( middle = = 0 | | seq - > buffer [ middle - 1 ] . node < node )
return middle ;
2001-02-23 17:55:21 +00:00
else
2004-11-05 17:22:25 +00:00
return middle - 1 ;
2001-02-23 17:55:21 +00:00
}
/**
* xmlParserAddNodeInfo :
* @ ctxt : an XML parser context
* @ info : a node info sequence pointer
*
* Insert node info record into the sorted sequence
*/
void
2002-01-23 17:53:44 +00:00
xmlParserAddNodeInfo ( xmlParserCtxtPtr ctxt ,
2002-01-20 22:08:18 +00:00
const xmlParserNodeInfoPtr info )
2001-02-23 17:55:21 +00:00
{
2002-01-23 17:53:44 +00:00
unsigned long pos ;
2004-11-05 17:22:25 +00:00
if ( ( ctxt = = NULL ) | | ( info = = NULL ) ) return ;
2002-01-23 17:53:44 +00:00
/* Find pos and check to see if node is already in the sequence */
2003-07-31 14:47:38 +00:00
pos = xmlParserFindNodeInfoIndex ( & ctxt - > node_seq , ( xmlNodePtr )
2002-01-23 17:53:44 +00:00
info - > node ) ;
2006-03-09 14:13:55 +00:00
2012-09-11 13:26:36 +08:00
if ( ( pos < ctxt - > node_seq . length ) & &
2006-03-09 14:13:55 +00:00
( ctxt - > node_seq . buffer ! = NULL ) & &
( ctxt - > node_seq . buffer [ pos ] . node = = info - > node ) ) {
2002-01-23 17:53:44 +00:00
ctxt - > node_seq . buffer [ pos ] = * info ;
}
2001-02-23 17:55:21 +00:00
2002-01-23 17:53:44 +00:00
/* Otherwise, we need to add new node to buffer */
else {
2013-08-03 22:25:13 +08:00
if ( ( ctxt - > node_seq . length + 1 > ctxt - > node_seq . maximum ) | |
( ctxt - > node_seq . buffer = = NULL ) ) {
2002-01-23 17:53:44 +00:00
xmlParserNodeInfo * tmp_buffer ;
unsigned int byte_size ;
if ( ctxt - > node_seq . maximum = = 0 )
ctxt - > node_seq . maximum = 2 ;
byte_size = ( sizeof ( * ctxt - > node_seq . buffer ) *
( 2 * ctxt - > node_seq . maximum ) ) ;
if ( ctxt - > node_seq . buffer = = NULL )
2003-04-21 23:07:45 +00:00
tmp_buffer = ( xmlParserNodeInfo * ) xmlMalloc ( byte_size ) ;
2002-01-23 17:53:44 +00:00
else
tmp_buffer =
( xmlParserNodeInfo * ) xmlRealloc ( ctxt - > node_seq . buffer ,
byte_size ) ;
if ( tmp_buffer = = NULL ) {
2003-10-05 21:33:18 +00:00
xmlErrMemory ( ctxt , " failed to allocate buffer \n " ) ;
2002-01-23 17:53:44 +00:00
return ;
}
ctxt - > node_seq . buffer = tmp_buffer ;
ctxt - > node_seq . maximum * = 2 ;
}
2001-02-23 17:55:21 +00:00
2002-01-23 17:53:44 +00:00
/* If position is not at end, move elements out of the way */
if ( pos ! = ctxt - > node_seq . length ) {
unsigned long i ;
2001-02-23 17:55:21 +00:00
2002-01-23 17:53:44 +00:00
for ( i = ctxt - > node_seq . length ; i > pos ; i - - )
ctxt - > node_seq . buffer [ i ] = ctxt - > node_seq . buffer [ i - 1 ] ;
}
2001-02-23 17:55:21 +00:00
2002-01-23 17:53:44 +00:00
/* Copy element and increase length */
ctxt - > node_seq . buffer [ pos ] = * info ;
ctxt - > node_seq . length + + ;
2001-02-23 17:55:21 +00:00
}
}
2001-07-25 17:18:57 +00:00
/************************************************************************
* *
* Defaults settings *
* *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
/**
* xmlPedanticParserDefault :
2012-09-11 13:26:36 +08:00
* @ val : int 0 or 1
2001-07-25 17:18:57 +00:00
*
* Set and return the previous value for enabling pedantic warnings .
*
* Returns the last value for 0 for no substitution , 1 for substitution .
*/
int
xmlPedanticParserDefault ( int val ) {
int old = xmlPedanticParserDefaultValue ;
xmlPedanticParserDefaultValue = val ;
return ( old ) ;
}
/**
* xmlLineNumbersDefault :
2012-09-11 13:26:36 +08:00
* @ val : int 0 or 1
2001-07-25 17:18:57 +00:00
*
* Set and return the previous value for enabling line numbers in elements
* contents . This may break on old application and is turned off by default .
*
* Returns the last value for 0 for no substitution , 1 for substitution .
*/
int
xmlLineNumbersDefault ( int val ) {
int old = xmlLineNumbersDefaultValue ;
xmlLineNumbersDefaultValue = val ;
return ( old ) ;
}
/**
* xmlSubstituteEntitiesDefault :
2012-09-11 13:26:36 +08:00
* @ val : int 0 or 1
2001-07-25 17:18:57 +00:00
*
* Set and return the previous value for default entity support .
* Initially the parser always keep entity references instead of substituting
* entity values in the output . This function has to be used to change the
2001-12-31 16:16:02 +00:00
* default parser behavior
* SAX : : substituteEntities ( ) has to be used for changing that on a file by
2001-07-25 17:18:57 +00:00
* file basis .
*
* Returns the last value for 0 for no substitution , 1 for substitution .
*/
int
xmlSubstituteEntitiesDefault ( int val ) {
int old = xmlSubstituteEntitiesDefaultValue ;
xmlSubstituteEntitiesDefaultValue = val ;
return ( old ) ;
}
/**
* xmlKeepBlanksDefault :
2012-09-11 13:26:36 +08:00
* @ val : int 0 or 1
2001-07-25 17:18:57 +00:00
*
* Set and return the previous value for default blanks text nodes support .
* The 1. x version of the parser used an heuristic to try to detect
* ignorable white spaces . As a result the SAX callback was generating
2003-09-26 14:51:39 +00:00
* xmlSAX2IgnorableWhitespace ( ) callbacks instead of characters ( ) one , and when
2001-07-25 17:18:57 +00:00
* using the DOM output text nodes containing those blanks were not generated .
* The 2. x and later version will switch to the XML standard way and
* ignorableWhitespace ( ) are only generated when running the parser in
* validating mode and when the current element doesn ' t allow CDATA or
* mixed content .
2012-09-11 13:26:36 +08:00
* This function is provided as a way to force the standard behavior
2001-07-25 17:18:57 +00:00
* on 1. X libs and to switch back to the old mode for compatibility when
* running 1. X client code on 2. X . Upgrade of 1. X code should be done
* by using xmlIsBlankNode ( ) commodity function to detect the " empty "
* nodes generated .
* This value also affect autogeneration of indentation when saving code
* if blanks sections are kept , indentation is not generated .
*
* Returns the last value for 0 for no substitution , 1 for substitution .
*/
int
xmlKeepBlanksDefault ( int val ) {
int old = xmlKeepBlanksDefaultValue ;
xmlKeepBlanksDefaultValue = val ;
2009-08-20 12:11:17 +02:00
if ( ! val ) xmlIndentTreeOutput = 1 ;
2001-07-25 17:18:57 +00:00
return ( old ) ;
}
2005-04-01 13:11:58 +00:00
# define bottom_parserInternals
# include "elfgcchack.h"