2001-02-23 17:55:21 +00:00
/**
2012-07-24 11:44:23 +08:00
* uri . c : set of generic URI related routines
2001-02-23 17:55:21 +00:00
*
2008-08-04 15:29:44 +00:00
* Reference : RFCs 3986 , 2732 and 2373
2001-02-23 17:55:21 +00:00
*
* See Copyright for the status of this software .
*
2001-06-24 12:13:24 +00:00
* daniel @ veillard . com
2001-02-23 17:55:21 +00:00
*/
2002-03-18 19:37:11 +00:00
# define IN_LIBXML
2001-04-21 16:57:29 +00:00
# include "libxml.h"
2001-02-23 17:55:21 +00:00
# include <string.h>
# include <libxml/xmlmemory.h>
# include <libxml/uri.h>
2001-10-13 09:15:48 +00:00
# include <libxml/globals.h>
2001-02-23 17:55:21 +00:00
# include <libxml/xmlerror.h>
2012-07-24 11:44:23 +08:00
/**
* MAX_URI_LENGTH :
*
* The definition of the URI regexp in the above RFC has no size limit
* In practice they are usually relativey short except for the
* data URI scheme as defined in RFC 2397. Even for data URI the usual
* maximum size before hitting random practical limits is around 64 KB
* and 4 KB is usually a maximum admitted limit for proper operations .
* The value below is more a security limit than anything else and
* really should never be hit by ' normal ' operations
* Set to 1 MByte in 2012 , this is only enforced on output
*/
# define MAX_URI_LENGTH 1024 * 1024
static void
xmlURIErrMemory ( const char * extra )
{
if ( extra )
__xmlRaiseError ( NULL , NULL , NULL ,
NULL , NULL , XML_FROM_URI ,
XML_ERR_NO_MEMORY , XML_ERR_FATAL , NULL , 0 ,
extra , NULL , NULL , 0 , 0 ,
" Memory allocation failed : %s \n " , extra ) ;
else
__xmlRaiseError ( NULL , NULL , NULL ,
NULL , NULL , XML_FROM_URI ,
XML_ERR_NO_MEMORY , XML_ERR_FATAL , NULL , 0 ,
NULL , NULL , NULL , 0 , 0 ,
" Memory allocation failed \n " ) ;
}
2008-08-04 15:29:44 +00:00
static void xmlCleanURI ( xmlURIPtr uri ) ;
2001-02-23 17:55:21 +00:00
/*
2008-08-04 15:29:44 +00:00
* Old rule from 2396 used in legacy handling code
2001-02-23 17:55:21 +00:00
* alpha = lowalpha | upalpha
*/
# define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
/*
* lowalpha = " a " | " b " | " c " | " d " | " e " | " f " | " g " | " h " | " i " | " j " |
* " k " | " l " | " m " | " n " | " o " | " p " | " q " | " r " | " s " | " t " |
* " u " | " v " | " w " | " x " | " y " | " z "
*/
# define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
/*
* upalpha = " A " | " B " | " C " | " D " | " E " | " F " | " G " | " H " | " I " | " J " |
* " K " | " L " | " M " | " N " | " O " | " P " | " Q " | " R " | " S " | " T " |
* " U " | " V " | " W " | " X " | " Y " | " Z "
*/
# define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
2004-07-09 12:05:25 +00:00
# ifdef IS_DIGIT
# undef IS_DIGIT
# endif
2001-02-23 17:55:21 +00:00
/*
* digit = " 0 " | " 1 " | " 2 " | " 3 " | " 4 " | " 5 " | " 6 " | " 7 " | " 8 " | " 9 "
*/
# define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
/*
* alphanum = alpha | digit
*/
# define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
/*
* mark = " - " | " _ " | " . " | " ! " | " ~ " | " * " | " ' " | " ( " | " ) "
*/
2008-08-04 15:29:44 +00:00
# define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') || \
( ( x ) = = ' ! ' ) | | ( ( x ) = = ' ~ ' ) | | ( ( x ) = = ' * ' ) | | ( ( x ) = = ' \' ' ) | | \
2001-02-23 17:55:21 +00:00
( ( x ) = = ' ( ' ) | | ( ( x ) = = ' ) ' ) )
2008-08-04 15:29:44 +00:00
/*
* unwise = " { " | " } " | " | " | " \" | " ^ " | " ` "
*/
2001-02-23 17:55:21 +00:00
2008-08-04 15:29:44 +00:00
# define IS_UNWISE(p) \
( ( ( * ( p ) = = ' { ' ) ) | | ( ( * ( p ) = = ' } ' ) ) | | ( ( * ( p ) = = ' | ' ) ) | | \
( ( * ( p ) = = ' \\ ' ) ) | | ( ( * ( p ) = = ' ^ ' ) ) | | ( ( * ( p ) = = ' [ ' ) ) | | \
( ( * ( p ) = = ' ] ' ) ) | | ( ( * ( p ) = = ' ` ' ) ) )
2001-02-23 17:55:21 +00:00
/*
2005-02-13 08:18:52 +00:00
* reserved = " ; " | " / " | " ? " | " : " | " @ " | " & " | " = " | " + " | " $ " | " , " |
2008-08-04 15:29:44 +00:00
* " [ " | " ] "
2001-02-23 17:55:21 +00:00
*/
2008-08-04 15:29:44 +00:00
# define IS_RESERVED(x) (((x) == ';') || ((x) == ' / ') || ((x) == '?') || \
( ( x ) = = ' : ' ) | | ( ( x ) = = ' @ ' ) | | ( ( x ) = = ' & ' ) | | ( ( x ) = = ' = ' ) | | \
( ( x ) = = ' + ' ) | | ( ( x ) = = ' $ ' ) | | ( ( x ) = = ' , ' ) | | ( ( x ) = = ' [ ' ) | | \
( ( x ) = = ' ] ' ) )
2001-02-23 17:55:21 +00:00
/*
* unreserved = alphanum | mark
*/
# define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
/*
2008-08-04 15:29:44 +00:00
* Skip to next pointer char , handle escaped sequences
2001-02-23 17:55:21 +00:00
*/
2008-08-04 15:29:44 +00:00
# define NEXT(p) ((*p == '%')? p += 3 : p++)
2001-02-23 17:55:21 +00:00
/*
2008-08-04 15:29:44 +00:00
* Productions from the spec .
*
* authority = server | reg_name
* reg_name = 1 * ( unreserved | escaped | " $ " | " , " |
* " ; " | " : " | " @ " | " & " | " = " | " + " )
*
* path = [ abs_path | opaque_part ]
2001-02-23 17:55:21 +00:00
*/
2008-08-04 15:29:44 +00:00
# define STRNDUP(s, n) (char *) xmlStrndup((const xmlChar *)(s), (n))
/************************************************************************
* *
* RFC 3986 parser *
* *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
# define ISA_DIGIT(p) ((*(p) >= '0') && (*(p) <= '9'))
# define ISA_ALPHA(p) (((*(p) >= 'a') && (*(p) <= 'z')) || \
( ( * ( p ) > = ' A ' ) & & ( * ( p ) < = ' Z ' ) ) )
# define ISA_HEXDIG(p) \
( ISA_DIGIT ( p ) | | ( ( * ( p ) > = ' a ' ) & & ( * ( p ) < = ' f ' ) ) | | \
( ( * ( p ) > = ' A ' ) & & ( * ( p ) < = ' F ' ) ) )
2001-02-23 17:55:21 +00:00
/*
2008-08-04 15:29:44 +00:00
* sub - delims = " ! " / " $ " / " & " / " ' " / " ( " / " ) "
* / " * " / " + " / " , " / " ; " / " = "
2001-02-23 17:55:21 +00:00
*/
2008-08-04 15:29:44 +00:00
# define ISA_SUB_DELIM(p) \
( ( ( * ( p ) = = ' ! ' ) ) | | ( ( * ( p ) = = ' $ ' ) ) | | ( ( * ( p ) = = ' & ' ) ) | | \
( ( * ( p ) = = ' ( ' ) ) | | ( ( * ( p ) = = ' ) ' ) ) | | ( ( * ( p ) = = ' * ' ) ) | | \
( ( * ( p ) = = ' + ' ) ) | | ( ( * ( p ) = = ' , ' ) ) | | ( ( * ( p ) = = ' ; ' ) ) | | \
2010-06-04 09:14:16 +08:00
( ( * ( p ) = = ' = ' ) ) | | ( ( * ( p ) = = ' \' ' ) ) )
2001-02-23 17:55:21 +00:00
/*
2008-08-04 15:29:44 +00:00
* gen - delims = " : " / " / " / " ? " / " # " / " [ " / " ] " / " @ "
2001-02-23 17:55:21 +00:00
*/
2008-08-04 15:29:44 +00:00
# define ISA_GEN_DELIM(p) \
( ( ( * ( p ) = = ' : ' ) ) | | ( ( * ( p ) = = ' / ' ) ) | | ( ( * ( p ) = = ' ? ' ) ) | | \
( ( * ( p ) = = ' # ' ) ) | | ( ( * ( p ) = = ' [ ' ) ) | | ( ( * ( p ) = = ' ] ' ) ) | | \
( ( * ( p ) = = ' @ ' ) ) )
2001-02-23 17:55:21 +00:00
/*
2008-08-04 15:29:44 +00:00
* reserved = gen - delims / sub - delims
2001-02-23 17:55:21 +00:00
*/
2008-08-04 15:29:44 +00:00
# define ISA_RESERVED(p) (ISA_GEN_DELIM(p) || (ISA_SUB_DELIM(p)))
2001-02-23 17:55:21 +00:00
/*
2008-08-04 15:29:44 +00:00
* unreserved = ALPHA / DIGIT / " - " / " . " / " _ " / " ~ "
2001-02-23 17:55:21 +00:00
*/
2008-08-04 15:29:44 +00:00
# define ISA_UNRESERVED(p) \
( ( ISA_ALPHA ( p ) ) | | ( ISA_DIGIT ( p ) ) | | ( ( * ( p ) = = ' - ' ) ) | | \
( ( * ( p ) = = ' . ' ) ) | | ( ( * ( p ) = = ' _ ' ) ) | | ( ( * ( p ) = = ' ~ ' ) ) )
2001-02-23 17:55:21 +00:00
/*
2008-08-04 15:29:44 +00:00
* pct - encoded = " % " HEXDIG HEXDIG
2001-02-23 17:55:21 +00:00
*/
2008-08-04 15:29:44 +00:00
# define ISA_PCT_ENCODED(p) \
( ( * ( p ) = = ' % ' ) & & ( ISA_HEXDIG ( p + 1 ) ) & & ( ISA_HEXDIG ( p + 2 ) ) )
2001-02-23 17:55:21 +00:00
/*
2008-08-04 15:29:44 +00:00
* pchar = unreserved / pct - encoded / sub - delims / " : " / " @ "
2001-02-23 17:55:21 +00:00
*/
2008-08-04 15:29:44 +00:00
# define ISA_PCHAR(p) \
( ISA_UNRESERVED ( p ) | | ISA_PCT_ENCODED ( p ) | | ISA_SUB_DELIM ( p ) | | \
( ( * ( p ) = = ' : ' ) ) | | ( ( * ( p ) = = ' @ ' ) ) )
2001-02-23 17:55:21 +00:00
2008-08-04 15:29:44 +00:00
/**
* xmlParse3986Scheme :
* @ uri : pointer to an URI structure
* @ str : pointer to the string to analyze
*
* Parse an URI scheme
*
* ALPHA * ( ALPHA / DIGIT / " + " / " - " / " . " )
*
* Returns 0 or the error code
*/
static int
xmlParse3986Scheme ( xmlURIPtr uri , const char * * str ) {
const char * cur ;
2001-02-23 17:55:21 +00:00
2008-08-04 15:29:44 +00:00
if ( str = = NULL )
return ( - 1 ) ;
2001-10-29 23:59:27 +00:00
2008-08-04 15:29:44 +00:00
cur = * str ;
if ( ! ISA_ALPHA ( cur ) )
return ( 2 ) ;
cur + + ;
while ( ISA_ALPHA ( cur ) | | ISA_DIGIT ( cur ) | |
( * cur = = ' + ' ) | | ( * cur = = ' - ' ) | | ( * cur = = ' . ' ) ) cur + + ;
if ( uri ! = NULL ) {
if ( uri - > scheme ! = NULL ) xmlFree ( uri - > scheme ) ;
uri - > scheme = STRNDUP ( * str , cur - * str ) ;
}
* str = cur ;
return ( 0 ) ;
}
2001-10-29 23:59:27 +00:00
2008-08-04 15:29:44 +00:00
/**
* xmlParse3986Fragment :
* @ uri : pointer to an URI structure
* @ str : pointer to the string to analyze
*
* Parse the query part of an URI
*
2008-08-06 10:26:06 +00:00
* fragment = * ( pchar / " / " / " ? " )
* NOTE : the strict syntax as defined by 3986 does not allow ' [ ' and ' ] '
* in the fragment identifier but this is used very broadly for
* xpointer scheme selection , so we are allowing it here to not break
* for example all the DocBook processing chains .
2008-08-04 15:29:44 +00:00
*
* Returns 0 or the error code
2001-02-23 17:55:21 +00:00
*/
2008-08-04 15:29:44 +00:00
static int
xmlParse3986Fragment ( xmlURIPtr uri , const char * * str )
{
const char * cur ;
2001-02-23 17:55:21 +00:00
2008-08-04 15:29:44 +00:00
if ( str = = NULL )
return ( - 1 ) ;
2001-02-23 17:55:21 +00:00
2008-08-04 15:29:44 +00:00
cur = * str ;
while ( ( ISA_PCHAR ( cur ) ) | | ( * cur = = ' / ' ) | | ( * cur = = ' ? ' ) | |
2008-08-06 10:26:06 +00:00
( * cur = = ' [ ' ) | | ( * cur = = ' ] ' ) | |
2008-08-04 15:29:44 +00:00
( ( uri ! = NULL ) & & ( uri - > cleanup & 1 ) & & ( IS_UNWISE ( cur ) ) ) )
NEXT ( cur ) ;
if ( uri ! = NULL ) {
if ( uri - > fragment ! = NULL )
xmlFree ( uri - > fragment ) ;
if ( uri - > cleanup & 2 )
uri - > fragment = STRNDUP ( * str , cur - * str ) ;
else
uri - > fragment = xmlURIUnescapeString ( * str , cur - * str , NULL ) ;
}
* str = cur ;
return ( 0 ) ;
}
/**
* xmlParse3986Query :
* @ uri : pointer to an URI structure
* @ str : pointer to the string to analyze
2001-02-23 17:55:21 +00:00
*
2008-08-04 15:29:44 +00:00
* Parse the query part of an URI
2001-02-23 17:55:21 +00:00
*
2008-08-04 15:29:44 +00:00
* query = * uric
*
* Returns 0 or the error code
2001-02-23 17:55:21 +00:00
*/
2008-08-04 15:29:44 +00:00
static int
xmlParse3986Query ( xmlURIPtr uri , const char * * str )
{
const char * cur ;
2001-02-23 17:55:21 +00:00
2008-08-04 15:29:44 +00:00
if ( str = = NULL )
return ( - 1 ) ;
2005-08-07 10:46:19 +00:00
2008-08-04 15:29:44 +00:00
cur = * str ;
while ( ( ISA_PCHAR ( cur ) ) | | ( * cur = = ' / ' ) | | ( * cur = = ' ? ' ) | |
( ( uri ! = NULL ) & & ( uri - > cleanup & 1 ) & & ( IS_UNWISE ( cur ) ) ) )
NEXT ( cur ) ;
if ( uri ! = NULL ) {
if ( uri - > query ! = NULL )
xmlFree ( uri - > query ) ;
if ( uri - > cleanup & 2 )
uri - > query = STRNDUP ( * str , cur - * str ) ;
else
uri - > query = xmlURIUnescapeString ( * str , cur - * str , NULL ) ;
/* Save the raw bytes of the query as well.
* See : http : //mail.gnome.org/archives/xml/2007-April/thread.html#00114
*/
if ( uri - > query_raw ! = NULL )
xmlFree ( uri - > query_raw ) ;
uri - > query_raw = STRNDUP ( * str , cur - * str ) ;
}
* str = cur ;
return ( 0 ) ;
}
2001-02-23 17:55:21 +00:00
/**
2008-08-04 15:29:44 +00:00
* xmlParse3986Port :
* @ uri : pointer to an URI structure
* @ str : the string to analyze
2001-02-23 17:55:21 +00:00
*
2016-05-21 17:16:05 +08:00
* Parse a port part and fills in the appropriate fields
2008-08-04 15:29:44 +00:00
* of the @ uri structure
2001-02-23 17:55:21 +00:00
*
2008-08-04 15:29:44 +00:00
* port = * DIGIT
*
* Returns 0 or the error code
2001-02-23 17:55:21 +00:00
*/
2008-08-04 15:29:44 +00:00
static int
xmlParse3986Port ( xmlURIPtr uri , const char * * str )
{
const char * cur = * str ;
2016-05-21 17:16:05 +08:00
unsigned port = 0 ; /* unsigned for defined overflow behavior */
2001-02-23 17:55:21 +00:00
2008-08-04 15:29:44 +00:00
if ( ISA_DIGIT ( cur ) ) {
while ( ISA_DIGIT ( cur ) ) {
2016-05-21 17:16:05 +08:00
port = port * 10 + ( * cur - ' 0 ' ) ;
2008-08-04 15:29:44 +00:00
cur + + ;
}
2016-05-21 17:16:05 +08:00
if ( uri ! = NULL )
uri - > port = port & INT_MAX ; /* port value modulo INT_MAX+1 */
2008-08-04 15:29:44 +00:00
* str = cur ;
return ( 0 ) ;
2001-02-23 17:55:21 +00:00
}
2008-08-04 15:29:44 +00:00
return ( 1 ) ;
2001-02-23 17:55:21 +00:00
}
/**
2008-08-04 15:29:44 +00:00
* xmlParse3986Userinfo :
* @ uri : pointer to an URI structure
* @ str : the string to analyze
2001-02-23 17:55:21 +00:00
*
2008-08-04 15:29:44 +00:00
* Parse an user informations part and fills in the appropriate fields
* of the @ uri structure
2001-02-23 17:55:21 +00:00
*
2008-08-04 15:29:44 +00:00
* userinfo = * ( unreserved / pct - encoded / sub - delims / " : " )
*
* Returns 0 or the error code
2001-02-23 17:55:21 +00:00
*/
2008-08-04 15:29:44 +00:00
static int
xmlParse3986Userinfo ( xmlURIPtr uri , const char * * str )
{
const char * cur ;
2001-02-23 17:55:21 +00:00
2008-08-04 15:29:44 +00:00
cur = * str ;
while ( ISA_UNRESERVED ( cur ) | | ISA_PCT_ENCODED ( cur ) | |
ISA_SUB_DELIM ( cur ) | | ( * cur = = ' : ' ) )
NEXT ( cur ) ;
if ( * cur = = ' @ ' ) {
if ( uri ! = NULL ) {
if ( uri - > user ! = NULL ) xmlFree ( uri - > user ) ;
if ( uri - > cleanup & 2 )
uri - > user = STRNDUP ( * str , cur - * str ) ;
else
uri - > user = xmlURIUnescapeString ( * str , cur - * str , NULL ) ;
}
* str = cur ;
return ( 0 ) ;
}
return ( 1 ) ;
}
2001-02-23 17:55:21 +00:00
2008-08-04 15:29:44 +00:00
/**
* xmlParse3986DecOctet :
* @ str : the string to analyze
*
* dec - octet = DIGIT ; 0 - 9
* / % x31 - 39 DIGIT ; 10 - 99
* / " 1 " 2 DIGIT ; 100 - 199
* / " 2 " % x30 - 34 DIGIT ; 200 - 249
* / " 25 " % x30 - 35 ; 250 - 255
*
* Skip a dec - octet .
*
* Returns 0 if found and skipped , 1 otherwise
*/
static int
xmlParse3986DecOctet ( const char * * str ) {
const char * cur = * str ;
2001-02-23 17:55:21 +00:00
2008-08-04 15:29:44 +00:00
if ( ! ( ISA_DIGIT ( cur ) ) )
return ( 1 ) ;
if ( ! ISA_DIGIT ( cur + 1 ) )
cur + + ;
else if ( ( * cur ! = ' 0 ' ) & & ( ISA_DIGIT ( cur + 1 ) ) & & ( ! ISA_DIGIT ( cur + 2 ) ) )
cur + = 2 ;
else if ( ( * cur = = ' 1 ' ) & & ( ISA_DIGIT ( cur + 1 ) ) & & ( ISA_DIGIT ( cur + 2 ) ) )
cur + = 3 ;
else if ( ( * cur = = ' 2 ' ) & & ( * ( cur + 1 ) > = ' 0 ' ) & &
( * ( cur + 1 ) < = ' 4 ' ) & & ( ISA_DIGIT ( cur + 2 ) ) )
cur + = 3 ;
else if ( ( * cur = = ' 2 ' ) & & ( * ( cur + 1 ) = = ' 5 ' ) & &
( * ( cur + 2 ) > = ' 0 ' ) & & ( * ( cur + 1 ) < = ' 5 ' ) )
cur + = 3 ;
else
return ( 1 ) ;
* str = cur ;
return ( 0 ) ;
}
/**
* xmlParse3986Host :
* @ uri : pointer to an URI structure
* @ str : the string to analyze
*
* Parse an host part and fills in the appropriate fields
* of the @ uri structure
*
* host = IP - literal / IPv4address / reg - name
* IP - literal = " [ " ( IPv6address / IPvFuture ) " ] "
* IPv4address = dec - octet " . " dec - octet " . " dec - octet " . " dec - octet
* reg - name = * ( unreserved / pct - encoded / sub - delims )
*
* Returns 0 or the error code
*/
static int
xmlParse3986Host ( xmlURIPtr uri , const char * * str )
{
const char * cur = * str ;
const char * host ;
2001-02-23 17:55:21 +00:00
2008-08-04 15:29:44 +00:00
host = cur ;
/*
* IPv6 and future adressing scheme are enclosed between brackets
*/
if ( * cur = = ' [ ' ) {
cur + + ;
while ( ( * cur ! = ' ] ' ) & & ( * cur ! = 0 ) )
cur + + ;
if ( * cur ! = ' ] ' )
return ( 1 ) ;
cur + + ;
goto found ;
2001-02-23 17:55:21 +00:00
}
2008-08-04 15:29:44 +00:00
/*
* try to parse an IPv4
*/
if ( ISA_DIGIT ( cur ) ) {
if ( xmlParse3986DecOctet ( & cur ) ! = 0 )
goto not_ipv4 ;
if ( * cur ! = ' . ' )
goto not_ipv4 ;
cur + + ;
if ( xmlParse3986DecOctet ( & cur ) ! = 0 )
goto not_ipv4 ;
if ( * cur ! = ' . ' )
goto not_ipv4 ;
if ( xmlParse3986DecOctet ( & cur ) ! = 0 )
goto not_ipv4 ;
if ( * cur ! = ' . ' )
goto not_ipv4 ;
if ( xmlParse3986DecOctet ( & cur ) ! = 0 )
goto not_ipv4 ;
goto found ;
not_ipv4 :
cur = * str ;
}
/*
* then this should be a hostname which can be empty
*/
while ( ISA_UNRESERVED ( cur ) | | ISA_PCT_ENCODED ( cur ) | | ISA_SUB_DELIM ( cur ) )
NEXT ( cur ) ;
found :
if ( uri ! = NULL ) {
if ( uri - > authority ! = NULL ) xmlFree ( uri - > authority ) ;
uri - > authority = NULL ;
if ( uri - > server ! = NULL ) xmlFree ( uri - > server ) ;
if ( cur ! = host ) {
if ( uri - > cleanup & 2 )
uri - > server = STRNDUP ( host , cur - host ) ;
else
uri - > server = xmlURIUnescapeString ( host , cur - host , NULL ) ;
} else
uri - > server = NULL ;
}
* str = cur ;
return ( 0 ) ;
}
/**
* xmlParse3986Authority :
* @ uri : pointer to an URI structure
* @ str : the string to analyze
*
* Parse an authority part and fills in the appropriate fields
* of the @ uri structure
*
* authority = [ userinfo " @ " ] host [ " : " port ]
*
* Returns 0 or the error code
*/
static int
xmlParse3986Authority ( xmlURIPtr uri , const char * * str )
{
const char * cur ;
int ret ;
cur = * str ;
/*
* try to parse an userinfo and check for the trailing @
*/
ret = xmlParse3986Userinfo ( uri , & cur ) ;
if ( ( ret ! = 0 ) | | ( * cur ! = ' @ ' ) )
cur = * str ;
else
cur + + ;
ret = xmlParse3986Host ( uri , & cur ) ;
if ( ret ! = 0 ) return ( ret ) ;
if ( * cur = = ' : ' ) {
2008-08-27 17:23:41 +00:00
cur + + ;
2008-08-04 15:29:44 +00:00
ret = xmlParse3986Port ( uri , & cur ) ;
if ( ret ! = 0 ) return ( ret ) ;
}
* str = cur ;
return ( 0 ) ;
}
/**
* xmlParse3986Segment :
* @ str : the string to analyze
* @ forbid : an optional forbidden character
* @ empty : allow an empty segment
*
* Parse a segment and fills in the appropriate fields
* of the @ uri structure
*
* segment = * pchar
* segment - nz = 1 * pchar
* segment - nz - nc = 1 * ( unreserved / pct - encoded / sub - delims / " @ " )
* ; non - zero - length segment without any colon " : "
*
* Returns 0 or the error code
*/
static int
xmlParse3986Segment ( const char * * str , char forbid , int empty )
{
const char * cur ;
cur = * str ;
if ( ! ISA_PCHAR ( cur ) ) {
if ( empty )
return ( 0 ) ;
return ( 1 ) ;
2002-11-28 11:55:38 +00:00
}
2008-08-04 15:29:44 +00:00
while ( ISA_PCHAR ( cur ) & & ( * cur ! = forbid ) )
NEXT ( cur ) ;
* str = cur ;
return ( 0 ) ;
}
/**
* xmlParse3986PathAbEmpty :
* @ uri : pointer to an URI structure
* @ str : the string to analyze
*
* Parse an path absolute or empty and fills in the appropriate fields
* of the @ uri structure
*
* path - abempty = * ( " / " segment )
*
* Returns 0 or the error code
*/
static int
xmlParse3986PathAbEmpty ( xmlURIPtr uri , const char * * str )
{
const char * cur ;
int ret ;
cur = * str ;
while ( * cur = = ' / ' ) {
cur + + ;
ret = xmlParse3986Segment ( & cur , 0 , 1 ) ;
if ( ret ! = 0 ) return ( ret ) ;
}
if ( uri ! = NULL ) {
if ( uri - > path ! = NULL ) xmlFree ( uri - > path ) ;
2009-10-02 17:29:48 +02:00
if ( * str ! = cur ) {
if ( uri - > cleanup & 2 )
uri - > path = STRNDUP ( * str , cur - * str ) ;
else
uri - > path = xmlURIUnescapeString ( * str , cur - * str , NULL ) ;
} else {
uri - > path = NULL ;
}
2008-08-04 15:29:44 +00:00
}
* str = cur ;
return ( 0 ) ;
}
/**
* xmlParse3986PathAbsolute :
* @ uri : pointer to an URI structure
* @ str : the string to analyze
*
* Parse an path absolute and fills in the appropriate fields
* of the @ uri structure
*
* path - absolute = " / " [ segment - nz * ( " / " segment ) ]
*
* Returns 0 or the error code
*/
static int
xmlParse3986PathAbsolute ( xmlURIPtr uri , const char * * str )
{
const char * cur ;
int ret ;
cur = * str ;
if ( * cur ! = ' / ' )
return ( 1 ) ;
cur + + ;
ret = xmlParse3986Segment ( & cur , 0 , 0 ) ;
if ( ret = = 0 ) {
while ( * cur = = ' / ' ) {
cur + + ;
ret = xmlParse3986Segment ( & cur , 0 , 1 ) ;
if ( ret ! = 0 ) return ( ret ) ;
2001-02-23 17:55:21 +00:00
}
2002-11-28 11:55:38 +00:00
}
2008-08-04 15:29:44 +00:00
if ( uri ! = NULL ) {
if ( uri - > path ! = NULL ) xmlFree ( uri - > path ) ;
2009-10-02 17:29:48 +02:00
if ( cur ! = * str ) {
if ( uri - > cleanup & 2 )
uri - > path = STRNDUP ( * str , cur - * str ) ;
else
uri - > path = xmlURIUnescapeString ( * str , cur - * str , NULL ) ;
} else {
uri - > path = NULL ;
}
2001-02-23 17:55:21 +00:00
}
2008-08-04 15:29:44 +00:00
* str = cur ;
return ( 0 ) ;
2001-02-23 17:55:21 +00:00
}
/**
2008-08-04 15:29:44 +00:00
* xmlParse3986PathRootless :
* @ uri : pointer to an URI structure
* @ str : the string to analyze
2001-02-23 17:55:21 +00:00
*
2008-08-04 15:29:44 +00:00
* Parse an path without root and fills in the appropriate fields
* of the @ uri structure
*
* path - rootless = segment - nz * ( " / " segment )
*
* Returns 0 or the error code
2001-02-23 17:55:21 +00:00
*/
2008-08-04 15:29:44 +00:00
static int
xmlParse3986PathRootless ( xmlURIPtr uri , const char * * str )
{
const char * cur ;
int ret ;
2001-02-23 17:55:21 +00:00
2008-08-04 15:29:44 +00:00
cur = * str ;
ret = xmlParse3986Segment ( & cur , 0 , 0 ) ;
if ( ret ! = 0 ) return ( ret ) ;
while ( * cur = = ' / ' ) {
cur + + ;
ret = xmlParse3986Segment ( & cur , 0 , 1 ) ;
if ( ret ! = 0 ) return ( ret ) ;
}
if ( uri ! = NULL ) {
if ( uri - > path ! = NULL ) xmlFree ( uri - > path ) ;
2009-10-02 17:29:48 +02:00
if ( cur ! = * str ) {
if ( uri - > cleanup & 2 )
uri - > path = STRNDUP ( * str , cur - * str ) ;
else
uri - > path = xmlURIUnescapeString ( * str , cur - * str , NULL ) ;
} else {
uri - > path = NULL ;
}
2001-02-23 17:55:21 +00:00
}
2008-08-04 15:29:44 +00:00
* str = cur ;
return ( 0 ) ;
2001-02-23 17:55:21 +00:00
}
/**
2008-08-04 15:29:44 +00:00
* xmlParse3986PathNoScheme :
* @ uri : pointer to an URI structure
* @ str : the string to analyze
2001-02-23 17:55:21 +00:00
*
2008-08-04 15:29:44 +00:00
* Parse an path which is not a scheme and fills in the appropriate fields
* of the @ uri structure
*
* path - noscheme = segment - nz - nc * ( " / " segment )
*
* Returns 0 or the error code
2001-02-23 17:55:21 +00:00
*/
2008-08-04 15:29:44 +00:00
static int
xmlParse3986PathNoScheme ( xmlURIPtr uri , const char * * str )
{
const char * cur ;
int ret ;
2001-02-23 17:55:21 +00:00
2008-08-04 15:29:44 +00:00
cur = * str ;
ret = xmlParse3986Segment ( & cur , ' : ' , 0 ) ;
if ( ret ! = 0 ) return ( ret ) ;
while ( * cur = = ' / ' ) {
cur + + ;
ret = xmlParse3986Segment ( & cur , 0 , 1 ) ;
if ( ret ! = 0 ) return ( ret ) ;
}
if ( uri ! = NULL ) {
if ( uri - > path ! = NULL ) xmlFree ( uri - > path ) ;
2009-10-02 17:29:48 +02:00
if ( cur ! = * str ) {
if ( uri - > cleanup & 2 )
uri - > path = STRNDUP ( * str , cur - * str ) ;
else
uri - > path = xmlURIUnescapeString ( * str , cur - * str , NULL ) ;
} else {
uri - > path = NULL ;
}
2008-08-04 15:29:44 +00:00
}
* str = cur ;
return ( 0 ) ;
}
/**
* xmlParse3986HierPart :
* @ uri : pointer to an URI structure
* @ str : the string to analyze
*
* Parse an hierarchical part and fills in the appropriate fields
* of the @ uri structure
*
* hier - part = " // " authority path - abempty
* / path - absolute
* / path - rootless
* / path - empty
*
* Returns 0 or the error code
*/
static int
xmlParse3986HierPart ( xmlURIPtr uri , const char * * str )
{
const char * cur ;
int ret ;
cur = * str ;
if ( ( * cur = = ' / ' ) & & ( * ( cur + 1 ) = = ' / ' ) ) {
cur + = 2 ;
ret = xmlParse3986Authority ( uri , & cur ) ;
if ( ret ! = 0 ) return ( ret ) ;
2014-10-03 19:22:39 +08:00
if ( uri - > server = = NULL )
uri - > port = - 1 ;
2008-08-04 15:29:44 +00:00
ret = xmlParse3986PathAbEmpty ( uri , & cur ) ;
if ( ret ! = 0 ) return ( ret ) ;
* str = cur ;
return ( 0 ) ;
} else if ( * cur = = ' / ' ) {
ret = xmlParse3986PathAbsolute ( uri , & cur ) ;
if ( ret ! = 0 ) return ( ret ) ;
} else if ( ISA_PCHAR ( cur ) ) {
ret = xmlParse3986PathRootless ( uri , & cur ) ;
if ( ret ! = 0 ) return ( ret ) ;
} else {
/* path-empty is effectively empty */
if ( uri ! = NULL ) {
if ( uri - > path ! = NULL ) xmlFree ( uri - > path ) ;
uri - > path = NULL ;
}
}
* str = cur ;
return ( 0 ) ;
}
/**
* xmlParse3986RelativeRef :
* @ uri : pointer to an URI structure
* @ str : the string to analyze
*
* Parse an URI string and fills in the appropriate fields
* of the @ uri structure
*
* relative - ref = relative - part [ " ? " query ] [ " # " fragment ]
* relative - part = " // " authority path - abempty
* / path - absolute
* / path - noscheme
* / path - empty
*
* Returns 0 or the error code
*/
static int
xmlParse3986RelativeRef ( xmlURIPtr uri , const char * str ) {
int ret ;
if ( ( * str = = ' / ' ) & & ( * ( str + 1 ) = = ' / ' ) ) {
str + = 2 ;
ret = xmlParse3986Authority ( uri , & str ) ;
if ( ret ! = 0 ) return ( ret ) ;
ret = xmlParse3986PathAbEmpty ( uri , & str ) ;
if ( ret ! = 0 ) return ( ret ) ;
} else if ( * str = = ' / ' ) {
ret = xmlParse3986PathAbsolute ( uri , & str ) ;
if ( ret ! = 0 ) return ( ret ) ;
} else if ( ISA_PCHAR ( str ) ) {
ret = xmlParse3986PathNoScheme ( uri , & str ) ;
if ( ret ! = 0 ) return ( ret ) ;
} else {
/* path-empty is effectively empty */
if ( uri ! = NULL ) {
if ( uri - > path ! = NULL ) xmlFree ( uri - > path ) ;
uri - > path = NULL ;
}
}
if ( * str = = ' ? ' ) {
str + + ;
ret = xmlParse3986Query ( uri , & str ) ;
if ( ret ! = 0 ) return ( ret ) ;
}
if ( * str = = ' # ' ) {
str + + ;
ret = xmlParse3986Fragment ( uri , & str ) ;
if ( ret ! = 0 ) return ( ret ) ;
}
if ( * str ! = 0 ) {
xmlCleanURI ( uri ) ;
return ( 1 ) ;
}
return ( 0 ) ;
2001-02-23 17:55:21 +00:00
}
2008-08-04 15:29:44 +00:00
2001-02-23 17:55:21 +00:00
/**
2008-08-04 15:29:44 +00:00
* xmlParse3986URI :
* @ uri : pointer to an URI structure
* @ str : the string to analyze
2001-02-23 17:55:21 +00:00
*
2008-08-04 15:29:44 +00:00
* Parse an URI string and fills in the appropriate fields
* of the @ uri structure
*
* scheme " : " hier - part [ " ? " query ] [ " # " fragment ]
*
* Returns 0 or the error code
2001-02-23 17:55:21 +00:00
*/
2008-08-04 15:29:44 +00:00
static int
xmlParse3986URI ( xmlURIPtr uri , const char * str ) {
int ret ;
2001-02-23 17:55:21 +00:00
2008-08-04 15:29:44 +00:00
ret = xmlParse3986Scheme ( uri , & str ) ;
if ( ret ! = 0 ) return ( ret ) ;
if ( * str ! = ' : ' ) {
return ( 1 ) ;
}
str + + ;
ret = xmlParse3986HierPart ( uri , & str ) ;
if ( ret ! = 0 ) return ( ret ) ;
if ( * str = = ' ? ' ) {
str + + ;
ret = xmlParse3986Query ( uri , & str ) ;
if ( ret ! = 0 ) return ( ret ) ;
}
if ( * str = = ' # ' ) {
str + + ;
ret = xmlParse3986Fragment ( uri , & str ) ;
if ( ret ! = 0 ) return ( ret ) ;
}
if ( * str ! = 0 ) {
xmlCleanURI ( uri ) ;
return ( 1 ) ;
}
return ( 0 ) ;
2001-02-23 17:55:21 +00:00
}
/**
2008-08-04 15:29:44 +00:00
* xmlParse3986URIReference :
* @ uri : pointer to an URI structure
* @ str : the string to analyze
2001-02-23 17:55:21 +00:00
*
2008-08-04 15:29:44 +00:00
* Parse an URI reference string and fills in the appropriate fields
* of the @ uri structure
2001-02-23 17:55:21 +00:00
*
2008-08-04 15:29:44 +00:00
* URI - reference = URI / relative - ref
2001-02-23 17:55:21 +00:00
*
2008-08-04 15:29:44 +00:00
* Returns 0 or the error code
2001-02-23 17:55:21 +00:00
*/
2008-08-04 15:29:44 +00:00
static int
xmlParse3986URIReference ( xmlURIPtr uri , const char * str ) {
int ret ;
2001-02-23 17:55:21 +00:00
2008-08-04 15:29:44 +00:00
if ( str = = NULL )
2001-02-23 17:55:21 +00:00
return ( - 1 ) ;
2008-08-04 15:29:44 +00:00
xmlCleanURI ( uri ) ;
2001-02-23 17:55:21 +00:00
/*
2008-08-04 15:29:44 +00:00
* Try first to parse absolute refs , then fallback to relative if
* it fails .
2001-02-23 17:55:21 +00:00
*/
2008-08-04 15:29:44 +00:00
ret = xmlParse3986URI ( uri , str ) ;
if ( ret ! = 0 ) {
xmlCleanURI ( uri ) ;
ret = xmlParse3986RelativeRef ( uri , str ) ;
if ( ret ! = 0 ) {
xmlCleanURI ( uri ) ;
return ( ret ) ;
2001-02-23 17:55:21 +00:00
}
}
2008-08-04 15:29:44 +00:00
return ( 0 ) ;
}
2001-02-23 17:55:21 +00:00
2008-08-04 15:29:44 +00:00
/**
* xmlParseURI :
* @ str : the URI string to analyze
*
* Parse an URI based on RFC 3986
*
* URI - reference = [ absoluteURI | relativeURI ] [ " # " fragment ]
*
* Returns a newly built xmlURIPtr or NULL in case of error
*/
xmlURIPtr
xmlParseURI ( const char * str ) {
xmlURIPtr uri ;
int ret ;
2001-02-23 17:55:21 +00:00
2008-08-04 15:29:44 +00:00
if ( str = = NULL )
return ( NULL ) ;
uri = xmlCreateURI ( ) ;
if ( uri ! = NULL ) {
ret = xmlParse3986URIReference ( uri , str ) ;
if ( ret ) {
xmlFreeURI ( uri ) ;
return ( NULL ) ;
}
2001-02-23 17:55:21 +00:00
}
2008-08-04 15:29:44 +00:00
return ( uri ) ;
}
2001-02-23 17:55:21 +00:00
2008-08-04 15:29:44 +00:00
/**
* xmlParseURIReference :
* @ uri : pointer to an URI structure
* @ str : the string to analyze
*
* Parse an URI reference string based on RFC 3986 and fills in the
* appropriate fields of the @ uri structure
*
* URI - reference = URI / relative - ref
*
* Returns 0 or the error code
*/
int
xmlParseURIReference ( xmlURIPtr uri , const char * str ) {
return ( xmlParse3986URIReference ( uri , str ) ) ;
}
2001-02-23 17:55:21 +00:00
2008-08-04 15:29:44 +00:00
/**
* xmlParseURIRaw :
* @ str : the URI string to analyze
* @ raw : if 1 unescaping of URI pieces are disabled
*
* Parse an URI but allows to keep intact the original fragments .
*
* URI - reference = URI / relative - ref
*
* Returns a newly built xmlURIPtr or NULL in case of error
*/
xmlURIPtr
xmlParseURIRaw ( const char * str , int raw ) {
xmlURIPtr uri ;
int ret ;
2001-02-23 17:55:21 +00:00
2008-08-04 15:29:44 +00:00
if ( str = = NULL )
return ( NULL ) ;
uri = xmlCreateURI ( ) ;
if ( uri ! = NULL ) {
if ( raw ) {
uri - > cleanup | = 2 ;
}
ret = xmlParseURIReference ( uri , str ) ;
if ( ret ) {
xmlFreeURI ( uri ) ;
return ( NULL ) ;
}
}
return ( uri ) ;
2001-02-23 17:55:21 +00:00
}
2008-08-04 15:29:44 +00:00
/************************************************************************
* *
* Generic URI structure functions *
* *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
/**
* xmlCreateURI :
*
* Simply creates an empty xmlURI
*
* Returns the new structure or NULL in case of error
*/
xmlURIPtr
xmlCreateURI ( void ) {
xmlURIPtr ret ;
ret = ( xmlURIPtr ) xmlMalloc ( sizeof ( xmlURI ) ) ;
if ( ret = = NULL ) {
2012-07-24 11:44:23 +08:00
xmlURIErrMemory ( " creating URI structure \n " ) ;
2008-08-04 15:29:44 +00:00
return ( NULL ) ;
}
memset ( ret , 0 , sizeof ( xmlURI ) ) ;
return ( ret ) ;
2004-05-09 02:58:44 +00:00
}
2012-07-24 11:44:23 +08:00
/**
* xmlSaveUriRealloc :
*
* Function to handle properly a reallocation when saving an URI
* Also imposes some limit on the length of an URI string output
*/
static xmlChar *
xmlSaveUriRealloc ( xmlChar * ret , int * max ) {
xmlChar * temp ;
int tmp ;
if ( * max > MAX_URI_LENGTH ) {
xmlURIErrMemory ( " reaching arbitrary MAX_URI_LENGTH limit \n " ) ;
return ( NULL ) ;
}
tmp = * max * 2 ;
temp = ( xmlChar * ) xmlRealloc ( ret , ( tmp + 1 ) ) ;
if ( temp = = NULL ) {
xmlURIErrMemory ( " saving URI \n " ) ;
return ( NULL ) ;
}
* max = tmp ;
return ( temp ) ;
}
2001-02-23 17:55:21 +00:00
/**
2008-08-04 15:29:44 +00:00
* xmlSaveUri :
* @ uri : pointer to an xmlURI
2001-02-23 17:55:21 +00:00
*
2008-08-04 15:29:44 +00:00
* Save the URI as an escaped string
2001-02-23 17:55:21 +00:00
*
2008-08-04 15:29:44 +00:00
* Returns a new string ( to be deallocated by caller )
2001-02-23 17:55:21 +00:00
*/
2008-08-04 15:29:44 +00:00
xmlChar *
xmlSaveUri ( xmlURIPtr uri ) {
xmlChar * ret = NULL ;
xmlChar * temp ;
const char * p ;
int len ;
int max ;
2001-02-23 17:55:21 +00:00
2008-08-04 15:29:44 +00:00
if ( uri = = NULL ) return ( NULL ) ;
max = 80 ;
ret = ( xmlChar * ) xmlMallocAtomic ( ( max + 1 ) * sizeof ( xmlChar ) ) ;
if ( ret = = NULL ) {
2012-07-24 11:44:23 +08:00
xmlURIErrMemory ( " saving URI \n " ) ;
2001-02-23 17:55:21 +00:00
return ( NULL ) ;
2008-08-04 15:29:44 +00:00
}
len = 0 ;
2001-02-23 17:55:21 +00:00
2008-08-04 15:29:44 +00:00
if ( uri - > scheme ! = NULL ) {
p = uri - > scheme ;
while ( * p ! = 0 ) {
if ( len > = max ) {
2012-07-24 11:44:23 +08:00
temp = xmlSaveUriRealloc ( ret , & max ) ;
if ( temp = = NULL ) goto mem_error ;
2008-08-04 15:29:44 +00:00
ret = temp ;
}
ret [ len + + ] = * p + + ;
2001-02-23 17:55:21 +00:00
}
2008-08-04 15:29:44 +00:00
if ( len > = max ) {
2012-07-24 11:44:23 +08:00
temp = xmlSaveUriRealloc ( ret , & max ) ;
if ( temp = = NULL ) goto mem_error ;
ret = temp ;
2008-08-04 15:29:44 +00:00
}
ret [ len + + ] = ' : ' ;
}
if ( uri - > opaque ! = NULL ) {
p = uri - > opaque ;
while ( * p ! = 0 ) {
if ( len + 3 > = max ) {
2012-07-24 11:44:23 +08:00
temp = xmlSaveUriRealloc ( ret , & max ) ;
if ( temp = = NULL ) goto mem_error ;
ret = temp ;
2008-08-04 15:29:44 +00:00
}
if ( IS_RESERVED ( * ( p ) ) | | IS_UNRESERVED ( * ( p ) ) )
ret [ len + + ] = * p + + ;
else {
int val = * ( unsigned char * ) p + + ;
int hi = val / 0x10 , lo = val % 0x10 ;
ret [ len + + ] = ' % ' ;
ret [ len + + ] = hi + ( hi > 9 ? ' A ' - 10 : ' 0 ' ) ;
ret [ len + + ] = lo + ( lo > 9 ? ' A ' - 10 : ' 0 ' ) ;
}
}
} else {
2014-10-03 19:22:39 +08:00
if ( ( uri - > server ! = NULL ) | | ( uri - > port = = - 1 ) ) {
2008-08-04 15:29:44 +00:00
if ( len + 3 > = max ) {
2012-07-24 11:44:23 +08:00
temp = xmlSaveUriRealloc ( ret , & max ) ;
if ( temp = = NULL ) goto mem_error ;
ret = temp ;
2008-08-04 15:29:44 +00:00
}
ret [ len + + ] = ' / ' ;
ret [ len + + ] = ' / ' ;
if ( uri - > user ! = NULL ) {
p = uri - > user ;
while ( * p ! = 0 ) {
if ( len + 3 > = max ) {
2012-07-24 11:44:23 +08:00
temp = xmlSaveUriRealloc ( ret , & max ) ;
if ( temp = = NULL ) goto mem_error ;
ret = temp ;
2008-08-04 15:29:44 +00:00
}
if ( ( IS_UNRESERVED ( * ( p ) ) ) | |
( ( * ( p ) = = ' ; ' ) ) | | ( ( * ( p ) = = ' : ' ) ) | |
( ( * ( p ) = = ' & ' ) ) | | ( ( * ( p ) = = ' = ' ) ) | |
( ( * ( p ) = = ' + ' ) ) | | ( ( * ( p ) = = ' $ ' ) ) | |
( ( * ( p ) = = ' , ' ) ) )
ret [ len + + ] = * p + + ;
else {
int val = * ( unsigned char * ) p + + ;
int hi = val / 0x10 , lo = val % 0x10 ;
ret [ len + + ] = ' % ' ;
ret [ len + + ] = hi + ( hi > 9 ? ' A ' - 10 : ' 0 ' ) ;
ret [ len + + ] = lo + ( lo > 9 ? ' A ' - 10 : ' 0 ' ) ;
}
}
if ( len + 3 > = max ) {
2012-07-24 11:44:23 +08:00
temp = xmlSaveUriRealloc ( ret , & max ) ;
if ( temp = = NULL ) goto mem_error ;
ret = temp ;
2008-08-04 15:29:44 +00:00
}
ret [ len + + ] = ' @ ' ;
}
2014-10-03 19:22:39 +08:00
if ( uri - > server ! = NULL ) {
p = uri - > server ;
while ( * p ! = 0 ) {
if ( len > = max ) {
temp = xmlSaveUriRealloc ( ret , & max ) ;
if ( temp = = NULL ) goto mem_error ;
ret = temp ;
}
ret [ len + + ] = * p + + ;
2008-08-04 15:29:44 +00:00
}
2014-10-03 19:22:39 +08:00
if ( uri - > port > 0 ) {
if ( len + 10 > = max ) {
temp = xmlSaveUriRealloc ( ret , & max ) ;
if ( temp = = NULL ) goto mem_error ;
ret = temp ;
}
len + = snprintf ( ( char * ) & ret [ len ] , max - len , " :%d " , uri - > port ) ;
2008-08-04 15:29:44 +00:00
}
}
} else if ( uri - > authority ! = NULL ) {
if ( len + 3 > = max ) {
2012-07-24 11:44:23 +08:00
temp = xmlSaveUriRealloc ( ret , & max ) ;
if ( temp = = NULL ) goto mem_error ;
ret = temp ;
2008-08-04 15:29:44 +00:00
}
ret [ len + + ] = ' / ' ;
ret [ len + + ] = ' / ' ;
p = uri - > authority ;
while ( * p ! = 0 ) {
if ( len + 3 > = max ) {
2012-07-24 11:44:23 +08:00
temp = xmlSaveUriRealloc ( ret , & max ) ;
if ( temp = = NULL ) goto mem_error ;
ret = temp ;
2008-08-04 15:29:44 +00:00
}
if ( ( IS_UNRESERVED ( * ( p ) ) ) | |
( ( * ( p ) = = ' $ ' ) ) | | ( ( * ( p ) = = ' , ' ) ) | | ( ( * ( p ) = = ' ; ' ) ) | |
( ( * ( p ) = = ' : ' ) ) | | ( ( * ( p ) = = ' @ ' ) ) | | ( ( * ( p ) = = ' & ' ) ) | |
( ( * ( p ) = = ' = ' ) ) | | ( ( * ( p ) = = ' + ' ) ) )
ret [ len + + ] = * p + + ;
else {
int val = * ( unsigned char * ) p + + ;
int hi = val / 0x10 , lo = val % 0x10 ;
ret [ len + + ] = ' % ' ;
ret [ len + + ] = hi + ( hi > 9 ? ' A ' - 10 : ' 0 ' ) ;
ret [ len + + ] = lo + ( lo > 9 ? ' A ' - 10 : ' 0 ' ) ;
}
}
} else if ( uri - > scheme ! = NULL ) {
if ( len + 3 > = max ) {
2012-07-24 11:44:23 +08:00
temp = xmlSaveUriRealloc ( ret , & max ) ;
if ( temp = = NULL ) goto mem_error ;
ret = temp ;
2008-08-04 15:29:44 +00:00
}
2001-02-23 17:55:21 +00:00
}
2008-08-04 15:29:44 +00:00
if ( uri - > path ! = NULL ) {
p = uri - > path ;
/*
* the colon in file : ///d: should not be escaped or
* Windows accesses fail later .
*/
if ( ( uri - > scheme ! = NULL ) & &
( p [ 0 ] = = ' / ' ) & &
( ( ( p [ 1 ] > = ' a ' ) & & ( p [ 1 ] < = ' z ' ) ) | |
( ( p [ 1 ] > = ' A ' ) & & ( p [ 1 ] < = ' Z ' ) ) ) & &
( p [ 2 ] = = ' : ' ) & &
( xmlStrEqual ( BAD_CAST uri - > scheme , BAD_CAST " file " ) ) ) {
if ( len + 3 > = max ) {
2012-07-24 11:44:23 +08:00
temp = xmlSaveUriRealloc ( ret , & max ) ;
if ( temp = = NULL ) goto mem_error ;
ret = temp ;
2008-08-04 15:29:44 +00:00
}
ret [ len + + ] = * p + + ;
ret [ len + + ] = * p + + ;
ret [ len + + ] = * p + + ;
}
while ( * p ! = 0 ) {
if ( len + 3 > = max ) {
2012-07-24 11:44:23 +08:00
temp = xmlSaveUriRealloc ( ret , & max ) ;
if ( temp = = NULL ) goto mem_error ;
ret = temp ;
2008-08-04 15:29:44 +00:00
}
if ( ( IS_UNRESERVED ( * ( p ) ) ) | | ( ( * ( p ) = = ' / ' ) ) | |
( ( * ( p ) = = ' ; ' ) ) | | ( ( * ( p ) = = ' @ ' ) ) | | ( ( * ( p ) = = ' & ' ) ) | |
( ( * ( p ) = = ' = ' ) ) | | ( ( * ( p ) = = ' + ' ) ) | | ( ( * ( p ) = = ' $ ' ) ) | |
( ( * ( p ) = = ' , ' ) ) )
ret [ len + + ] = * p + + ;
else {
int val = * ( unsigned char * ) p + + ;
int hi = val / 0x10 , lo = val % 0x10 ;
ret [ len + + ] = ' % ' ;
ret [ len + + ] = hi + ( hi > 9 ? ' A ' - 10 : ' 0 ' ) ;
ret [ len + + ] = lo + ( lo > 9 ? ' A ' - 10 : ' 0 ' ) ;
}
2001-02-23 17:55:21 +00:00
}
}
2008-08-04 15:29:44 +00:00
if ( uri - > query_raw ! = NULL ) {
if ( len + 1 > = max ) {
2012-07-24 11:44:23 +08:00
temp = xmlSaveUriRealloc ( ret , & max ) ;
if ( temp = = NULL ) goto mem_error ;
ret = temp ;
2008-08-04 15:29:44 +00:00
}
ret [ len + + ] = ' ? ' ;
p = uri - > query_raw ;
while ( * p ! = 0 ) {
if ( len + 1 > = max ) {
2012-07-24 11:44:23 +08:00
temp = xmlSaveUriRealloc ( ret , & max ) ;
if ( temp = = NULL ) goto mem_error ;
ret = temp ;
2008-08-04 15:29:44 +00:00
}
ret [ len + + ] = * p + + ;
}
} else if ( uri - > query ! = NULL ) {
if ( len + 3 > = max ) {
2012-07-24 11:44:23 +08:00
temp = xmlSaveUriRealloc ( ret , & max ) ;
if ( temp = = NULL ) goto mem_error ;
ret = temp ;
2008-08-04 15:29:44 +00:00
}
ret [ len + + ] = ' ? ' ;
p = uri - > query ;
while ( * p ! = 0 ) {
if ( len + 3 > = max ) {
2012-07-24 11:44:23 +08:00
temp = xmlSaveUriRealloc ( ret , & max ) ;
if ( temp = = NULL ) goto mem_error ;
ret = temp ;
2008-08-04 15:29:44 +00:00
}
2012-07-24 11:44:23 +08:00
if ( ( IS_UNRESERVED ( * ( p ) ) ) | | ( IS_RESERVED ( * ( p ) ) ) )
2008-08-04 15:29:44 +00:00
ret [ len + + ] = * p + + ;
else {
int val = * ( unsigned char * ) p + + ;
int hi = val / 0x10 , lo = val % 0x10 ;
ret [ len + + ] = ' % ' ;
ret [ len + + ] = hi + ( hi > 9 ? ' A ' - 10 : ' 0 ' ) ;
ret [ len + + ] = lo + ( lo > 9 ? ' A ' - 10 : ' 0 ' ) ;
}
}
2001-02-23 17:55:21 +00:00
}
2001-10-30 09:47:47 +00:00
}
2008-08-04 15:29:44 +00:00
if ( uri - > fragment ! = NULL ) {
if ( len + 3 > = max ) {
2012-07-24 11:44:23 +08:00
temp = xmlSaveUriRealloc ( ret , & max ) ;
if ( temp = = NULL ) goto mem_error ;
ret = temp ;
2008-08-04 15:29:44 +00:00
}
ret [ len + + ] = ' # ' ;
p = uri - > fragment ;
while ( * p ! = 0 ) {
if ( len + 3 > = max ) {
2012-07-24 11:44:23 +08:00
temp = xmlSaveUriRealloc ( ret , & max ) ;
if ( temp = = NULL ) goto mem_error ;
ret = temp ;
2008-08-04 15:29:44 +00:00
}
2012-07-24 11:44:23 +08:00
if ( ( IS_UNRESERVED ( * ( p ) ) ) | | ( IS_RESERVED ( * ( p ) ) ) )
2008-08-04 15:29:44 +00:00
ret [ len + + ] = * p + + ;
else {
int val = * ( unsigned char * ) p + + ;
int hi = val / 0x10 , lo = val % 0x10 ;
ret [ len + + ] = ' % ' ;
ret [ len + + ] = hi + ( hi > 9 ? ' A ' - 10 : ' 0 ' ) ;
ret [ len + + ] = lo + ( lo > 9 ? ' A ' - 10 : ' 0 ' ) ;
}
}
2001-02-23 17:55:21 +00:00
}
2008-08-04 15:29:44 +00:00
if ( len > = max ) {
2012-07-24 11:44:23 +08:00
temp = xmlSaveUriRealloc ( ret , & max ) ;
if ( temp = = NULL ) goto mem_error ;
ret = temp ;
2001-02-23 17:55:21 +00:00
}
2009-09-05 14:52:55 +02:00
ret [ len ] = 0 ;
2008-08-04 15:29:44 +00:00
return ( ret ) ;
2012-07-24 11:44:23 +08:00
mem_error :
xmlFree ( ret ) ;
return ( NULL ) ;
2001-02-23 17:55:21 +00:00
}
/**
2008-08-04 15:29:44 +00:00
* xmlPrintURI :
* @ stream : a FILE * for the output
* @ uri : pointer to an xmlURI
2001-02-23 17:55:21 +00:00
*
2008-08-04 15:29:44 +00:00
* Prints the URI in the stream @ stream .
2001-02-23 17:55:21 +00:00
*/
2008-08-04 15:29:44 +00:00
void
xmlPrintURI ( FILE * stream , xmlURIPtr uri ) {
xmlChar * out ;
2001-02-23 17:55:21 +00:00
2008-08-04 15:29:44 +00:00
out = xmlSaveUri ( uri ) ;
if ( out ! = NULL ) {
fprintf ( stream , " %s " , ( char * ) out ) ;
xmlFree ( out ) ;
2001-02-23 17:55:21 +00:00
}
}
/**
2008-08-04 15:29:44 +00:00
* xmlCleanURI :
* @ uri : pointer to an xmlURI
2001-02-23 17:55:21 +00:00
*
2008-08-04 15:29:44 +00:00
* Make sure the xmlURI struct is free of content
2001-02-23 17:55:21 +00:00
*/
2008-08-04 15:29:44 +00:00
static void
xmlCleanURI ( xmlURIPtr uri ) {
if ( uri = = NULL ) return ;
2001-02-23 17:55:21 +00:00
2008-08-04 15:29:44 +00:00
if ( uri - > scheme ! = NULL ) xmlFree ( uri - > scheme ) ;
uri - > scheme = NULL ;
if ( uri - > server ! = NULL ) xmlFree ( uri - > server ) ;
uri - > server = NULL ;
if ( uri - > user ! = NULL ) xmlFree ( uri - > user ) ;
uri - > user = NULL ;
if ( uri - > path ! = NULL ) xmlFree ( uri - > path ) ;
uri - > path = NULL ;
if ( uri - > fragment ! = NULL ) xmlFree ( uri - > fragment ) ;
uri - > fragment = NULL ;
if ( uri - > opaque ! = NULL ) xmlFree ( uri - > opaque ) ;
uri - > opaque = NULL ;
if ( uri - > authority ! = NULL ) xmlFree ( uri - > authority ) ;
uri - > authority = NULL ;
if ( uri - > query ! = NULL ) xmlFree ( uri - > query ) ;
uri - > query = NULL ;
if ( uri - > query_raw ! = NULL ) xmlFree ( uri - > query_raw ) ;
uri - > query_raw = NULL ;
}
2001-10-30 09:47:47 +00:00
2008-08-04 15:29:44 +00:00
/**
* xmlFreeURI :
* @ uri : pointer to an xmlURI
*
* Free up the xmlURI struct
*/
void
xmlFreeURI ( xmlURIPtr uri ) {
if ( uri = = NULL ) return ;
if ( uri - > scheme ! = NULL ) xmlFree ( uri - > scheme ) ;
if ( uri - > server ! = NULL ) xmlFree ( uri - > server ) ;
if ( uri - > user ! = NULL ) xmlFree ( uri - > user ) ;
if ( uri - > path ! = NULL ) xmlFree ( uri - > path ) ;
if ( uri - > fragment ! = NULL ) xmlFree ( uri - > fragment ) ;
if ( uri - > opaque ! = NULL ) xmlFree ( uri - > opaque ) ;
if ( uri - > authority ! = NULL ) xmlFree ( uri - > authority ) ;
if ( uri - > query ! = NULL ) xmlFree ( uri - > query ) ;
if ( uri - > query_raw ! = NULL ) xmlFree ( uri - > query_raw ) ;
xmlFree ( uri ) ;
2001-02-23 17:55:21 +00:00
}
2008-08-04 15:29:44 +00:00
/************************************************************************
* *
* Helper functions *
* *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2001-02-23 17:55:21 +00:00
/**
2008-08-04 15:29:44 +00:00
* xmlNormalizeURIPath :
* @ path : pointer to the path string
2001-02-23 17:55:21 +00:00
*
2008-08-04 15:29:44 +00:00
* Applies the 5 normalization steps to a path string - - that is , RFC 2396
* Section 5.2 , steps 6. c through 6. g .
2001-02-23 17:55:21 +00:00
*
2008-08-04 15:29:44 +00:00
* Normalization occurs directly on the string , no new allocation is done
*
* Returns 0 or an error code
2001-02-23 17:55:21 +00:00
*/
2008-08-04 15:29:44 +00:00
int
xmlNormalizeURIPath ( char * path ) {
char * cur , * out ;
2001-02-23 17:55:21 +00:00
2008-08-04 15:29:44 +00:00
if ( path = = NULL )
2001-02-23 17:55:21 +00:00
return ( - 1 ) ;
2008-08-04 15:29:44 +00:00
/* Skip all initial "/" chars. We want to get to the beginning of the
* first non - empty segment .
2001-02-23 17:55:21 +00:00
*/
2008-08-04 15:29:44 +00:00
cur = path ;
while ( cur [ 0 ] = = ' / ' )
+ + cur ;
if ( cur [ 0 ] = = ' \0 ' )
return ( 0 ) ;
/* Keep everything we've seen so far. */
out = cur ;
2001-02-23 17:55:21 +00:00
/*
2008-08-04 15:29:44 +00:00
* Analyze each segment in sequence for cases ( c ) and ( d ) .
2001-02-23 17:55:21 +00:00
*/
2008-08-04 15:29:44 +00:00
while ( cur [ 0 ] ! = ' \0 ' ) {
2005-02-13 08:18:52 +00:00
/*
2008-08-04 15:29:44 +00:00
* c ) All occurrences of " ./ " , where " . " is a complete path segment ,
* are removed from the buffer string .
2005-02-13 08:18:52 +00:00
*/
2008-08-04 15:29:44 +00:00
if ( ( cur [ 0 ] = = ' . ' ) & & ( cur [ 1 ] = = ' / ' ) ) {
cur + = 2 ;
/* '//' normalization should be done at this point too */
while ( cur [ 0 ] = = ' / ' )
2001-02-23 17:55:21 +00:00
cur + + ;
2008-08-04 15:29:44 +00:00
continue ;
2001-02-23 17:55:21 +00:00
}
2008-08-04 15:29:44 +00:00
/*
* d ) If the buffer string ends with " . " as a complete path segment ,
* that " . " is removed .
*/
if ( ( cur [ 0 ] = = ' . ' ) & & ( cur [ 1 ] = = ' \0 ' ) )
break ;
2001-02-23 17:55:21 +00:00
2008-08-04 15:29:44 +00:00
/* Otherwise keep the segment. */
while ( cur [ 0 ] ! = ' / ' ) {
if ( cur [ 0 ] = = ' \0 ' )
goto done_cd ;
( out + + ) [ 0 ] = ( cur + + ) [ 0 ] ;
}
/* nomalize // */
while ( ( cur [ 0 ] = = ' / ' ) & & ( cur [ 1 ] = = ' / ' ) )
cur + + ;
2001-10-30 09:47:47 +00:00
2008-08-04 15:29:44 +00:00
( out + + ) [ 0 ] = ( cur + + ) [ 0 ] ;
2001-02-23 17:55:21 +00:00
}
2008-08-04 15:29:44 +00:00
done_cd :
out [ 0 ] = ' \0 ' ;
/* Reset to the beginning of the first segment for the next sequence. */
cur = path ;
while ( cur [ 0 ] = = ' / ' )
+ + cur ;
if ( cur [ 0 ] = = ' \0 ' )
return ( 0 ) ;
/*
* Analyze each segment in sequence for cases ( e ) and ( f ) .
*
* e ) All occurrences of " <segment>/../ " , where < segment > is a
* complete path segment not equal to " .. " , are removed from the
* buffer string . Removal of these path segments is performed
* iteratively , removing the leftmost matching pattern on each
* iteration , until no matching pattern remains .
*
* f ) If the buffer string ends with " <segment>/.. " , where < segment >
* is a complete path segment not equal to " .. " , that
* " <segment>/.. " is removed .
*
* To satisfy the " iterative " clause in ( e ) , we need to collapse the
* string every time we find something that needs to be removed . Thus ,
* we don ' t need to keep two pointers into the string : we only need a
* " current position " pointer .
*/
while ( 1 ) {
char * segp , * tmp ;
2001-02-23 17:55:21 +00:00
2008-08-04 15:29:44 +00:00
/* At the beginning of each iteration of this loop, "cur" points to
* the first character of the segment we want to examine .
*/
2001-02-23 17:55:21 +00:00
2008-08-04 15:29:44 +00:00
/* Find the end of the current segment. */
segp = cur ;
while ( ( segp [ 0 ] ! = ' / ' ) & & ( segp [ 0 ] ! = ' \0 ' ) )
+ + segp ;
2001-10-30 09:47:47 +00:00
2008-08-04 15:29:44 +00:00
/* If this is the last segment, we're done (we need at least two
* segments to meet the criteria for the ( e ) and ( f ) cases ) .
*/
if ( segp [ 0 ] = = ' \0 ' )
break ;
2001-02-23 17:55:21 +00:00
2008-08-04 15:29:44 +00:00
/* If the first segment is "..", or if the next segment _isn't_ "..",
* keep this segment and try the next one .
*/
+ + segp ;
if ( ( ( cur [ 0 ] = = ' . ' ) & & ( cur [ 1 ] = = ' . ' ) & & ( segp = = cur + 3 ) )
| | ( ( segp [ 0 ] ! = ' . ' ) | | ( segp [ 1 ] ! = ' . ' )
| | ( ( segp [ 2 ] ! = ' / ' ) & & ( segp [ 2 ] ! = ' \0 ' ) ) ) ) {
cur = segp ;
continue ;
2001-10-30 09:47:47 +00:00
}
2001-02-23 17:55:21 +00:00
2008-08-04 15:29:44 +00:00
/* If we get here, remove this segment and the next one and back up
* to the previous segment ( if there is one ) , to implement the
* " iteratively " clause . It ' s pretty much impossible to back up
* while maintaining two pointers into the buffer , so just compact
* the whole buffer now .
2001-10-30 09:47:47 +00:00
*/
2001-02-23 17:55:21 +00:00
2008-08-04 15:29:44 +00:00
/* If this is the end of the buffer, we're done. */
if ( segp [ 2 ] = = ' \0 ' ) {
cur [ 0 ] = ' \0 ' ;
break ;
2001-10-30 09:47:47 +00:00
}
2008-08-04 15:29:44 +00:00
/* Valgrind complained, strcpy(cur, segp + 3); */
2012-03-05 16:36:59 +08:00
/* string will overlap, do not use strcpy */
tmp = cur ;
segp + = 3 ;
while ( ( * tmp + + = * segp + + ) ! = 0 )
;
2001-02-23 17:55:21 +00:00
2008-08-04 15:29:44 +00:00
/* If there are no previous segments, then keep going from here. */
segp = cur ;
while ( ( segp > path ) & & ( ( - - segp ) [ 0 ] = = ' / ' ) )
;
if ( segp = = path )
continue ;
2001-02-23 17:55:21 +00:00
2008-08-04 15:29:44 +00:00
/* "segp" is pointing to the end of a previous segment; find it's
* start . We need to back up to the previous segment and start
* over with that to handle things like " foo/bar/../.. " . If we
* don ' t do this , then on the first pass we ' ll remove the " bar/.. " ,
* but be pointing at the second " .. " so we won ' t realize we can also
* remove the " foo/.. " .
*/
cur = segp ;
while ( ( cur > path ) & & ( cur [ - 1 ] ! = ' / ' ) )
- - cur ;
}
out [ 0 ] = ' \0 ' ;
2001-02-23 17:55:21 +00:00
/*
2008-08-04 15:29:44 +00:00
* g ) If the resulting buffer string still begins with one or more
* complete path segments of " .. " , then the reference is
* considered to be in error . Implementations may handle this
* error by retaining these components in the resolved path ( i . e . ,
* treating them as part of the final URI ) , by removing them from
* the resolved path ( i . e . , discarding relative levels above the
* root ) , or by avoiding traversal of the reference .
*
* We discard them from the final path .
2001-02-23 17:55:21 +00:00
*/
2008-08-04 15:29:44 +00:00
if ( path [ 0 ] = = ' / ' ) {
cur = path ;
while ( ( cur [ 0 ] = = ' / ' ) & & ( cur [ 1 ] = = ' . ' ) & & ( cur [ 2 ] = = ' . ' )
& & ( ( cur [ 3 ] = = ' / ' ) | | ( cur [ 3 ] = = ' \0 ' ) ) )
cur + = 3 ;
2001-02-23 17:55:21 +00:00
2008-08-04 15:29:44 +00:00
if ( cur ! = path ) {
out = path ;
while ( cur [ 0 ] ! = ' \0 ' )
( out + + ) [ 0 ] = ( cur + + ) [ 0 ] ;
out [ 0 ] = 0 ;
}
2001-02-23 17:55:21 +00:00
}
return ( 0 ) ;
}
2008-08-04 15:29:44 +00:00
static int is_hex ( char c ) {
if ( ( ( c > = ' 0 ' ) & & ( c < = ' 9 ' ) ) | |
( ( c > = ' a ' ) & & ( c < = ' f ' ) ) | |
( ( c > = ' A ' ) & & ( c < = ' F ' ) ) )
2001-02-23 17:55:21 +00:00
return ( 1 ) ;
2008-08-04 15:29:44 +00:00
return ( 0 ) ;
2001-02-23 17:55:21 +00:00
}
/**
2008-08-04 15:29:44 +00:00
* xmlURIUnescapeString :
* @ str : the string to unescape
* @ len : the length in bytes to unescape ( or < = 0 to indicate full string )
* @ target : optional destination buffer
2001-02-23 17:55:21 +00:00
*
2008-08-04 15:29:44 +00:00
* Unescaping routine , but does not check that the string is an URI . The
* output is a direct unsigned char translation of % XX values ( no encoding )
* Note that the length of the result can only be smaller or same size as
* the input string .
2001-02-23 17:55:21 +00:00
*
2008-08-04 15:29:44 +00:00
* Returns a copy of the string , but unescaped , will return NULL only in case
* of error
2001-02-23 17:55:21 +00:00
*/
2008-08-04 15:29:44 +00:00
char *
xmlURIUnescapeString ( const char * str , int len , char * target ) {
char * ret , * out ;
const char * in ;
2001-02-23 17:55:21 +00:00
if ( str = = NULL )
2008-08-04 15:29:44 +00:00
return ( NULL ) ;
if ( len < = 0 ) len = strlen ( str ) ;
if ( len < 0 ) return ( NULL ) ;
if ( target = = NULL ) {
ret = ( char * ) xmlMallocAtomic ( len + 1 ) ;
if ( ret = = NULL ) {
2012-07-24 11:44:23 +08:00
xmlURIErrMemory ( " unescaping URI value \n " ) ;
2008-08-04 15:29:44 +00:00
return ( NULL ) ;
2001-02-23 17:55:21 +00:00
}
2008-08-04 15:29:44 +00:00
} else
ret = target ;
in = str ;
out = ret ;
while ( len > 0 ) {
if ( ( len > 2 ) & & ( * in = = ' % ' ) & & ( is_hex ( in [ 1 ] ) ) & & ( is_hex ( in [ 2 ] ) ) ) {
in + + ;
2012-07-24 11:44:23 +08:00
if ( ( * in > = ' 0 ' ) & & ( * in < = ' 9 ' ) )
2008-08-04 15:29:44 +00:00
* out = ( * in - ' 0 ' ) ;
else if ( ( * in > = ' a ' ) & & ( * in < = ' f ' ) )
* out = ( * in - ' a ' ) + 10 ;
else if ( ( * in > = ' A ' ) & & ( * in < = ' F ' ) )
* out = ( * in - ' A ' ) + 10 ;
in + + ;
2012-07-24 11:44:23 +08:00
if ( ( * in > = ' 0 ' ) & & ( * in < = ' 9 ' ) )
2008-08-04 15:29:44 +00:00
* out = * out * 16 + ( * in - ' 0 ' ) ;
else if ( ( * in > = ' a ' ) & & ( * in < = ' f ' ) )
* out = * out * 16 + ( * in - ' a ' ) + 10 ;
else if ( ( * in > = ' A ' ) & & ( * in < = ' F ' ) )
* out = * out * 16 + ( * in - ' A ' ) + 10 ;
in + + ;
len - = 3 ;
out + + ;
} else {
* out + + = * in + + ;
len - - ;
2001-02-23 17:55:21 +00:00
}
}
2008-08-04 15:29:44 +00:00
* out = 0 ;
2001-02-23 17:55:21 +00:00
return ( ret ) ;
}
/**
2008-08-04 15:29:44 +00:00
* xmlURIEscapeStr :
* @ str : string to escape
* @ list : exception list string of chars not to escape
2001-02-23 17:55:21 +00:00
*
2008-08-04 15:29:44 +00:00
* This routine escapes a string to hex , ignoring reserved characters ( a - z )
* and the characters in the exception list .
2001-02-23 17:55:21 +00:00
*
2008-08-04 15:29:44 +00:00
* Returns a new escaped string or NULL in case of error .
2001-02-23 17:55:21 +00:00
*/
2008-08-04 15:29:44 +00:00
xmlChar *
xmlURIEscapeStr ( const xmlChar * str , const xmlChar * list ) {
xmlChar * ret , ch ;
xmlChar * temp ;
const xmlChar * in ;
2012-07-24 11:44:23 +08:00
int len , out ;
2001-02-23 17:55:21 +00:00
if ( str = = NULL )
2008-08-04 15:29:44 +00:00
return ( NULL ) ;
if ( str [ 0 ] = = 0 )
return ( xmlStrdup ( str ) ) ;
len = xmlStrlen ( str ) ;
if ( ! ( len > 0 ) ) return ( NULL ) ;
2001-02-23 17:55:21 +00:00
2008-08-04 15:29:44 +00:00
len + = 20 ;
ret = ( xmlChar * ) xmlMallocAtomic ( len ) ;
if ( ret = = NULL ) {
2012-07-24 11:44:23 +08:00
xmlURIErrMemory ( " escaping URI value \n " ) ;
2008-08-04 15:29:44 +00:00
return ( NULL ) ;
2001-02-23 17:55:21 +00:00
}
2008-08-04 15:29:44 +00:00
in = ( const xmlChar * ) str ;
out = 0 ;
while ( * in ! = 0 ) {
if ( len - out < = 3 ) {
2012-07-24 11:44:23 +08:00
temp = xmlSaveUriRealloc ( ret , & len ) ;
2008-08-04 15:29:44 +00:00
if ( temp = = NULL ) {
2012-07-24 11:44:23 +08:00
xmlURIErrMemory ( " escaping URI value \n " ) ;
2008-08-04 15:29:44 +00:00
xmlFree ( ret ) ;
return ( NULL ) ;
}
ret = temp ;
}
ch = * in ;
if ( ( ch ! = ' @ ' ) & & ( ! IS_UNRESERVED ( ch ) ) & & ( ! xmlStrchr ( list , ch ) ) ) {
unsigned char val ;
ret [ out + + ] = ' % ' ;
val = ch > > 4 ;
if ( val < = 9 )
ret [ out + + ] = ' 0 ' + val ;
else
ret [ out + + ] = ' A ' + val - 0xA ;
val = ch & 0xF ;
if ( val < = 9 )
ret [ out + + ] = ' 0 ' + val ;
else
ret [ out + + ] = ' A ' + val - 0xA ;
in + + ;
} else {
ret [ out + + ] = * in + + ;
}
2001-02-23 17:55:21 +00:00
}
2008-08-04 15:29:44 +00:00
ret [ out ] = 0 ;
return ( ret ) ;
2001-02-23 17:55:21 +00:00
}
/**
2008-08-04 15:29:44 +00:00
* xmlURIEscape :
* @ str : the string of the URI to escape
2001-02-23 17:55:21 +00:00
*
2008-08-04 15:29:44 +00:00
* Escaping routine , does not do validity checks !
* It will try to escape the chars needing this , but this is heuristic
* based it ' s impossible to be sure .
2001-02-23 17:55:21 +00:00
*
2008-08-04 15:29:44 +00:00
* Returns an copy of the string , but escaped
*
* 25 May 2001
* Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
* according to RFC2396 .
* - Carl Douglas
2001-02-23 17:55:21 +00:00
*/
2008-08-04 15:29:44 +00:00
xmlChar *
xmlURIEscape ( const xmlChar * str )
{
xmlChar * ret , * segment = NULL ;
2001-02-23 17:55:21 +00:00
xmlURIPtr uri ;
2008-08-04 15:29:44 +00:00
int ret2 ;
# define NULLCHK(p) if(!p) { \
2012-07-24 11:44:23 +08:00
xmlURIErrMemory ( " escaping URI value \n " ) ; \
xmlFreeURI ( uri ) ; \
return NULL ; } \
2001-02-23 17:55:21 +00:00
if ( str = = NULL )
2008-08-04 15:29:44 +00:00
return ( NULL ) ;
2001-02-23 17:55:21 +00:00
uri = xmlCreateURI ( ) ;
if ( uri ! = NULL ) {
2008-08-04 15:29:44 +00:00
/*
* Allow escaping errors in the unescaped form
*/
uri - > cleanup = 1 ;
ret2 = xmlParseURIReference ( uri , ( const char * ) str ) ;
if ( ret2 ) {
xmlFreeURI ( uri ) ;
return ( NULL ) ;
}
2001-02-23 17:55:21 +00:00
}
2008-08-04 15:29:44 +00:00
if ( ! uri )
return NULL ;
2005-08-07 10:46:19 +00:00
2008-08-04 15:29:44 +00:00
ret = NULL ;
if ( uri - > scheme ) {
segment = xmlURIEscapeStr ( BAD_CAST uri - > scheme , BAD_CAST " +-. " ) ;
NULLCHK ( segment )
ret = xmlStrcat ( ret , segment ) ;
ret = xmlStrcat ( ret , BAD_CAST " : " ) ;
xmlFree ( segment ) ;
2005-08-07 10:46:19 +00:00
}
2008-08-04 15:29:44 +00:00
if ( uri - > authority ) {
segment =
xmlURIEscapeStr ( BAD_CAST uri - > authority , BAD_CAST " /?;:@ " ) ;
NULLCHK ( segment )
ret = xmlStrcat ( ret , BAD_CAST " // " ) ;
ret = xmlStrcat ( ret , segment ) ;
xmlFree ( segment ) ;
}
if ( uri - > user ) {
segment = xmlURIEscapeStr ( BAD_CAST uri - > user , BAD_CAST " ;:&=+$, " ) ;
NULLCHK ( segment )
2012-07-24 11:44:23 +08:00
ret = xmlStrcat ( ret , BAD_CAST " // " ) ;
2008-08-04 15:29:44 +00:00
ret = xmlStrcat ( ret , segment ) ;
ret = xmlStrcat ( ret , BAD_CAST " @ " ) ;
xmlFree ( segment ) ;
}
if ( uri - > server ) {
segment = xmlURIEscapeStr ( BAD_CAST uri - > server , BAD_CAST " /?;:@ " ) ;
NULLCHK ( segment )
if ( uri - > user = = NULL )
ret = xmlStrcat ( ret , BAD_CAST " // " ) ;
ret = xmlStrcat ( ret , segment ) ;
xmlFree ( segment ) ;
}
if ( uri - > port ) {
xmlChar port [ 10 ] ;
snprintf ( ( char * ) port , 10 , " %d " , uri - > port ) ;
ret = xmlStrcat ( ret , BAD_CAST " : " ) ;
ret = xmlStrcat ( ret , port ) ;
}
if ( uri - > path ) {
segment =
xmlURIEscapeStr ( BAD_CAST uri - > path , BAD_CAST " :@&=+$,/?; " ) ;
NULLCHK ( segment )
ret = xmlStrcat ( ret , segment ) ;
xmlFree ( segment ) ;
}
if ( uri - > query_raw ) {
ret = xmlStrcat ( ret , BAD_CAST " ? " ) ;
ret = xmlStrcat ( ret , BAD_CAST uri - > query_raw ) ;
}
else if ( uri - > query ) {
segment =
xmlURIEscapeStr ( BAD_CAST uri - > query , BAD_CAST " ;/?:@&=+,$ " ) ;
NULLCHK ( segment )
ret = xmlStrcat ( ret , BAD_CAST " ? " ) ;
ret = xmlStrcat ( ret , segment ) ;
xmlFree ( segment ) ;
}
if ( uri - > opaque ) {
segment = xmlURIEscapeStr ( BAD_CAST uri - > opaque , BAD_CAST " " ) ;
NULLCHK ( segment )
ret = xmlStrcat ( ret , segment ) ;
xmlFree ( segment ) ;
}
if ( uri - > fragment ) {
segment = xmlURIEscapeStr ( BAD_CAST uri - > fragment , BAD_CAST " # " ) ;
NULLCHK ( segment )
ret = xmlStrcat ( ret , BAD_CAST " # " ) ;
ret = xmlStrcat ( ret , segment ) ;
xmlFree ( segment ) ;
}
xmlFreeURI ( uri ) ;
# undef NULLCHK
return ( ret ) ;
2005-08-07 10:46:19 +00:00
}
2001-02-23 17:55:21 +00:00
/************************************************************************
* *
* Public functions *
* *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
/**
* xmlBuildURI :
* @ URI : the URI instance found in the document
* @ base : the base value
*
* Computes he final URI of the reference done by checking that
* the given URI is valid , and building the final URI using the
2012-07-24 11:44:23 +08:00
* base URI . This is processed according to section 5.2 of the
2001-02-23 17:55:21 +00:00
* RFC 2396
*
* 5.2 . Resolving Relative References to Absolute Form
*
* Returns a new URI string ( to be freed by the caller ) or NULL in case
* of error .
*/
xmlChar *
xmlBuildURI ( const xmlChar * URI , const xmlChar * base ) {
xmlChar * val = NULL ;
2001-03-24 17:00:36 +00:00
int ret , len , indx , cur , out ;
2001-02-23 17:55:21 +00:00
xmlURIPtr ref = NULL ;
xmlURIPtr bas = NULL ;
xmlURIPtr res = NULL ;
/*
* 1 ) The URI reference is parsed into the potential four components and
* fragment identifier , as described in Section 4.3 .
*
* NOTE that a completely empty URI is treated by modern browsers
* as a reference to " . " rather than as a synonym for the current
* URI . Should we do that here ?
*/
2012-07-24 11:44:23 +08:00
if ( URI = = NULL )
2001-02-23 17:55:21 +00:00
ret = - 1 ;
else {
if ( * URI ) {
ref = xmlCreateURI ( ) ;
if ( ref = = NULL )
goto done ;
ret = xmlParseURIReference ( ref , ( const char * ) URI ) ;
}
else
ret = 0 ;
}
if ( ret ! = 0 )
goto done ;
2003-01-06 13:11:20 +00:00
if ( ( ref ! = NULL ) & & ( ref - > scheme ! = NULL ) ) {
/*
* The URI is absolute don ' t modify .
*/
val = xmlStrdup ( URI ) ;
goto done ;
}
2001-02-23 17:55:21 +00:00
if ( base = = NULL )
ret = - 1 ;
else {
bas = xmlCreateURI ( ) ;
if ( bas = = NULL )
goto done ;
ret = xmlParseURIReference ( bas , ( const char * ) base ) ;
}
if ( ret ! = 0 ) {
if ( ref )
val = xmlSaveUri ( ref ) ;
goto done ;
}
if ( ref = = NULL ) {
/*
* the base fragment must be ignored
*/
if ( bas - > fragment ! = NULL ) {
xmlFree ( bas - > fragment ) ;
bas - > fragment = NULL ;
}
val = xmlSaveUri ( bas ) ;
goto done ;
}
/*
* 2 ) If the path component is empty and the scheme , authority , and
* query components are undefined , then it is a reference to the
* current document and we are done . Otherwise , the reference URI ' s
* query and fragment components are defined as found ( or not found )
* within the URI reference and not inherited from the base URI .
*
* NOTE that in modern browsers , the parsing differs from the above
* in the following aspect : the query component is allowed to be
* defined while still treating this as a reference to the current
* document .
*/
res = xmlCreateURI ( ) ;
if ( res = = NULL )
goto done ;
if ( ( ref - > scheme = = NULL ) & & ( ref - > path = = NULL ) & &
( ( ref - > authority = = NULL ) & & ( ref - > server = = NULL ) ) ) {
if ( bas - > scheme ! = NULL )
res - > scheme = xmlMemStrdup ( bas - > scheme ) ;
if ( bas - > authority ! = NULL )
res - > authority = xmlMemStrdup ( bas - > authority ) ;
2017-08-28 21:12:14 +02:00
else if ( ( bas - > server ! = NULL ) | | ( bas - > port = = - 1 ) ) {
if ( bas - > server ! = NULL )
res - > server = xmlMemStrdup ( bas - > server ) ;
2001-02-23 17:55:21 +00:00
if ( bas - > user ! = NULL )
res - > user = xmlMemStrdup ( bas - > user ) ;
2012-07-24 11:44:23 +08:00
res - > port = bas - > port ;
2001-02-23 17:55:21 +00:00
}
if ( bas - > path ! = NULL )
res - > path = xmlMemStrdup ( bas - > path ) ;
2007-04-26 08:33:28 +00:00
if ( ref - > query_raw ! = NULL )
res - > query_raw = xmlMemStrdup ( ref - > query_raw ) ;
else if ( ref - > query ! = NULL )
2001-02-23 17:55:21 +00:00
res - > query = xmlMemStrdup ( ref - > query ) ;
2007-04-26 08:33:28 +00:00
else if ( bas - > query_raw ! = NULL )
res - > query_raw = xmlMemStrdup ( bas - > query_raw ) ;
2001-02-23 17:55:21 +00:00
else if ( bas - > query ! = NULL )
res - > query = xmlMemStrdup ( bas - > query ) ;
if ( ref - > fragment ! = NULL )
res - > fragment = xmlMemStrdup ( ref - > fragment ) ;
goto step_7 ;
}
/*
* 3 ) If the scheme component is defined , indicating that the reference
* starts with a scheme name , then the reference is interpreted as an
* absolute URI and we are done . Otherwise , the reference URI ' s
* scheme is inherited from the base URI ' s scheme component .
*/
if ( ref - > scheme ! = NULL ) {
val = xmlSaveUri ( ref ) ;
goto done ;
}
if ( bas - > scheme ! = NULL )
res - > scheme = xmlMemStrdup ( bas - > scheme ) ;
2012-07-24 11:44:23 +08:00
2007-04-26 08:33:28 +00:00
if ( ref - > query_raw ! = NULL )
res - > query_raw = xmlMemStrdup ( ref - > query_raw ) ;
else if ( ref - > query ! = NULL )
2003-03-23 22:00:51 +00:00
res - > query = xmlMemStrdup ( ref - > query ) ;
if ( ref - > fragment ! = NULL )
res - > fragment = xmlMemStrdup ( ref - > fragment ) ;
2001-02-23 17:55:21 +00:00
/*
* 4 ) If the authority component is defined , then the reference is a
* network - path and we skip to step 7. Otherwise , the reference
* URI ' s authority is inherited from the base URI ' s authority
* component , which will also be undefined if the URI scheme does not
* use an authority component .
*/
if ( ( ref - > authority ! = NULL ) | | ( ref - > server ! = NULL ) ) {
if ( ref - > authority ! = NULL )
res - > authority = xmlMemStrdup ( ref - > authority ) ;
else {
res - > server = xmlMemStrdup ( ref - > server ) ;
if ( ref - > user ! = NULL )
res - > user = xmlMemStrdup ( ref - > user ) ;
2012-07-24 11:44:23 +08:00
res - > port = ref - > port ;
2001-02-23 17:55:21 +00:00
}
if ( ref - > path ! = NULL )
res - > path = xmlMemStrdup ( ref - > path ) ;
goto step_7 ;
}
if ( bas - > authority ! = NULL )
res - > authority = xmlMemStrdup ( bas - > authority ) ;
2017-08-28 21:12:14 +02:00
else if ( ( bas - > server ! = NULL ) | | ( bas - > port = = - 1 ) ) {
if ( bas - > server ! = NULL )
res - > server = xmlMemStrdup ( bas - > server ) ;
2001-02-23 17:55:21 +00:00
if ( bas - > user ! = NULL )
res - > user = xmlMemStrdup ( bas - > user ) ;
2012-07-24 11:44:23 +08:00
res - > port = bas - > port ;
2001-02-23 17:55:21 +00:00
}
/*
* 5 ) If the path component begins with a slash character ( " / " ) , then
* the reference is an absolute - path and we skip to step 7.
*/
if ( ( ref - > path ! = NULL ) & & ( ref - > path [ 0 ] = = ' / ' ) ) {
res - > path = xmlMemStrdup ( ref - > path ) ;
goto step_7 ;
}
/*
* 6 ) If this step is reached , then we are resolving a relative - path
* reference . The relative path needs to be merged with the base
* URI ' s path . Although there are many ways to do this , we will
* describe a simple method using a separate string buffer .
*
* Allocate a buffer large enough for the result string .
*/
len = 2 ; /* extra / and 0 */
if ( ref - > path ! = NULL )
len + = strlen ( ref - > path ) ;
if ( bas - > path ! = NULL )
len + = strlen ( bas - > path ) ;
2003-04-19 00:07:51 +00:00
res - > path = ( char * ) xmlMallocAtomic ( len ) ;
2001-02-23 17:55:21 +00:00
if ( res - > path = = NULL ) {
2012-07-24 11:44:23 +08:00
xmlURIErrMemory ( " resolving URI against base \n " ) ;
2001-02-23 17:55:21 +00:00
goto done ;
}
res - > path [ 0 ] = 0 ;
/*
* a ) All but the last segment of the base URI ' s path component is
* copied to the buffer . In other words , any characters after the
* last ( right - most ) slash character , if any , are excluded .
*/
cur = 0 ;
out = 0 ;
if ( bas - > path ! = NULL ) {
while ( bas - > path [ cur ] ! = 0 ) {
while ( ( bas - > path [ cur ] ! = 0 ) & & ( bas - > path [ cur ] ! = ' / ' ) )
cur + + ;
if ( bas - > path [ cur ] = = 0 )
break ;
cur + + ;
while ( out < cur ) {
res - > path [ out ] = bas - > path [ out ] ;
out + + ;
}
}
}
res - > path [ out ] = 0 ;
/*
* b ) The reference ' s path component is appended to the buffer
* string .
*/
if ( ref - > path ! = NULL & & ref - > path [ 0 ] ! = 0 ) {
2001-03-24 17:00:36 +00:00
indx = 0 ;
2001-02-23 17:55:21 +00:00
/*
* Ensure the path includes a ' / '
*/
if ( ( out = = 0 ) & & ( bas - > server ! = NULL ) )
res - > path [ out + + ] = ' / ' ;
2001-03-24 17:00:36 +00:00
while ( ref - > path [ indx ] ! = 0 ) {
res - > path [ out + + ] = ref - > path [ indx + + ] ;
2001-02-23 17:55:21 +00:00
}
}
res - > path [ out ] = 0 ;
/*
* Steps c ) to h ) are really path normalization steps
*/
xmlNormalizeURIPath ( res - > path ) ;
step_7 :
/*
* 7 ) The resulting URI components , including any inherited from the
* base URI , are recombined to give the absolute form of the URI
* reference .
*/
val = xmlSaveUri ( res ) ;
done :
if ( ref ! = NULL )
xmlFreeURI ( ref ) ;
if ( bas ! = NULL )
xmlFreeURI ( bas ) ;
if ( res ! = NULL )
xmlFreeURI ( res ) ;
return ( val ) ;
}
2004-06-07 08:57:27 +00:00
/**
* xmlBuildRelativeURI :
* @ URI : the URI reference under consideration
* @ base : the base value
*
* Expresses the URI of the reference in terms relative to the
* base . Some examples of this operation include :
* base = " http://site1.com/docs/book1.html "
* URI input URI returned
* docs / pic1 . gif pic1 . gif
* docs / img / pic1 . gif img / pic1 . gif
* img / pic1 . gif . . / img / pic1 . gif
* http : //site1.com/docs/pic1.gif pic1.gif
* http : //site2.com/docs/pic1.gif http://site2.com/docs/pic1.gif
*
* base = " docs/book1.html "
* URI input URI returned
* docs / pic1 . gif pic1 . gif
* docs / img / pic1 . gif img / pic1 . gif
* img / pic1 . gif . . / img / pic1 . gif
* http : //site1.com/docs/pic1.gif http://site1.com/docs/pic1.gif
*
*
* Note : if the URI reference is really wierd or complicated , it may be
* worthwhile to first convert it into a " nice " one by calling
* xmlBuildURI ( using ' base ' ) before calling this routine ,
* since this routine ( for reasonable efficiency ) assumes URI has
* already been through some validation .
*
* Returns a new URI string ( to be freed by the caller ) or NULL in case
* error .
*/
xmlChar *
xmlBuildRelativeURI ( const xmlChar * URI , const xmlChar * base )
{
xmlChar * val = NULL ;
int ret ;
int ix ;
int nbslash = 0 ;
2005-09-14 05:24:27 +00:00
int len ;
2004-06-07 08:57:27 +00:00
xmlURIPtr ref = NULL ;
xmlURIPtr bas = NULL ;
xmlChar * bptr , * uptr , * vptr ;
2005-09-15 14:15:20 +00:00
int remove_path = 0 ;
2004-06-07 08:57:27 +00:00
if ( ( URI = = NULL ) | | ( * URI = = 0 ) )
return NULL ;
/*
* First parse URI into a standard form
*/
ref = xmlCreateURI ( ) ;
if ( ref = = NULL )
return NULL ;
2005-07-25 18:39:34 +00:00
/* If URI not already in "relative" form */
if ( URI [ 0 ] ! = ' . ' ) {
ret = xmlParseURIReference ( ref , ( const char * ) URI ) ;
if ( ret ! = 0 )
goto done ; /* Error in URI, return NULL */
} else
ref - > path = ( char * ) xmlStrdup ( URI ) ;
2004-06-07 08:57:27 +00:00
/*
* Next parse base into the same standard form
*/
if ( ( base = = NULL ) | | ( * base = = 0 ) ) {
val = xmlStrdup ( URI ) ;
goto done ;
}
bas = xmlCreateURI ( ) ;
if ( bas = = NULL )
goto done ;
2005-07-25 18:39:34 +00:00
if ( base [ 0 ] ! = ' . ' ) {
ret = xmlParseURIReference ( bas , ( const char * ) base ) ;
if ( ret ! = 0 )
goto done ; /* Error in base, return NULL */
} else
bas - > path = ( char * ) xmlStrdup ( base ) ;
2004-06-07 08:57:27 +00:00
/*
* If the scheme / server on the URI differs from the base ,
* just return the URI
*/
if ( ( ref - > scheme ! = NULL ) & &
2005-09-15 14:15:20 +00:00
( ( bas - > scheme = = NULL ) | |
( xmlStrcmp ( ( xmlChar * ) bas - > scheme , ( xmlChar * ) ref - > scheme ) ) | |
( xmlStrcmp ( ( xmlChar * ) bas - > server , ( xmlChar * ) ref - > server ) ) ) ) {
2004-06-07 08:57:27 +00:00
val = xmlStrdup ( URI ) ;
goto done ;
}
2005-09-15 14:15:20 +00:00
if ( xmlStrEqual ( ( xmlChar * ) bas - > path , ( xmlChar * ) ref - > path ) ) {
val = xmlStrdup ( BAD_CAST " " ) ;
goto done ;
}
if ( bas - > path = = NULL ) {
val = xmlStrdup ( ( xmlChar * ) ref - > path ) ;
goto done ;
}
if ( ref - > path = = NULL ) {
ref - > path = ( char * ) " / " ;
remove_path = 1 ;
}
2004-06-07 08:57:27 +00:00
/*
* At this point ( at last ! ) we can compare the two paths
*
2005-09-14 05:24:27 +00:00
* First we take care of the special case where either of the
* two path components may be missing ( bug 316224 )
2004-06-07 08:57:27 +00:00
*/
2005-09-14 05:24:27 +00:00
bptr = ( xmlChar * ) bas - > path ;
2018-11-05 14:26:26 +01:00
{
2017-06-08 18:25:30 +02:00
xmlChar * rptr = ( xmlChar * ) ref - > path ;
int pos = 0 ;
/*
* Next we compare the two strings and find where they first differ
*/
if ( ( * rptr = = ' . ' ) & & ( rptr [ 1 ] = = ' / ' ) )
rptr + = 2 ;
2005-09-14 05:24:27 +00:00
if ( ( * bptr = = ' . ' ) & & ( bptr [ 1 ] = = ' / ' ) )
bptr + = 2 ;
2017-06-08 18:25:30 +02:00
else if ( ( * bptr = = ' / ' ) & & ( * rptr ! = ' / ' ) )
2005-09-14 05:24:27 +00:00
bptr + + ;
2017-06-08 18:25:30 +02:00
while ( ( bptr [ pos ] = = rptr [ pos ] ) & & ( bptr [ pos ] ! = 0 ) )
2005-09-14 05:24:27 +00:00
pos + + ;
2017-06-08 18:25:30 +02:00
if ( bptr [ pos ] = = rptr [ pos ] ) {
2005-09-15 14:15:20 +00:00
val = xmlStrdup ( BAD_CAST " " ) ;
2005-09-14 05:24:27 +00:00
goto done ; /* (I can't imagine why anyone would do this) */
}
/*
* In URI , " back up " to the last ' / ' encountered . This will be the
* beginning of the " unique " suffix of URI
*/
ix = pos ;
for ( ; ix > 0 ; ix - - ) {
2018-11-05 14:20:16 +01:00
if ( rptr [ ix - 1 ] = = ' / ' )
2005-09-14 05:24:27 +00:00
break ;
}
2018-11-05 14:20:16 +01:00
uptr = ( xmlChar * ) & rptr [ ix ] ;
2004-06-07 08:57:27 +00:00
2005-09-14 05:24:27 +00:00
/*
* In base , count the number of ' / ' from the differing point
*/
2018-11-05 14:26:26 +01:00
for ( ; bptr [ ix ] ! = 0 ; ix + + ) {
if ( bptr [ ix ] = = ' / ' )
nbslash + + ;
2005-09-14 05:24:27 +00:00
}
2018-11-05 14:20:16 +01:00
/*
* e . g : URI = " foo/ " base = " foo/bar " - > " ./ "
*/
if ( nbslash = = 0 & & ! uptr [ 0 ] ) {
val = xmlStrdup ( BAD_CAST " ./ " ) ;
goto done ;
}
2005-09-14 05:24:27 +00:00
len = xmlStrlen ( uptr ) + 1 ;
}
2012-07-24 11:44:23 +08:00
2004-06-07 08:57:27 +00:00
if ( nbslash = = 0 ) {
2005-09-14 05:24:27 +00:00
if ( uptr ! = NULL )
2007-07-20 01:09:08 +00:00
/* exception characters from xmlSaveUri */
val = xmlURIEscapeStr ( uptr , BAD_CAST " /;&=+$, " ) ;
2004-06-07 08:57:27 +00:00
goto done ;
}
/*
* Allocate just enough space for the returned string -
* length of the remainder of the URI , plus enough space
* for the " ../ " groups , plus one for the terminator
*/
2005-09-14 05:24:27 +00:00
val = ( xmlChar * ) xmlMalloc ( len + 3 * nbslash ) ;
2004-06-07 08:57:27 +00:00
if ( val = = NULL ) {
2012-07-24 11:44:23 +08:00
xmlURIErrMemory ( " building relative URI \n " ) ;
2004-06-07 08:57:27 +00:00
goto done ;
}
vptr = val ;
/*
* Put in as many " ../ " as needed
*/
for ( ; nbslash > 0 ; nbslash - - ) {
* vptr + + = ' . ' ;
* vptr + + = ' . ' ;
* vptr + + = ' / ' ;
}
/*
* Finish up with the end of the URI
*/
2005-09-15 14:15:20 +00:00
if ( uptr ! = NULL ) {
if ( ( vptr > val ) & & ( len > 0 ) & &
( uptr [ 0 ] = = ' / ' ) & & ( vptr [ - 1 ] = = ' / ' ) ) {
memcpy ( vptr , uptr + 1 , len - 1 ) ;
vptr [ len - 2 ] = 0 ;
} else {
memcpy ( vptr , uptr , len ) ;
vptr [ len - 1 ] = 0 ;
}
} else {
2005-09-14 05:24:27 +00:00
vptr [ len - 1 ] = 0 ;
2005-09-15 14:15:20 +00:00
}
2004-06-07 08:57:27 +00:00
2007-07-20 01:09:08 +00:00
/* escape the freshly-built path */
vptr = val ;
/* exception characters from xmlSaveUri */
val = xmlURIEscapeStr ( vptr , BAD_CAST " /;&=+$, " ) ;
xmlFree ( vptr ) ;
2005-09-15 14:15:20 +00:00
done :
2004-06-07 08:57:27 +00:00
/*
* Free the working variables
*/
2005-09-15 14:15:20 +00:00
if ( remove_path ! = 0 )
ref - > path = NULL ;
2004-06-07 08:57:27 +00:00
if ( ref ! = NULL )
xmlFreeURI ( ref ) ;
if ( bas ! = NULL )
xmlFreeURI ( bas ) ;
return val ;
}
2003-02-19 14:50:35 +00:00
/**
* xmlCanonicPath :
* @ path : the resource locator in a filesystem notation
*
2012-07-24 11:44:23 +08:00
* Constructs a canonic path from the specified path .
2003-02-19 14:50:35 +00:00
*
2012-07-24 11:44:23 +08:00
* Returns a new canonic path , or a duplicate of the path parameter if the
2003-02-19 14:50:35 +00:00
* construction fails . The caller is responsible for freeing the memory occupied
2012-07-24 11:44:23 +08:00
* by the returned string . If there is insufficient memory available , or the
2003-02-19 14:50:35 +00:00
* argument is NULL , the function returns NULL .
*/
2012-07-24 11:44:23 +08:00
# define IS_WINDOWS_PATH(p) \
2003-02-19 14:50:35 +00:00
( ( p ! = NULL ) & & \
( ( ( p [ 0 ] > = ' a ' ) & & ( p [ 0 ] < = ' z ' ) ) | | \
( ( p [ 0 ] > = ' A ' ) & & ( p [ 0 ] < = ' Z ' ) ) ) & & \
( p [ 1 ] = = ' : ' ) & & ( ( p [ 2 ] = = ' / ' ) | | ( p [ 2 ] = = ' \\ ' ) ) )
2006-10-10 12:37:14 +00:00
xmlChar *
2003-02-19 14:50:35 +00:00
xmlCanonicPath ( const xmlChar * path )
{
2007-01-27 07:59:37 +00:00
/*
* For Windows implementations , additional work needs to be done to
* replace backslashes in pathnames with " forward slashes "
*/
2012-07-24 11:44:23 +08:00
# if defined(_WIN32) && !defined(__CYGWIN__)
2003-02-23 13:39:39 +00:00
int len = 0 ;
2017-10-09 13:32:20 +02:00
char * p = NULL ;
2003-02-24 11:47:13 +00:00
# endif
2003-02-19 14:50:35 +00:00
xmlURIPtr uri ;
2005-08-07 10:46:19 +00:00
xmlChar * ret ;
const xmlChar * absuri ;
2003-02-19 14:50:35 +00:00
if ( path = = NULL )
return ( NULL ) ;
2008-02-05 08:37:56 +00:00
2012-09-07 12:14:00 +08:00
# if defined(_WIN32)
/*
* We must not change the backslashes to slashes if the the path
* starts with \ \ ? \
* Those paths can be up to 32 k characters long .
* Was added specifically for OpenOffice , those paths can ' t be converted
* to URIs anyway .
*/
if ( ( path [ 0 ] = = ' \\ ' ) & & ( path [ 1 ] = = ' \\ ' ) & & ( path [ 2 ] = = ' ? ' ) & &
( path [ 3 ] = = ' \\ ' ) )
return xmlStrdup ( ( const xmlChar * ) path ) ;
# endif
/* sanitize filename starting with // so it can be used as URI */
2008-02-05 08:37:56 +00:00
if ( ( path [ 0 ] = = ' / ' ) & & ( path [ 1 ] = = ' / ' ) & & ( path [ 2 ] ! = ' / ' ) )
path + + ;
2003-02-24 11:47:13 +00:00
if ( ( uri = xmlParseURI ( ( const char * ) path ) ) ! = NULL ) {
2003-02-19 14:50:35 +00:00
xmlFreeURI ( uri ) ;
return xmlStrdup ( path ) ;
}
2007-01-27 07:59:37 +00:00
/* Check if this is an "absolute uri" */
2005-08-07 10:46:19 +00:00
absuri = xmlStrstr ( path , BAD_CAST " :// " ) ;
if ( absuri ! = NULL ) {
int l , j ;
unsigned char c ;
xmlChar * escURI ;
/*
* this looks like an URI where some parts have not been
2007-01-27 07:59:37 +00:00
* escaped leading to a parsing problem . Check that the first
2005-08-07 10:46:19 +00:00
* part matches a protocol .
*/
l = absuri - path ;
2007-01-27 07:59:37 +00:00
/* Bypass if first part (part before the '://') is > 20 chars */
2005-08-07 10:46:19 +00:00
if ( ( l < = 0 ) | | ( l > 20 ) )
goto path_processing ;
2007-01-27 07:59:37 +00:00
/* Bypass if any non-alpha characters are present in first part */
2005-08-07 10:46:19 +00:00
for ( j = 0 ; j < l ; j + + ) {
c = path [ j ] ;
if ( ! ( ( ( c > = ' a ' ) & & ( c < = ' z ' ) ) | | ( ( c > = ' A ' ) & & ( c < = ' Z ' ) ) ) )
goto path_processing ;
}
2007-01-27 07:59:37 +00:00
/* Escape all except the characters specified in the supplied path */
2005-08-07 10:46:19 +00:00
escURI = xmlURIEscapeStr ( path , BAD_CAST " :/?_.#&;= " ) ;
if ( escURI ! = NULL ) {
2007-01-27 07:59:37 +00:00
/* Try parsing the escaped path */
2005-08-07 10:46:19 +00:00
uri = xmlParseURI ( ( const char * ) escURI ) ;
2007-01-27 07:59:37 +00:00
/* If successful, return the escaped string */
2005-08-07 10:46:19 +00:00
if ( uri ! = NULL ) {
xmlFreeURI ( uri ) ;
return escURI ;
}
2017-05-27 14:44:36 +02:00
xmlFree ( escURI ) ;
2005-08-07 10:46:19 +00:00
}
}
path_processing :
2007-01-27 07:59:37 +00:00
/* For Windows implementations, replace backslashes with 'forward slashes' */
2012-07-24 11:44:23 +08:00
# if defined(_WIN32) && !defined(__CYGWIN__)
2005-08-07 10:46:19 +00:00
/*
2007-01-27 07:59:37 +00:00
* Create a URI structure
2005-08-07 10:46:19 +00:00
*/
2003-02-19 14:50:35 +00:00
uri = xmlCreateURI ( ) ;
2007-01-27 07:59:37 +00:00
if ( uri = = NULL ) { /* Guard against 'out of memory' */
2003-04-24 16:06:47 +00:00
return ( NULL ) ;
}
2003-02-19 14:50:35 +00:00
len = xmlStrlen ( path ) ;
if ( ( len > 2 ) & & IS_WINDOWS_PATH ( path ) ) {
2007-01-27 07:59:37 +00:00
/* make the scheme 'file' */
2017-10-09 13:32:20 +02:00
uri - > scheme = ( char * ) xmlStrdup ( BAD_CAST " file " ) ;
2007-01-27 07:59:37 +00:00
/* allocate space for leading '/' + path + string terminator */
2006-10-10 12:37:14 +00:00
uri - > path = xmlMallocAtomic ( len + 2 ) ;
if ( uri - > path = = NULL ) {
2007-01-27 07:59:37 +00:00
xmlFreeURI ( uri ) ; /* Guard agains 'out of memory' */
2006-10-10 12:37:14 +00:00
return ( NULL ) ;
}
2007-01-27 07:59:37 +00:00
/* Put in leading '/' plus path */
2003-02-19 14:50:35 +00:00
uri - > path [ 0 ] = ' / ' ;
2003-02-23 13:39:39 +00:00
p = uri - > path + 1 ;
2017-10-09 13:32:20 +02:00
strncpy ( p , ( char * ) path , len + 1 ) ;
2003-02-23 13:39:39 +00:00
} else {
2017-10-09 13:32:20 +02:00
uri - > path = ( char * ) xmlStrdup ( path ) ;
2006-10-10 12:37:14 +00:00
if ( uri - > path = = NULL ) {
xmlFreeURI ( uri ) ;
return ( NULL ) ;
}
2003-02-23 13:39:39 +00:00
p = uri - > path ;
}
2007-01-27 07:59:37 +00:00
/* Now change all occurences of '\' to '/' */
2003-02-23 13:39:39 +00:00
while ( * p ! = ' \0 ' ) {
if ( * p = = ' \\ ' )
* p = ' / ' ;
p + + ;
}
2006-02-03 09:45:10 +00:00
2006-10-10 12:37:14 +00:00
if ( uri - > scheme = = NULL ) {
2007-01-27 07:59:37 +00:00
ret = xmlStrdup ( ( const xmlChar * ) uri - > path ) ;
2006-10-10 12:37:14 +00:00
} else {
ret = xmlSaveUri ( uri ) ;
}
2006-02-03 09:45:10 +00:00
2003-02-19 14:50:35 +00:00
xmlFreeURI ( uri ) ;
2005-08-07 10:46:19 +00:00
# else
ret = xmlStrdup ( ( const xmlChar * ) path ) ;
# endif
2003-02-19 14:50:35 +00:00
return ( ret ) ;
}
2001-02-23 17:55:21 +00:00
2006-10-10 12:37:14 +00:00
/**
* xmlPathToURI :
* @ path : the resource locator in a filesystem notation
*
* Constructs an URI expressing the existing path
*
2012-07-24 11:44:23 +08:00
* Returns a new URI , or a duplicate of the path parameter if the
2006-10-10 12:37:14 +00:00
* construction fails . The caller is responsible for freeing the memory
* occupied by the returned string . If there is insufficient memory available ,
* or the argument is NULL , the function returns NULL .
*/
xmlChar *
xmlPathToURI ( const xmlChar * path )
{
xmlURIPtr uri ;
xmlURI temp ;
xmlChar * ret , * cal ;
if ( path = = NULL )
return ( NULL ) ;
if ( ( uri = xmlParseURI ( ( const char * ) path ) ) ! = NULL ) {
xmlFreeURI ( uri ) ;
return xmlStrdup ( path ) ;
}
cal = xmlCanonicPath ( path ) ;
if ( cal = = NULL )
return ( NULL ) ;
2006-11-06 08:54:18 +00:00
# if defined(_WIN32) && !defined(__CYGWIN__)
2012-07-24 11:44:23 +08:00
/* xmlCanonicPath can return an URI on Windows (is that the intended behaviour?)
2006-11-06 08:54:18 +00:00
If ' cal ' is a valid URI allready then we are done here , as continuing would make
it invalid . */
if ( ( uri = xmlParseURI ( ( const char * ) cal ) ) ! = NULL ) {
xmlFreeURI ( uri ) ;
return cal ;
}
/* 'cal' can contain a relative path with backslashes. If that is processed
by xmlSaveURI , they will be escaped and the external entity loader machinery
will fail . So convert them to slashes . Misuse ' ret ' for walking . */
ret = cal ;
while ( * ret ! = ' \0 ' ) {
if ( * ret = = ' \\ ' )
* ret = ' / ' ;
ret + + ;
}
# endif
2006-10-10 12:37:14 +00:00
memset ( & temp , 0 , sizeof ( temp ) ) ;
temp . path = ( char * ) cal ;
ret = xmlSaveUri ( & temp ) ;
xmlFree ( cal ) ;
return ( ret ) ;
}
2005-04-01 13:11:58 +00:00
# define bottom_uri
# include "elfgcchack.h"