2009-10-30 08:58:34 +11:00
/*
* NOTE :
*
* This file imported from the Squid project . The licence below is
* reproduced intact , but refers to files in Squid ' s repository , not
* in Samba . See COPYING for the GPLv3 notice ( being the later
* version mentioned below ) .
*
* This file has also been modified , in particular to use talloc to
* allocate in rfc1738_escape ( )
*
* - Andrew Bartlett Oct - 2009
*
*/
2009-10-29 17:09:49 +11:00
/*
* $ Id $
*
* DEBUG :
* AUTHOR : Harvest Derived
*
* SQUID Web Proxy Cache http : //www.squid-cache.org/
* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*
* Squid is the result of efforts by numerous individuals from
* the Internet community ; see the CONTRIBUTORS file for full
* details . Many organizations have provided support for Squid ' s
* development ; see the SPONSORS file for full details . Squid is
* Copyrighted ( C ) 2001 by the Regents of the University of
* California ; see the COPYRIGHT file for full details . Squid
* incorporates software developed and / or copyrighted by other
* sources ; see the CREDITS file for full details .
*
* This program is free software ; you can redistribute it and / or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation ; either version 2 of the License , or
* ( at your option ) any later version .
*
* This program is distributed in the hope that it will be useful ,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
* GNU General Public License for more details .
*
* You should have received a copy of the GNU General Public License
* along with this program ; if not , write to the Free Software
* Foundation , Inc . , 59 Temple Place , Suite 330 , Boston , MA 02111 , USA .
*
*/
2009-10-30 08:58:34 +11:00
# include "includes.h"
2009-10-29 17:09:49 +11:00
# include "util.h"
/*
* RFC 1738 defines that these characters should be escaped , as well
* any non - US - ASCII character or anything between 0x00 - 0x1F .
*/
static char rfc1738_unsafe_chars [ ] = {
( char ) 0x3C , /* < */
( char ) 0x3E , /* > */
( char ) 0x22 , /* " */
( char ) 0x23 , /* # */
#if 0 /* done in code */
( char ) 0x25 , /* % */
# endif
( char ) 0x7B , /* { */
( char ) 0x7D , /* } */
( char ) 0x7C , /* | */
( char ) 0x5C , /* \ */
( char ) 0x5E , /* ^ */
( char ) 0x7E , /* ~ */
( char ) 0x5B , /* [ */
( char ) 0x5D , /* ] */
( char ) 0x60 , /* ` */
( char ) 0x27 , /* ' */
( char ) 0x20 /* space */
} ;
static char rfc1738_reserved_chars [ ] = {
( char ) 0x3b , /* ; */
( char ) 0x2f , /* / */
( char ) 0x3f , /* ? */
( char ) 0x3a , /* : */
( char ) 0x40 , /* @ */
( char ) 0x3d , /* = */
( char ) 0x26 /* & */
} ;
/*
* rfc1738_escape - Returns a static buffer contains the RFC 1738
* compliant , escaped version of the given url .
2009-10-30 08:58:34 +11:00
*
2009-10-29 17:09:49 +11:00
*/
static char *
2009-10-30 08:58:34 +11:00
rfc1738_do_escape ( TALLOC_CTX * mem_ctx , const char * url , int encode_reserved )
2009-10-29 17:09:49 +11:00
{
2009-10-30 08:58:34 +11:00
size_t bufsize = 0 ;
2009-10-29 17:09:49 +11:00
const char * p ;
2009-10-30 08:58:34 +11:00
char * buf ;
2009-10-29 17:09:49 +11:00
char * q ;
unsigned int i , do_escape ;
2009-10-30 08:58:34 +11:00
bufsize = strlen ( url ) * 3 + 1 ;
buf = talloc_array ( mem_ctx , char , bufsize ) ;
if ( ! buf ) {
return NULL ;
2009-10-29 17:09:49 +11:00
}
2009-10-30 08:58:34 +11:00
talloc_set_name_const ( buf , buf ) ;
buf [ 0 ] = ' \0 ' ;
2009-10-29 17:09:49 +11:00
for ( p = url , q = buf ; * p ! = ' \0 ' & & q < ( buf + bufsize - 1 ) ; p + + , q + + ) {
do_escape = 0 ;
/* RFC 1738 defines these chars as unsafe */
for ( i = 0 ; i < sizeof ( rfc1738_unsafe_chars ) ; i + + ) {
if ( * p = = rfc1738_unsafe_chars [ i ] ) {
do_escape = 1 ;
break ;
}
}
/* Handle % separately */
if ( encode_reserved > = 0 & & * p = = ' % ' )
do_escape = 1 ;
/* RFC 1738 defines these chars as reserved */
for ( i = 0 ; i < sizeof ( rfc1738_reserved_chars ) & & encode_reserved > 0 ; i + + ) {
if ( * p = = rfc1738_reserved_chars [ i ] ) {
do_escape = 1 ;
break ;
}
}
/* RFC 1738 says any control chars (0x00-0x1F) are encoded */
if ( ( unsigned char ) * p < = ( unsigned char ) 0x1F ) {
do_escape = 1 ;
}
/* RFC 1738 says 0x7f is encoded */
if ( * p = = ( char ) 0x7F ) {
do_escape = 1 ;
}
/* RFC 1738 says any non-US-ASCII are encoded */
if ( ( ( unsigned char ) * p > = ( unsigned char ) 0x80 ) ) {
do_escape = 1 ;
}
/* Do the triplet encoding, or just copy the char */
2009-10-30 08:58:34 +11:00
/* note: while we do not need snprintf here as q is appropriately
* allocated , Samba does to avoid our macro banning it - - abartlet */
2009-10-29 17:09:49 +11:00
if ( do_escape = = 1 ) {
2009-10-30 08:58:34 +11:00
( void ) snprintf ( q , 4 , " %%%02X " , ( unsigned char ) * p ) ;
2009-10-29 17:09:49 +11:00
q + = sizeof ( char ) * 2 ;
} else {
* q = * p ;
}
}
* q = ' \0 ' ;
return ( buf ) ;
}
/*
2009-11-02 16:39:31 +11:00
* rfc1738_escape - Returns a buffer that contains the RFC
2009-10-30 08:58:34 +11:00
* 1738 compliant , escaped version of the given url . ( escapes unsafe and % characters )
2009-10-29 17:09:49 +11:00
*/
char *
2009-10-30 08:58:34 +11:00
rfc1738_escape ( TALLOC_CTX * mem_ctx , const char * url )
2009-10-29 17:09:49 +11:00
{
2009-10-30 08:58:34 +11:00
return rfc1738_do_escape ( mem_ctx , url , 0 ) ;
2009-10-29 17:09:49 +11:00
}
/*
2009-11-02 16:39:31 +11:00
* rfc1738_escape_unescaped - Returns a buffer that contains
2009-10-30 08:58:34 +11:00
* the RFC 1738 compliant , escaped version of the given url ( escapes unsafe chars only )
2009-10-29 17:09:49 +11:00
*/
char *
2009-10-30 08:58:34 +11:00
rfc1738_escape_unescaped ( TALLOC_CTX * mem_ctx , const char * url )
2009-10-29 17:09:49 +11:00
{
2009-10-30 08:58:34 +11:00
return rfc1738_do_escape ( mem_ctx , url , - 1 ) ;
2009-10-29 17:09:49 +11:00
}
/*
2009-11-02 16:39:31 +11:00
* rfc1738_escape_part - Returns a buffer that contains the RFC
2009-10-30 08:58:34 +11:00
* 1738 compliant , escaped version of the given url segment . ( escapes
* unsafe , reserved and % chars ) It would mangle the : // in http://,
* and mangle paths ( because of / ) .
2009-10-29 17:09:49 +11:00
*/
char *
2009-10-30 08:58:34 +11:00
rfc1738_escape_part ( TALLOC_CTX * mem_ctx , const char * url )
2009-10-29 17:09:49 +11:00
{
2009-10-30 08:58:34 +11:00
return rfc1738_do_escape ( mem_ctx , url , 1 ) ;
2009-10-29 17:09:49 +11:00
}
/*
* rfc1738_unescape ( ) - Converts escaped characters ( % xy numbers ) in
* given the string . % % is a % . % ab is the 8 - bit hexadecimal number " ab "
*/
2009-10-30 08:58:34 +11:00
_PUBLIC_ void
2009-10-29 17:09:49 +11:00
rfc1738_unescape ( char * s )
{
char hexnum [ 3 ] ;
int i , j ; /* i is write, j is read */
unsigned int x ;
for ( i = j = 0 ; s [ j ] ; i + + , j + + ) {
s [ i ] = s [ j ] ;
if ( s [ i ] ! = ' % ' )
continue ;
if ( s [ j + 1 ] = = ' % ' ) { /* %% case */
j + + ;
continue ;
}
if ( s [ j + 1 ] & & s [ j + 2 ] ) {
if ( s [ j + 1 ] = = ' 0 ' & & s [ j + 2 ] = = ' 0 ' ) { /* %00 case */
j + = 2 ;
continue ;
}
hexnum [ 0 ] = s [ j + 1 ] ;
hexnum [ 1 ] = s [ j + 2 ] ;
hexnum [ 2 ] = ' \0 ' ;
if ( 1 = = sscanf ( hexnum , " %x " , & x ) ) {
s [ i ] = ( char ) ( 0x0ff & x ) ;
j + = 2 ;
}
}
}
s [ i ] = ' \0 ' ;
}