2009-10-30 00:58:34 +03:00
/*
* NOTE :
*
* This file imported from the Squid project . The licence below is
* reproduced intact , but refers to files in Squid ' s repository , not
* in Samba . See COPYING for the GPLv3 notice ( being the later
* version mentioned below ) .
*
* This file has also been modified , in particular to use talloc to
* allocate in rfc1738_escape ( )
*
* - Andrew Bartlett Oct - 2009
*
*/
2009-10-29 09:09:49 +03:00
/*
* $ Id $
*
* DEBUG :
* AUTHOR : Harvest Derived
*
* SQUID Web Proxy Cache http : //www.squid-cache.org/
* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*
* Squid is the result of efforts by numerous individuals from
* the Internet community ; see the CONTRIBUTORS file for full
* details . Many organizations have provided support for Squid ' s
* development ; see the SPONSORS file for full details . Squid is
* Copyrighted ( C ) 2001 by the Regents of the University of
* California ; see the COPYRIGHT file for full details . Squid
* incorporates software developed and / or copyrighted by other
* sources ; see the CREDITS file for full details .
*
* This program is free software ; you can redistribute it and / or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation ; either version 2 of the License , or
* ( at your option ) any later version .
*
* This program is distributed in the hope that it will be useful ,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
* GNU General Public License for more details .
*
* You should have received a copy of the GNU General Public License
* along with this program ; if not , write to the Free Software
* Foundation , Inc . , 59 Temple Place , Suite 330 , Boston , MA 02111 , USA .
*
*/
2017-01-08 22:52:47 +03:00
# include "replace.h"
# include <talloc.h>
2011-09-21 01:26:36 +04:00
# include "lib/util/samba_util.h"
2018-02-19 04:12:03 +03:00
# include "lib/util/util_str_hex.h"
2009-10-29 09:09:49 +03:00
/*
* RFC 1738 defines that these characters should be escaped , as well
* any non - US - ASCII character or anything between 0x00 - 0x1F .
*/
static char rfc1738_unsafe_chars [ ] = {
( char ) 0x3C , /* < */
( char ) 0x3E , /* > */
( char ) 0x22 , /* " */
( char ) 0x23 , /* # */
#if 0 /* done in code */
( char ) 0x25 , /* % */
# endif
( char ) 0x7B , /* { */
( char ) 0x7D , /* } */
( char ) 0x7C , /* | */
( char ) 0x5C , /* \ */
( char ) 0x5E , /* ^ */
( char ) 0x7E , /* ~ */
( char ) 0x5B , /* [ */
( char ) 0x5D , /* ] */
( char ) 0x60 , /* ` */
( char ) 0x27 , /* ' */
( char ) 0x20 /* space */
} ;
static char rfc1738_reserved_chars [ ] = {
( char ) 0x3b , /* ; */
( char ) 0x2f , /* / */
( char ) 0x3f , /* ? */
( char ) 0x3a , /* : */
( char ) 0x40 , /* @ */
( char ) 0x3d , /* = */
( char ) 0x26 /* & */
} ;
/*
* rfc1738_escape - Returns a static buffer contains the RFC 1738
* compliant , escaped version of the given url .
2009-10-30 00:58:34 +03:00
*
2009-10-29 09:09:49 +03:00
*/
static char *
2009-10-30 00:58:34 +03:00
rfc1738_do_escape ( TALLOC_CTX * mem_ctx , const char * url , int encode_reserved )
2009-10-29 09:09:49 +03:00
{
2009-10-30 00:58:34 +03:00
size_t bufsize = 0 ;
2009-10-29 09:09:49 +03:00
const char * p ;
2009-10-30 00:58:34 +03:00
char * buf ;
2009-10-29 09:09:49 +03:00
char * q ;
unsigned int i , do_escape ;
2009-10-30 00:58:34 +03:00
bufsize = strlen ( url ) * 3 + 1 ;
buf = talloc_array ( mem_ctx , char , bufsize ) ;
if ( ! buf ) {
return NULL ;
2009-10-29 09:09:49 +03:00
}
2009-10-30 00:58:34 +03:00
talloc_set_name_const ( buf , buf ) ;
buf [ 0 ] = ' \0 ' ;
2009-10-29 09:09:49 +03:00
for ( p = url , q = buf ; * p ! = ' \0 ' & & q < ( buf + bufsize - 1 ) ; p + + , q + + ) {
do_escape = 0 ;
/* RFC 1738 defines these chars as unsafe */
for ( i = 0 ; i < sizeof ( rfc1738_unsafe_chars ) ; i + + ) {
if ( * p = = rfc1738_unsafe_chars [ i ] ) {
do_escape = 1 ;
break ;
}
}
/* Handle % separately */
if ( encode_reserved > = 0 & & * p = = ' % ' )
do_escape = 1 ;
/* RFC 1738 defines these chars as reserved */
for ( i = 0 ; i < sizeof ( rfc1738_reserved_chars ) & & encode_reserved > 0 ; i + + ) {
if ( * p = = rfc1738_reserved_chars [ i ] ) {
do_escape = 1 ;
break ;
}
}
/* RFC 1738 says any control chars (0x00-0x1F) are encoded */
if ( ( unsigned char ) * p < = ( unsigned char ) 0x1F ) {
do_escape = 1 ;
}
/* RFC 1738 says 0x7f is encoded */
if ( * p = = ( char ) 0x7F ) {
do_escape = 1 ;
}
/* RFC 1738 says any non-US-ASCII are encoded */
if ( ( ( unsigned char ) * p > = ( unsigned char ) 0x80 ) ) {
do_escape = 1 ;
}
/* Do the triplet encoding, or just copy the char */
2009-10-30 00:58:34 +03:00
/* note: while we do not need snprintf here as q is appropriately
* allocated , Samba does to avoid our macro banning it - - abartlet */
2009-10-29 09:09:49 +03:00
if ( do_escape = = 1 ) {
2009-10-30 00:58:34 +03:00
( void ) snprintf ( q , 4 , " %%%02X " , ( unsigned char ) * p ) ;
2009-10-29 09:09:49 +03:00
q + = sizeof ( char ) * 2 ;
} else {
* q = * p ;
}
}
* q = ' \0 ' ;
return ( buf ) ;
}
/*
2009-11-02 08:39:31 +03:00
* rfc1738_escape_part - Returns a buffer that contains the RFC
2009-10-30 00:58:34 +03:00
* 1738 compliant , escaped version of the given url segment . ( escapes
* unsafe , reserved and % chars ) It would mangle the : // in http://,
* and mangle paths ( because of / ) .
2009-10-29 09:09:49 +03:00
*/
char *
2009-10-30 00:58:34 +03:00
rfc1738_escape_part ( TALLOC_CTX * mem_ctx , const char * url )
2009-10-29 09:09:49 +03:00
{
2009-10-30 00:58:34 +03:00
return rfc1738_do_escape ( mem_ctx , url , 1 ) ;
2009-10-29 09:09:49 +03:00
}
/*
2018-02-19 04:12:03 +03:00
* rfc1738_unescape ( ) - Converts url - escaped characters in the string .
*
* The two characters following a ' % ' in a string should be hex digits that
* describe an encoded byte . For example , " %25 " is hex 0x25 or ' % ' in ASCII ;
* this is the only way to include a % in the unescaped string . Any character
* can be escaped , including plain letters ( e . g . " %61 " for " a " ) . Anything
* other than 2 hex characters following the % is an error .
*
* The conversion is done in - place , which is always safe as unescapes can only
* shorten the string .
*
* Returns a pointer to the end of the string ( that is , the ' \0 ' byte ) , or
* NULL on error , at which point s is in an undefined state .
*
* Note that after ` char * e = rfc_unescape ( s ) ` , ` strlen ( s ) ` will not equal
* ` e - s ` if s originally contained " %00 " . You might want to check for this .
2009-10-29 09:09:49 +03:00
*/
2018-02-17 00:46:44 +03:00
_PUBLIC_ char * rfc1738_unescape ( char * s )
2009-10-29 09:09:49 +03:00
{
2018-02-19 04:12:03 +03:00
size_t i , j ; /* i is write, j is read */
uint64_t x ;
NTSTATUS status ;
for ( i = 0 , j = 0 ; s [ j ] ! = ' \0 ' ; i + + , j + + ) {
if ( s [ j ] = = ' % ' ) {
status = read_hex_bytes ( & s [ j + 1 ] , 2 , & x ) ;
if ( ! NT_STATUS_IS_OK ( status ) ) {
return NULL ;
}
j + = 2 ; /* OK; read_hex_bytes() has checked ahead */
s [ i ] = ( unsigned char ) x ;
} else {
s [ i ] = s [ j ] ;
}
}
s [ i ] = ' \0 ' ;
2018-02-17 00:46:44 +03:00
return s + i ;
2009-10-29 09:09:49 +03:00
}