1
0
mirror of https://github.com/samba-team/samba.git synced 2024-12-23 17:34:34 +03:00
samba-mirror/lib/util/rfc1738.c
Volker Lendecke 2ba7fe1095 lib: Use hex_byte() in rfc1738_unescape()
Signed-off-by: Volker Lendecke <vl@samba.org>
Reviewed-by: Ralph Boehme <slow@samba.org>
Reviewed-by: Jeremy Allison <jra@samba.org>
2021-01-08 20:31:33 +00:00

199 lines
5.6 KiB
C

/*
* Functions for RFC 3986 percent-encoding.
*
* NOTE:
*
* This file was originally imported from the Squid project but has been
* significantly altered. The licence below is reproduced intact, but refers
* to files in Squid's repository, not in Samba. See COPYING for the GPLv3
* notice (being the later version mentioned below).
*/
/*
* $Id$
*
* DEBUG:
* AUTHOR: Harvest Derived
*
* SQUID Web Proxy Cache http://www.squid-cache.org/
* ----------------------------------------------------------
*
* Squid is the result of efforts by numerous individuals from
* the Internet community; see the CONTRIBUTORS file for full
* details. Many organizations have provided support for Squid's
* development; see the SPONSORS file for full details. Squid is
* Copyrighted (C) 2001 by the Regents of the University of
* California; see the COPYRIGHT file for full details. Squid
* incorporates software developed and/or copyrighted by other
* sources; see the CREDITS file for full details.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
*
*/
#include "replace.h"
#include <talloc.h>
#include "lib/util/samba_util.h"
#define RFC1738_ENCODE 1
#define RFC1738_RESERVED 2
/*
* According to RFC 1738, "$-_.+!*'()," are not reserved or unsafe, but as
* that has been obsolete since 2004, we sm instead for RFC 3986, where:
*
* reserved = : / ? # [ ] @ ! $ & ' ( ) * + , ; =
* unreserved = ALPHA DIGIT - . _ ~
*
* and whatever is not in either of those are what RFC 1738 called "unsafe",
* meaning that they should are canonically but not mandatorily escaped.
*
* Characters below 0x20 or above 0x7E are always enocded.
*/
static const unsigned char escapees[127] = {
[' '] = RFC1738_ENCODE,
['"'] = RFC1738_ENCODE,
['%'] = RFC1738_ENCODE,
['<'] = RFC1738_ENCODE,
['>'] = RFC1738_ENCODE,
['\\'] = RFC1738_ENCODE,
['^'] = RFC1738_ENCODE,
['`'] = RFC1738_ENCODE,
['{'] = RFC1738_ENCODE,
['|'] = RFC1738_ENCODE,
['}'] = RFC1738_ENCODE,
/* reserved : / ? # [ ] @ ! $ & ' ( ) * + , ; = */
[':'] = RFC1738_RESERVED,
['/'] = RFC1738_RESERVED,
['?'] = RFC1738_RESERVED,
['#'] = RFC1738_RESERVED,
['['] = RFC1738_RESERVED,
[']'] = RFC1738_RESERVED,
['@'] = RFC1738_RESERVED,
['!'] = RFC1738_RESERVED,
['$'] = RFC1738_RESERVED,
['&'] = RFC1738_RESERVED,
['\''] = RFC1738_RESERVED,
['('] = RFC1738_RESERVED,
[')'] = RFC1738_RESERVED,
['*'] = RFC1738_RESERVED,
['+'] = RFC1738_RESERVED,
[','] = RFC1738_RESERVED,
[';'] = RFC1738_RESERVED,
['='] = RFC1738_RESERVED,
};
/*
* rfc1738_do_escape - fills a preallocated buffer with an escaped version of
* the given string.
*
* For canonical escaping, mask should be RFC1738_ENCODE | RFC1738_RESERVED.
* For mandatory escaping, mask should be RFC1738_RESERVED.
*/
static char *
rfc1738_do_escape(char *buf, size_t bufsize,
const char *url, size_t len, unsigned char mask)
{
size_t i;
size_t j = 0;
for (i = 0; i < len; i++) {
unsigned int c = (unsigned char) url[i];
if (c > 126 || c < 32 || (escapees[c] & mask)) {
if (j + 3 >= bufsize) {
return NULL;
}
(void) snprintf(&buf[j], 4, "%%%02X", c);
j += 3;
} else {
if (j + 1 >= bufsize) {
return NULL;
}
buf[j] = c;
j++;
}
}
buf[j] = '\0';
return buf;
}
/*
* rfc1738_escape_part - Returns a talloced buffer that contains the RFC 3986
* compliant, escaped version of the given url segment.
*/
char *
rfc1738_escape_part(TALLOC_CTX *mem_ctx, const char *url)
{
size_t bufsize = 0;
char *buf = NULL;
size_t len = strlen(url);
if (len >= SIZE_MAX / 3) {
return NULL;
}
bufsize = len * 3 + 1;
buf = talloc_array(mem_ctx, char, bufsize);
if (buf == NULL) {
return NULL;
}
talloc_set_name_const(buf, buf);
return rfc1738_do_escape(buf, bufsize, url, len,
RFC1738_ENCODE | RFC1738_RESERVED);
}
/*
* rfc1738_unescape() - Converts url-escaped characters in the string.
*
* The two characters following a '%' in a string should be hex digits that
* describe an encoded byte. For example, "%25" is hex 0x25 or '%' in ASCII;
* this is the only way to include a % in the unescaped string. Any character
* can be escaped, including plain letters (e.g. "%61" for "a"). Anything
* other than 2 hex characters following the % is an error.
*
* The conversion is done in-place, which is always safe as unescapes can only
* shorten the string.
*
* Returns a pointer to the end of the string (that is, the '\0' byte), or
* NULL on error, at which point s is in an undefined state.
*
* Note that after `char *e = rfc_unescape(s)`, `strlen(s)` will not equal
* `e - s` if s originally contained "%00". You might want to check for this.
*/
_PUBLIC_ char *rfc1738_unescape(char *s)
{
size_t i, j; /* i is write, j is read */
for (i = 0, j = 0; s[j] != '\0'; i++, j++) {
if (s[j] == '%') {
uint8_t v;
bool ok;
ok = hex_byte(&s[j+1], &v);
if (!ok) {
return NULL;
}
j += 2; /* OK; hex_byte() has checked ahead */
s[i] = (unsigned char)v;
} else {
s[i] = s[j];
}
}
s[i] = '\0';
return s + i;
}