/*
 * Functions for RFC 3986 percent-encoding.
 *
 * NOTE:
 *
 * This file was originally imported from the Squid project but has been
 * significantly altered. The licence below is reproduced intact, but refers
 * to files in Squid's repository, not in Samba. See COPYING for the GPLv3
 * notice (being the later version mentioned below).
 */

/*
 * $Id$
 *
 * DEBUG:
 * AUTHOR: Harvest Derived
 *
 * SQUID Web Proxy Cache          http://www.squid-cache.org/
 * ----------------------------------------------------------
 *
 *  Squid is the result of efforts by numerous individuals from
 *  the Internet community; see the CONTRIBUTORS file for full
 *  details.   Many organizations have provided support for Squid's
 *  development; see the SPONSORS file for full details.  Squid is
 *  Copyrighted (C) 2001 by the Regents of the University of
 *  California; see the COPYRIGHT file for full details.  Squid
 *  incorporates software developed and/or copyrighted by other
 *  sources; see the CREDITS file for full details.
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
 *
 */

#include "replace.h"
#include <talloc.h>
#include "lib/util/samba_util.h"

#define RFC1738_ENCODE 1
#define RFC1738_RESERVED 2

/*
 * According to RFC 1738, "$-_.+!*'()," are not reserved or unsafe, but as
 * that has been obsolete since 2004, we sm instead for RFC 3986, where:
 *
 *  reserved =    : / ? # [ ] @ ! $ & ' ( ) * + , ; =
 *  unreserved = ALPHA DIGIT - . _ ~
 *
 * and whatever is not in either of those are what RFC 1738 called "unsafe",
 * meaning that they should are canonically but not mandatorily escaped.
 *
 * Characters below 0x20 or above 0x7E are always enocded.
 */

static const unsigned char escapees[127] = {
	[' '] = RFC1738_ENCODE,
	['"'] = RFC1738_ENCODE,
	['%'] = RFC1738_ENCODE,
	['<'] = RFC1738_ENCODE,
	['>'] = RFC1738_ENCODE,
	['\\'] = RFC1738_ENCODE,
	['^'] = RFC1738_ENCODE,
	['`'] = RFC1738_ENCODE,
	['{'] = RFC1738_ENCODE,
	['|'] = RFC1738_ENCODE,
	['}'] = RFC1738_ENCODE,
	/* reserved : / ? # [ ] @ ! $ & ' ( ) * + , ; = */
	[':'] = RFC1738_RESERVED,
	['/'] = RFC1738_RESERVED,
	['?'] = RFC1738_RESERVED,
	['#'] = RFC1738_RESERVED,
	['['] = RFC1738_RESERVED,
	[']'] = RFC1738_RESERVED,
	['@'] = RFC1738_RESERVED,
	['!'] = RFC1738_RESERVED,
	['$'] = RFC1738_RESERVED,
	['&'] = RFC1738_RESERVED,
	['\''] = RFC1738_RESERVED,
	['('] = RFC1738_RESERVED,
	[')'] = RFC1738_RESERVED,
	['*'] = RFC1738_RESERVED,
	['+'] = RFC1738_RESERVED,
	[','] = RFC1738_RESERVED,
	[';'] = RFC1738_RESERVED,
	['='] = RFC1738_RESERVED,
};

/*
 *  rfc1738_do_escape - fills a preallocated buffer with an escaped version of
 *  the given string.
 *
 *  For canonical escaping, mask should be RFC1738_ENCODE | RFC1738_RESERVED.
 *  For mandatory escaping, mask should be RFC1738_RESERVED.
 */
static char *
rfc1738_do_escape(char *buf, size_t bufsize,
		  const char *url, size_t len, unsigned char mask)
{
	size_t i;
	size_t j = 0;
	for (i = 0; i < len; i++) {
		unsigned int c = (unsigned char) url[i];
		if (c > 126 || c < 32 || (escapees[c] & mask)) {
			if (j + 3 >= bufsize) {
				return NULL;
			}
			(void) snprintf(&buf[j], 4, "%%%02X", c);
			j += 3;
		} else {
			if (j + 1 >= bufsize) {
				return NULL;
			}
			buf[j] = c;
			j++;
		}
	}
	buf[j] = '\0';
	return buf;
}

/*
 * rfc1738_escape_part - Returns a talloced buffer that contains the RFC 3986
 * compliant, escaped version of the given url segment.
 */
char *
rfc1738_escape_part(TALLOC_CTX *mem_ctx, const char *url)
{
	size_t bufsize = 0;
	char *buf = NULL;

	size_t len = strlen(url);
	if (len >= SIZE_MAX / 3) {
		return NULL;
	}

	bufsize = len * 3 + 1;
	buf = talloc_array(mem_ctx, char, bufsize);
	if (buf == NULL) {
		return NULL;
	}

	talloc_set_name_const(buf, buf);

	return rfc1738_do_escape(buf, bufsize, url, len,
				 RFC1738_ENCODE | RFC1738_RESERVED);
}

/*
 * rfc1738_unescape() - Converts url-escaped characters in the string.
 *
 * The two characters following a '%' in a string should be hex digits that
 * describe an encoded byte. For example, "%25" is hex 0x25 or '%' in ASCII;
 * this is the only way to include a % in the unescaped string. Any character
 * can be escaped, including plain letters (e.g. "%61" for "a"). Anything
 * other than 2 hex characters following the % is an error.
 *
 * The conversion is done in-place, which is always safe as unescapes can only
 * shorten the string.
 *
 * Returns a pointer to the end of the string (that is, the '\0' byte), or
 * NULL on error, at which point s is in an undefined state.
 *
 * Note that after `char *e = rfc_unescape(s)`, `strlen(s)` will not equal
 * `e - s` if s originally contained "%00". You might want to check for this.
 */

_PUBLIC_ char *rfc1738_unescape(char *s)
{
	size_t i, j;	    /* i is write, j is read */
	for (i = 0, j = 0; s[j] != '\0'; i++, j++) {
		if (s[j] == '%') {
			uint8_t v;
			bool ok;

			ok = hex_byte(&s[j+1], &v);
			if (!ok) {
				return NULL;
			}
			j += 2; /* OK; hex_byte() has checked ahead */
			s[i] = (unsigned char)v;
		} else {
			s[i] = s[j];
		}
	}
	s[i] = '\0';
	return s + i;
}