1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2025-01-11 05:17:37 +03:00
libxml2/uri.c

2706 lines
67 KiB
C
Raw Normal View History

2001-02-23 20:55:21 +03:00
/**
* uri.c: set of generic URI related routines
2001-02-23 20:55:21 +03:00
*
* Reference: RFCs 3986, 2732 and 2373
2001-02-23 20:55:21 +03:00
*
* See Copyright for the status of this software.
*
* daniel@veillard.com
2001-02-23 20:55:21 +03:00
*/
#define IN_LIBXML
2001-04-21 20:57:29 +04:00
#include "libxml.h"
#include <limits.h>
2001-02-23 20:55:21 +03:00
#include <string.h>
#include <libxml/xmlmemory.h>
#include <libxml/uri.h>
#include <libxml/xmlerror.h>
#include "private/error.h"
/**
* MAX_URI_LENGTH:
*
* The definition of the URI regexp in the above RFC has no size limit
2019-09-30 18:04:54 +03:00
* In practice they are usually relatively short except for the
* data URI scheme as defined in RFC 2397. Even for data URI the usual
* maximum size before hitting random practical limits is around 64 KB
* and 4KB is usually a maximum admitted limit for proper operations.
* The value below is more a security limit than anything else and
* really should never be hit by 'normal' operations
* Set to 1 MByte in 2012, this is only enforced on output
*/
#define MAX_URI_LENGTH 1024 * 1024
#define PORT_EMPTY 0
#define PORT_EMPTY_SERVER -1
static void xmlCleanURI(xmlURIPtr uri);
2001-02-23 20:55:21 +03:00
/*
* Old rule from 2396 used in legacy handling code
2001-02-23 20:55:21 +03:00
* alpha = lowalpha | upalpha
*/
#define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
/*
* lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
* "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
* "u" | "v" | "w" | "x" | "y" | "z"
*/
#define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
/*
* upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
* "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
* "U" | "V" | "W" | "X" | "Y" | "Z"
*/
#define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
#ifdef IS_DIGIT
#undef IS_DIGIT
#endif
2001-02-23 20:55:21 +03:00
/*
* digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
*/
#define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
/*
* alphanum = alpha | digit
*/
#define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
/*
* mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
*/
#define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') || \
((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') || \
2001-02-23 20:55:21 +03:00
((x) == '(') || ((x) == ')'))
/*
* unwise = "{" | "}" | "|" | "\" | "^" | "`"
*/
#define IS_UNWISE(p) \
(((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) || \
((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) || \
((*(p) == ']')) || ((*(p) == '`')))
2001-02-23 20:55:21 +03:00
/*
* reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," |
* "[" | "]"
2001-02-23 20:55:21 +03:00
*/
#define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \
((x) == ']'))
2001-02-23 20:55:21 +03:00
/*
* unreserved = alphanum | mark
*/
#define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
/*
* Skip to next pointer char, handle escaped sequences
2001-02-23 20:55:21 +03:00
*/
#define NEXT(p) ((*p == '%')? p += 3 : p++)
2001-02-23 20:55:21 +03:00
/*
* Productions from the spec.
*
* authority = server | reg_name
* reg_name = 1*( unreserved | escaped | "$" | "," |
* ";" | ":" | "@" | "&" | "=" | "+" )
*
* path = [ abs_path | opaque_part ]
2001-02-23 20:55:21 +03:00
*/
#define STRNDUP(s, n) (char *) xmlStrndup((const xmlChar *)(s), (n))
/************************************************************************
* *
* RFC 3986 parser *
* *
************************************************************************/
#define ISA_DIGIT(p) ((*(p) >= '0') && (*(p) <= '9'))
#define ISA_ALPHA(p) (((*(p) >= 'a') && (*(p) <= 'z')) || \
((*(p) >= 'A') && (*(p) <= 'Z')))
#define ISA_HEXDIG(p) \
(ISA_DIGIT(p) || ((*(p) >= 'a') && (*(p) <= 'f')) || \
((*(p) >= 'A') && (*(p) <= 'F')))
2001-02-23 20:55:21 +03:00
/*
* sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
* / "*" / "+" / "," / ";" / "="
2001-02-23 20:55:21 +03:00
*/
#define ISA_SUB_DELIM(p) \
(((*(p) == '!')) || ((*(p) == '$')) || ((*(p) == '&')) || \
((*(p) == '(')) || ((*(p) == ')')) || ((*(p) == '*')) || \
((*(p) == '+')) || ((*(p) == ',')) || ((*(p) == ';')) || \
((*(p) == '=')) || ((*(p) == '\'')))
2001-02-23 20:55:21 +03:00
/*
* gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
2001-02-23 20:55:21 +03:00
*/
#define ISA_GEN_DELIM(p) \
(((*(p) == ':')) || ((*(p) == '/')) || ((*(p) == '?')) || \
((*(p) == '#')) || ((*(p) == '[')) || ((*(p) == ']')) || \
((*(p) == '@')))
2001-02-23 20:55:21 +03:00
/*
* reserved = gen-delims / sub-delims
2001-02-23 20:55:21 +03:00
*/
#define ISA_RESERVED(p) (ISA_GEN_DELIM(p) || (ISA_SUB_DELIM(p)))
2001-02-23 20:55:21 +03:00
/*
* unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
2001-02-23 20:55:21 +03:00
*/
#define ISA_STRICTLY_UNRESERVED(p) \
((ISA_ALPHA(p)) || (ISA_DIGIT(p)) || ((*(p) == '-')) || \
((*(p) == '.')) || ((*(p) == '_')) || ((*(p) == '~')))
2001-02-23 20:55:21 +03:00
/*
* pct-encoded = "%" HEXDIG HEXDIG
2001-02-23 20:55:21 +03:00
*/
#define ISA_PCT_ENCODED(p) \
((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2)))
2001-02-23 20:55:21 +03:00
/*
* pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
2001-02-23 20:55:21 +03:00
*/
#define ISA_PCHAR(u, p) \
(ISA_UNRESERVED(u, p) || ISA_PCT_ENCODED(p) || ISA_SUB_DELIM(p) || \
((*(p) == ':')) || ((*(p) == '@')))
2001-02-23 20:55:21 +03:00
/*
* From https://www.w3.org/TR/leiri/
*
* " " / "<" / ">" / '"' / "{" / "}" / "|"
* / "\" / "^" / "`" / %x0-1F / %x7F-D7FF
* / %xE000-FFFD / %x10000-10FFFF
*/
#define ISA_UCSCHAR(p) \
((*(p) <= 0x20) || (*(p) >= 0x7F) || (*(p) == '<') || (*(p) == '>') || \
(*(p) == '"') || (*(p) == '{') || (*(p) == '}') || (*(p) == '|') || \
(*(p) == '\\') || (*(p) == '^') || (*(p) == '`'))
#define ISA_UNRESERVED(u, p) (xmlIsUnreserved(u, p))
#define XML_URI_ALLOW_UNWISE 1
#define XML_URI_NO_UNESCAPE 2
#define XML_URI_ALLOW_UCSCHAR 4
static int
xmlIsUnreserved(xmlURIPtr uri, const char *cur) {
if (uri == NULL)
return(0);
if (ISA_STRICTLY_UNRESERVED(cur))
return(1);
if (uri->cleanup & XML_URI_ALLOW_UNWISE) {
if (IS_UNWISE(cur))
return(1);
} else if (uri->cleanup & XML_URI_ALLOW_UCSCHAR) {
if (ISA_UCSCHAR(cur))
return(1);
}
return(0);
}
/**
* xmlParse3986Scheme:
* @uri: pointer to an URI structure
* @str: pointer to the string to analyze
*
* Parse an URI scheme
*
* ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
*
* Returns 0 or the error code
*/
static int
xmlParse3986Scheme(xmlURIPtr uri, const char **str) {
const char *cur;
2001-02-23 20:55:21 +03:00
cur = *str;
if (!ISA_ALPHA(cur))
return(1);
cur++;
while (ISA_ALPHA(cur) || ISA_DIGIT(cur) ||
(*cur == '+') || (*cur == '-') || (*cur == '.')) cur++;
if (uri != NULL) {
if (uri->scheme != NULL) xmlFree(uri->scheme);
uri->scheme = STRNDUP(*str, cur - *str);
if (uri->scheme == NULL)
return(-1);
}
*str = cur;
return(0);
}
/**
* xmlParse3986Fragment:
* @uri: pointer to an URI structure
* @str: pointer to the string to analyze
*
* Parse the query part of an URI
*
* fragment = *( pchar / "/" / "?" )
* NOTE: the strict syntax as defined by 3986 does not allow '[' and ']'
* in the fragment identifier but this is used very broadly for
* xpointer scheme selection, so we are allowing it here to not break
* for example all the DocBook processing chains.
*
* Returns 0 or the error code
2001-02-23 20:55:21 +03:00
*/
static int
xmlParse3986Fragment(xmlURIPtr uri, const char **str)
{
const char *cur;
2001-02-23 20:55:21 +03:00
cur = *str;
while ((ISA_PCHAR(uri, cur)) || (*cur == '/') || (*cur == '?') ||
(*cur == '[') || (*cur == ']'))
NEXT(cur);
if (uri != NULL) {
if (uri->fragment != NULL)
xmlFree(uri->fragment);
if (uri->cleanup & XML_URI_NO_UNESCAPE)
uri->fragment = STRNDUP(*str, cur - *str);
else
uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
if (uri->fragment == NULL)
return (-1);
}
*str = cur;
return (0);
}
/**
* xmlParse3986Query:
* @uri: pointer to an URI structure
* @str: pointer to the string to analyze
2001-02-23 20:55:21 +03:00
*
* Parse the query part of an URI
2001-02-23 20:55:21 +03:00
*
* query = *uric
*
* Returns 0 or the error code
2001-02-23 20:55:21 +03:00
*/
static int
xmlParse3986Query(xmlURIPtr uri, const char **str)
{
const char *cur;
2001-02-23 20:55:21 +03:00
cur = *str;
while ((ISA_PCHAR(uri, cur)) || (*cur == '/') || (*cur == '?'))
NEXT(cur);
if (uri != NULL) {
if (uri->query != NULL)
xmlFree(uri->query);
if (uri->cleanup & XML_URI_NO_UNESCAPE)
uri->query = STRNDUP(*str, cur - *str);
else
uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
if (uri->query == NULL)
return (-1);
/* Save the raw bytes of the query as well.
* See: http://mail.gnome.org/archives/xml/2007-April/thread.html#00114
*/
if (uri->query_raw != NULL)
xmlFree (uri->query_raw);
uri->query_raw = STRNDUP (*str, cur - *str);
if (uri->query_raw == NULL)
return (-1);
}
*str = cur;
return (0);
}
2001-02-23 20:55:21 +03:00
/**
* xmlParse3986Port:
* @uri: pointer to an URI structure
* @str: the string to analyze
2001-02-23 20:55:21 +03:00
*
* Parse a port part and fills in the appropriate fields
* of the @uri structure
2001-02-23 20:55:21 +03:00
*
* port = *DIGIT
*
* Returns 0 or the error code
2001-02-23 20:55:21 +03:00
*/
static int
xmlParse3986Port(xmlURIPtr uri, const char **str)
{
const char *cur = *str;
int port = 0;
2001-02-23 20:55:21 +03:00
if (ISA_DIGIT(cur)) {
while (ISA_DIGIT(cur)) {
int digit = *cur - '0';
if (port > INT_MAX / 10)
return(1);
port *= 10;
if (port > INT_MAX - digit)
return(1);
port += digit;
cur++;
}
if (uri != NULL)
uri->port = port;
*str = cur;
return(0);
2001-02-23 20:55:21 +03:00
}
return(1);
2001-02-23 20:55:21 +03:00
}
/**
* xmlParse3986Userinfo:
* @uri: pointer to an URI structure
* @str: the string to analyze
2001-02-23 20:55:21 +03:00
*
2020-03-08 19:19:42 +03:00
* Parse an user information part and fills in the appropriate fields
* of the @uri structure
2001-02-23 20:55:21 +03:00
*
* userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
*
* Returns 0 or the error code
2001-02-23 20:55:21 +03:00
*/
static int
xmlParse3986Userinfo(xmlURIPtr uri, const char **str)
{
const char *cur;
2001-02-23 20:55:21 +03:00
cur = *str;
while (ISA_UNRESERVED(uri, cur) || ISA_PCT_ENCODED(cur) ||
ISA_SUB_DELIM(cur) || (*cur == ':'))
NEXT(cur);
if (*cur == '@') {
if (uri != NULL) {
if (uri->user != NULL) xmlFree(uri->user);
if (uri->cleanup & XML_URI_NO_UNESCAPE)
uri->user = STRNDUP(*str, cur - *str);
else
uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
if (uri->user == NULL)
return(-1);
}
*str = cur;
return(0);
}
return(1);
}
2001-02-23 20:55:21 +03:00
/**
* xmlParse3986DecOctet:
* @str: the string to analyze
*
* dec-octet = DIGIT ; 0-9
* / %x31-39 DIGIT ; 10-99
* / "1" 2DIGIT ; 100-199
* / "2" %x30-34 DIGIT ; 200-249
* / "25" %x30-35 ; 250-255
*
* Skip a dec-octet.
*
* Returns 0 if found and skipped, 1 otherwise
*/
static int
xmlParse3986DecOctet(const char **str) {
const char *cur = *str;
2001-02-23 20:55:21 +03:00
if (!(ISA_DIGIT(cur)))
return(1);
if (!ISA_DIGIT(cur+1))
cur++;
else if ((*cur != '0') && (ISA_DIGIT(cur + 1)) && (!ISA_DIGIT(cur+2)))
cur += 2;
else if ((*cur == '1') && (ISA_DIGIT(cur + 1)) && (ISA_DIGIT(cur + 2)))
cur += 3;
else if ((*cur == '2') && (*(cur + 1) >= '0') &&
(*(cur + 1) <= '4') && (ISA_DIGIT(cur + 2)))
cur += 3;
else if ((*cur == '2') && (*(cur + 1) == '5') &&
(*(cur + 2) >= '0') && (*(cur + 1) <= '5'))
cur += 3;
else
return(1);
*str = cur;
return(0);
}
/**
* xmlParse3986Host:
* @uri: pointer to an URI structure
* @str: the string to analyze
*
* Parse an host part and fills in the appropriate fields
* of the @uri structure
*
* host = IP-literal / IPv4address / reg-name
* IP-literal = "[" ( IPv6address / IPvFuture ) "]"
* IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
* reg-name = *( unreserved / pct-encoded / sub-delims )
*
* Returns 0 or the error code
*/
static int
xmlParse3986Host(xmlURIPtr uri, const char **str)
{
const char *cur = *str;
const char *host;
2001-02-23 20:55:21 +03:00
host = cur;
/*
2019-09-30 18:04:54 +03:00
* IPv6 and future addressing scheme are enclosed between brackets
*/
if (*cur == '[') {
cur++;
while ((*cur != ']') && (*cur != 0))
cur++;
if (*cur != ']')
return(1);
cur++;
goto found;
2001-02-23 20:55:21 +03:00
}
/*
* try to parse an IPv4
*/
if (ISA_DIGIT(cur)) {
if (xmlParse3986DecOctet(&cur) != 0)
goto not_ipv4;
if (*cur != '.')
goto not_ipv4;
cur++;
if (xmlParse3986DecOctet(&cur) != 0)
goto not_ipv4;
if (*cur != '.')
goto not_ipv4;
if (xmlParse3986DecOctet(&cur) != 0)
goto not_ipv4;
if (*cur != '.')
goto not_ipv4;
if (xmlParse3986DecOctet(&cur) != 0)
goto not_ipv4;
goto found;
not_ipv4:
cur = *str;
}
/*
* then this should be a hostname which can be empty
*/
while (ISA_UNRESERVED(uri, cur) ||
ISA_PCT_ENCODED(cur) || ISA_SUB_DELIM(cur))
NEXT(cur);
found:
if (uri != NULL) {
if (uri->authority != NULL) xmlFree(uri->authority);
uri->authority = NULL;
if (uri->server != NULL) xmlFree(uri->server);
if (cur != host) {
if (uri->cleanup & XML_URI_NO_UNESCAPE)
uri->server = STRNDUP(host, cur - host);
else
uri->server = xmlURIUnescapeString(host, cur - host, NULL);
if (uri->server == NULL)
return(-1);
} else
uri->server = NULL;
}
*str = cur;
return(0);
}
/**
* xmlParse3986Authority:
* @uri: pointer to an URI structure
* @str: the string to analyze
*
* Parse an authority part and fills in the appropriate fields
* of the @uri structure
*
* authority = [ userinfo "@" ] host [ ":" port ]
*
* Returns 0 or the error code
*/
static int
xmlParse3986Authority(xmlURIPtr uri, const char **str)
{
const char *cur;
int ret;
cur = *str;
/*
* try to parse an userinfo and check for the trailing @
*/
ret = xmlParse3986Userinfo(uri, &cur);
if (ret < 0)
return(ret);
if ((ret != 0) || (*cur != '@'))
cur = *str;
else
cur++;
ret = xmlParse3986Host(uri, &cur);
if (ret != 0) return(ret);
if (*cur == ':') {
cur++;
ret = xmlParse3986Port(uri, &cur);
if (ret != 0) return(ret);
}
*str = cur;
return(0);
}
/**
* xmlParse3986Segment:
* @str: the string to analyze
* @forbid: an optional forbidden character
* @empty: allow an empty segment
*
* Parse a segment and fills in the appropriate fields
* of the @uri structure
*
* segment = *pchar
* segment-nz = 1*pchar
* segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
* ; non-zero-length segment without any colon ":"
*
* Returns 0 or the error code
*/
static int
xmlParse3986Segment(xmlURIPtr uri, const char **str, char forbid, int empty)
{
const char *cur;
cur = *str;
if (!ISA_PCHAR(uri, cur)) {
if (empty)
return(0);
return(1);
}
while (ISA_PCHAR(uri, cur) && (*cur != forbid))
NEXT(cur);
*str = cur;
return (0);
}
/**
* xmlParse3986PathAbEmpty:
* @uri: pointer to an URI structure
* @str: the string to analyze
*
* Parse an path absolute or empty and fills in the appropriate fields
* of the @uri structure
*
* path-abempty = *( "/" segment )
*
* Returns 0 or the error code
*/
static int
xmlParse3986PathAbEmpty(xmlURIPtr uri, const char **str)
{
const char *cur;
int ret;
cur = *str;
while (*cur == '/') {
cur++;
ret = xmlParse3986Segment(uri, &cur, 0, 1);
if (ret != 0) return(ret);
}
if (uri != NULL) {
if (uri->path != NULL) xmlFree(uri->path);
if (*str != cur) {
if (uri->cleanup & XML_URI_NO_UNESCAPE)
uri->path = STRNDUP(*str, cur - *str);
else
uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
if (uri->path == NULL)
return (-1);
} else {
uri->path = NULL;
}
}
*str = cur;
return (0);
}
/**
* xmlParse3986PathAbsolute:
* @uri: pointer to an URI structure
* @str: the string to analyze
*
* Parse an path absolute and fills in the appropriate fields
* of the @uri structure
*
* path-absolute = "/" [ segment-nz *( "/" segment ) ]
*
* Returns 0 or the error code
*/
static int
xmlParse3986PathAbsolute(xmlURIPtr uri, const char **str)
{
const char *cur;
int ret;
cur = *str;
if (*cur != '/')
return(1);
cur++;
ret = xmlParse3986Segment(uri, &cur, 0, 0);
if (ret == 0) {
while (*cur == '/') {
cur++;
ret = xmlParse3986Segment(uri, &cur, 0, 1);
if (ret != 0) return(ret);
2001-02-23 20:55:21 +03:00
}
}
if (uri != NULL) {
if (uri->path != NULL) xmlFree(uri->path);
if (cur != *str) {
if (uri->cleanup & XML_URI_NO_UNESCAPE)
uri->path = STRNDUP(*str, cur - *str);
else
uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
if (uri->path == NULL)
return (-1);
} else {
uri->path = NULL;
}
2001-02-23 20:55:21 +03:00
}
*str = cur;
return (0);
2001-02-23 20:55:21 +03:00
}
/**
* xmlParse3986PathRootless:
* @uri: pointer to an URI structure
* @str: the string to analyze
2001-02-23 20:55:21 +03:00
*
* Parse an path without root and fills in the appropriate fields
* of the @uri structure
*
* path-rootless = segment-nz *( "/" segment )
*
* Returns 0 or the error code
2001-02-23 20:55:21 +03:00
*/
static int
xmlParse3986PathRootless(xmlURIPtr uri, const char **str)
{
const char *cur;
int ret;
2001-02-23 20:55:21 +03:00
cur = *str;
ret = xmlParse3986Segment(uri, &cur, 0, 0);
if (ret != 0) return(ret);
while (*cur == '/') {
cur++;
ret = xmlParse3986Segment(uri, &cur, 0, 1);
if (ret != 0) return(ret);
}
if (uri != NULL) {
if (uri->path != NULL) xmlFree(uri->path);
if (cur != *str) {
if (uri->cleanup & XML_URI_NO_UNESCAPE)
uri->path = STRNDUP(*str, cur - *str);
else
uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
if (uri->path == NULL)
return (-1);
} else {
uri->path = NULL;
}
2001-02-23 20:55:21 +03:00
}
*str = cur;
return (0);
2001-02-23 20:55:21 +03:00
}
/**
* xmlParse3986PathNoScheme:
* @uri: pointer to an URI structure
* @str: the string to analyze
2001-02-23 20:55:21 +03:00
*
* Parse an path which is not a scheme and fills in the appropriate fields
* of the @uri structure
*
* path-noscheme = segment-nz-nc *( "/" segment )
*
* Returns 0 or the error code
2001-02-23 20:55:21 +03:00
*/
static int
xmlParse3986PathNoScheme(xmlURIPtr uri, const char **str)
{
const char *cur;
int ret;
2001-02-23 20:55:21 +03:00
cur = *str;
ret = xmlParse3986Segment(uri, &cur, ':', 0);
if (ret != 0) return(ret);
while (*cur == '/') {
cur++;
ret = xmlParse3986Segment(uri, &cur, 0, 1);
if (ret != 0) return(ret);
}
if (uri != NULL) {
if (uri->path != NULL) xmlFree(uri->path);
if (cur != *str) {
if (uri->cleanup & XML_URI_NO_UNESCAPE)
uri->path = STRNDUP(*str, cur - *str);
else
uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
if (uri->path == NULL)
return (-1);
} else {
uri->path = NULL;
}
}
*str = cur;
return (0);
}
/**
* xmlParse3986HierPart:
* @uri: pointer to an URI structure
* @str: the string to analyze
*
* Parse an hierarchical part and fills in the appropriate fields
* of the @uri structure
*
* hier-part = "//" authority path-abempty
* / path-absolute
* / path-rootless
* / path-empty
*
* Returns 0 or the error code
*/
static int
xmlParse3986HierPart(xmlURIPtr uri, const char **str)
{
const char *cur;
int ret;
cur = *str;
if ((*cur == '/') && (*(cur + 1) == '/')) {
cur += 2;
ret = xmlParse3986Authority(uri, &cur);
if (ret != 0) return(ret);
/*
* An empty server is marked with a special URI value.
*/
if ((uri->server == NULL) && (uri->port == PORT_EMPTY))
uri->port = PORT_EMPTY_SERVER;
ret = xmlParse3986PathAbEmpty(uri, &cur);
if (ret != 0) return(ret);
*str = cur;
return(0);
} else if (*cur == '/') {
ret = xmlParse3986PathAbsolute(uri, &cur);
if (ret != 0) return(ret);
} else if (ISA_PCHAR(uri, cur)) {
ret = xmlParse3986PathRootless(uri, &cur);
if (ret != 0) return(ret);
} else {
/* path-empty is effectively empty */
if (uri != NULL) {
if (uri->path != NULL) xmlFree(uri->path);
uri->path = NULL;
}
}
*str = cur;
return (0);
}
/**
* xmlParse3986RelativeRef:
* @uri: pointer to an URI structure
* @str: the string to analyze
*
* Parse an URI string and fills in the appropriate fields
* of the @uri structure
*
* relative-ref = relative-part [ "?" query ] [ "#" fragment ]
* relative-part = "//" authority path-abempty
* / path-absolute
* / path-noscheme
* / path-empty
*
* Returns 0 or the error code
*/
static int
xmlParse3986RelativeRef(xmlURIPtr uri, const char *str) {
int ret;
if ((*str == '/') && (*(str + 1) == '/')) {
str += 2;
ret = xmlParse3986Authority(uri, &str);
if (ret != 0) return(ret);
ret = xmlParse3986PathAbEmpty(uri, &str);
if (ret != 0) return(ret);
} else if (*str == '/') {
ret = xmlParse3986PathAbsolute(uri, &str);
if (ret != 0) return(ret);
} else if (ISA_PCHAR(uri, str)) {
ret = xmlParse3986PathNoScheme(uri, &str);
if (ret != 0) return(ret);
} else {
/* path-empty is effectively empty */
if (uri != NULL) {
if (uri->path != NULL) xmlFree(uri->path);
uri->path = NULL;
}
}
if (*str == '?') {
str++;
ret = xmlParse3986Query(uri, &str);
if (ret != 0) return(ret);
}
if (*str == '#') {
str++;
ret = xmlParse3986Fragment(uri, &str);
if (ret != 0) return(ret);
}
if (*str != 0) {
xmlCleanURI(uri);
return(1);
}
return(0);
2001-02-23 20:55:21 +03:00
}
2001-02-23 20:55:21 +03:00
/**
* xmlParse3986URI:
* @uri: pointer to an URI structure
* @str: the string to analyze
2001-02-23 20:55:21 +03:00
*
* Parse an URI string and fills in the appropriate fields
* of the @uri structure
*
* scheme ":" hier-part [ "?" query ] [ "#" fragment ]
*
* Returns 0 or the error code
2001-02-23 20:55:21 +03:00
*/
static int
xmlParse3986URI(xmlURIPtr uri, const char *str) {
int ret;
2001-02-23 20:55:21 +03:00
ret = xmlParse3986Scheme(uri, &str);
if (ret != 0) return(ret);
if (*str != ':') {
return(1);
}
str++;
ret = xmlParse3986HierPart(uri, &str);
if (ret != 0) return(ret);
if (*str == '?') {
str++;
ret = xmlParse3986Query(uri, &str);
if (ret != 0) return(ret);
}
if (*str == '#') {
str++;
ret = xmlParse3986Fragment(uri, &str);
if (ret != 0) return(ret);
}
if (*str != 0) {
xmlCleanURI(uri);
return(1);
}
return(0);
2001-02-23 20:55:21 +03:00
}
/**
* xmlParse3986URIReference:
* @uri: pointer to an URI structure
* @str: the string to analyze
2001-02-23 20:55:21 +03:00
*
* Parse an URI reference string and fills in the appropriate fields
* of the @uri structure
2001-02-23 20:55:21 +03:00
*
* URI-reference = URI / relative-ref
2001-02-23 20:55:21 +03:00
*
* Returns 0 or the error code
2001-02-23 20:55:21 +03:00
*/
static int
xmlParse3986URIReference(xmlURIPtr uri, const char *str) {
int ret;
2001-02-23 20:55:21 +03:00
if (str == NULL)
2001-02-23 20:55:21 +03:00
return(-1);
xmlCleanURI(uri);
2001-02-23 20:55:21 +03:00
/*
* Try first to parse absolute refs, then fallback to relative if
* it fails.
2001-02-23 20:55:21 +03:00
*/
ret = xmlParse3986URI(uri, str);
if (ret < 0)
return(ret);
if (ret != 0) {
xmlCleanURI(uri);
ret = xmlParse3986RelativeRef(uri, str);
if (ret != 0) {
xmlCleanURI(uri);
return(ret);
2001-02-23 20:55:21 +03:00
}
}
return(0);
}
2001-02-23 20:55:21 +03:00
/**
* xmlParseURISafe:
* @str: the URI string to analyze
* @uriOut: optional pointer to parsed URI
*
* Parse an URI based on RFC 3986
*
* URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
*
2024-05-20 14:58:22 +03:00
* Available since 2.13.0.
*
* Returns 0 on success, an error code (typically 1) if the URI is invalid
* or -1 if a memory allocation failed.
*/
int
xmlParseURISafe(const char *str, xmlURIPtr *uriOut) {
xmlURIPtr uri;
int ret;
2001-02-23 20:55:21 +03:00
if (uriOut == NULL)
return(1);
*uriOut = NULL;
if (str == NULL)
return(1);
uri = xmlCreateURI();
if (uri == NULL)
return(-1);
ret = xmlParse3986URIReference(uri, str);
if (ret) {
xmlFreeURI(uri);
return(ret);
2001-02-23 20:55:21 +03:00
}
*uriOut = uri;
return(0);
}
/**
* xmlParseURI:
* @str: the URI string to analyze
*
* Parse an URI based on RFC 3986
*
* URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
*
* Returns a newly built xmlURIPtr or NULL in case of error
*/
xmlURIPtr
xmlParseURI(const char *str) {
xmlURIPtr uri;
xmlParseURISafe(str, &uri);
return(uri);
}
2001-02-23 20:55:21 +03:00
/**
* xmlParseURIReference:
* @uri: pointer to an URI structure
* @str: the string to analyze
*
* Parse an URI reference string based on RFC 3986 and fills in the
* appropriate fields of the @uri structure
*
* URI-reference = URI / relative-ref
*
* Returns 0 or the error code
*/
int
xmlParseURIReference(xmlURIPtr uri, const char *str) {
return(xmlParse3986URIReference(uri, str));
}
2001-02-23 20:55:21 +03:00
/**
* xmlParseURIRaw:
* @str: the URI string to analyze
* @raw: if 1 unescaping of URI pieces are disabled
*
* Parse an URI but allows to keep intact the original fragments.
*
* URI-reference = URI / relative-ref
*
* Returns a newly built xmlURIPtr or NULL in case of error
*/
xmlURIPtr
xmlParseURIRaw(const char *str, int raw) {
xmlURIPtr uri;
int ret;
2001-02-23 20:55:21 +03:00
if (str == NULL)
return(NULL);
uri = xmlCreateURI();
if (uri != NULL) {
if (raw) {
uri->cleanup |= XML_URI_NO_UNESCAPE;
}
ret = xmlParseURIReference(uri, str);
if (ret) {
xmlFreeURI(uri);
return(NULL);
}
}
return(uri);
2001-02-23 20:55:21 +03:00
}
/************************************************************************
* *
* Generic URI structure functions *
* *
************************************************************************/
/**
* xmlCreateURI:
*
* Simply creates an empty xmlURI
*
* Returns the new structure or NULL in case of error
*/
xmlURIPtr
xmlCreateURI(void) {
xmlURIPtr ret;
ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
if (ret == NULL)
return(NULL);
memset(ret, 0, sizeof(xmlURI));
ret->port = PORT_EMPTY;
return(ret);
}
/**
* xmlSaveUriRealloc:
*
* Function to handle properly a reallocation when saving an URI
* Also imposes some limit on the length of an URI string output
*/
static xmlChar *
xmlSaveUriRealloc(xmlChar *ret, int *max) {
xmlChar *temp;
int tmp;
if (*max > MAX_URI_LENGTH)
return(NULL);
tmp = *max * 2;
temp = (xmlChar *) xmlRealloc(ret, (tmp + 1));
if (temp == NULL)
return(NULL);
*max = tmp;
return(temp);
}
2001-02-23 20:55:21 +03:00
/**
* xmlSaveUri:
* @uri: pointer to an xmlURI
2001-02-23 20:55:21 +03:00
*
* Save the URI as an escaped string
2001-02-23 20:55:21 +03:00
*
* Returns a new string (to be deallocated by caller)
2001-02-23 20:55:21 +03:00
*/
xmlChar *
xmlSaveUri(xmlURIPtr uri) {
xmlChar *ret = NULL;
xmlChar *temp;
const char *p;
int len;
int max;
2001-02-23 20:55:21 +03:00
if (uri == NULL) return(NULL);
max = 80;
ret = (xmlChar *) xmlMallocAtomic(max + 1);
if (ret == NULL)
2001-02-23 20:55:21 +03:00
return(NULL);
len = 0;
2001-02-23 20:55:21 +03:00
if (uri->scheme != NULL) {
p = uri->scheme;
while (*p != 0) {
if (len >= max) {
temp = xmlSaveUriRealloc(ret, &max);
if (temp == NULL) goto mem_error;
ret = temp;
}
ret[len++] = *p++;
2001-02-23 20:55:21 +03:00
}
if (len >= max) {
temp = xmlSaveUriRealloc(ret, &max);
if (temp == NULL) goto mem_error;
ret = temp;
}
ret[len++] = ':';
}
if (uri->opaque != NULL) {
p = uri->opaque;
while (*p != 0) {
if (len + 3 >= max) {
temp = xmlSaveUriRealloc(ret, &max);
if (temp == NULL) goto mem_error;
ret = temp;
}
if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p)))
ret[len++] = *p++;
else {
int val = *(unsigned char *)p++;
int hi = val / 0x10, lo = val % 0x10;
ret[len++] = '%';
ret[len++] = hi + (hi > 9? 'A'-10 : '0');
ret[len++] = lo + (lo > 9? 'A'-10 : '0');
}
}
} else {
if ((uri->server != NULL) || (uri->port != PORT_EMPTY)) {
if (len + 3 >= max) {
temp = xmlSaveUriRealloc(ret, &max);
if (temp == NULL) goto mem_error;
ret = temp;
}
ret[len++] = '/';
ret[len++] = '/';
if (uri->user != NULL) {
p = uri->user;
while (*p != 0) {
if (len + 3 >= max) {
temp = xmlSaveUriRealloc(ret, &max);
if (temp == NULL) goto mem_error;
ret = temp;
}
if ((IS_UNRESERVED(*(p))) ||
((*(p) == ';')) || ((*(p) == ':')) ||
((*(p) == '&')) || ((*(p) == '=')) ||
((*(p) == '+')) || ((*(p) == '$')) ||
((*(p) == ',')))
ret[len++] = *p++;
else {
int val = *(unsigned char *)p++;
int hi = val / 0x10, lo = val % 0x10;
ret[len++] = '%';
ret[len++] = hi + (hi > 9? 'A'-10 : '0');
ret[len++] = lo + (lo > 9? 'A'-10 : '0');
}
}
if (len + 3 >= max) {
temp = xmlSaveUriRealloc(ret, &max);
if (temp == NULL) goto mem_error;
ret = temp;
}
ret[len++] = '@';
}
Fix a problem properly saving URIs As written by Martin Kletzander <mkletzan@redhat.com>: Since commit 8eb55d782a2b9afacc7938694891cc6fad7b42a5, when you parse and save an URI that has no server (or similar) part, two slashes after the 'schema:' get lost. It means 'uri:///noserver' is turned into 'uri:/noserver'. basically foo:///only/path means a host of "" while foo:/only/path means no host at all So the best fix IMHO is to fix the URI parser to record the first case and an empty host string and the second case as a NULL host string I would not revert the initial patch, we should not 'invent' those slash, but we should instead when parsing keep the information that it's a host based path and that foo:/// means the presence of a host but an empty one. Once applied the resulting patch below, all cases seems to be saved properly: thinkpad:~/XML -> ./testURI uri:/noserver uri:/noserver thinkpad:~/XML -> ./testURI uri:///noserver uri:///noserver thinkpad:~/XML -> ./testURI uri://server/foo uri://server/foo thinkpad:~/XML -> ./testURI uri:/noserver/foo uri:/noserver/foo thinkpad:~/XML -> ./testURI uri:/// uri:/// thinkpad:~/XML -> ./testURI uri:// uri:// thinkpad:~/XML -> ./testURI uri:/ uri:/ thinkpad:~/XML -> If you revert the initial patch that last case fails The problem is that I don't want to change the xmlURI structure to minimize ABI breakage, so I could not extend the field. The natural solution is to denote that uri:/// has an empty host by making the uri server field an empty string which works very well but breaks applications (like libvirt ;-) who blindly look at uri->server not being NULL to try to reach it ! Simplest was to stick the port to -1 in that case, instead of 0 application don't bother looking at the port of there is no server string, this makes the patch more complex than a 1 liner, but is better for ABI.
2014-10-03 15:22:39 +04:00
if (uri->server != NULL) {
p = uri->server;
while (*p != 0) {
if (len >= max) {
temp = xmlSaveUriRealloc(ret, &max);
if (temp == NULL) goto mem_error;
ret = temp;
}
/* TODO: escaping? */
ret[len++] = (xmlChar) *p++;
}
}
if (uri->port > 0) {
if (len + 10 >= max) {
temp = xmlSaveUriRealloc(ret, &max);
if (temp == NULL) goto mem_error;
ret = temp;
}
len += snprintf((char *) &ret[len], max - len, ":%d", uri->port);
}
} else if (uri->authority != NULL) {
if (len + 3 >= max) {
temp = xmlSaveUriRealloc(ret, &max);
if (temp == NULL) goto mem_error;
ret = temp;
}
ret[len++] = '/';
ret[len++] = '/';
p = uri->authority;
while (*p != 0) {
if (len + 3 >= max) {
temp = xmlSaveUriRealloc(ret, &max);
if (temp == NULL) goto mem_error;
ret = temp;
}
if ((IS_UNRESERVED(*(p))) ||
((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
((*(p) == '=')) || ((*(p) == '+')))
ret[len++] = *p++;
else {
int val = *(unsigned char *)p++;
int hi = val / 0x10, lo = val % 0x10;
ret[len++] = '%';
ret[len++] = hi + (hi > 9? 'A'-10 : '0');
ret[len++] = lo + (lo > 9? 'A'-10 : '0');
}
}
} else if (uri->scheme != NULL) {
if (len + 3 >= max) {
temp = xmlSaveUriRealloc(ret, &max);
if (temp == NULL) goto mem_error;
ret = temp;
}
2001-02-23 20:55:21 +03:00
}
if (uri->path != NULL) {
p = uri->path;
/*
* the colon in file:///d: should not be escaped or
* Windows accesses fail later.
*/
if ((uri->scheme != NULL) &&
(p[0] == '/') &&
(((p[1] >= 'a') && (p[1] <= 'z')) ||
((p[1] >= 'A') && (p[1] <= 'Z'))) &&
(p[2] == ':') &&
(xmlStrEqual(BAD_CAST uri->scheme, BAD_CAST "file"))) {
if (len + 3 >= max) {
temp = xmlSaveUriRealloc(ret, &max);
if (temp == NULL) goto mem_error;
ret = temp;
}
ret[len++] = *p++;
ret[len++] = *p++;
ret[len++] = *p++;
}
while (*p != 0) {
if (len + 3 >= max) {
temp = xmlSaveUriRealloc(ret, &max);
if (temp == NULL) goto mem_error;
ret = temp;
}
if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
((*(p) == ',')))
ret[len++] = *p++;
else {
int val = *(unsigned char *)p++;
int hi = val / 0x10, lo = val % 0x10;
ret[len++] = '%';
ret[len++] = hi + (hi > 9? 'A'-10 : '0');
ret[len++] = lo + (lo > 9? 'A'-10 : '0');
}
2001-02-23 20:55:21 +03:00
}
}
if (uri->query_raw != NULL) {
if (len + 1 >= max) {
temp = xmlSaveUriRealloc(ret, &max);
if (temp == NULL) goto mem_error;
ret = temp;
}
ret[len++] = '?';
p = uri->query_raw;
while (*p != 0) {
if (len + 1 >= max) {
temp = xmlSaveUriRealloc(ret, &max);
if (temp == NULL) goto mem_error;
ret = temp;
}
ret[len++] = *p++;
}
} else if (uri->query != NULL) {
if (len + 3 >= max) {
temp = xmlSaveUriRealloc(ret, &max);
if (temp == NULL) goto mem_error;
ret = temp;
}
ret[len++] = '?';
p = uri->query;
while (*p != 0) {
if (len + 3 >= max) {
temp = xmlSaveUriRealloc(ret, &max);
if (temp == NULL) goto mem_error;
ret = temp;
}
if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
ret[len++] = *p++;
else {
int val = *(unsigned char *)p++;
int hi = val / 0x10, lo = val % 0x10;
ret[len++] = '%';
ret[len++] = hi + (hi > 9? 'A'-10 : '0');
ret[len++] = lo + (lo > 9? 'A'-10 : '0');
}
}
2001-02-23 20:55:21 +03:00
}
}
if (uri->fragment != NULL) {
if (len + 3 >= max) {
temp = xmlSaveUriRealloc(ret, &max);
if (temp == NULL) goto mem_error;
ret = temp;
}
ret[len++] = '#';
p = uri->fragment;
while (*p != 0) {
if (len + 3 >= max) {
temp = xmlSaveUriRealloc(ret, &max);
if (temp == NULL) goto mem_error;
ret = temp;
}
if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
ret[len++] = *p++;
else {
int val = *(unsigned char *)p++;
int hi = val / 0x10, lo = val % 0x10;
ret[len++] = '%';
ret[len++] = hi + (hi > 9? 'A'-10 : '0');
ret[len++] = lo + (lo > 9? 'A'-10 : '0');
}
}
2001-02-23 20:55:21 +03:00
}
if (len >= max) {
temp = xmlSaveUriRealloc(ret, &max);
if (temp == NULL) goto mem_error;
ret = temp;
2001-02-23 20:55:21 +03:00
}
ret[len] = 0;
return(ret);
mem_error:
xmlFree(ret);
return(NULL);
2001-02-23 20:55:21 +03:00
}
/**
* xmlPrintURI:
* @stream: a FILE* for the output
* @uri: pointer to an xmlURI
2001-02-23 20:55:21 +03:00
*
* Prints the URI in the stream @stream.
2001-02-23 20:55:21 +03:00
*/
void
xmlPrintURI(FILE *stream, xmlURIPtr uri) {
xmlChar *out;
2001-02-23 20:55:21 +03:00
out = xmlSaveUri(uri);
if (out != NULL) {
fprintf(stream, "%s", (char *) out);
xmlFree(out);
2001-02-23 20:55:21 +03:00
}
}
/**
* xmlCleanURI:
* @uri: pointer to an xmlURI
2001-02-23 20:55:21 +03:00
*
* Make sure the xmlURI struct is free of content
2001-02-23 20:55:21 +03:00
*/
static void
xmlCleanURI(xmlURIPtr uri) {
if (uri == NULL) return;
2001-02-23 20:55:21 +03:00
if (uri->scheme != NULL) xmlFree(uri->scheme);
uri->scheme = NULL;
if (uri->server != NULL) xmlFree(uri->server);
uri->server = NULL;
if (uri->user != NULL) xmlFree(uri->user);
uri->user = NULL;
if (uri->path != NULL) xmlFree(uri->path);
uri->path = NULL;
if (uri->fragment != NULL) xmlFree(uri->fragment);
uri->fragment = NULL;
if (uri->opaque != NULL) xmlFree(uri->opaque);
uri->opaque = NULL;
if (uri->authority != NULL) xmlFree(uri->authority);
uri->authority = NULL;
if (uri->query != NULL) xmlFree(uri->query);
uri->query = NULL;
if (uri->query_raw != NULL) xmlFree(uri->query_raw);
uri->query_raw = NULL;
}
/**
* xmlFreeURI:
* @uri: pointer to an xmlURI
*
* Free up the xmlURI struct
*/
void
xmlFreeURI(xmlURIPtr uri) {
if (uri == NULL) return;
if (uri->scheme != NULL) xmlFree(uri->scheme);
if (uri->server != NULL) xmlFree(uri->server);
if (uri->user != NULL) xmlFree(uri->user);
if (uri->path != NULL) xmlFree(uri->path);
if (uri->fragment != NULL) xmlFree(uri->fragment);
if (uri->opaque != NULL) xmlFree(uri->opaque);
if (uri->authority != NULL) xmlFree(uri->authority);
if (uri->query != NULL) xmlFree(uri->query);
if (uri->query_raw != NULL) xmlFree(uri->query_raw);
xmlFree(uri);
2001-02-23 20:55:21 +03:00
}
/************************************************************************
* *
* Helper functions *
* *
************************************************************************/
static int
xmlIsPathSeparator(int c, int isFile) {
(void) isFile;
if (c == '/')
return(1);
#ifdef _WIN32
if (isFile && (c == '\\'))
return(1);
#endif
return(0);
}
2001-02-23 20:55:21 +03:00
/**
* xmlNormalizePath:
* @path: pointer to the path string
* @isFile: true for filesystem paths, false for URIs
2001-02-23 20:55:21 +03:00
*
* Normalize a filesystem path or URI.
*
* Returns 0 or an error code
2001-02-23 20:55:21 +03:00
*/
static int
xmlNormalizePath(char *path, int isFile) {
char *cur, *out;
int numSeg = 0;
2001-02-23 20:55:21 +03:00
if (path == NULL)
2001-02-23 20:55:21 +03:00
return(-1);
cur = path;
out = path;
if (*cur == 0)
return(0);
if (xmlIsPathSeparator(*cur, isFile)) {
cur++;
*out++ = '/';
2001-02-23 20:55:21 +03:00
}
while (*cur != 0) {
/*
* At this point, out is either empty or ends with a separator.
* Collapse multiple separators first.
*/
while (xmlIsPathSeparator(*cur, isFile)) {
#ifdef _WIN32
/* Allow two separators at start of path */
if ((isFile) && (out == path + 1))
*out++ = '/';
#endif
cur++;
}
2001-02-23 20:55:21 +03:00
if (*cur == '.') {
if (cur[1] == 0) {
/* Ignore "." at end of path */
break;
} else if (xmlIsPathSeparator(cur[1], isFile)) {
/* Skip "./" */
cur += 2;
continue;
} else if ((cur[1] == '.') &&
((cur[2] == 0) || xmlIsPathSeparator(cur[2], isFile))) {
if (numSeg > 0) {
/* Handle ".." by removing last segment */
do {
out--;
} while ((out > path) &&
!xmlIsPathSeparator(out[-1], isFile));
numSeg--;
if (cur[2] == 0)
break;
cur += 3;
continue;
} else if (out[0] == '/') {
/* Ignore extraneous ".." in absolute paths */
if (cur[2] == 0)
break;
cur += 3;
continue;
} else {
/* Keep "../" at start of relative path */
numSeg--;
}
}
}
2001-02-23 20:55:21 +03:00
/* Copy segment */
while ((*cur != 0) && !xmlIsPathSeparator(*cur, isFile)) {
*out++ = *cur++;
}
2001-02-23 20:55:21 +03:00
/* Copy separator */
if (*cur != 0) {
cur++;
*out++ = '/';
}
2001-02-23 20:55:21 +03:00
numSeg++;
2001-02-23 20:55:21 +03:00
}
/* Keep "." if output is empty and it's a file */
if ((isFile) && (out <= path))
*out++ = '.';
*out = 0;
2001-02-23 20:55:21 +03:00
return(0);
}
/**
* xmlNormalizeURIPath:
* @path: pointer to the path string
*
* Applies the 5 normalization steps to a path string--that is, RFC 2396
* Section 5.2, steps 6.c through 6.g.
*
* Normalization occurs directly on the string, no new allocation is done
*
* Returns 0 or an error code
*/
int
xmlNormalizeURIPath(char *path) {
return(xmlNormalizePath(path, 0));
}
static int is_hex(char c) {
if (((c >= '0') && (c <= '9')) ||
((c >= 'a') && (c <= 'f')) ||
((c >= 'A') && (c <= 'F')))
2001-02-23 20:55:21 +03:00
return(1);
return(0);
2001-02-23 20:55:21 +03:00
}
/**
* xmlURIUnescapeString:
* @str: the string to unescape
* @len: the length in bytes to unescape (or <= 0 to indicate full string)
* @target: optional destination buffer
2001-02-23 20:55:21 +03:00
*
* Unescaping routine, but does not check that the string is an URI. The
* output is a direct unsigned char translation of %XX values (no encoding)
* Note that the length of the result can only be smaller or same size as
* the input string.
2001-02-23 20:55:21 +03:00
*
* Returns a copy of the string, but unescaped, will return NULL only in case
* of error
2001-02-23 20:55:21 +03:00
*/
char *
xmlURIUnescapeString(const char *str, int len, char *target) {
char *ret, *out;
const char *in;
2001-02-23 20:55:21 +03:00
if (str == NULL)
return(NULL);
if (len <= 0) len = strlen(str);
if (len < 0) return(NULL);
if (target == NULL) {
ret = (char *) xmlMallocAtomic(len + 1);
if (ret == NULL)
return(NULL);
} else
ret = target;
in = str;
out = ret;
while(len > 0) {
if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
int c = 0;
in++;
if ((*in >= '0') && (*in <= '9'))
c = (*in - '0');
else if ((*in >= 'a') && (*in <= 'f'))
c = (*in - 'a') + 10;
else if ((*in >= 'A') && (*in <= 'F'))
c = (*in - 'A') + 10;
in++;
if ((*in >= '0') && (*in <= '9'))
c = c * 16 + (*in - '0');
else if ((*in >= 'a') && (*in <= 'f'))
c = c * 16 + (*in - 'a') + 10;
else if ((*in >= 'A') && (*in <= 'F'))
c = c * 16 + (*in - 'A') + 10;
in++;
len -= 3;
2022-09-01 03:58:00 +03:00
/* Explicit sign change */
*out++ = (char) c;
} else {
*out++ = *in++;
len--;
2001-02-23 20:55:21 +03:00
}
}
*out = 0;
2001-02-23 20:55:21 +03:00
return(ret);
}
/**
* xmlURIEscapeStr:
* @str: string to escape
* @list: exception list string of chars not to escape
2001-02-23 20:55:21 +03:00
*
* This routine escapes a string to hex, ignoring unreserved characters
* a-z, A-Z, 0-9, "-._~", a few sub-delims "!*'()", the gen-delim "@"
* (why?) and the characters in the exception list.
2001-02-23 20:55:21 +03:00
*
* Returns a new escaped string or NULL in case of error.
2001-02-23 20:55:21 +03:00
*/
xmlChar *
xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) {
xmlChar *ret, ch;
xmlChar *temp;
const xmlChar *in;
int len, out;
2001-02-23 20:55:21 +03:00
if (str == NULL)
return(NULL);
if (str[0] == 0)
return(xmlStrdup(str));
len = xmlStrlen(str);
2001-02-23 20:55:21 +03:00
len += 20;
ret = (xmlChar *) xmlMallocAtomic(len);
if (ret == NULL)
return(NULL);
in = (const xmlChar *) str;
out = 0;
while(*in != 0) {
if (len - out <= 3) {
if (len > INT_MAX / 2)
return(NULL);
temp = xmlRealloc(ret, len * 2);
if (temp == NULL) {
xmlFree(ret);
return(NULL);
}
ret = temp;
len *= 2;
}
ch = *in;
if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) {
unsigned char val;
ret[out++] = '%';
val = ch >> 4;
if (val <= 9)
ret[out++] = '0' + val;
else
ret[out++] = 'A' + val - 0xA;
val = ch & 0xF;
if (val <= 9)
ret[out++] = '0' + val;
else
ret[out++] = 'A' + val - 0xA;
in++;
} else {
ret[out++] = *in++;
}
2001-02-23 20:55:21 +03:00
}
ret[out] = 0;
return(ret);
2001-02-23 20:55:21 +03:00
}
/**
* xmlURIEscape:
* @str: the string of the URI to escape
2001-02-23 20:55:21 +03:00
*
* Escaping routine, does not do validity checks !
* It will try to escape the chars needing this, but this is heuristic
* based it's impossible to be sure.
2001-02-23 20:55:21 +03:00
*
* Returns an copy of the string, but escaped
*
* 25 May 2001
* Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
* according to RFC2396.
* - Carl Douglas
2001-02-23 20:55:21 +03:00
*/
xmlChar *
xmlURIEscape(const xmlChar * str)
{
xmlChar *ret, *segment = NULL;
2001-02-23 20:55:21 +03:00
xmlURIPtr uri;
int ret2;
2001-02-23 20:55:21 +03:00
if (str == NULL)
return (NULL);
2001-02-23 20:55:21 +03:00
uri = xmlCreateURI();
if (uri != NULL) {
/*
* Allow escaping errors in the unescaped form
*/
uri->cleanup = XML_URI_ALLOW_UNWISE;
ret2 = xmlParseURIReference(uri, (const char *)str);
if (ret2) {
xmlFreeURI(uri);
return (NULL);
}
2001-02-23 20:55:21 +03:00
}
if (!uri)
return NULL;
ret = NULL;
#define NULLCHK(p) if(!p) { \
xmlFreeURI(uri); \
xmlFree(ret); \
return NULL; } \
if (uri->scheme) {
segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-.");
NULLCHK(segment)
ret = xmlStrcat(ret, segment);
ret = xmlStrcat(ret, BAD_CAST ":");
xmlFree(segment);
}
if (uri->authority) {
segment =
xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@");
NULLCHK(segment)
ret = xmlStrcat(ret, BAD_CAST "//");
ret = xmlStrcat(ret, segment);
xmlFree(segment);
}
if (uri->user) {
segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,");
NULLCHK(segment)
ret = xmlStrcat(ret,BAD_CAST "//");
ret = xmlStrcat(ret, segment);
ret = xmlStrcat(ret, BAD_CAST "@");
xmlFree(segment);
}
if (uri->server) {
segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@");
NULLCHK(segment)
if (uri->user == NULL)
ret = xmlStrcat(ret, BAD_CAST "//");
ret = xmlStrcat(ret, segment);
xmlFree(segment);
}
if (uri->port > 0) {
xmlChar port[11];
snprintf((char *) port, 11, "%d", uri->port);
ret = xmlStrcat(ret, BAD_CAST ":");
ret = xmlStrcat(ret, port);
}
if (uri->path) {
segment =
xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;");
NULLCHK(segment)
ret = xmlStrcat(ret, segment);
xmlFree(segment);
}
if (uri->query_raw) {
ret = xmlStrcat(ret, BAD_CAST "?");
ret = xmlStrcat(ret, BAD_CAST uri->query_raw);
}
else if (uri->query) {
segment =
xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$");
NULLCHK(segment)
ret = xmlStrcat(ret, BAD_CAST "?");
ret = xmlStrcat(ret, segment);
xmlFree(segment);
}
if (uri->opaque) {
segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST "");
NULLCHK(segment)
ret = xmlStrcat(ret, segment);
xmlFree(segment);
}
if (uri->fragment) {
segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#");
NULLCHK(segment)
ret = xmlStrcat(ret, BAD_CAST "#");
ret = xmlStrcat(ret, segment);
xmlFree(segment);
}
xmlFreeURI(uri);
#undef NULLCHK
return (ret);
}
2001-02-23 20:55:21 +03:00
/************************************************************************
* *
* Public functions *
* *
************************************************************************/
static int
xmlIsAbsolutePath(const xmlChar *path) {
int c = path[0];
if (xmlIsPathSeparator(c, 1))
return(1);
#ifdef _WIN32
if ((((c >= 'A') && (c <= 'Z')) ||
((c >= 'a') && (c <= 'z'))) &&
(path[1] == ':'))
return(1);
#endif
return(0);
}
2001-02-23 20:55:21 +03:00
/**
* xmlResolvePath:
* @ref: the filesystem path
* @base: the base value
* @out: pointer to result URI
*
* Resolves a filesystem path from a base path.
*
* Returns 0 on success, -1 if a memory allocation failed or an error
* code if URI or base are invalid.
*/
static int
xmlResolvePath(const xmlChar *escRef, const xmlChar *base, xmlChar **out) {
const xmlChar *fragment;
xmlChar *tmp = NULL;
xmlChar *ref = NULL;
xmlChar *result = NULL;
int ret = -1;
int i;
if (out == NULL)
return(1);
*out = NULL;
if ((escRef == NULL) || (escRef[0] == 0)) {
if ((base == NULL) || (base[0] == 0))
return(1);
ref = xmlStrdup(base);
if (ref == NULL)
goto err_memory;
*out = ref;
return(0);
}
/*
* If a URI is resolved, we can assume it is a valid URI and not
* a filesystem path. This means we have to unescape the part
* before the fragment.
*/
fragment = xmlStrchr(escRef, '#');
if (fragment != NULL) {
tmp = xmlStrndup(escRef, fragment - escRef);
if (tmp == NULL)
goto err_memory;
escRef = tmp;
}
ref = (xmlChar *) xmlURIUnescapeString((char *) escRef, -1, NULL);
if (ref == NULL)
goto err_memory;
if ((base == NULL) || (base[0] == 0))
goto done;
if (xmlIsAbsolutePath(ref))
goto done;
/*
* Remove last segment from base
*/
i = xmlStrlen(base);
while ((i > 0) && !xmlIsPathSeparator(base[i-1], 1))
i--;
/*
* Concatenate base and ref
*/
if (i > 0) {
int refLen = xmlStrlen(ref);
result = xmlMalloc(i + refLen + 1);
if (result == NULL)
goto err_memory;
memcpy(result, base, i);
memcpy(result + i, ref, refLen + 1);
}
/*
* Normalize
*/
xmlNormalizePath((char *) result, 1);
done:
if (result == NULL) {
result = ref;
ref = NULL;
}
if (fragment != NULL) {
result = xmlStrcat(result, fragment);
if (result == NULL)
goto err_memory;
}
*out = result;
ret = 0;
err_memory:
xmlFree(tmp);
xmlFree(ref);
return(ret);
}
/**
2024-05-20 14:58:22 +03:00
* xmlBuildURISafe:
2001-02-23 20:55:21 +03:00
* @URI: the URI instance found in the document
* @base: the base value
* @valPtr: pointer to result URI
2001-02-23 20:55:21 +03:00
*
* Computes he final URI of the reference done by checking that
* the given URI is valid, and building the final URI using the
* base URI. This is processed according to section 5.2 of the
2001-02-23 20:55:21 +03:00
* RFC 2396
*
* 5.2. Resolving Relative References to Absolute Form
*
2024-05-20 14:58:22 +03:00
* Available since 2.13.0.
*
* Returns 0 on success, -1 if a memory allocation failed or an error
* code if URI or base are invalid.
2001-02-23 20:55:21 +03:00
*/
int
xmlBuildURISafe(const xmlChar *URI, const xmlChar *base, xmlChar **valPtr) {
2001-02-23 20:55:21 +03:00
xmlChar *val = NULL;
int ret, len, indx, cur, out;
2001-02-23 20:55:21 +03:00
xmlURIPtr ref = NULL;
xmlURIPtr bas = NULL;
xmlURIPtr res = NULL;
if (valPtr == NULL)
return(1);
2001-02-23 20:55:21 +03:00
/*
* 1) The URI reference is parsed into the potential four components and
* fragment identifier, as described in Section 4.3.
*
* NOTE that a completely empty URI is treated by modern browsers
* as a reference to "." rather than as a synonym for the current
* URI. Should we do that here?
*/
if (URI == NULL)
ret = 1;
else if (URI[0] != 0)
ret = xmlParseURISafe((const char *) URI, &ref);
else
ret = 0;
2001-02-23 20:55:21 +03:00
if (ret != 0)
goto done;
if ((ref != NULL) && (ref->scheme != NULL)) {
/*
* The URI is absolute don't modify.
*/
val = xmlStrdup(URI);
if (val == NULL)
ret = -1;
goto done;
}
/*
* If base has no scheme or authority, it is assumed to be a
* filesystem path.
*/
if (xmlStrstr(base, BAD_CAST "://") == NULL) {
xmlFreeURI(ref);
return(xmlResolvePath(URI, base, valPtr));
}
ret = xmlParseURISafe((const char *) base, &bas);
if (ret < 0)
goto done;
2001-02-23 20:55:21 +03:00
if (ret != 0) {
if (ref) {
ret = 0;
2001-02-23 20:55:21 +03:00
val = xmlSaveUri(ref);
if (val == NULL)
ret = -1;
}
2001-02-23 20:55:21 +03:00
goto done;
}
if (ref == NULL) {
/*
* the base fragment must be ignored
*/
if (bas->fragment != NULL) {
xmlFree(bas->fragment);
bas->fragment = NULL;
}
val = xmlSaveUri(bas);
if (val == NULL)
ret = -1;
2001-02-23 20:55:21 +03:00
goto done;
}
/*
* 2) If the path component is empty and the scheme, authority, and
* query components are undefined, then it is a reference to the
* current document and we are done. Otherwise, the reference URI's
* query and fragment components are defined as found (or not found)
* within the URI reference and not inherited from the base URI.
*
* NOTE that in modern browsers, the parsing differs from the above
* in the following aspect: the query component is allowed to be
* defined while still treating this as a reference to the current
* document.
*/
ret = -1;
2001-02-23 20:55:21 +03:00
res = xmlCreateURI();
if (res == NULL)
goto done;
if ((ref->scheme == NULL) && (ref->path == NULL) &&
((ref->authority == NULL) && (ref->server == NULL) &&
(ref->port == PORT_EMPTY))) {
if (bas->scheme != NULL) {
2001-02-23 20:55:21 +03:00
res->scheme = xmlMemStrdup(bas->scheme);
if (res->scheme == NULL)
goto done;
}
if (bas->authority != NULL) {
2001-02-23 20:55:21 +03:00
res->authority = xmlMemStrdup(bas->authority);
if (res->authority == NULL)
goto done;
} else {
if (bas->server != NULL) {
res->server = xmlMemStrdup(bas->server);
if (res->server == NULL)
goto done;
}
if (bas->user != NULL) {
2001-02-23 20:55:21 +03:00
res->user = xmlMemStrdup(bas->user);
if (res->user == NULL)
goto done;
}
res->port = bas->port;
2001-02-23 20:55:21 +03:00
}
if (bas->path != NULL) {
2001-02-23 20:55:21 +03:00
res->path = xmlMemStrdup(bas->path);
if (res->path == NULL)
goto done;
}
if (ref->query_raw != NULL) {
res->query_raw = xmlMemStrdup (ref->query_raw);
if (res->query_raw == NULL)
goto done;
} else if (ref->query != NULL) {
2001-02-23 20:55:21 +03:00
res->query = xmlMemStrdup(ref->query);
if (res->query == NULL)
goto done;
} else if (bas->query_raw != NULL) {
res->query_raw = xmlMemStrdup(bas->query_raw);
if (res->query_raw == NULL)
goto done;
} else if (bas->query != NULL) {
2001-02-23 20:55:21 +03:00
res->query = xmlMemStrdup(bas->query);
if (res->query == NULL)
goto done;
}
if (ref->fragment != NULL) {
2001-02-23 20:55:21 +03:00
res->fragment = xmlMemStrdup(ref->fragment);
if (res->fragment == NULL)
goto done;
}
2001-02-23 20:55:21 +03:00
goto step_7;
}
/*
* 3) If the scheme component is defined, indicating that the reference
* starts with a scheme name, then the reference is interpreted as an
* absolute URI and we are done. Otherwise, the reference URI's
* scheme is inherited from the base URI's scheme component.
*/
if (ref->scheme != NULL) {
val = xmlSaveUri(ref);
if (val != NULL)
ret = 0;
2001-02-23 20:55:21 +03:00
goto done;
}
if (bas->scheme != NULL) {
2001-02-23 20:55:21 +03:00
res->scheme = xmlMemStrdup(bas->scheme);
if (res->scheme == NULL)
goto done;
}
if (ref->query_raw != NULL) {
res->query_raw = xmlMemStrdup(ref->query_raw);
if (res->query_raw == NULL)
goto done;
} else if (ref->query != NULL) {
res->query = xmlMemStrdup(ref->query);
if (res->query == NULL)
goto done;
}
if (ref->fragment != NULL) {
res->fragment = xmlMemStrdup(ref->fragment);
if (res->fragment == NULL)
goto done;
}
2001-02-23 20:55:21 +03:00
/*
* 4) If the authority component is defined, then the reference is a
* network-path and we skip to step 7. Otherwise, the reference
* URI's authority is inherited from the base URI's authority
* component, which will also be undefined if the URI scheme does not
* use an authority component.
*/
if ((ref->authority != NULL) || (ref->server != NULL) ||
(ref->port != PORT_EMPTY)) {
if (ref->authority != NULL) {
2001-02-23 20:55:21 +03:00
res->authority = xmlMemStrdup(ref->authority);
if (res->authority == NULL)
goto done;
} else {
if (ref->server != NULL) {
res->server = xmlMemStrdup(ref->server);
if (res->server == NULL)
goto done;
}
if (ref->user != NULL) {
2001-02-23 20:55:21 +03:00
res->user = xmlMemStrdup(ref->user);
if (res->user == NULL)
goto done;
}
res->port = ref->port;
2001-02-23 20:55:21 +03:00
}
if (ref->path != NULL) {
2001-02-23 20:55:21 +03:00
res->path = xmlMemStrdup(ref->path);
if (res->path == NULL)
goto done;
}
2001-02-23 20:55:21 +03:00
goto step_7;
}
if (bas->authority != NULL) {
2001-02-23 20:55:21 +03:00
res->authority = xmlMemStrdup(bas->authority);
if (res->authority == NULL)
goto done;
} else if ((bas->server != NULL) || (bas->port != PORT_EMPTY)) {
if (bas->server != NULL) {
res->server = xmlMemStrdup(bas->server);
if (res->server == NULL)
goto done;
}
if (bas->user != NULL) {
2001-02-23 20:55:21 +03:00
res->user = xmlMemStrdup(bas->user);
if (res->user == NULL)
goto done;
}
res->port = bas->port;
2001-02-23 20:55:21 +03:00
}
/*
* 5) If the path component begins with a slash character ("/"), then
* the reference is an absolute-path and we skip to step 7.
*/
if ((ref->path != NULL) && (ref->path[0] == '/')) {
res->path = xmlMemStrdup(ref->path);
if (res->path == NULL)
goto done;
2001-02-23 20:55:21 +03:00
goto step_7;
}
/*
* 6) If this step is reached, then we are resolving a relative-path
* reference. The relative path needs to be merged with the base
* URI's path. Although there are many ways to do this, we will
* describe a simple method using a separate string buffer.
*
* Allocate a buffer large enough for the result string.
*/
len = 2; /* extra / and 0 */
if (ref->path != NULL)
len += strlen(ref->path);
if (bas->path != NULL)
len += strlen(bas->path);
res->path = (char *) xmlMallocAtomic(len);
if (res->path == NULL)
2001-02-23 20:55:21 +03:00
goto done;
res->path[0] = 0;
/*
* a) All but the last segment of the base URI's path component is
* copied to the buffer. In other words, any characters after the
* last (right-most) slash character, if any, are excluded.
*/
cur = 0;
out = 0;
if (bas->path != NULL) {
while (bas->path[cur] != 0) {
while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
cur++;
if (bas->path[cur] == 0)
break;
cur++;
while (out < cur) {
res->path[out] = bas->path[out];
out++;
}
}
}
res->path[out] = 0;
/*
* b) The reference's path component is appended to the buffer
* string.
*/
if (ref->path != NULL && ref->path[0] != 0) {
indx = 0;
2001-02-23 20:55:21 +03:00
/*
* Ensure the path includes a '/'
*/
if ((out == 0) && ((bas->server != NULL) || bas->port != PORT_EMPTY))
2001-02-23 20:55:21 +03:00
res->path[out++] = '/';
while (ref->path[indx] != 0) {
res->path[out++] = ref->path[indx++];
2001-02-23 20:55:21 +03:00
}
}
res->path[out] = 0;
/*
* Steps c) to h) are really path normalization steps
*/
xmlNormalizeURIPath(res->path);
step_7:
/*
* 7) The resulting URI components, including any inherited from the
* base URI, are recombined to give the absolute form of the URI
* reference.
*/
val = xmlSaveUri(res);
if (val != NULL)
ret = 0;
2001-02-23 20:55:21 +03:00
done:
if (ref != NULL)
xmlFreeURI(ref);
if (bas != NULL)
xmlFreeURI(bas);
if (res != NULL)
xmlFreeURI(res);
*valPtr = val;
return(ret);
2001-02-23 20:55:21 +03:00
}
/**
* xmlBuildURI:
* @URI: the URI instance found in the document
* @base: the base value
*
* Computes he final URI of the reference done by checking that
* the given URI is valid, and building the final URI using the
* base URI. This is processed according to section 5.2 of the
* RFC 2396
*
* 5.2. Resolving Relative References to Absolute Form
*
* Returns a new URI string (to be freed by the caller) or NULL in case
* of error.
*/
xmlChar *
xmlBuildURI(const xmlChar *URI, const xmlChar *base) {
xmlChar *out;
xmlBuildURISafe(URI, base, &out);
return(out);
}
/**
* xmlBuildRelativeURISafe:
* @URI: the URI reference under consideration
* @base: the base value
* @valPtr: pointer to result URI
*
* Expresses the URI of the reference in terms relative to the
* base. Some examples of this operation include:
* base = "http://site1.com/docs/book1.html"
* URI input URI returned
* docs/pic1.gif pic1.gif
* docs/img/pic1.gif img/pic1.gif
* img/pic1.gif ../img/pic1.gif
* http://site1.com/docs/pic1.gif pic1.gif
* http://site2.com/docs/pic1.gif http://site2.com/docs/pic1.gif
*
* base = "docs/book1.html"
* URI input URI returned
* docs/pic1.gif pic1.gif
* docs/img/pic1.gif img/pic1.gif
* img/pic1.gif ../img/pic1.gif
* http://site1.com/docs/pic1.gif http://site1.com/docs/pic1.gif
*
*
2019-09-30 18:04:54 +03:00
* Note: if the URI reference is really weird or complicated, it may be
* worthwhile to first convert it into a "nice" one by calling
* xmlBuildURI (using 'base') before calling this routine,
* since this routine (for reasonable efficiency) assumes URI has
* already been through some validation.
*
2024-05-20 14:58:22 +03:00
* Available since 2.13.0.
*
* Returns 0 on success, -1 if a memory allocation failed or an error
* code if URI or base are invalid.
*/
int
xmlBuildRelativeURISafe(const xmlChar * URI, const xmlChar * base,
xmlChar **valPtr)
{
xmlChar *val = NULL;
int ret = 0;
int ix;
int nbslash = 0;
int len;
xmlURIPtr ref = NULL;
xmlURIPtr bas = NULL;
xmlChar *bptr, *uptr, *vptr;
int remove_path = 0;
if (valPtr == NULL)
return(1);
*valPtr = NULL;
if ((URI == NULL) || (*URI == 0))
return(1);
/*
* First parse URI into a standard form
*/
ref = xmlCreateURI ();
if (ref == NULL) {
ret = -1;
goto done;
}
/* If URI not already in "relative" form */
if (URI[0] != '.') {
ret = xmlParseURIReference (ref, (const char *) URI);
if (ret != 0)
goto done; /* Error in URI, return NULL */
} else {
ref->path = (char *)xmlStrdup(URI);
if (ref->path == NULL) {
ret = -1;
goto done;
}
}
/*
* Next parse base into the same standard form
*/
if ((base == NULL) || (*base == 0)) {
val = xmlStrdup (URI);
if (val == NULL)
ret = -1;
goto done;
}
bas = xmlCreateURI ();
if (bas == NULL) {
ret = -1;
goto done;
}
if (base[0] != '.') {
ret = xmlParseURIReference (bas, (const char *) base);
if (ret != 0)
goto done; /* Error in base, return NULL */
} else {
bas->path = (char *)xmlStrdup(base);
if (bas->path == NULL) {
ret = -1;
goto done;
}
}
/*
* If the scheme / server on the URI differs from the base,
* just return the URI
*/
if ((ref->scheme != NULL) &&
((bas->scheme == NULL) ||
(xmlStrcmp ((xmlChar *)bas->scheme, (xmlChar *)ref->scheme)) ||
(xmlStrcmp ((xmlChar *)bas->server, (xmlChar *)ref->server)) ||
(bas->port != ref->port))) {
val = xmlStrdup (URI);
if (val == NULL)
ret = -1;
goto done;
}
if (xmlStrEqual((xmlChar *)bas->path, (xmlChar *)ref->path)) {
val = xmlStrdup(BAD_CAST "");
if (val == NULL)
ret = -1;
goto done;
}
if (bas->path == NULL) {
val = xmlStrdup((xmlChar *)ref->path);
if (val == NULL)
ret = -1;
goto done;
}
if (ref->path == NULL) {
ref->path = (char *) "/";
remove_path = 1;
}
/*
* At this point (at last!) we can compare the two paths
*
* First we take care of the special case where either of the
* two path components may be missing (bug 316224)
*/
bptr = (xmlChar *)bas->path;
2018-11-05 16:26:26 +03:00
{
xmlChar *rptr = (xmlChar *) ref->path;
int pos = 0;
/*
* Next we compare the two strings and find where they first differ
*/
if ((*rptr == '.') && (rptr[1] == '/'))
rptr += 2;
if ((*bptr == '.') && (bptr[1] == '/'))
bptr += 2;
else if ((*bptr == '/') && (*rptr != '/'))
bptr++;
while ((bptr[pos] == rptr[pos]) && (bptr[pos] != 0))
pos++;
if (bptr[pos] == rptr[pos]) {
val = xmlStrdup(BAD_CAST "");
if (val == NULL)
ret = -1;
goto done; /* (I can't imagine why anyone would do this) */
}
/*
* In URI, "back up" to the last '/' encountered. This will be the
* beginning of the "unique" suffix of URI
*/
ix = pos;
for (; ix > 0; ix--) {
if (rptr[ix - 1] == '/')
break;
}
uptr = (xmlChar *)&rptr[ix];
/*
* In base, count the number of '/' from the differing point
*/
2018-11-05 16:26:26 +03:00
for (; bptr[ix] != 0; ix++) {
if (bptr[ix] == '/')
nbslash++;
}
/*
* e.g: URI="foo/" base="foo/bar" -> "./"
*/
if (nbslash == 0 && !uptr[0]) {
val = xmlStrdup(BAD_CAST "./");
if (val == NULL)
ret = -1;
goto done;
}
len = xmlStrlen (uptr) + 1;
}
if (nbslash == 0) {
if (uptr != NULL) {
/* exception characters from xmlSaveUri */
val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
if (val == NULL)
ret = -1;
}
goto done;
}
/*
* Allocate just enough space for the returned string -
* length of the remainder of the URI, plus enough space
* for the "../" groups, plus one for the terminator
*/
val = (xmlChar *) xmlMalloc (len + 3 * nbslash);
if (val == NULL) {
ret = -1;
goto done;
}
vptr = val;
/*
* Put in as many "../" as needed
*/
for (; nbslash>0; nbslash--) {
*vptr++ = '.';
*vptr++ = '.';
*vptr++ = '/';
}
/*
* Finish up with the end of the URI
*/
if (uptr != NULL) {
if ((vptr > val) && (len > 0) &&
(uptr[0] == '/') && (vptr[-1] == '/')) {
memcpy (vptr, uptr + 1, len - 1);
vptr[len - 2] = 0;
} else {
memcpy (vptr, uptr, len);
vptr[len - 1] = 0;
}
} else {
vptr[len - 1] = 0;
}
/* escape the freshly-built path */
vptr = val;
/* exception characters from xmlSaveUri */
val = xmlURIEscapeStr(vptr, BAD_CAST "/;&=+$,");
if (val == NULL)
ret = -1;
else
ret = 0;
xmlFree(vptr);
done:
/*
* Free the working variables
*/
if (remove_path != 0)
ref->path = NULL;
if (ref != NULL)
xmlFreeURI (ref);
if (bas != NULL)
xmlFreeURI (bas);
if (ret != 0) {
xmlFree(val);
val = NULL;
}
*valPtr = val;
return(ret);
}
/*
* xmlBuildRelativeURI:
* @URI: the URI reference under consideration
* @base: the base value
*
* See xmlBuildRelativeURISafe.
*
* Returns a new URI string (to be freed by the caller) or NULL in case
* error.
*/
xmlChar *
xmlBuildRelativeURI(const xmlChar * URI, const xmlChar * base)
{
xmlChar *val;
xmlBuildRelativeURISafe(URI, base, &val);
return(val);
}
2003-02-19 17:50:35 +03:00
/**
* xmlCanonicPath:
* @path: the resource locator in a filesystem notation
*
* Prepares a path.
*
* If the path contains the substring "://", it is considered a
* Legacy Extended IRI. Characters which aren't allowed in URIs are
* escaped.
*
* Otherwise, the path is considered a filesystem path which is
* copied without modification.
2003-02-19 17:50:35 +03:00
*
* The caller is responsible for freeing the memory occupied
* by the returned string. If there is insufficient memory available, or the
2003-02-19 17:50:35 +03:00
* argument is NULL, the function returns NULL.
2024-05-20 14:58:22 +03:00
*
* Returns the escaped path.
2003-02-19 17:50:35 +03:00
*/
xmlChar *
2003-02-19 17:50:35 +03:00
xmlCanonicPath(const xmlChar *path)
{
xmlChar *ret;
2003-02-19 17:50:35 +03:00
if (path == NULL)
return(NULL);
/* Check if this is an "absolute uri" */
if (xmlStrstr(path, BAD_CAST "://") != NULL) {
/*
* Escape all characters except reserved, unreserved and the
* percent sign.
*
* xmlURIEscapeStr already keeps unreserved characters, so we
* pass gen-delims, sub-delims and "%" to ignore.
*/
ret = xmlURIEscapeStr(path, BAD_CAST ":/?#[]@!$&()*+,;='%");
} else {
ret = xmlStrdup((const xmlChar *) path);
}
2003-02-19 17:50:35 +03:00
return(ret);
}
2001-02-23 20:55:21 +03:00
/**
* xmlPathToURI:
* @path: the resource locator in a filesystem notation
*
* Constructs an URI expressing the existing path
*
* Returns a new URI, or a duplicate of the path parameter if the
* construction fails. The caller is responsible for freeing the memory
* occupied by the returned string. If there is insufficient memory available,
* or the argument is NULL, the function returns NULL.
*/
xmlChar *
xmlPathToURI(const xmlChar *path)
{
return(xmlCanonicPath(path));
}