1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2025-01-13 13:17:36 +03:00

Rewrite/cleanup/tests of URI normalization:

- uri.c: rewrite of xmlNormalizeURIPath from Paul D. Smith
- test/URI/smith.uri result/URI/smith.uri Makefile.am:
  added the new tests for URI normalization
- testURI.c: fixed stoopid bugs
- result/VC/OneID3 result/VC/UniqueElementTypeDeclaration:
  the URI in the error messages are now properly normalized
Daniel
This commit is contained in:
Daniel Veillard 2001-02-02 17:07:32 +00:00
parent ea28ce621c
commit b6e7fdbac6
9 changed files with 259 additions and 26 deletions

View File

@ -1,3 +1,12 @@
Fri Feb 2 18:04:35 CET 2001 Daniel Veillard <Daniel.Veillard@imag.fr>
* uri.c: rewrite of xmlNormalizeURIPath from Paul D. Smith
* test/URI/smith.uri result/URI/smith.uri Makefile.am:
added the new tests for URI normalization
* testURI.c: fixed stoopid bugs
* result/VC/OneID3 result/VC/UniqueElementTypeDeclaration:
the URI in the error messages are now properly normalized
Fri Feb 2 09:18:53 CET 2001 Daniel Veillard <Daniel.Veillard@imag.fr>
* uri.c: applied Marc Sanfacon's patch for xmlNormalizeURIPath

View File

@ -237,6 +237,19 @@ URItests : testURI
diff $(srcdir)/result/URI/$$name result.$$name ; \
rm result.$$name ; \
fi ; fi ; done)
@(for i in $(srcdir)/test/URI/*.uri ; do \
name=`basename $$i`; \
if [ ! -d $$i ] ; then \
if [ ! -f $(srcdir)/result/URI/$$name ] ; then \
echo New test file $$name ; \
$(top_builddir)/testURI < $$i > $(srcdir)/result/URI/$$name ; \
else \
echo Testing $$name ; \
$(top_builddir)/testURI < $$i > result.$$name ; \
grep "MORY ALLO" .memdump | grep -v "MEMORY ALLOCATED : 0";\
diff $(srcdir)/result/URI/$$name result.$$name ; \
rm result.$$name ; \
fi ; fi ; done)
XPathtests : testXPath
@echo "##"

40
aclocal.m4 vendored
View File

@ -620,31 +620,35 @@ esac
])
# AC_LIBLTDL_CONVENIENCE[(dir)] - sets LIBLTDL to the link flags for
# the libltdl convenience library, adds --enable-ltdl-convenience to
# the configure arguments. Note that LIBLTDL is not AC_SUBSTed, nor
# is AC_CONFIG_SUBDIRS called. If DIR is not provided, it is assumed
# to be `${top_builddir}/libltdl'. Make sure you start DIR with
# '${top_builddir}/' (note the single quotes!) if your package is not
# flat, and, if you're not using automake, define top_builddir as
# appropriate in the Makefiles.
# the libltdl convenience library and INCLTDL to the include flags for
# the libltdl header and adds --enable-ltdl-convenience to the
# configure arguments. Note that LIBLTDL and INCLTDL are not
# AC_SUBSTed, nor is AC_CONFIG_SUBDIRS called. If DIR is not
# provided, it is assumed to be `libltdl'. LIBLTDL will be prefixed
# with '${top_builddir}/' and INCLTDL will be prefixed with
# '${top_srcdir}/' (note the single quotes!). If your package is not
# flat and you're not using automake, define top_builddir and
# top_srcdir appropriately in the Makefiles.
AC_DEFUN(AC_LIBLTDL_CONVENIENCE, [AC_BEFORE([$0],[AC_LIBTOOL_SETUP])dnl
case "$enable_ltdl_convenience" in
no) AC_MSG_ERROR([this package needs a convenience libltdl]) ;;
"") enable_ltdl_convenience=yes
ac_configure_args="$ac_configure_args --enable-ltdl-convenience" ;;
esac
LIBLTDL=ifelse($#,1,$1,['${top_builddir}/libltdl'])/libltdlc.la
INCLTDL=ifelse($#,1,-I$1,['-I${top_builddir}/libltdl'])
LIBLTDL='${top_builddir}/'ifelse($#,1,[$1],['libltdl'])/libltdlc.la
INCLTDL='-I${top_srcdir}/'ifelse($#,1,[$1],['libltdl'])
])
# AC_LIBLTDL_INSTALLABLE[(dir)] - sets LIBLTDL to the link flags for
# the libltdl installable library, and adds --enable-ltdl-install to
# the configure arguments. Note that LIBLTDL is not AC_SUBSTed, nor
# is AC_CONFIG_SUBDIRS called. If DIR is not provided, it is assumed
# to be `${top_builddir}/libltdl'. Make sure you start DIR with
# '${top_builddir}/' (note the single quotes!) if your package is not
# flat, and, if you're not using automake, define top_builddir as
# appropriate in the Makefiles.
# the libltdl installable library and INCLTDL to the include flags for
# the libltdl header and adds --enable-ltdl-install to the configure
# arguments. Note that LIBLTDL and INCLTDL are not AC_SUBSTed, nor is
# AC_CONFIG_SUBDIRS called. If DIR is not provided and an installed
# libltdl is not found, it is assumed to be `libltdl'. LIBLTDL will
# be prefixed with '${top_builddir}/' and INCLTDL will be prefixed
# with '${top_srcdir}/' (note the single quotes!). If your package is
# not flat and you're not using automake, define top_builddir and
# top_srcdir appropriately in the Makefiles.
# In the future, this macro may have to be called after AC_PROG_LIBTOOL.
AC_DEFUN(AC_LIBLTDL_INSTALLABLE, [AC_BEFORE([$0],[AC_LIBTOOL_SETUP])dnl
AC_CHECK_LIB(ltdl, main,
@ -657,8 +661,8 @@ AC_DEFUN(AC_LIBLTDL_INSTALLABLE, [AC_BEFORE([$0],[AC_LIBTOOL_SETUP])dnl
])
if test x"$enable_ltdl_install" = x"yes"; then
ac_configure_args="$ac_configure_args --enable-ltdl-install"
LIBLTDL=ifelse($#,1,$1,['${top_builddir}/libltdl'])/libltdl.la
INCLTDL=ifelse($#,1,-I$1,['-I${top_builddir}/libltdl'])
LIBLTDL='${top_builddir}/'ifelse($#,1,[$1],['libltdl'])/libltdl.la
INCLTDL='-I${top_srcdir}/'ifelse($#,1,[$1],['libltdl'])
else
ac_configure_args="$ac_configure_args --enable-ltdl-install=no"
LIBLTDL="-lltdl"

15
result/URI/smith.uri Normal file
View File

@ -0,0 +1,15 @@
/bar
bar
bar
bar
baz
foo/
foo
foo
../foo./
../foo/
/foo
../foo
../../foo
../../../foo

View File

@ -1,3 +1,3 @@
./test/VC/dtds/doc.dtd:2: validity error: Element doc has ID attributes defined in the internal and external subset : val
test/VC/dtds/doc.dtd:2: validity error: Element doc has ID attributes defined in the internal and external subset : val
<!ATTLIST doc val ID #IMPLIED>
^

View File

@ -1,3 +1,3 @@
./test/VC/dtds/a.dtd:1: validity error: Redefinition of element a
test/VC/dtds/a.dtd:1: validity error: Redefinition of element a
<!ELEMENT a (#PCDATA | b | c)*>
^

15
test/URI/smith.uri Normal file
View File

@ -0,0 +1,15 @@
/foo/../bar
foo/../bar
./foo/../bar
foo/./../bar
foo/bar/.././../baz
foo/..
foo/bar/..
./foo
././foo
.././foo./
.././foo/.
/foo
../foo
../../foo
../../../foo

View File

@ -27,11 +27,8 @@ int main(int argc, char **argv) {
const char *base = NULL;
xmlChar *composite;
if (argv[arg] == NULL) {
printf("Usage: %s [-base URI] URI ...\n", argv[0]);
exit(0);
}
if ((!strcmp(argv[arg], "-base")) || (!strcmp(argv[arg], "--base"))) {
if ((argv[arg] != NULL) &&
((!strcmp(argv[arg], "-base")) || (!strcmp(argv[arg], "--base")))) {
arg++;
base = argv[arg];
if (base != NULL)
@ -64,6 +61,7 @@ int main(int argc, char **argv) {
if (ret != 0)
printf("%s : error %d\n", str, ret);
else {
xmlNormalizeURIPath(uri->path);
xmlPrintURI(stdout, uri);
printf("\n");
}
@ -99,5 +97,5 @@ int main(int argc, char **argv) {
}
xmlFreeURI(uri);
xmlMemoryDump();
exit(0);
return(0);
}

179
uri.c
View File

@ -606,6 +606,7 @@ xmlFreeURI(xmlURIPtr uri) {
* *
************************************************************************/
#if 0
/**
* xmlNormalizeURIPath:
* @path: pointer to the path string
@ -739,6 +740,184 @@ xmlNormalizeURIPath(char *path) {
}
return(0);
}
#else
/**
* xmlNormalizeURIPath:
* @path: pointer to the path string
*
* Applies the 5 normalization steps to a path string--that is, RFC 2396
* Section 5.2, steps 6.c through 6.g.
*
* Normalization occurs directly on the string, no new allocation is done
*
* Returns 0 or an error code
*/
int
xmlNormalizeURIPath(char *path) {
char *cur, *out;
if (path == NULL)
return(-1);
/* Skip all initial "/" chars. We want to get to the beginning of the
* first non-empty segment.
*/
cur = path;
while (cur[0] == '/')
++cur;
if (cur[0] == '\0')
return(0);
/* Keep everything we've seen so far. */
out = cur;
/*
* Analyze each segment in sequence for cases (c) and (d).
*/
while (cur[0] != '\0') {
/*
* c) All occurrences of "./", where "." is a complete path segment,
* are removed from the buffer string.
*/
if ((cur[0] == '.') && (cur[1] == '/')) {
cur += 2;
continue;
}
/*
* d) If the buffer string ends with "." as a complete path segment,
* that "." is removed.
*/
if ((cur[0] == '.') && (cur[1] == '\0'))
break;
/* Otherwise keep the segment. */
while (cur[0] != '/') {
if (cur[0] == '\0')
goto done_cd;
(out++)[0] = (cur++)[0];
}
(out++)[0] = (cur++)[0];
}
done_cd:
out[0] = '\0';
/* Reset to the beginning of the first segment for the next sequence. */
cur = path;
while (cur[0] == '/')
++cur;
if (cur[0] == '\0')
return(0);
/*
* Analyze each segment in sequence for cases (e) and (f).
*
* e) All occurrences of "<segment>/../", where <segment> is a
* complete path segment not equal to "..", are removed from the
* buffer string. Removal of these path segments is performed
* iteratively, removing the leftmost matching pattern on each
* iteration, until no matching pattern remains.
*
* f) If the buffer string ends with "<segment>/..", where <segment>
* is a complete path segment not equal to "..", that
* "<segment>/.." is removed.
*
* To satisfy the "iterative" clause in (e), we need to collapse the
* string every time we find something that needs to be removed. Thus,
* we don't need to keep two pointers into the string: we only need a
* "current position" pointer.
*/
while (1) {
char *segp;
/* At the beginning of each iteration of this loop, "cur" points to
* the first character of the segment we want to examine.
*/
/* Find the end of the current segment. */
segp = cur;
while ((segp[0] != '/') && (segp[0] != '\0'))
++segp;
/* If this is the last segment, we're done (we need at least two
* segments to meet the criteria for the (e) and (f) cases).
*/
if (segp[0] == '\0')
break;
/* If the first segment is "..", or if the next segment _isn't_ "..",
* keep this segment and try the next one.
*/
++segp;
if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
|| ((segp[0] != '.') || (segp[1] != '.')
|| ((segp[2] != '/') && (segp[2] != '\0')))) {
cur = segp;
continue;
}
/* If we get here, remove this segment and the next one and back up
* to the previous segment (if there is one), to implement the
* "iteratively" clause. It's pretty much impossible to back up
* while maintaining two pointers into the buffer, so just compact
* the whole buffer now.
*/
/* If this is the end of the buffer, we're done. */
if (segp[2] == '\0') {
cur[0] = '\0';
break;
}
strcpy(cur, segp + 3);
/* If there are no previous segments, then keep going from here. */
segp = cur;
while ((segp > path) && ((--segp)[0] == '/'))
;
if (segp == path)
continue;
/* "segp" is pointing to the end of a previous segment; find it's
* start. We need to back up to the previous segment and start
* over with that to handle things like "foo/bar/../..". If we
* don't do this, then on the first pass we'll remove the "bar/..",
* but be pointing at the second ".." so we won't realize we can also
* remove the "foo/..".
*/
cur = segp;
while ((cur > path) && (cur[-1] != '/'))
--cur;
}
out[0] = '\0';
/*
* g) If the resulting buffer string still begins with one or more
* complete path segments of "..", then the reference is
* considered to be in error. Implementations may handle this
* error by retaining these components in the resolved path (i.e.,
* treating them as part of the final URI), by removing them from
* the resolved path (i.e., discarding relative levels above the
* root), or by avoiding traversal of the reference.
*
* We discard them from the final path.
*/
if (path[0] == '/') {
cur = path;
while ((cur[1] == '.') && (cur[2] == '.')
&& ((cur[3] == '/') || (cur[3] == '\0')))
cur += 3;
if (cur != path) {
out = path;
while (cur[0] != '\0')
(out++)[0] = (cur++)[0];
out[0] = 0;
}
}
return(0);
}
#endif
/**
* xmlURIUnescapeString: