1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2025-03-27 18:50:07 +03:00

xmllint: Support compressed input from stdin

Another regression related to reading from stdin.

Making a "-" filename read from stdin was deeply baked into the core
IO code but is inherently insecure. I really want to reenable this
dangerous feature as sparingly as possible.

This now enables compressed input when using the "Fd" API functions
which wan't supported before. But XML_PARSE_NO_UNZIP will be
inverted later.

Allow compressed stdin in xmlReadFile to support xmlstarlet and older
versions of xsltproc. So far, these are the only known command-line
tools that rely on "-" meaning stdin.
This commit is contained in:
Nick Wellnhofer 2025-01-28 20:13:58 +01:00
parent a8d8a70c51
commit a78843be5e
5 changed files with 127 additions and 85 deletions

View File

@ -31,6 +31,9 @@ XML_HIDDEN xmlParserInputBufferPtr
xmlNewInputBufferMemory(const void *mem, size_t size, int flags,
xmlCharEncoding enc);
XML_HIDDEN int
xmlInputFromFd(xmlParserInputBufferPtr buf, int fd, int unzip);
#ifdef LIBXML_OUTPUT_ENABLED
XML_HIDDEN void
xmlOutputBufferWriteQuotedString(xmlOutputBufferPtr buf,

View File

@ -14021,7 +14021,7 @@ xmlReadFile(const char *filename, const char *encoding, int options)
*/
if ((filename != NULL) && (filename[0] == '-') && (filename[1] == 0))
input = xmlCtxtNewInputFromFd(ctxt, filename, STDIN_FILENO,
encoding, 0);
encoding, XML_INPUT_UNZIP);
else
input = xmlCtxtNewInputFromUrl(ctxt, filename, NULL, encoding, 0);
@ -14283,6 +14283,7 @@ xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
const char *URL, const char *encoding, int options)
{
xmlParserInputPtr input;
int inputFlags;
if (ctxt == NULL)
return(NULL);
@ -14290,7 +14291,10 @@ xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
xmlCtxtReset(ctxt);
xmlCtxtUseOptions(ctxt, options);
input = xmlCtxtNewInputFromFd(ctxt, URL, fd, encoding, 0);
inputFlags = 0;
if ((options & XML_PARSE_NO_UNZIP) == 0)
inputFlags |= XML_INPUT_UNZIP;
input = xmlCtxtNewInputFromFd(ctxt, URL, fd, encoding, inputFlags);
if (input == NULL)
return(NULL);

View File

@ -1914,7 +1914,7 @@ xmlCtxtNewInputFromString(xmlParserCtxtPtr ctxt, const char *url,
* xmlNewInputFromFd:
* @url: base URL (optional)
* @fd: file descriptor
* @flags: unused, pass 0
* @flags: input flags
*
* Creates a new parser input to read from a zero-terminated string.
*
@ -1923,21 +1923,30 @@ xmlCtxtNewInputFromString(xmlParserCtxtPtr ctxt, const char *url,
*
* @fd is closed after parsing has finished.
*
* Supported @flags are XML_INPUT_UNZIP to decompress data
* automatically. This feature is deprecated and will be removed
* in a future release.
*
* Available since 2.14.0.
*
* Returns a new parser input or NULL if a memory allocation failed.
*/
xmlParserInputPtr
xmlNewInputFromFd(const char *url, int fd, int flags ATTRIBUTE_UNUSED) {
xmlNewInputFromFd(const char *url, int fd, int flags) {
xmlParserInputBufferPtr buf;
if (fd < 0)
return(NULL);
buf = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
buf = xmlAllocParserInputBuffer(XML_CHAR_ENCODING_NONE);
if (buf == NULL)
return(NULL);
if (xmlInputFromFd(buf, fd, flags) < 0) {
xmlFreeParserInputBuffer(buf);
return(NULL);
}
return(xmlNewInputInternal(buf, url));
}

183
xmlIO.c
View File

@ -1037,6 +1037,105 @@ xmlIODefaultMatch(const char *filename ATTRIBUTE_UNUSED) {
return(1);
}
int
xmlInputFromFd(xmlParserInputBufferPtr buf, int fd, int flags) {
xmlFdIOCtxt *fdctxt;
int copy;
(void) flags;
#ifdef LIBXML_LZMA_ENABLED
if (flags & XML_INPUT_UNZIP) {
xzFile xzStream;
off_t pos;
pos = lseek(fd, 0, SEEK_CUR);
copy = dup(fd);
if (copy == -1)
return(xmlIOErr(errno));
xzStream = __libxml2_xzdopen("?", copy, "rb");
if (xzStream == NULL) {
close(copy);
} else {
if ((__libxml2_xzcompressed(xzStream) > 0) ||
/* Try to rewind if not gzip compressed */
(pos < 0) ||
(lseek(fd, pos, SEEK_SET) < 0)) {
/*
* If a file isn't seekable, we pipe uncompressed
* input through xzlib.
*/
buf->context = xzStream;
buf->readcallback = xmlXzfileRead;
buf->closecallback = xmlXzfileClose;
buf->compressed = 1;
return(XML_ERR_OK);
}
xmlXzfileClose(xzStream);
}
}
#endif /* LIBXML_LZMA_ENABLED */
#ifdef LIBXML_ZLIB_ENABLED
if (flags & XML_INPUT_UNZIP) {
gzFile gzStream;
off_t pos;
pos = lseek(fd, 0, SEEK_CUR);
copy = dup(fd);
if (copy == -1)
return(xmlIOErr(errno));
gzStream = gzdopen(copy, "rb");
if (gzStream == NULL) {
close(copy);
} else {
if ((gzdirect(gzStream) == 0) ||
/* Try to rewind if not gzip compressed */
(pos < 0) ||
(lseek(fd, pos, SEEK_SET) < 0)) {
/*
* If a file isn't seekable, we pipe uncompressed
* input through zlib.
*/
buf->context = gzStream;
buf->readcallback = xmlGzfileRead;
buf->closecallback = xmlGzfileClose;
buf->compressed = 1;
return(XML_ERR_OK);
}
xmlGzfileClose(gzStream);
}
}
#endif /* LIBXML_ZLIB_ENABLED */
copy = dup(fd);
if (copy == -1)
return(xmlIOErr(errno));
fdctxt = xmlMalloc(sizeof(*fdctxt));
if (fdctxt == NULL) {
close(copy);
return(XML_ERR_NO_MEMORY);
}
fdctxt->fd = copy;
buf->context = fdctxt;
buf->readcallback = xmlFdRead;
buf->closecallback = xmlFdClose;
return(XML_ERR_OK);
}
/**
* xmlInputDefaultOpen:
* @buf: input buffer to be filled
@ -1048,13 +1147,9 @@ xmlIODefaultMatch(const char *filename ATTRIBUTE_UNUSED) {
static int
xmlInputDefaultOpen(xmlParserInputBufferPtr buf, const char *filename,
int flags) {
xmlFdIOCtxt *fdctxt;
int ret;
int fd;
/* Avoid unused variable warning */
(void) flags;
#ifdef LIBXML_HTTP_ENABLED
if (xmlIOHTTPMatch(filename)) {
if ((flags & XML_INPUT_NETWORK) == 0)
@ -1073,87 +1168,15 @@ xmlInputDefaultOpen(xmlParserInputBufferPtr buf, const char *filename,
if (!xmlFileMatch(filename))
return(XML_IO_ENOENT);
#ifdef LIBXML_LZMA_ENABLED
if (flags & XML_INPUT_UNZIP) {
xzFile xzStream;
ret = xmlFdOpen(filename, 0, &fd);
if (ret != XML_ERR_OK)
return(ret);
xzStream = __libxml2_xzdopen(filename, fd, "rb");
if (xzStream == NULL) {
close(fd);
} else {
/*
* Non-regular files like pipes can't be reopened.
* If a file isn't seekable, we pipe uncompressed
* input through xzlib.
*/
if ((lseek(fd, 0, SEEK_CUR) < 0) ||
(__libxml2_xzcompressed(xzStream) > 0)) {
buf->context = xzStream;
buf->readcallback = xmlXzfileRead;
buf->closecallback = xmlXzfileClose;
buf->compressed = 1;
return(XML_ERR_OK);
}
xmlXzfileClose(xzStream);
}
}
#endif /* LIBXML_LZMA_ENABLED */
#ifdef LIBXML_ZLIB_ENABLED
if (flags & XML_INPUT_UNZIP) {
gzFile gzStream;
ret = xmlFdOpen(filename, 0, &fd);
if (ret != XML_ERR_OK)
return(ret);
gzStream = gzdopen(fd, "rb");
if (gzStream == NULL) {
close(fd);
} else {
/*
* Non-regular files like pipes can't be reopened.
* If a file isn't seekable, we pipe uncompressed
* input through zlib.
*/
if ((lseek(fd, 0, SEEK_CUR) < 0) ||
(gzdirect(gzStream) == 0)) {
buf->context = gzStream;
buf->readcallback = xmlGzfileRead;
buf->closecallback = xmlGzfileClose;
buf->compressed = 1;
return(XML_ERR_OK);
}
xmlGzfileClose(gzStream);
}
}
#endif /* LIBXML_ZLIB_ENABLED */
ret = xmlFdOpen(filename, 0, &fd);
if (ret != XML_ERR_OK)
return(ret);
fdctxt = xmlMalloc(sizeof(*fdctxt));
if (fdctxt == NULL) {
close(fd);
return(XML_ERR_NO_MEMORY);
}
fdctxt->fd = fd;
ret = xmlInputFromFd(buf, fd, flags);
buf->context = fdctxt;
buf->readcallback = xmlFdRead;
buf->closecallback = xmlFdClose;
return(XML_ERR_OK);
close(fd);
return(ret);
}
#ifdef LIBXML_OUTPUT_ENABLED

View File

@ -86,6 +86,9 @@
#define HTML_BUF_SIZE 50000
/* Internal parser option */
#define XML_PARSE_UNZIP (1 << 24)
typedef enum {
XMLLINT_RETURN_OK = 0, /* No error */
XMLLINT_ERR_UNCLASS = 1, /* Unclassified */