xmllint: Support compressed input from stdin

Another regression related to reading from stdin.

Making a "-" filename read from stdin was deeply baked into the core
IO code but is inherently insecure. I really want to reenable this
dangerous feature as sparingly as possible.

This now enables compressed input when using the "Fd" API functions
which wan't supported before. But XML_PARSE_NO_UNZIP will be
inverted later.

Allow compressed stdin in xmlReadFile to support xmlstarlet and older
versions of xsltproc. So far, these are the only known command-line
tools that rely on "-" meaning stdin.
This commit is contained in:
Nick Wellnhofer 2025-01-28 20:13:58 +01:00
parent a8d8a70c51
commit a78843be5e
5 changed files with 127 additions and 85 deletions

View File

@ -31,6 +31,9 @@ XML_HIDDEN xmlParserInputBufferPtr
xmlNewInputBufferMemory(const void *mem, size_t size, int flags, xmlNewInputBufferMemory(const void *mem, size_t size, int flags,
xmlCharEncoding enc); xmlCharEncoding enc);
XML_HIDDEN int
xmlInputFromFd(xmlParserInputBufferPtr buf, int fd, int unzip);
#ifdef LIBXML_OUTPUT_ENABLED #ifdef LIBXML_OUTPUT_ENABLED
XML_HIDDEN void XML_HIDDEN void
xmlOutputBufferWriteQuotedString(xmlOutputBufferPtr buf, xmlOutputBufferWriteQuotedString(xmlOutputBufferPtr buf,

View File

@ -14021,7 +14021,7 @@ xmlReadFile(const char *filename, const char *encoding, int options)
*/ */
if ((filename != NULL) && (filename[0] == '-') && (filename[1] == 0)) if ((filename != NULL) && (filename[0] == '-') && (filename[1] == 0))
input = xmlCtxtNewInputFromFd(ctxt, filename, STDIN_FILENO, input = xmlCtxtNewInputFromFd(ctxt, filename, STDIN_FILENO,
encoding, 0); encoding, XML_INPUT_UNZIP);
else else
input = xmlCtxtNewInputFromUrl(ctxt, filename, NULL, encoding, 0); input = xmlCtxtNewInputFromUrl(ctxt, filename, NULL, encoding, 0);
@ -14283,6 +14283,7 @@ xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
const char *URL, const char *encoding, int options) const char *URL, const char *encoding, int options)
{ {
xmlParserInputPtr input; xmlParserInputPtr input;
int inputFlags;
if (ctxt == NULL) if (ctxt == NULL)
return(NULL); return(NULL);
@ -14290,7 +14291,10 @@ xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
xmlCtxtReset(ctxt); xmlCtxtReset(ctxt);
xmlCtxtUseOptions(ctxt, options); xmlCtxtUseOptions(ctxt, options);
input = xmlCtxtNewInputFromFd(ctxt, URL, fd, encoding, 0); inputFlags = 0;
if ((options & XML_PARSE_NO_UNZIP) == 0)
inputFlags |= XML_INPUT_UNZIP;
input = xmlCtxtNewInputFromFd(ctxt, URL, fd, encoding, inputFlags);
if (input == NULL) if (input == NULL)
return(NULL); return(NULL);

View File

@ -1914,7 +1914,7 @@ xmlCtxtNewInputFromString(xmlParserCtxtPtr ctxt, const char *url,
* xmlNewInputFromFd: * xmlNewInputFromFd:
* @url: base URL (optional) * @url: base URL (optional)
* @fd: file descriptor * @fd: file descriptor
* @flags: unused, pass 0 * @flags: input flags
* *
* Creates a new parser input to read from a zero-terminated string. * Creates a new parser input to read from a zero-terminated string.
* *
@ -1923,21 +1923,30 @@ xmlCtxtNewInputFromString(xmlParserCtxtPtr ctxt, const char *url,
* *
* @fd is closed after parsing has finished. * @fd is closed after parsing has finished.
* *
* Supported @flags are XML_INPUT_UNZIP to decompress data
* automatically. This feature is deprecated and will be removed
* in a future release.
*
* Available since 2.14.0. * Available since 2.14.0.
* *
* Returns a new parser input or NULL if a memory allocation failed. * Returns a new parser input or NULL if a memory allocation failed.
*/ */
xmlParserInputPtr xmlParserInputPtr
xmlNewInputFromFd(const char *url, int fd, int flags ATTRIBUTE_UNUSED) { xmlNewInputFromFd(const char *url, int fd, int flags) {
xmlParserInputBufferPtr buf; xmlParserInputBufferPtr buf;
if (fd < 0) if (fd < 0)
return(NULL); return(NULL);
buf = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE); buf = xmlAllocParserInputBuffer(XML_CHAR_ENCODING_NONE);
if (buf == NULL) if (buf == NULL)
return(NULL); return(NULL);
if (xmlInputFromFd(buf, fd, flags) < 0) {
xmlFreeParserInputBuffer(buf);
return(NULL);
}
return(xmlNewInputInternal(buf, url)); return(xmlNewInputInternal(buf, url));
} }

183
xmlIO.c
View File

@ -1037,6 +1037,105 @@ xmlIODefaultMatch(const char *filename ATTRIBUTE_UNUSED) {
return(1); return(1);
} }
int
xmlInputFromFd(xmlParserInputBufferPtr buf, int fd, int flags) {
xmlFdIOCtxt *fdctxt;
int copy;
(void) flags;
#ifdef LIBXML_LZMA_ENABLED
if (flags & XML_INPUT_UNZIP) {
xzFile xzStream;
off_t pos;
pos = lseek(fd, 0, SEEK_CUR);
copy = dup(fd);
if (copy == -1)
return(xmlIOErr(errno));
xzStream = __libxml2_xzdopen("?", copy, "rb");
if (xzStream == NULL) {
close(copy);
} else {
if ((__libxml2_xzcompressed(xzStream) > 0) ||
/* Try to rewind if not gzip compressed */
(pos < 0) ||
(lseek(fd, pos, SEEK_SET) < 0)) {
/*
* If a file isn't seekable, we pipe uncompressed
* input through xzlib.
*/
buf->context = xzStream;
buf->readcallback = xmlXzfileRead;
buf->closecallback = xmlXzfileClose;
buf->compressed = 1;
return(XML_ERR_OK);
}
xmlXzfileClose(xzStream);
}
}
#endif /* LIBXML_LZMA_ENABLED */
#ifdef LIBXML_ZLIB_ENABLED
if (flags & XML_INPUT_UNZIP) {
gzFile gzStream;
off_t pos;
pos = lseek(fd, 0, SEEK_CUR);
copy = dup(fd);
if (copy == -1)
return(xmlIOErr(errno));
gzStream = gzdopen(copy, "rb");
if (gzStream == NULL) {
close(copy);
} else {
if ((gzdirect(gzStream) == 0) ||
/* Try to rewind if not gzip compressed */
(pos < 0) ||
(lseek(fd, pos, SEEK_SET) < 0)) {
/*
* If a file isn't seekable, we pipe uncompressed
* input through zlib.
*/
buf->context = gzStream;
buf->readcallback = xmlGzfileRead;
buf->closecallback = xmlGzfileClose;
buf->compressed = 1;
return(XML_ERR_OK);
}
xmlGzfileClose(gzStream);
}
}
#endif /* LIBXML_ZLIB_ENABLED */
copy = dup(fd);
if (copy == -1)
return(xmlIOErr(errno));
fdctxt = xmlMalloc(sizeof(*fdctxt));
if (fdctxt == NULL) {
close(copy);
return(XML_ERR_NO_MEMORY);
}
fdctxt->fd = copy;
buf->context = fdctxt;
buf->readcallback = xmlFdRead;
buf->closecallback = xmlFdClose;
return(XML_ERR_OK);
}
/** /**
* xmlInputDefaultOpen: * xmlInputDefaultOpen:
* @buf: input buffer to be filled * @buf: input buffer to be filled
@ -1048,13 +1147,9 @@ xmlIODefaultMatch(const char *filename ATTRIBUTE_UNUSED) {
static int static int
xmlInputDefaultOpen(xmlParserInputBufferPtr buf, const char *filename, xmlInputDefaultOpen(xmlParserInputBufferPtr buf, const char *filename,
int flags) { int flags) {
xmlFdIOCtxt *fdctxt;
int ret; int ret;
int fd; int fd;
/* Avoid unused variable warning */
(void) flags;
#ifdef LIBXML_HTTP_ENABLED #ifdef LIBXML_HTTP_ENABLED
if (xmlIOHTTPMatch(filename)) { if (xmlIOHTTPMatch(filename)) {
if ((flags & XML_INPUT_NETWORK) == 0) if ((flags & XML_INPUT_NETWORK) == 0)
@ -1073,87 +1168,15 @@ xmlInputDefaultOpen(xmlParserInputBufferPtr buf, const char *filename,
if (!xmlFileMatch(filename)) if (!xmlFileMatch(filename))
return(XML_IO_ENOENT); return(XML_IO_ENOENT);
#ifdef LIBXML_LZMA_ENABLED
if (flags & XML_INPUT_UNZIP) {
xzFile xzStream;
ret = xmlFdOpen(filename, 0, &fd);
if (ret != XML_ERR_OK)
return(ret);
xzStream = __libxml2_xzdopen(filename, fd, "rb");
if (xzStream == NULL) {
close(fd);
} else {
/*
* Non-regular files like pipes can't be reopened.
* If a file isn't seekable, we pipe uncompressed
* input through xzlib.
*/
if ((lseek(fd, 0, SEEK_CUR) < 0) ||
(__libxml2_xzcompressed(xzStream) > 0)) {
buf->context = xzStream;
buf->readcallback = xmlXzfileRead;
buf->closecallback = xmlXzfileClose;
buf->compressed = 1;
return(XML_ERR_OK);
}
xmlXzfileClose(xzStream);
}
}
#endif /* LIBXML_LZMA_ENABLED */
#ifdef LIBXML_ZLIB_ENABLED
if (flags & XML_INPUT_UNZIP) {
gzFile gzStream;
ret = xmlFdOpen(filename, 0, &fd);
if (ret != XML_ERR_OK)
return(ret);
gzStream = gzdopen(fd, "rb");
if (gzStream == NULL) {
close(fd);
} else {
/*
* Non-regular files like pipes can't be reopened.
* If a file isn't seekable, we pipe uncompressed
* input through zlib.
*/
if ((lseek(fd, 0, SEEK_CUR) < 0) ||
(gzdirect(gzStream) == 0)) {
buf->context = gzStream;
buf->readcallback = xmlGzfileRead;
buf->closecallback = xmlGzfileClose;
buf->compressed = 1;
return(XML_ERR_OK);
}
xmlGzfileClose(gzStream);
}
}
#endif /* LIBXML_ZLIB_ENABLED */
ret = xmlFdOpen(filename, 0, &fd); ret = xmlFdOpen(filename, 0, &fd);
if (ret != XML_ERR_OK) if (ret != XML_ERR_OK)
return(ret); return(ret);
fdctxt = xmlMalloc(sizeof(*fdctxt)); ret = xmlInputFromFd(buf, fd, flags);
if (fdctxt == NULL) {
close(fd);
return(XML_ERR_NO_MEMORY);
}
fdctxt->fd = fd;
buf->context = fdctxt; close(fd);
buf->readcallback = xmlFdRead;
buf->closecallback = xmlFdClose; return(ret);
return(XML_ERR_OK);
} }
#ifdef LIBXML_OUTPUT_ENABLED #ifdef LIBXML_OUTPUT_ENABLED

View File

@ -86,6 +86,9 @@
#define HTML_BUF_SIZE 50000 #define HTML_BUF_SIZE 50000
/* Internal parser option */
#define XML_PARSE_UNZIP (1 << 24)
typedef enum { typedef enum {
XMLLINT_RETURN_OK = 0, /* No error */ XMLLINT_RETURN_OK = 0, /* No error */
XMLLINT_ERR_UNCLASS = 1, /* Unclassified */ XMLLINT_ERR_UNCLASS = 1, /* Unclassified */