From 6208f86edd59e31a51a8d9b300d428504adb25a7 Mon Sep 17 00:00:00 2001 From: Nick Wellnhofer Date: Tue, 28 Jan 2025 20:13:58 +0100 Subject: [PATCH] xmllint: Support compressed input from stdin Another regression related to reading from stdin. Making a "-" filename read from stdin was deeply baked into the core IO code but is inherently insecure. I really want to reenable this dangerous feature as sparingly as possible. Add a new hidden parser option to make xmllint work. This will likely turn into a public option that must be opted in later. Allow compressed stdin in xmlReadFile to support xmlstarlet and older versions of xsltproc. So far, these are the only known command-line tools that rely on "-" meaning stdin. --- include/private/io.h | 3 + include/private/parser.h | 4 + parser.c | 9 ++- parserInternals.c | 9 ++- xmlIO.c | 166 ++++++++++++++++++++++----------------- xmllint.c | 9 ++- 6 files changed, 124 insertions(+), 76 deletions(-) diff --git a/include/private/io.h b/include/private/io.h index a2535ae1..d116fada 100644 --- a/include/private/io.h +++ b/include/private/io.h @@ -24,6 +24,9 @@ XML_HIDDEN xmlParserInputBufferPtr xmlNewInputBufferMemory(const void *mem, size_t size, int flags, xmlCharEncoding enc); +XML_HIDDEN int +xmlInputFromFd(xmlParserInputBufferPtr buf, int fd, int unzip); + #ifdef LIBXML_OUTPUT_ENABLED XML_HIDDEN xmlOutputBufferPtr xmlAllocOutputBufferInternal(xmlCharEncodingHandlerPtr encoder); diff --git a/include/private/parser.h b/include/private/parser.h index b14bebf9..4a800420 100644 --- a/include/private/parser.h +++ b/include/private/parser.h @@ -90,6 +90,10 @@ xmlParserNsLookupSax(xmlParserCtxtPtr ctxt, const xmlChar *prefix); #define XML_INPUT_BUF_STATIC (1u << 1) #define XML_INPUT_BUF_ZERO_TERMINATED (1u << 2) +#define XML_INPUT_UNZIP (1u << 3) + +/* Internal parser option */ +#define XML_PARSE_UNZIP (1 << 24) XML_HIDDEN xmlParserInputPtr xmlNewInputURL(xmlParserCtxtPtr ctxt, const char *url, const char *publicId, diff --git a/parser.c b/parser.c index 52ca3565..44467355 100644 --- a/parser.c +++ b/parser.c @@ -13890,7 +13890,8 @@ xmlReadFile(const char *filename, const char *encoding, int options) * should be removed at some point. */ if ((filename != NULL) && (filename[0] == '-') && (filename[1] == 0)) - input = xmlNewInputFd(ctxt, filename, STDIN_FILENO, encoding, 0); + input = xmlNewInputFd(ctxt, filename, STDIN_FILENO, encoding, + XML_INPUT_UNZIP); else input = xmlNewInputURL(ctxt, filename, NULL, encoding, 0); @@ -14141,6 +14142,7 @@ xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd, const char *URL, const char *encoding, int options) { xmlParserInputPtr input; + int inputFlags; if (ctxt == NULL) return(NULL); @@ -14148,7 +14150,10 @@ xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd, xmlCtxtReset(ctxt); xmlCtxtUseOptions(ctxt, options); - input = xmlNewInputFd(ctxt, URL, fd, encoding, 0); + inputFlags = 0; + if (options & XML_PARSE_UNZIP) + inputFlags |= XML_INPUT_UNZIP; + input = xmlNewInputFd(ctxt, URL, fd, encoding, inputFlags); return(xmlCtxtParseDocument(ctxt, input)); } diff --git a/parserInternals.c b/parserInternals.c index 6ddd28e7..c9afe21d 100644 --- a/parserInternals.c +++ b/parserInternals.c @@ -1715,18 +1715,23 @@ xmlNewInputString(xmlParserCtxtPtr ctxt, const char *url, */ xmlParserInputPtr xmlNewInputFd(xmlParserCtxtPtr ctxt, const char *url, - int fd, const char *encoding, int flags ATTRIBUTE_UNUSED) { + int fd, const char *encoding, int flags) { xmlParserInputBufferPtr buf; if ((ctxt == NULL) || (fd < 0)) return(NULL); - buf = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE); + buf = xmlAllocParserInputBuffer(XML_CHAR_ENCODING_NONE); if (buf == NULL) { xmlCtxtErrMemory(ctxt); return(NULL); } + if (xmlInputFromFd(buf, fd, (flags & XML_INPUT_UNZIP) != 0) < 0) { + xmlFreeParserInputBuffer(buf); + return(NULL); + } + return(xmlNewInputInternal(ctxt, buf, url, encoding)); } diff --git a/xmlIO.c b/xmlIO.c index 746cb3e2..a758caa3 100644 --- a/xmlIO.c +++ b/xmlIO.c @@ -1158,6 +1158,97 @@ xmlIODefaultMatch(const char *filename ATTRIBUTE_UNUSED) { return(1); } +int +xmlInputFromFd(xmlParserInputBufferPtr buf, int fd, int unzip) { + int copy; + + (void) unzip; + +#ifdef LIBXML_LZMA_ENABLED + if (unzip) { + xzFile xzStream; + off_t pos; + + pos = lseek(fd, 0, SEEK_CUR); + + copy = dup(fd); + if (copy == -1) + return(xmlIOErr(0, "dup()")); + + xzStream = __libxml2_xzdopen("?", copy, "rb"); + + if (xzStream == NULL) { + close(copy); + } else { + if ((__libxml2_xzcompressed(xzStream) > 0) || + /* Try to rewind if not gzip compressed */ + (pos < 0) || + (lseek(fd, pos, SEEK_SET) < 0)) { + /* + * If a file isn't seekable, we pipe uncompressed + * input through xzlib. + */ + buf->context = xzStream; + buf->readcallback = xmlXzfileRead; + buf->closecallback = xmlXzfileClose; + buf->compressed = 1; + + return(XML_ERR_OK); + } + + xmlXzfileClose(xzStream); + } + } +#endif /* LIBXML_LZMA_ENABLED */ + +#ifdef LIBXML_ZLIB_ENABLED + if (unzip) { + gzFile gzStream; + off_t pos; + + pos = lseek(fd, 0, SEEK_CUR); + + copy = dup(fd); + if (copy == -1) + return(xmlIOErr(0, "dup()")); + + gzStream = gzdopen(copy, "rb"); + + if (gzStream == NULL) { + close(copy); + } else { + if ((gzdirect(gzStream) == 0) || + /* Try to rewind if not gzip compressed */ + (pos < 0) || + (lseek(fd, pos, SEEK_SET) < 0)) { + /* + * If a file isn't seekable, we pipe uncompressed + * input through zlib. + */ + buf->context = gzStream; + buf->readcallback = xmlGzfileRead; + buf->closecallback = xmlGzfileClose; + buf->compressed = 1; + + return(XML_ERR_OK); + } + + xmlGzfileClose(gzStream); + } + } +#endif /* LIBXML_ZLIB_ENABLED */ + + copy = dup(fd); + if (copy == -1) + return(xmlIOErr(0, "dup()")); + + buf->context = (void *) (ptrdiff_t) copy; + buf->readcallback = xmlFdRead; + buf->closecallback = xmlFdClose; + + return(XML_ERR_OK); +} + /** * xmlInputDefaultOpen: * @buf: input buffer to be filled @@ -1197,80 +1288,15 @@ xmlInputDefaultOpen(xmlParserInputBufferPtr buf, const char *filename) { if (!xmlFileMatch(filename)) return(XML_IO_ENOENT); -#ifdef LIBXML_LZMA_ENABLED - { - xzFile xzStream; - - ret = xmlFdOpen(filename, 0, &fd); - if (ret != XML_ERR_OK) - return(ret); - - xzStream = __libxml2_xzdopen(filename, fd, "rb"); - - if (xzStream == NULL) { - close(fd); - } else { - /* - * Non-regular files like pipes can't be reopened. - * If a file isn't seekable, we pipe uncompressed - * input through xzlib. - */ - if ((lseek(fd, 0, SEEK_CUR) < 0) || - (__libxml2_xzcompressed(xzStream) > 0)) { - buf->context = xzStream; - buf->readcallback = xmlXzfileRead; - buf->closecallback = xmlXzfileClose; - buf->compressed = 1; - - return(XML_ERR_OK); - } - - xmlXzfileClose(xzStream); - } - } -#endif /* LIBXML_LZMA_ENABLED */ - -#ifdef LIBXML_ZLIB_ENABLED - { - gzFile gzStream; - - ret = xmlFdOpen(filename, 0, &fd); - if (ret != XML_ERR_OK) - return(ret); - - gzStream = gzdopen(fd, "rb"); - - if (gzStream == NULL) { - close(fd); - } else { - /* - * Non-regular files like pipes can't be reopened. - * If a file isn't seekable, we pipe uncompressed - * input through zlib. - */ - if ((lseek(fd, 0, SEEK_CUR) < 0) || - (gzdirect(gzStream) == 0)) { - buf->context = gzStream; - buf->readcallback = xmlGzfileRead; - buf->closecallback = xmlGzfileClose; - buf->compressed = 1; - - return(XML_ERR_OK); - } - - xmlGzfileClose(gzStream); - } - } -#endif /* LIBXML_ZLIB_ENABLED */ - ret = xmlFdOpen(filename, 0, &fd); if (ret != XML_ERR_OK) return(ret); - buf->context = (void *) (ptrdiff_t) fd; - buf->readcallback = xmlFdRead; - buf->closecallback = xmlFdClose; - return(XML_ERR_OK); + ret = xmlInputFromFd(buf, fd, /* unzip */ 1); + + close(fd); + + return(ret); } #ifdef LIBXML_OUTPUT_ENABLED diff --git a/xmllint.c b/xmllint.c index 3a7a8a00..c6273477 100644 --- a/xmllint.c +++ b/xmllint.c @@ -95,6 +95,9 @@ #define STDIN_FILENO 0 #endif +/* Internal parser option */ +#define XML_PARSE_UNZIP (1 << 24) + typedef enum { XMLLINT_RETURN_OK = 0, /* No error */ XMLLINT_ERR_UNCLASS = 1, /* Unclassified */ @@ -1648,7 +1651,8 @@ testSAX(const char *filename) { xmlCtxtSetMaxAmplification(ctxt, maxAmpl); if (strcmp(filename, "-") == 0) - xmlCtxtReadFd(ctxt, STDIN_FILENO, "-", NULL, options); + xmlCtxtReadFd(ctxt, STDIN_FILENO, "-", NULL, + options | XML_PARSE_UNZIP); else xmlCtxtReadFile(ctxt, filename, NULL, options); @@ -2333,7 +2337,8 @@ parseFile(const char *filename, xmlParserCtxtPtr rectxt) { #endif } else { if (strcmp(filename, "-") == 0) - doc = xmlCtxtReadFd(ctxt, STDIN_FILENO, "-", NULL, options); + doc = xmlCtxtReadFd(ctxt, STDIN_FILENO, "-", NULL, + options | XML_PARSE_UNZIP); else doc = xmlCtxtReadFile(ctxt, filename, NULL, options); }