diff --git a/include/private/io.h b/include/private/io.h index 8748c663..da2004fd 100644 --- a/include/private/io.h +++ b/include/private/io.h @@ -31,6 +31,9 @@ XML_HIDDEN xmlParserInputBufferPtr xmlNewInputBufferMemory(const void *mem, size_t size, int flags, xmlCharEncoding enc); +XML_HIDDEN int +xmlInputFromFd(xmlParserInputBufferPtr buf, int fd, int unzip); + #ifdef LIBXML_OUTPUT_ENABLED XML_HIDDEN void xmlOutputBufferWriteQuotedString(xmlOutputBufferPtr buf, diff --git a/parser.c b/parser.c index 2d624de5..18dfaa9d 100644 --- a/parser.c +++ b/parser.c @@ -14021,7 +14021,7 @@ xmlReadFile(const char *filename, const char *encoding, int options) */ if ((filename != NULL) && (filename[0] == '-') && (filename[1] == 0)) input = xmlCtxtNewInputFromFd(ctxt, filename, STDIN_FILENO, - encoding, 0); + encoding, XML_INPUT_UNZIP); else input = xmlCtxtNewInputFromUrl(ctxt, filename, NULL, encoding, 0); @@ -14283,6 +14283,7 @@ xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd, const char *URL, const char *encoding, int options) { xmlParserInputPtr input; + int inputFlags; if (ctxt == NULL) return(NULL); @@ -14290,7 +14291,10 @@ xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd, xmlCtxtReset(ctxt); xmlCtxtUseOptions(ctxt, options); - input = xmlCtxtNewInputFromFd(ctxt, URL, fd, encoding, 0); + inputFlags = 0; + if ((options & XML_PARSE_NO_UNZIP) == 0) + inputFlags |= XML_INPUT_UNZIP; + input = xmlCtxtNewInputFromFd(ctxt, URL, fd, encoding, inputFlags); if (input == NULL) return(NULL); diff --git a/parserInternals.c b/parserInternals.c index 2aee44f1..f507060c 100644 --- a/parserInternals.c +++ b/parserInternals.c @@ -1914,7 +1914,7 @@ xmlCtxtNewInputFromString(xmlParserCtxtPtr ctxt, const char *url, * xmlNewInputFromFd: * @url: base URL (optional) * @fd: file descriptor - * @flags: unused, pass 0 + * @flags: input flags * * Creates a new parser input to read from a zero-terminated string. * @@ -1923,21 +1923,30 @@ xmlCtxtNewInputFromString(xmlParserCtxtPtr ctxt, const char *url, * * @fd is closed after parsing has finished. * + * Supported @flags are XML_INPUT_UNZIP to decompress data + * automatically. This feature is deprecated and will be removed + * in a future release. + * * Available since 2.14.0. * * Returns a new parser input or NULL if a memory allocation failed. */ xmlParserInputPtr -xmlNewInputFromFd(const char *url, int fd, int flags ATTRIBUTE_UNUSED) { +xmlNewInputFromFd(const char *url, int fd, int flags) { xmlParserInputBufferPtr buf; if (fd < 0) return(NULL); - buf = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE); + buf = xmlAllocParserInputBuffer(XML_CHAR_ENCODING_NONE); if (buf == NULL) return(NULL); + if (xmlInputFromFd(buf, fd, flags) < 0) { + xmlFreeParserInputBuffer(buf); + return(NULL); + } + return(xmlNewInputInternal(buf, url)); } diff --git a/xmlIO.c b/xmlIO.c index 25a3c325..cc861402 100644 --- a/xmlIO.c +++ b/xmlIO.c @@ -1037,6 +1037,105 @@ xmlIODefaultMatch(const char *filename ATTRIBUTE_UNUSED) { return(1); } +int +xmlInputFromFd(xmlParserInputBufferPtr buf, int fd, int flags) { + xmlFdIOCtxt *fdctxt; + int copy; + + (void) flags; + +#ifdef LIBXML_LZMA_ENABLED + if (flags & XML_INPUT_UNZIP) { + xzFile xzStream; + off_t pos; + + pos = lseek(fd, 0, SEEK_CUR); + + copy = dup(fd); + if (copy == -1) + return(xmlIOErr(errno)); + + xzStream = __libxml2_xzdopen("?", copy, "rb"); + + if (xzStream == NULL) { + close(copy); + } else { + if ((__libxml2_xzcompressed(xzStream) > 0) || + /* Try to rewind if not gzip compressed */ + (pos < 0) || + (lseek(fd, pos, SEEK_SET) < 0)) { + /* + * If a file isn't seekable, we pipe uncompressed + * input through xzlib. + */ + buf->context = xzStream; + buf->readcallback = xmlXzfileRead; + buf->closecallback = xmlXzfileClose; + buf->compressed = 1; + + return(XML_ERR_OK); + } + + xmlXzfileClose(xzStream); + } + } +#endif /* LIBXML_LZMA_ENABLED */ + +#ifdef LIBXML_ZLIB_ENABLED + if (flags & XML_INPUT_UNZIP) { + gzFile gzStream; + off_t pos; + + pos = lseek(fd, 0, SEEK_CUR); + + copy = dup(fd); + if (copy == -1) + return(xmlIOErr(errno)); + + gzStream = gzdopen(copy, "rb"); + + if (gzStream == NULL) { + close(copy); + } else { + if ((gzdirect(gzStream) == 0) || + /* Try to rewind if not gzip compressed */ + (pos < 0) || + (lseek(fd, pos, SEEK_SET) < 0)) { + /* + * If a file isn't seekable, we pipe uncompressed + * input through zlib. + */ + buf->context = gzStream; + buf->readcallback = xmlGzfileRead; + buf->closecallback = xmlGzfileClose; + buf->compressed = 1; + + return(XML_ERR_OK); + } + + xmlGzfileClose(gzStream); + } + } +#endif /* LIBXML_ZLIB_ENABLED */ + + copy = dup(fd); + if (copy == -1) + return(xmlIOErr(errno)); + + fdctxt = xmlMalloc(sizeof(*fdctxt)); + if (fdctxt == NULL) { + close(copy); + return(XML_ERR_NO_MEMORY); + } + fdctxt->fd = copy; + + buf->context = fdctxt; + buf->readcallback = xmlFdRead; + buf->closecallback = xmlFdClose; + + return(XML_ERR_OK); +} + /** * xmlInputDefaultOpen: * @buf: input buffer to be filled @@ -1048,13 +1147,9 @@ xmlIODefaultMatch(const char *filename ATTRIBUTE_UNUSED) { static int xmlInputDefaultOpen(xmlParserInputBufferPtr buf, const char *filename, int flags) { - xmlFdIOCtxt *fdctxt; int ret; int fd; - /* Avoid unused variable warning */ - (void) flags; - #ifdef LIBXML_HTTP_ENABLED if (xmlIOHTTPMatch(filename)) { if ((flags & XML_INPUT_NETWORK) == 0) @@ -1073,87 +1168,15 @@ xmlInputDefaultOpen(xmlParserInputBufferPtr buf, const char *filename, if (!xmlFileMatch(filename)) return(XML_IO_ENOENT); -#ifdef LIBXML_LZMA_ENABLED - if (flags & XML_INPUT_UNZIP) { - xzFile xzStream; - - ret = xmlFdOpen(filename, 0, &fd); - if (ret != XML_ERR_OK) - return(ret); - - xzStream = __libxml2_xzdopen(filename, fd, "rb"); - - if (xzStream == NULL) { - close(fd); - } else { - /* - * Non-regular files like pipes can't be reopened. - * If a file isn't seekable, we pipe uncompressed - * input through xzlib. - */ - if ((lseek(fd, 0, SEEK_CUR) < 0) || - (__libxml2_xzcompressed(xzStream) > 0)) { - buf->context = xzStream; - buf->readcallback = xmlXzfileRead; - buf->closecallback = xmlXzfileClose; - buf->compressed = 1; - - return(XML_ERR_OK); - } - - xmlXzfileClose(xzStream); - } - } -#endif /* LIBXML_LZMA_ENABLED */ - -#ifdef LIBXML_ZLIB_ENABLED - if (flags & XML_INPUT_UNZIP) { - gzFile gzStream; - - ret = xmlFdOpen(filename, 0, &fd); - if (ret != XML_ERR_OK) - return(ret); - - gzStream = gzdopen(fd, "rb"); - - if (gzStream == NULL) { - close(fd); - } else { - /* - * Non-regular files like pipes can't be reopened. - * If a file isn't seekable, we pipe uncompressed - * input through zlib. - */ - if ((lseek(fd, 0, SEEK_CUR) < 0) || - (gzdirect(gzStream) == 0)) { - buf->context = gzStream; - buf->readcallback = xmlGzfileRead; - buf->closecallback = xmlGzfileClose; - buf->compressed = 1; - - return(XML_ERR_OK); - } - - xmlGzfileClose(gzStream); - } - } -#endif /* LIBXML_ZLIB_ENABLED */ - ret = xmlFdOpen(filename, 0, &fd); if (ret != XML_ERR_OK) return(ret); - fdctxt = xmlMalloc(sizeof(*fdctxt)); - if (fdctxt == NULL) { - close(fd); - return(XML_ERR_NO_MEMORY); - } - fdctxt->fd = fd; + ret = xmlInputFromFd(buf, fd, flags); - buf->context = fdctxt; - buf->readcallback = xmlFdRead; - buf->closecallback = xmlFdClose; - return(XML_ERR_OK); + close(fd); + + return(ret); } #ifdef LIBXML_OUTPUT_ENABLED diff --git a/xmllint.c b/xmllint.c index 84ca7eba..bedd0526 100644 --- a/xmllint.c +++ b/xmllint.c @@ -86,6 +86,9 @@ #define HTML_BUF_SIZE 50000 +/* Internal parser option */ +#define XML_PARSE_UNZIP (1 << 24) + typedef enum { XMLLINT_RETURN_OK = 0, /* No error */ XMLLINT_ERR_UNCLASS = 1, /* Unclassified */