diff --git a/encoding.c b/encoding.c index cbe057cc..3d336fb1 100644 --- a/encoding.c +++ b/encoding.c @@ -1537,75 +1537,104 @@ xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out, /** * xmlCharEncInput: * @input: a parser input buffer + * @sizeOut: pointer to output size + * + * @sizeOut should be set to the maximum output size (or SIZE_MAX). + * After return, it is set to the number of bytes written. * * Generic front-end for the encoding handler on parser input * - * Returns the number of bytes written or an XML_ENC_ERR code. + * Returns an XML_ENC_ERR code. */ int -xmlCharEncInput(xmlParserInputBufferPtr input) +xmlCharEncInput(xmlParserInputBufferPtr input, size_t *sizeOut) { + xmlBufPtr out, in; + const xmlChar *dataIn; + size_t availIn; + size_t maxOut; + size_t totalIn, totalOut; int ret; - size_t avail; - size_t toconv; - int c_in; - int c_out; - xmlBufPtr in; - xmlBufPtr out; - const xmlChar *inData; - size_t inTotal = 0; - if ((input == NULL) || (input->encoder == NULL) || - (input->buffer == NULL) || (input->raw == NULL)) - return(XML_ENC_ERR_INTERNAL); out = input->buffer; in = input->raw; - toconv = xmlBufUse(in); - if (toconv == 0) - return (0); - inData = xmlBufContent(in); - inTotal = 0; + maxOut = *sizeOut; + totalOut = 0; - do { - c_in = toconv > INT_MAX / 2 ? INT_MAX / 2 : toconv; + *sizeOut = 0; - avail = xmlBufAvail(out); - if (avail > INT_MAX) - avail = INT_MAX; - if (avail < 4096) { + availIn = xmlBufUse(in); + if (availIn == 0) + return(0); + dataIn = xmlBufContent(in); + totalIn = 0; + + while (1) { + size_t availOut; + int completeOut, completeIn; + int c_out, c_in; + + availOut = xmlBufAvail(out); + if (availOut > INT_MAX / 2) + availOut = INT_MAX / 2; + + if (availOut < maxOut) { + c_out = availOut; + completeOut = 0; + } else { + c_out = maxOut; + completeOut = 1; + } + + if (availIn > INT_MAX / 2) { + c_in = INT_MAX / 2; + completeIn = 0; + } else { + c_in = availIn; + completeIn = 1; + } + + ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out, + dataIn, &c_in); + + totalIn += c_in; + dataIn += c_in; + availIn -= c_in; + + totalOut += c_out; + maxOut -= c_out; + xmlBufAddLen(out, c_out); + + if ((ret != XML_ENC_ERR_SUCCESS) && (ret != XML_ENC_ERR_SPACE)) { + input->error = xmlEncConvertError(ret); + return(ret); + } + + if ((completeOut) && (completeIn)) + break; + if ((completeOut) && (ret == XML_ENC_ERR_SPACE)) + break; + if ((completeIn) && (ret == XML_ENC_ERR_SUCCESS)) + break; + + if (ret == XML_ENC_ERR_SPACE) { if (xmlBufGrow(out, 4096) < 0) { input->error = XML_ERR_NO_MEMORY; return(XML_ENC_ERR_MEMORY); } - avail = xmlBufAvail(out); } - - c_in = toconv; - c_out = avail; - ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out, - inData, &c_in); - inTotal += c_in; - inData += c_in; - toconv -= c_in; - xmlBufAddLen(out, c_out); - } while (ret == XML_ENC_ERR_SPACE); - - xmlBufShrink(in, inTotal); - - if (input->rawconsumed > ULONG_MAX - (unsigned long)c_in) - input->rawconsumed = ULONG_MAX; - else - input->rawconsumed += c_in; - - if (((ret != 0) && (c_out == 0)) || - (ret == XML_ENC_ERR_MEMORY)) { - if (input->error == 0) - input->error = xmlEncConvertError(ret); - return(ret); } - return (c_out); + xmlBufShrink(in, totalIn); + + if (input->rawconsumed > ULONG_MAX - (unsigned long) totalIn) + input->rawconsumed = ULONG_MAX; + else + input->rawconsumed += totalIn; + + *sizeOut = totalOut; + return(XML_ERR_OK); } /** diff --git a/include/private/enc.h b/include/private/enc.h index cd549145..864025f8 100644 --- a/include/private/enc.h +++ b/include/private/enc.h @@ -11,7 +11,7 @@ XML_HIDDEN int xmlEncInputChunk(xmlCharEncodingHandler *handler, unsigned char *out, int *outlen, const unsigned char *in, int *inlen); XML_HIDDEN int -xmlCharEncInput(xmlParserInputBufferPtr input); +xmlCharEncInput(xmlParserInputBufferPtr input, size_t *sizeOut); XML_HIDDEN int xmlCharEncOutput(xmlOutputBufferPtr output, int init); diff --git a/parser.c b/parser.c index e52392ce..b35e804e 100644 --- a/parser.c +++ b/parser.c @@ -11561,14 +11561,18 @@ xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size, int terminate) { size_t curBase; size_t maxLength; + size_t pos; int end_in_lf = 0; + int res; if ((ctxt == NULL) || (size < 0)) return(XML_ERR_ARGUMENT); + if ((chunk == NULL) && (size > 0)) + return(XML_ERR_ARGUMENT); + if ((ctxt->input == NULL) || (ctxt->input->buf == NULL)) + return(XML_ERR_ARGUMENT); if (ctxt->disableSAX != 0) return(ctxt->errNo); - if (ctxt->input == NULL) - return(XML_ERR_INTERNAL_ERROR); ctxt->input->flags |= XML_INPUT_PROGRESSIVE; if (ctxt->instate == XML_PARSER_START) @@ -11579,18 +11583,17 @@ xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size, size--; } - if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) && - (ctxt->input->buf != NULL)) { - size_t pos = ctxt->input->cur - ctxt->input->base; - int res; - - res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk); - xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos); - if (res < 0) { - xmlCtxtErrIO(ctxt, ctxt->input->buf->error, NULL); - xmlHaltParser(ctxt); - return(ctxt->errNo); - } + /* + * Also push an empty chunk to make sure that the raw buffer + * will be flushed if there is an encoder. + */ + pos = ctxt->input->cur - ctxt->input->base; + res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk); + xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos); + if (res < 0) { + xmlCtxtErrIO(ctxt, ctxt->input->buf->error, NULL); + xmlHaltParser(ctxt); + return(ctxt->errNo); } xmlParseTryOrFinish(ctxt, terminate); @@ -11608,11 +11611,8 @@ xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size, if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1)) return(ctxt->errNo); - if ((end_in_lf == 1) && (ctxt->input != NULL) && - (ctxt->input->buf != NULL)) { - size_t pos = ctxt->input->cur - ctxt->input->base; - int res; - + if (end_in_lf == 1) { + pos = ctxt->input->cur - ctxt->input->base; res = xmlParserInputBufferPush(ctxt->input->buf, 1, "\r"); xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos); if (res < 0) { @@ -11639,8 +11639,7 @@ xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size, xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY, "Start tag expected, '<' not found\n"); } - } else if ((ctxt->input->buf != NULL) && - (ctxt->input->buf->encoder != NULL) && + } else if ((ctxt->input->buf->encoder != NULL) && (ctxt->input->buf->error == 0) && (!xmlBufIsEmpty(ctxt->input->buf->raw))) { xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR, diff --git a/parserInternals.c b/parserInternals.c index 1590ce62..8022cd24 100644 --- a/parserInternals.c +++ b/parserInternals.c @@ -1273,7 +1273,6 @@ xmlInputSetEncodingHandler(xmlParserInputPtr input, xmlCharEncodingHandlerPtr handler) { xmlParserInputBufferPtr in; xmlBufPtr buf; - int nbchars; int code = XML_ERR_OK; if ((input == NULL) || (input->buf == NULL)) { @@ -1326,6 +1325,8 @@ xmlInputSetEncodingHandler(xmlParserInputPtr input, */ if (input->end > input->base) { size_t processed; + size_t nbchars; + int res; /* * Shrink the current input buffer. @@ -1336,8 +1337,9 @@ xmlInputSetEncodingHandler(xmlParserInputPtr input, input->consumed += processed; in->rawconsumed = processed; - nbchars = xmlCharEncInput(in); - if (nbchars < 0) + nbchars = 4000 /* MINLEN */; + res = xmlCharEncInput(in, &nbchars); + if (res < 0) code = in->error; } diff --git a/xmlIO.c b/xmlIO.c index d2b70432..8df36a69 100644 --- a/xmlIO.c +++ b/xmlIO.c @@ -48,6 +48,10 @@ #include "private/error.h" #include "private/io.h" +#ifndef SIZE_MAX + #define SIZE_MAX ((size_t) -1) +#endif + /* #define VERBOSE_FAILURE */ #define MINLEN 4000 @@ -2105,7 +2109,7 @@ xmlOutputBufferCreateFilenameDefault(xmlOutputBufferCreateFilenameFunc func) int xmlParserInputBufferPush(xmlParserInputBufferPtr in, int len, const char *buf) { - int nbchars = 0; + size_t nbchars = 0; int ret; if (len < 0) return(0); @@ -2130,9 +2134,11 @@ xmlParserInputBufferPush(xmlParserInputBufferPtr in, /* * convert as much as possible to the parser reading buffer. */ - nbchars = xmlCharEncInput(in); - if (nbchars < 0) - return(-1); + nbchars = SIZE_MAX; + if (xmlCharEncInput(in, &nbchars) < 0) + return(-1); + if (nbchars > INT_MAX) + nbchars = INT_MAX; } else { nbchars = len; ret = xmlBufAdd(in->buffer, (xmlChar *) buf, nbchars); @@ -2229,9 +2235,19 @@ xmlParserInputBufferGrow(xmlParserInputBufferPtr in, int len) { } if (in->encoder != NULL) { - res = xmlCharEncInput(in); - if (res < 0) + size_t sizeOut; + + /* + * Don't convert whole buffer when reading from memory. + */ + if (in->readcallback == NULL) + sizeOut = len; + else + sizeOut = SIZE_MAX; + + if (xmlCharEncInput(in, &sizeOut) < 0) return(-1); + res = sizeOut; } return(res); }