mirror of
https://gitlab.gnome.org/GNOME/libxml2
synced 2025-03-28 21:33:13 +00:00
encoding: Detect truncated multi-byte sequences with ICU
Unlike iconv or the internal converters, ICU consumes truncated multi- byte sequences at the end of an input buffer. We currently check for a non-empty raw input buffer to detect truncated sequences, so this fails with ICU. It might be possible to inspect the pivot buffer pointers, but it seems cleaner to implement a `flush` flag for some encoding and I/O functions. After flushing, we can check for U_TRUNCATED_CHAR_FOUND with ICU, or detect remaining input with other converters. Also fix detection of truncated sequences for HTML, XML content and DTDs with iconv.
This commit is contained in:
parent
76c6ddfef9
commit
69b83bb68e
25
HTMLparser.c
25
HTMLparser.c
@ -4385,6 +4385,11 @@ htmlCtxtParseContentInternal(htmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
|
|||||||
|
|
||||||
htmlParseContent(ctxt);
|
htmlParseContent(ctxt);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Only check for truncated multi-byte sequences
|
||||||
|
*/
|
||||||
|
xmlParserCheckEOF(ctxt, XML_ERR_INTERNAL_ERROR);
|
||||||
|
|
||||||
/* TODO: Use xmlCtxtIsCatastrophicError */
|
/* TODO: Use xmlCtxtIsCatastrophicError */
|
||||||
if (ctxt->errNo != XML_ERR_NO_MEMORY) {
|
if (ctxt->errNo != XML_ERR_NO_MEMORY) {
|
||||||
xmlNodePtr cur;
|
xmlNodePtr cur;
|
||||||
@ -4509,11 +4514,9 @@ htmlParseDocument(htmlParserCtxtPtr ctxt) {
|
|||||||
htmlParseContent(ctxt);
|
htmlParseContent(ctxt);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* autoclose
|
* Only check for truncated multi-byte sequences
|
||||||
*/
|
*/
|
||||||
if (CUR == 0)
|
xmlParserCheckEOF(ctxt, XML_ERR_INTERNAL_ERROR);
|
||||||
htmlAutoCloseOnEnd(ctxt);
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* SAX: end of the document processing.
|
* SAX: end of the document processing.
|
||||||
@ -5237,12 +5240,15 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
|
|||||||
int
|
int
|
||||||
htmlParseChunk(htmlParserCtxtPtr ctxt, const char *chunk, int size,
|
htmlParseChunk(htmlParserCtxtPtr ctxt, const char *chunk, int size,
|
||||||
int terminate) {
|
int terminate) {
|
||||||
if ((ctxt == NULL) || (ctxt->input == NULL))
|
if ((ctxt == NULL) ||
|
||||||
|
(ctxt->input == NULL) || (ctxt->input->buf == NULL) ||
|
||||||
|
(size < 0) ||
|
||||||
|
((size > 0) && (chunk == NULL)))
|
||||||
return(XML_ERR_ARGUMENT);
|
return(XML_ERR_ARGUMENT);
|
||||||
if (PARSER_STOPPED(ctxt) != 0)
|
if (PARSER_STOPPED(ctxt) != 0)
|
||||||
return(ctxt->errNo);
|
return(ctxt->errNo);
|
||||||
if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
|
|
||||||
(ctxt->input->buf != NULL)) {
|
if (size > 0) {
|
||||||
size_t pos = ctxt->input->cur - ctxt->input->base;
|
size_t pos = ctxt->input->cur - ctxt->input->base;
|
||||||
int res;
|
int res;
|
||||||
|
|
||||||
@ -5261,6 +5267,11 @@ htmlParseChunk(htmlParserCtxtPtr ctxt, const char *chunk, int size,
|
|||||||
if ((terminate) && (ctxt->instate != XML_PARSER_EOF)) {
|
if ((terminate) && (ctxt->instate != XML_PARSER_EOF)) {
|
||||||
htmlAutoCloseOnEnd(ctxt);
|
htmlAutoCloseOnEnd(ctxt);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Only check for truncated multi-byte sequences
|
||||||
|
*/
|
||||||
|
xmlParserCheckEOF(ctxt, XML_ERR_INTERNAL_ERROR);
|
||||||
|
|
||||||
if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
|
if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
|
||||||
ctxt->sax->endDocument(ctxt->userData);
|
ctxt->sax->endDocument(ctxt->userData);
|
||||||
|
|
||||||
|
@ -8107,13 +8107,14 @@ crash if you try to modify the tree)'/>
|
|||||||
<arg name='vctxt' type='void *' info='conversion context'/>
|
<arg name='vctxt' type='void *' info='conversion context'/>
|
||||||
</functype>
|
</functype>
|
||||||
<functype name='xmlCharEncConvFunc' file='encoding' module='encoding'>
|
<functype name='xmlCharEncConvFunc' file='encoding' module='encoding'>
|
||||||
<info>Convert between character encodings. On success, the value of @inlen after return is the number of bytes consumed and @outlen is the number of bytes produced.</info>
|
<info>Convert between character encodings. The value of @inlen after return is the number of bytes consumed and @outlen is the number of bytes produced. If the converter can consume partial multi-byte sequences, the @flush flag can be used to detect truncated sequences at EOF. Otherwise, the flag can be ignored.</info>
|
||||||
<return type='int' info='the number of bytes written or an XML_ENC_ERR code.'/>
|
<return type='int' info='a non-negative number on success or an XML_ENC_ERR code.'/>
|
||||||
|
<arg name='vctxt' type='void *' info='conversion context'/>
|
||||||
<arg name='out' type='unsigned char *' info='a pointer to an array of bytes to store the result'/>
|
<arg name='out' type='unsigned char *' info='a pointer to an array of bytes to store the result'/>
|
||||||
<arg name='outlen' type='int *' info='the length of @out'/>
|
<arg name='outlen' type='int *' info='the length of @out'/>
|
||||||
<arg name='in' type='const unsigned char *' info='a pointer to an array of input bytes'/>
|
<arg name='in' type='const unsigned char *' info='a pointer to an array of input bytes'/>
|
||||||
<arg name='inlen' type='int *' info='the length of @in'/>
|
<arg name='inlen' type='int *' info='the length of @in'/>
|
||||||
<arg name='vctxt' type='void *' info='conversion context'/>
|
<arg name='flush' type='int' info='end of input'/>
|
||||||
</functype>
|
</functype>
|
||||||
<functype name='xmlCharEncConvImpl' file='encoding' module='encoding'>
|
<functype name='xmlCharEncConvImpl' file='encoding' module='encoding'>
|
||||||
<info>If this function returns XML_ERR_OK, it must fill the @out pointer with an encoding handler. The handler can be obtained from xmlCharEncNewCustomHandler.</info>
|
<info>If this function returns XML_ERR_OK, it must fill the @out pointer with an encoding handler. The handler can be obtained from xmlCharEncNewCustomHandler.</info>
|
||||||
|
175
encoding.c
175
encoding.c
@ -113,35 +113,35 @@ static const xmlEncTableEntry xmlEncTable[] = {
|
|||||||
};
|
};
|
||||||
|
|
||||||
static int
|
static int
|
||||||
asciiToAscii(unsigned char* out, int *outlen,
|
asciiToAscii(void *vctxt, unsigned char* out, int *outlen,
|
||||||
const unsigned char* in, int *inlen, void *vctxt);
|
const unsigned char* in, int *inlen, int flush);
|
||||||
static int
|
static int
|
||||||
UTF8ToUTF8(unsigned char* out, int *outlen,
|
UTF8ToUTF8(void *vctxt, unsigned char* out, int *outlen,
|
||||||
const unsigned char* inb, int *inlenb, void *vctxt);
|
const unsigned char* inb, int *inlenb, int flush);
|
||||||
static int
|
static int
|
||||||
latin1ToUTF8(unsigned char* out, int *outlen,
|
latin1ToUTF8(void *vctxt, unsigned char* out, int *outlen,
|
||||||
const unsigned char* in, int *inlen, void *vctxt);
|
const unsigned char* in, int *inlen, int flush);
|
||||||
static int
|
static int
|
||||||
UTF16LEToUTF8(unsigned char* out, int *outlen,
|
UTF16LEToUTF8(void *vctxt, unsigned char* out, int *outlen,
|
||||||
const unsigned char* inb, int *inlenb, void *vctxt);
|
const unsigned char* inb, int *inlenb, int flush);
|
||||||
static int
|
static int
|
||||||
UTF16BEToUTF8(unsigned char* out, int *outlen,
|
UTF16BEToUTF8(void *vctxt, unsigned char* out, int *outlen,
|
||||||
const unsigned char* inb, int *inlenb, void *vctxt);
|
const unsigned char* inb, int *inlenb, int flush);
|
||||||
|
|
||||||
#ifdef LIBXML_OUTPUT_ENABLED
|
#ifdef LIBXML_OUTPUT_ENABLED
|
||||||
|
|
||||||
static int
|
static int
|
||||||
UTF8ToLatin1(unsigned char* outb, int *outlen,
|
UTF8ToLatin1(void *vctxt, unsigned char* outb, int *outlen,
|
||||||
const unsigned char* in, int *inlen, void *vctxt);
|
const unsigned char* in, int *inlen, int flush);
|
||||||
static int
|
static int
|
||||||
UTF8ToUTF16(unsigned char* outb, int *outlen,
|
UTF8ToUTF16(void *vctxt, unsigned char* outb, int *outlen,
|
||||||
const unsigned char* in, int *inlen, void *vctxt);
|
const unsigned char* in, int *inlen, int flush);
|
||||||
static int
|
static int
|
||||||
UTF8ToUTF16LE(unsigned char* outb, int *outlen,
|
UTF8ToUTF16LE(void *vctxt, unsigned char* outb, int *outlen,
|
||||||
const unsigned char* in, int *inlen, void *vctxt);
|
const unsigned char* in, int *inlen, int flush);
|
||||||
static int
|
static int
|
||||||
UTF8ToUTF16BE(unsigned char* outb, int *outlen,
|
UTF8ToUTF16BE(void *vctxt, unsigned char* outb, int *outlen,
|
||||||
const unsigned char* in, int *inlen, void *vctxt);
|
const unsigned char* in, int *inlen, int flush);
|
||||||
|
|
||||||
#else /* LIBXML_OUTPUT_ENABLED */
|
#else /* LIBXML_OUTPUT_ENABLED */
|
||||||
|
|
||||||
@ -154,8 +154,8 @@ UTF8ToUTF16BE(unsigned char* outb, int *outlen,
|
|||||||
|
|
||||||
#if defined(LIBXML_OUTPUT_ENABLED) && defined(LIBXML_HTML_ENABLED)
|
#if defined(LIBXML_OUTPUT_ENABLED) && defined(LIBXML_HTML_ENABLED)
|
||||||
static int
|
static int
|
||||||
UTF8ToHtmlWrapper(unsigned char *out, int *outlen,
|
UTF8ToHtmlWrapper(void *vctxt, unsigned char *out, int *outlen,
|
||||||
const unsigned char *in, int *inlen, void *vctxt);
|
const unsigned char *in, int *inlen, int flush);
|
||||||
#else
|
#else
|
||||||
#define UTF8ToHtmlWrapper NULL
|
#define UTF8ToHtmlWrapper NULL
|
||||||
#endif
|
#endif
|
||||||
@ -166,11 +166,11 @@ UTF8ToHtmlWrapper(unsigned char *out, int *outlen,
|
|||||||
#include "iso8859x.inc"
|
#include "iso8859x.inc"
|
||||||
|
|
||||||
static int
|
static int
|
||||||
ISO8859xToUTF8(unsigned char* out, int *outlen,
|
ISO8859xToUTF8(void *vctxt, unsigned char* out, int *outlen,
|
||||||
const unsigned char* in, int *inlen, void *vctxt);
|
const unsigned char* in, int *inlen, int flush);
|
||||||
static int
|
static int
|
||||||
UTF8ToISO8859x(unsigned char *out, int *outlen,
|
UTF8ToISO8859x(void *vctxt, unsigned char *out, int *outlen,
|
||||||
const unsigned char *in, int *inlen, void *vctxt);
|
const unsigned char *in, int *inlen, int flush);
|
||||||
|
|
||||||
#define MAKE_ISO_HANDLER(name, n) \
|
#define MAKE_ISO_HANDLER(name, n) \
|
||||||
{ (char *) name, { ISO8859xToUTF8 }, { UTF8ToISO8859x }, \
|
{ (char *) name, { ISO8859xToUTF8 }, { UTF8ToISO8859x }, \
|
||||||
@ -1073,6 +1073,7 @@ typedef struct {
|
|||||||
* @outlen: the length of @out
|
* @outlen: the length of @out
|
||||||
* @in: a pointer to an array of input bytes
|
* @in: a pointer to an array of input bytes
|
||||||
* @inlen: the length of @in
|
* @inlen: the length of @in
|
||||||
|
* @flush: end of input
|
||||||
*
|
*
|
||||||
* Returns an XML_ENC_ERR code.
|
* Returns an XML_ENC_ERR code.
|
||||||
*
|
*
|
||||||
@ -1081,8 +1082,9 @@ typedef struct {
|
|||||||
* The value of @outlen after return is the number of octets produced.
|
* The value of @outlen after return is the number of octets produced.
|
||||||
*/
|
*/
|
||||||
static int
|
static int
|
||||||
xmlIconvConvert(unsigned char *out, int *outlen,
|
xmlIconvConvert(void *vctxt, unsigned char *out, int *outlen,
|
||||||
const unsigned char *in, int *inlen, void *vctxt) {
|
const unsigned char *in, int *inlen,
|
||||||
|
int flush ATTRIBUTE_UNUSED) {
|
||||||
xmlIconvCtxt *ctxt = vctxt;
|
xmlIconvCtxt *ctxt = vctxt;
|
||||||
size_t icv_inlen, icv_outlen;
|
size_t icv_inlen, icv_outlen;
|
||||||
const char *icv_in = (const char *) in;
|
const char *icv_in = (const char *) in;
|
||||||
@ -1293,6 +1295,7 @@ struct _uconv_t {
|
|||||||
* @outlen: the length of @out
|
* @outlen: the length of @out
|
||||||
* @in: a pointer to an array of input bytes
|
* @in: a pointer to an array of input bytes
|
||||||
* @inlen: the length of @in
|
* @inlen: the length of @in
|
||||||
|
* @flush: end of input
|
||||||
*
|
*
|
||||||
* Returns an XML_ENC_ERR code.
|
* Returns an XML_ENC_ERR code.
|
||||||
*
|
*
|
||||||
@ -1301,8 +1304,8 @@ struct _uconv_t {
|
|||||||
* The value of @outlen after return is the number of octets produced.
|
* The value of @outlen after return is the number of octets produced.
|
||||||
*/
|
*/
|
||||||
static int
|
static int
|
||||||
xmlUconvConvert(unsigned char *out, int *outlen,
|
xmlUconvConvert(void *vctxt, unsigned char *out, int *outlen,
|
||||||
const unsigned char *in, int *inlen, void *vctxt) {
|
const unsigned char *in, int *inlen, int flush) {
|
||||||
xmlUconvCtxt *cd = vctxt;
|
xmlUconvCtxt *cd = vctxt;
|
||||||
const char *ucv_in = (const char *) in;
|
const char *ucv_in = (const char *) in;
|
||||||
char *ucv_out = (char *) out;
|
char *ucv_out = (char *) out;
|
||||||
@ -1317,14 +1320,10 @@ xmlUconvConvert(unsigned char *out, int *outlen,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Note that the ICU API is stateful. It can always consume a certain
|
* The ICU API can consume input, including partial sequences,
|
||||||
* amount of input even if the output buffer would overflow. The
|
* even if the output buffer would overflow. The remaining input
|
||||||
* remaining input must be processed by calling ucnv_convertEx with a
|
* must be processed by calling ucnv_convertEx with a possibly
|
||||||
* possibly empty input buffer.
|
* empty input buffer.
|
||||||
*
|
|
||||||
* ucnv_convertEx is always called with reset and flush set to 0,
|
|
||||||
* so we don't mess up the state. This should never generate
|
|
||||||
* U_TRUNCATED_CHAR_FOUND errors.
|
|
||||||
*/
|
*/
|
||||||
if (cd->isInput) {
|
if (cd->isInput) {
|
||||||
source = cd->uconv;
|
source = cd->uconv;
|
||||||
@ -1337,7 +1336,8 @@ xmlUconvConvert(unsigned char *out, int *outlen,
|
|||||||
ucnv_convertEx(target, source, &ucv_out, ucv_out + *outlen,
|
ucnv_convertEx(target, source, &ucv_out, ucv_out + *outlen,
|
||||||
&ucv_in, ucv_in + *inlen, cd->pivot_buf,
|
&ucv_in, ucv_in + *inlen, cd->pivot_buf,
|
||||||
&cd->pivot_source, &cd->pivot_target,
|
&cd->pivot_source, &cd->pivot_target,
|
||||||
cd->pivot_buf + ICU_PIVOT_BUF_SIZE, 0, 0, &err);
|
cd->pivot_buf + ICU_PIVOT_BUF_SIZE,
|
||||||
|
/* reset */ 0, flush, &err);
|
||||||
|
|
||||||
*inlen = ucv_in - (const char*) in;
|
*inlen = ucv_in - (const char*) in;
|
||||||
*outlen = ucv_out - (char *) out;
|
*outlen = ucv_out - (char *) out;
|
||||||
@ -1347,8 +1347,8 @@ xmlUconvConvert(unsigned char *out, int *outlen,
|
|||||||
} else {
|
} else {
|
||||||
switch (err) {
|
switch (err) {
|
||||||
case U_TRUNCATED_CHAR_FOUND:
|
case U_TRUNCATED_CHAR_FOUND:
|
||||||
/* Shouldn't happen without flush */
|
/* Should only happen with flush */
|
||||||
ret = XML_ENC_ERR_SUCCESS;
|
ret = XML_ENC_ERR_INPUT;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case U_BUFFER_OVERFLOW_ERROR:
|
case U_BUFFER_OVERFLOW_ERROR:
|
||||||
@ -1510,6 +1510,7 @@ xmlEncConvertError(int code) {
|
|||||||
* @outlen: the length of @out
|
* @outlen: the length of @out
|
||||||
* @in: a pointer to an array of input bytes
|
* @in: a pointer to an array of input bytes
|
||||||
* @inlen: the length of @in
|
* @inlen: the length of @in
|
||||||
|
* @flush: end of input
|
||||||
*
|
*
|
||||||
* The value of @inlen after return is the number of octets consumed
|
* The value of @inlen after return is the number of octets consumed
|
||||||
* as the return value is 0, else unpredictable.
|
* as the return value is 0, else unpredictable.
|
||||||
@ -1519,7 +1520,8 @@ xmlEncConvertError(int code) {
|
|||||||
*/
|
*/
|
||||||
int
|
int
|
||||||
xmlEncInputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
|
xmlEncInputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
|
||||||
int *outlen, const unsigned char *in, int *inlen) {
|
int *outlen, const unsigned char *in, int *inlen,
|
||||||
|
int flush) {
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
if (handler->flags & XML_HANDLER_LEGACY) {
|
if (handler->flags & XML_HANDLER_LEGACY) {
|
||||||
@ -1534,6 +1536,7 @@ xmlEncInputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
|
|||||||
ret = func(out, outlen, in, inlen);
|
ret = func(out, outlen, in, inlen);
|
||||||
} else {
|
} else {
|
||||||
xmlCharEncConvFunc func = handler->input.func;
|
xmlCharEncConvFunc func = handler->input.func;
|
||||||
|
int oldInlen;
|
||||||
|
|
||||||
if (func == NULL) {
|
if (func == NULL) {
|
||||||
*outlen = 0;
|
*outlen = 0;
|
||||||
@ -1541,7 +1544,14 @@ xmlEncInputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
|
|||||||
return(XML_ENC_ERR_INTERNAL);
|
return(XML_ENC_ERR_INTERNAL);
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = func(out, outlen, in, inlen, handler->inputCtxt);
|
oldInlen = *inlen;
|
||||||
|
ret = func(handler->inputCtxt, out, outlen, in, inlen, flush);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Check for truncated multi-byte sequence.
|
||||||
|
*/
|
||||||
|
if ((flush) && (ret == XML_ENC_ERR_SUCCESS) && (*inlen != oldInlen))
|
||||||
|
ret = XML_ENC_ERR_INPUT;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ret > 0)
|
if (ret > 0)
|
||||||
@ -1588,7 +1598,7 @@ xmlEncOutputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
|
|||||||
return(XML_ENC_ERR_INTERNAL);
|
return(XML_ENC_ERR_INTERNAL);
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = func(out, outlen, in, inlen, handler->outputCtxt);
|
ret = func(handler->outputCtxt, out, outlen, in, inlen, /* flush */ 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ret > 0)
|
if (ret > 0)
|
||||||
@ -1617,6 +1627,7 @@ xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out,
|
|||||||
* xmlCharEncInput:
|
* xmlCharEncInput:
|
||||||
* @input: a parser input buffer
|
* @input: a parser input buffer
|
||||||
* @sizeOut: pointer to output size
|
* @sizeOut: pointer to output size
|
||||||
|
* @flush: end of input
|
||||||
*
|
*
|
||||||
* @sizeOut should be set to the maximum output size (or SIZE_MAX).
|
* @sizeOut should be set to the maximum output size (or SIZE_MAX).
|
||||||
* After return, it is set to the number of bytes written.
|
* After return, it is set to the number of bytes written.
|
||||||
@ -1626,7 +1637,7 @@ xmlCharEncFirstLine(xmlCharEncodingHandler *handler, xmlBufferPtr out,
|
|||||||
* Returns an XML_ENC_ERR code.
|
* Returns an XML_ENC_ERR code.
|
||||||
*/
|
*/
|
||||||
int
|
int
|
||||||
xmlCharEncInput(xmlParserInputBufferPtr input, size_t *sizeOut)
|
xmlCharEncInput(xmlParserInputBufferPtr input, size_t *sizeOut, int flush)
|
||||||
{
|
{
|
||||||
xmlBufPtr out, in;
|
xmlBufPtr out, in;
|
||||||
const xmlChar *dataIn;
|
const xmlChar *dataIn;
|
||||||
@ -1644,7 +1655,7 @@ xmlCharEncInput(xmlParserInputBufferPtr input, size_t *sizeOut)
|
|||||||
*sizeOut = 0;
|
*sizeOut = 0;
|
||||||
|
|
||||||
availIn = xmlBufUse(in);
|
availIn = xmlBufUse(in);
|
||||||
if (availIn == 0)
|
if ((availIn == 0) && (!flush))
|
||||||
return(0);
|
return(0);
|
||||||
dataIn = xmlBufContent(in);
|
dataIn = xmlBufContent(in);
|
||||||
totalIn = 0;
|
totalIn = 0;
|
||||||
@ -1675,7 +1686,7 @@ xmlCharEncInput(xmlParserInputBufferPtr input, size_t *sizeOut)
|
|||||||
}
|
}
|
||||||
|
|
||||||
ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out,
|
ret = xmlEncInputChunk(input->encoder, xmlBufEnd(out), &c_out,
|
||||||
dataIn, &c_in);
|
dataIn, &c_in, flush && completeIn);
|
||||||
|
|
||||||
totalIn += c_in;
|
totalIn += c_in;
|
||||||
dataIn += c_in;
|
dataIn += c_in;
|
||||||
@ -1750,7 +1761,7 @@ xmlCharEncInFunc(xmlCharEncodingHandler * handler, xmlBufferPtr out,
|
|||||||
written = out->size - out->use - 1;
|
written = out->size - out->use - 1;
|
||||||
}
|
}
|
||||||
ret = xmlEncInputChunk(handler, &out->content[out->use], &written,
|
ret = xmlEncInputChunk(handler, &out->content[out->use], &written,
|
||||||
in->content, &toconv);
|
in->content, &toconv, /* flush */ 0);
|
||||||
xmlBufferShrink(in, toconv);
|
xmlBufferShrink(in, toconv);
|
||||||
out->use += written;
|
out->use += written;
|
||||||
out->content[out->use] = 0;
|
out->content[out->use] = 0;
|
||||||
@ -2077,9 +2088,10 @@ xmlByteConsumed(xmlParserCtxtPtr ctxt) {
|
|||||||
************************************************************************/
|
************************************************************************/
|
||||||
|
|
||||||
static int
|
static int
|
||||||
asciiToAscii(unsigned char* out, int *poutlen,
|
asciiToAscii(void *vctxt ATTRIBUTE_UNUSED,
|
||||||
|
unsigned char* out, int *poutlen,
|
||||||
const unsigned char* in, int *pinlen,
|
const unsigned char* in, int *pinlen,
|
||||||
void *vctxt ATTRIBUTE_UNUSED) {
|
int flush ATTRIBUTE_UNUSED) {
|
||||||
const unsigned char *inend;
|
const unsigned char *inend;
|
||||||
const unsigned char *instart = in;
|
const unsigned char *instart = in;
|
||||||
int inlen, outlen, ret;
|
int inlen, outlen, ret;
|
||||||
@ -2121,9 +2133,10 @@ asciiToAscii(unsigned char* out, int *poutlen,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
latin1ToUTF8(unsigned char* out, int *outlen,
|
latin1ToUTF8(void *vctxt ATTRIBUTE_UNUSED,
|
||||||
|
unsigned char* out, int *outlen,
|
||||||
const unsigned char* in, int *inlen,
|
const unsigned char* in, int *inlen,
|
||||||
void *vctxt ATTRIBUTE_UNUSED) {
|
int flush ATTRIBUTE_UNUSED) {
|
||||||
unsigned char* outstart = out;
|
unsigned char* outstart = out;
|
||||||
const unsigned char* instart = in;
|
const unsigned char* instart = in;
|
||||||
unsigned char* outend;
|
unsigned char* outend;
|
||||||
@ -2180,13 +2193,15 @@ done:
|
|||||||
int
|
int
|
||||||
xmlIsolat1ToUTF8(unsigned char* out, int *outlen,
|
xmlIsolat1ToUTF8(unsigned char* out, int *outlen,
|
||||||
const unsigned char* in, int *inlen) {
|
const unsigned char* in, int *inlen) {
|
||||||
return(latin1ToUTF8(out, outlen, in, inlen, NULL));
|
return(latin1ToUTF8(/* ctxt */ NULL, out, outlen, in, inlen,
|
||||||
|
/* flush */ 0));
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
UTF8ToUTF8(unsigned char* out, int *outlen,
|
UTF8ToUTF8(void *vctxt ATTRIBUTE_UNUSED,
|
||||||
|
unsigned char* out, int *outlen,
|
||||||
const unsigned char* in, int *inlen,
|
const unsigned char* in, int *inlen,
|
||||||
void *vctxt ATTRIBUTE_UNUSED) {
|
int flush ATTRIBUTE_UNUSED) {
|
||||||
int len;
|
int len;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
@ -2214,9 +2229,10 @@ UTF8ToUTF8(unsigned char* out, int *outlen,
|
|||||||
|
|
||||||
#ifdef LIBXML_OUTPUT_ENABLED
|
#ifdef LIBXML_OUTPUT_ENABLED
|
||||||
static int
|
static int
|
||||||
UTF8ToLatin1(unsigned char* out, int *outlen,
|
UTF8ToLatin1(void *vctxt ATTRIBUTE_UNUSED,
|
||||||
|
unsigned char* out, int *outlen,
|
||||||
const unsigned char* in, int *inlen,
|
const unsigned char* in, int *inlen,
|
||||||
void *vctxt ATTRIBUTE_UNUSED) {
|
int flush ATTRIBUTE_UNUSED) {
|
||||||
const unsigned char* outend;
|
const unsigned char* outend;
|
||||||
const unsigned char* outstart = out;
|
const unsigned char* outstart = out;
|
||||||
const unsigned char* instart = in;
|
const unsigned char* instart = in;
|
||||||
@ -2286,14 +2302,16 @@ xmlUTF8ToIsolat1(unsigned char* out, int *outlen,
|
|||||||
if ((out == NULL) || (outlen == NULL) || (in == NULL) || (inlen == NULL))
|
if ((out == NULL) || (outlen == NULL) || (in == NULL) || (inlen == NULL))
|
||||||
return(XML_ENC_ERR_INTERNAL);
|
return(XML_ENC_ERR_INTERNAL);
|
||||||
|
|
||||||
return(UTF8ToLatin1(out, outlen, in, inlen, NULL));
|
return(UTF8ToLatin1(/* ctxt */ NULL, out, outlen, in, inlen,
|
||||||
|
/* flush */ 0));
|
||||||
}
|
}
|
||||||
#endif /* LIBXML_OUTPUT_ENABLED */
|
#endif /* LIBXML_OUTPUT_ENABLED */
|
||||||
|
|
||||||
static int
|
static int
|
||||||
UTF16LEToUTF8(unsigned char *out, int *outlen,
|
UTF16LEToUTF8(void *vctxt ATTRIBUTE_UNUSED,
|
||||||
|
unsigned char *out, int *outlen,
|
||||||
const unsigned char *in, int *inlen,
|
const unsigned char *in, int *inlen,
|
||||||
void *vctxt ATTRIBUTE_UNUSED) {
|
int flush ATTRIBUTE_UNUSED) {
|
||||||
const unsigned char *instart = in;
|
const unsigned char *instart = in;
|
||||||
const unsigned char *inend = in + (*inlen & ~1);
|
const unsigned char *inend = in + (*inlen & ~1);
|
||||||
unsigned char *outstart = out;
|
unsigned char *outstart = out;
|
||||||
@ -2360,9 +2378,10 @@ done:
|
|||||||
|
|
||||||
#ifdef LIBXML_OUTPUT_ENABLED
|
#ifdef LIBXML_OUTPUT_ENABLED
|
||||||
static int
|
static int
|
||||||
UTF8ToUTF16LE(unsigned char *out, int *outlen,
|
UTF8ToUTF16LE(void *vctxt ATTRIBUTE_UNUSED,
|
||||||
|
unsigned char *out, int *outlen,
|
||||||
const unsigned char *in, int *inlen,
|
const unsigned char *in, int *inlen,
|
||||||
void *vctxt ATTRIBUTE_UNUSED) {
|
int flush ATTRIBUTE_UNUSED) {
|
||||||
const unsigned char *instart = in;
|
const unsigned char *instart = in;
|
||||||
const unsigned char *inend;
|
const unsigned char *inend;
|
||||||
unsigned char *outstart = out;
|
unsigned char *outstart = out;
|
||||||
@ -2462,9 +2481,10 @@ done:
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
UTF8ToUTF16(unsigned char* outb, int *outlen,
|
UTF8ToUTF16(void *vctxt,
|
||||||
|
unsigned char* outb, int *outlen,
|
||||||
const unsigned char* in, int *inlen,
|
const unsigned char* in, int *inlen,
|
||||||
void *vctxt ATTRIBUTE_UNUSED) {
|
int flush) {
|
||||||
if (in == NULL) {
|
if (in == NULL) {
|
||||||
/*
|
/*
|
||||||
* initialization, add the Byte Order Mark for UTF-16LE
|
* initialization, add the Byte Order Mark for UTF-16LE
|
||||||
@ -2480,14 +2500,15 @@ UTF8ToUTF16(unsigned char* outb, int *outlen,
|
|||||||
*inlen = 0;
|
*inlen = 0;
|
||||||
return(0);
|
return(0);
|
||||||
}
|
}
|
||||||
return (UTF8ToUTF16LE(outb, outlen, in, inlen, NULL));
|
return (UTF8ToUTF16LE(vctxt, outb, outlen, in, inlen, flush));
|
||||||
}
|
}
|
||||||
#endif /* LIBXML_OUTPUT_ENABLED */
|
#endif /* LIBXML_OUTPUT_ENABLED */
|
||||||
|
|
||||||
static int
|
static int
|
||||||
UTF16BEToUTF8(unsigned char *out, int *outlen,
|
UTF16BEToUTF8(void *vctxt ATTRIBUTE_UNUSED,
|
||||||
|
unsigned char *out, int *outlen,
|
||||||
const unsigned char *in, int *inlen,
|
const unsigned char *in, int *inlen,
|
||||||
void *vctxt ATTRIBUTE_UNUSED) {
|
int flush ATTRIBUTE_UNUSED) {
|
||||||
const unsigned char *instart = in;
|
const unsigned char *instart = in;
|
||||||
const unsigned char *inend = in + (*inlen & ~1);
|
const unsigned char *inend = in + (*inlen & ~1);
|
||||||
unsigned char *outstart = out;
|
unsigned char *outstart = out;
|
||||||
@ -2554,9 +2575,10 @@ done:
|
|||||||
|
|
||||||
#ifdef LIBXML_OUTPUT_ENABLED
|
#ifdef LIBXML_OUTPUT_ENABLED
|
||||||
static int
|
static int
|
||||||
UTF8ToUTF16BE(unsigned char *out, int *outlen,
|
UTF8ToUTF16BE(void *vctxt ATTRIBUTE_UNUSED,
|
||||||
|
unsigned char *out, int *outlen,
|
||||||
const unsigned char *in, int *inlen,
|
const unsigned char *in, int *inlen,
|
||||||
void *vctxt ATTRIBUTE_UNUSED) {
|
int flush ATTRIBUTE_UNUSED) {
|
||||||
const unsigned char *instart = in;
|
const unsigned char *instart = in;
|
||||||
const unsigned char *inend;
|
const unsigned char *inend;
|
||||||
unsigned char *outstart = out;
|
unsigned char *outstart = out;
|
||||||
@ -2657,10 +2679,11 @@ done:
|
|||||||
|
|
||||||
#if defined(LIBXML_OUTPUT_ENABLED) && defined(LIBXML_HTML_ENABLED)
|
#if defined(LIBXML_OUTPUT_ENABLED) && defined(LIBXML_HTML_ENABLED)
|
||||||
static int
|
static int
|
||||||
UTF8ToHtmlWrapper(unsigned char *out, int *outlen,
|
UTF8ToHtmlWrapper(void *vctxt ATTRIBUTE_UNUSED,
|
||||||
|
unsigned char *out, int *outlen,
|
||||||
const unsigned char *in, int *inlen,
|
const unsigned char *in, int *inlen,
|
||||||
void *vctxt ATTRIBUTE_UNUSED) {
|
int flush ATTRIBUTE_UNUSED) {
|
||||||
return(UTF8ToHtml(out, outlen, in, inlen));
|
return(htmlUTF8ToHtml(out, outlen, in, inlen));
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -2668,8 +2691,10 @@ UTF8ToHtmlWrapper(unsigned char *out, int *outlen,
|
|||||||
defined(LIBXML_ISO8859X_ENABLED)
|
defined(LIBXML_ISO8859X_ENABLED)
|
||||||
|
|
||||||
static int
|
static int
|
||||||
UTF8ToISO8859x(unsigned char *out, int *outlen,
|
UTF8ToISO8859x(void *vctxt,
|
||||||
const unsigned char *in, int *inlen, void *vctxt) {
|
unsigned char *out, int *outlen,
|
||||||
|
const unsigned char *in, int *inlen,
|
||||||
|
int flush ATTRIBUTE_UNUSED) {
|
||||||
const unsigned char *xlattable = vctxt;
|
const unsigned char *xlattable = vctxt;
|
||||||
const unsigned char *instart = in;
|
const unsigned char *instart = in;
|
||||||
const unsigned char *inend;
|
const unsigned char *inend;
|
||||||
@ -2748,8 +2773,10 @@ done:
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
ISO8859xToUTF8(unsigned char* out, int *outlen,
|
ISO8859xToUTF8(void *vctxt,
|
||||||
const unsigned char* in, int *inlen, void *vctxt) {
|
unsigned char* out, int *outlen,
|
||||||
|
const unsigned char* in, int *inlen,
|
||||||
|
int flush ATTRIBUTE_UNUSED) {
|
||||||
unsigned short const *unicodetable = vctxt;
|
unsigned short const *unicodetable = vctxt;
|
||||||
const unsigned char* instart = in;
|
const unsigned char* instart = in;
|
||||||
const unsigned char* inend;
|
const unsigned char* inend;
|
||||||
|
@ -31,8 +31,8 @@ typedef struct {
|
|||||||
} myConvCtxt;
|
} myConvCtxt;
|
||||||
|
|
||||||
static int
|
static int
|
||||||
icuConvert(unsigned char *out, int *outlen,
|
icuConvert(void *vctxt, unsigned char *out, int *outlen,
|
||||||
const unsigned char *in, int *inlen, void *vctxt) {
|
const unsigned char *in, int *inlen, int flush) {
|
||||||
myConvCtxt *cd = vctxt;
|
myConvCtxt *cd = vctxt;
|
||||||
const char *ucv_in = (const char *) in;
|
const char *ucv_in = (const char *) in;
|
||||||
char *ucv_out = (char *) out;
|
char *ucv_out = (char *) out;
|
||||||
@ -47,14 +47,10 @@ icuConvert(unsigned char *out, int *outlen,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Note that the ICU API is stateful. It can always consume a certain
|
* The ICU API can consume input, including partial sequences,
|
||||||
* amount of input even if the output buffer would overflow. The
|
* even if the output buffer would overflow. The remaining input
|
||||||
* remaining input must be processed by calling ucnv_convertEx with a
|
* must be processed by calling ucnv_convertEx with a possibly
|
||||||
* possibly empty input buffer.
|
* empty input buffer.
|
||||||
*
|
|
||||||
* ucnv_convertEx is always called with reset and flush set to 0,
|
|
||||||
* so we don't mess up the state. This should never generate
|
|
||||||
* U_TRUNCATED_CHAR_FOUND errors.
|
|
||||||
*/
|
*/
|
||||||
if (cd->isInput) {
|
if (cd->isInput) {
|
||||||
source = cd->uconv;
|
source = cd->uconv;
|
||||||
@ -67,7 +63,8 @@ icuConvert(unsigned char *out, int *outlen,
|
|||||||
ucnv_convertEx(target, source, &ucv_out, ucv_out + *outlen,
|
ucnv_convertEx(target, source, &ucv_out, ucv_out + *outlen,
|
||||||
&ucv_in, ucv_in + *inlen, cd->pivot_buf,
|
&ucv_in, ucv_in + *inlen, cd->pivot_buf,
|
||||||
&cd->pivot_source, &cd->pivot_target,
|
&cd->pivot_source, &cd->pivot_target,
|
||||||
cd->pivot_buf + ICU_PIVOT_BUF_SIZE, 0, 0, &err);
|
cd->pivot_buf + ICU_PIVOT_BUF_SIZE,
|
||||||
|
/* reset */ 0, flush, &err);
|
||||||
|
|
||||||
*inlen = ucv_in - (const char*) in;
|
*inlen = ucv_in - (const char*) in;
|
||||||
*outlen = ucv_out - (char *) out;
|
*outlen = ucv_out - (char *) out;
|
||||||
@ -77,8 +74,8 @@ icuConvert(unsigned char *out, int *outlen,
|
|||||||
} else {
|
} else {
|
||||||
switch (err) {
|
switch (err) {
|
||||||
case U_TRUNCATED_CHAR_FOUND:
|
case U_TRUNCATED_CHAR_FOUND:
|
||||||
/* Shouldn't happen without flush */
|
/* Should only happen with flush */
|
||||||
ret = XML_ENC_ERR_SUCCESS;
|
ret = XML_ENC_ERR_INPUT;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case U_BUFFER_OVERFLOW_ERROR:
|
case U_BUFFER_OVERFLOW_ERROR:
|
||||||
|
@ -126,17 +126,22 @@ typedef int (*xmlCharEncodingOutputFunc)(unsigned char *out, int *outlen,
|
|||||||
* @outlen: the length of @out
|
* @outlen: the length of @out
|
||||||
* @in: a pointer to an array of input bytes
|
* @in: a pointer to an array of input bytes
|
||||||
* @inlen: the length of @in
|
* @inlen: the length of @in
|
||||||
|
* @flush: end of input
|
||||||
*
|
*
|
||||||
* Convert between character encodings.
|
* Convert between character encodings.
|
||||||
*
|
*
|
||||||
* On success, the value of @inlen after return is the number of
|
* The value of @inlen after return is the number of bytes consumed
|
||||||
* bytes consumed and @outlen is the number of bytes produced.
|
* and @outlen is the number of bytes produced.
|
||||||
*
|
*
|
||||||
* Returns the number of bytes written or an XML_ENC_ERR code.
|
* If the converter can consume partial multi-byte sequences, the
|
||||||
|
* @flush flag can be used to detect truncated sequences at EOF.
|
||||||
|
* Otherwise, the flag can be ignored.
|
||||||
|
*
|
||||||
|
* Returns a non-negative number on success or an XML_ENC_ERR code.
|
||||||
*/
|
*/
|
||||||
typedef int
|
typedef int
|
||||||
(*xmlCharEncConvFunc)(unsigned char *out, int *outlen,
|
(*xmlCharEncConvFunc)(void *vctxt, unsigned char *out, int *outlen,
|
||||||
const unsigned char *in, int *inlen, void *vctxt);
|
const unsigned char *in, int *inlen, int flush);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* xmlCharEncConvCtxtDtor:
|
* xmlCharEncConvCtxtDtor:
|
||||||
|
@ -9,9 +9,10 @@ xmlInitEncodingInternal(void);
|
|||||||
|
|
||||||
XML_HIDDEN int
|
XML_HIDDEN int
|
||||||
xmlEncInputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
|
xmlEncInputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
|
||||||
int *outlen, const unsigned char *in, int *inlen);
|
int *outlen, const unsigned char *in, int *inlen,
|
||||||
|
int flush);
|
||||||
XML_HIDDEN int
|
XML_HIDDEN int
|
||||||
xmlCharEncInput(xmlParserInputBufferPtr input, size_t *sizeOut);
|
xmlCharEncInput(xmlParserInputBufferPtr input, size_t *sizeOut, int flush);
|
||||||
XML_HIDDEN int
|
XML_HIDDEN int
|
||||||
xmlCharEncOutput(xmlOutputBufferPtr output, int init);
|
xmlCharEncOutput(xmlOutputBufferPtr output, int init);
|
||||||
|
|
||||||
|
@ -140,4 +140,7 @@ XML_HIDDEN xmlChar *
|
|||||||
xmlExpandEntitiesInAttValue(xmlParserCtxtPtr ctxt, const xmlChar *str,
|
xmlExpandEntitiesInAttValue(xmlParserCtxtPtr ctxt, const xmlChar *str,
|
||||||
int normalize);
|
int normalize);
|
||||||
|
|
||||||
|
XML_HIDDEN void
|
||||||
|
xmlParserCheckEOF(xmlParserCtxtPtr ctxt, xmlParserErrors code);
|
||||||
|
|
||||||
#endif /* XML_PARSER_H_PRIVATE__ */
|
#endif /* XML_PARSER_H_PRIVATE__ */
|
||||||
|
25
parser.c
25
parser.c
@ -7300,9 +7300,7 @@ xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
|
|||||||
while (ctxt->inputNr > oldInputNr)
|
while (ctxt->inputNr > oldInputNr)
|
||||||
xmlPopPE(ctxt);
|
xmlPopPE(ctxt);
|
||||||
|
|
||||||
if (RAW != 0) {
|
xmlParserCheckEOF(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED);
|
||||||
xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -9875,8 +9873,7 @@ xmlParseContent(xmlParserCtxtPtr ctxt) {
|
|||||||
|
|
||||||
xmlParseContentInternal(ctxt);
|
xmlParseContentInternal(ctxt);
|
||||||
|
|
||||||
if (ctxt->input->cur < ctxt->input->end)
|
xmlParserCheckEOF(ctxt, XML_ERR_NOT_WELL_BALANCED);
|
||||||
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -10737,16 +10734,7 @@ xmlParseDocument(xmlParserCtxtPtr ctxt) {
|
|||||||
*/
|
*/
|
||||||
xmlParseMisc(ctxt);
|
xmlParseMisc(ctxt);
|
||||||
|
|
||||||
if (ctxt->input->cur < ctxt->input->end) {
|
xmlParserCheckEOF(ctxt, XML_ERR_DOCUMENT_END);
|
||||||
if (ctxt->wellFormed)
|
|
||||||
xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
|
|
||||||
} else if ((ctxt->input->buf != NULL) &&
|
|
||||||
(ctxt->input->buf->encoder != NULL) &&
|
|
||||||
(ctxt->input->buf->error == 0) &&
|
|
||||||
(!xmlBufIsEmpty(ctxt->input->buf->raw))) {
|
|
||||||
xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
|
|
||||||
"Truncated multi-byte sequence at EOF\n");
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
ctxt->instate = XML_PARSER_EOF;
|
ctxt->instate = XML_PARSER_EOF;
|
||||||
@ -11596,11 +11584,8 @@ xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
|
|||||||
xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
|
xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
|
||||||
"Start tag expected, '<' not found\n");
|
"Start tag expected, '<' not found\n");
|
||||||
}
|
}
|
||||||
} else if ((ctxt->input->buf->encoder != NULL) &&
|
} else {
|
||||||
(ctxt->input->buf->error == 0) &&
|
xmlParserCheckEOF(ctxt, XML_ERR_DOCUMENT_END);
|
||||||
(!xmlBufIsEmpty(ctxt->input->buf->raw))) {
|
|
||||||
xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
|
|
||||||
"Truncated multi-byte sequence at EOF\n");
|
|
||||||
}
|
}
|
||||||
if (ctxt->instate != XML_PARSER_EOF) {
|
if (ctxt->instate != XML_PARSER_EOF) {
|
||||||
ctxt->instate = XML_PARSER_EOF;
|
ctxt->instate = XML_PARSER_EOF;
|
||||||
|
@ -596,6 +596,49 @@ xmlParserGrow(xmlParserCtxtPtr ctxt) {
|
|||||||
return(ret);
|
return(ret);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* xmlParserCheckEOF:
|
||||||
|
* @ctxt: parser ctxt
|
||||||
|
* @code: error code
|
||||||
|
*
|
||||||
|
* Raises an error with @code if the input wasn't consumed
|
||||||
|
* completely.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
xmlParserCheckEOF(xmlParserCtxtPtr ctxt, xmlParserErrors code) {
|
||||||
|
xmlParserInputPtr in = ctxt->input;
|
||||||
|
xmlParserInputBufferPtr buf;
|
||||||
|
|
||||||
|
if (ctxt->errNo != XML_ERR_OK)
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (in->cur < in->end) {
|
||||||
|
xmlFatalErr(ctxt, code, NULL);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
buf = in->buf;
|
||||||
|
if ((buf != NULL) && (buf->encoder != NULL)) {
|
||||||
|
size_t curBase = in->cur - in->base;
|
||||||
|
size_t sizeOut = 64;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Check for truncated multi-byte sequence
|
||||||
|
*/
|
||||||
|
ret = xmlCharEncInput(buf, &sizeOut, /* flush */ 1);
|
||||||
|
xmlBufUpdateInput(buf->buffer, in, curBase);
|
||||||
|
if (ret < 0) {
|
||||||
|
xmlCtxtErrIO(ctxt, buf->error, NULL);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Shouldn't happen */
|
||||||
|
if (in->cur < in->end)
|
||||||
|
xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "expected EOF");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* xmlParserInputGrow:
|
* xmlParserInputGrow:
|
||||||
* @in: an XML parser input
|
* @in: an XML parser input
|
||||||
@ -1105,7 +1148,8 @@ xmlDetectEBCDIC(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr *hout) {
|
|||||||
return(res);
|
return(res);
|
||||||
outlen = sizeof(out) - 1;
|
outlen = sizeof(out) - 1;
|
||||||
inlen = input->end - input->cur;
|
inlen = input->end - input->cur;
|
||||||
res = xmlEncInputChunk(handler, out, &outlen, input->cur, &inlen);
|
res = xmlEncInputChunk(handler, out, &outlen, input->cur, &inlen,
|
||||||
|
/* flush */ 0);
|
||||||
/*
|
/*
|
||||||
* Return the EBCDIC handler if decoding failed. The error will
|
* Return the EBCDIC handler if decoding failed. The error will
|
||||||
* be reported later.
|
* be reported later.
|
||||||
@ -1354,7 +1398,7 @@ xmlInputSetEncodingHandler(xmlParserInputPtr input,
|
|||||||
nbchars = SIZE_MAX;
|
nbchars = SIZE_MAX;
|
||||||
else
|
else
|
||||||
nbchars = 4000 /* MINLEN */;
|
nbchars = 4000 /* MINLEN */;
|
||||||
res = xmlCharEncInput(in, &nbchars);
|
res = xmlCharEncInput(in, &nbchars, /* flush */ 0);
|
||||||
if (res < 0)
|
if (res < 0)
|
||||||
code = in->error;
|
code = in->error;
|
||||||
}
|
}
|
||||||
|
@ -1,3 +1,3 @@
|
|||||||
./test/errors/truncated-utf16.xml:1: parser error : Truncated multi-byte sequence at EOF
|
./test/errors/truncated-utf16.xml:1: I/O error : Invalid bytes in character encoding
|
||||||
<d/>
|
<d/>
|
||||||
^
|
^
|
||||||
|
@ -1,3 +1,3 @@
|
|||||||
./test/errors/truncated-utf16.xml:1: parser error : Truncated multi-byte sequence at EOF
|
./test/errors/truncated-utf16.xml:1: I/O error : Invalid bytes in character encoding
|
||||||
<d/>
|
<d/>
|
||||||
^
|
^
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
./test/errors/truncated-utf16.xml:1: parser error : Truncated multi-byte sequence at EOF
|
./test/errors/truncated-utf16.xml:1: I/O error : Invalid bytes in character encoding
|
||||||
<d/>
|
<d/>
|
||||||
^
|
^
|
||||||
./test/errors/truncated-utf16.xml : failed to parse
|
./test/errors/truncated-utf16.xml : failed to parse
|
||||||
|
84
testparser.c
84
testparser.c
@ -952,11 +952,88 @@ testWindowsUri(void) {
|
|||||||
}
|
}
|
||||||
#endif /* WIN32 */
|
#endif /* WIN32 */
|
||||||
|
|
||||||
|
#if defined(LIBXML_ICONV_ENABLED) || defined(LIBXML_ICU_ENABLED)
|
||||||
|
static int
|
||||||
|
testTruncatedMultiByte(void) {
|
||||||
|
const char xml[] =
|
||||||
|
"<?xml version=\"1.0\" encoding=\"EUC-JP\"?>\n"
|
||||||
|
"<doc/>\xC3";
|
||||||
|
#ifdef LIBXML_HTML_ENABLED
|
||||||
|
const char html[] =
|
||||||
|
"<meta charset=\"EUC-JP\">\n"
|
||||||
|
"<div/>\xC3";
|
||||||
|
#endif
|
||||||
|
xmlDocPtr doc;
|
||||||
|
const xmlError *error;
|
||||||
|
int err = 0;
|
||||||
|
|
||||||
|
xmlResetLastError();
|
||||||
|
doc = xmlReadDoc(BAD_CAST xml, NULL, NULL, XML_PARSE_NOERROR);
|
||||||
|
error = xmlGetLastError();
|
||||||
|
if (error == NULL || error->code != XML_ERR_INVALID_ENCODING) {
|
||||||
|
fprintf(stderr, "xml, pull: expected XML_ERR_INVALID_ENCODING\n");
|
||||||
|
err = 1;
|
||||||
|
}
|
||||||
|
xmlFreeDoc(doc);
|
||||||
|
|
||||||
|
#ifdef LIBXML_HTML_ENABLED
|
||||||
|
xmlResetLastError();
|
||||||
|
doc = htmlReadDoc(BAD_CAST html, NULL, NULL, XML_PARSE_NOERROR);
|
||||||
|
error = xmlGetLastError();
|
||||||
|
if (error == NULL || error->code != XML_ERR_INVALID_ENCODING) {
|
||||||
|
fprintf(stderr, "html, pull: expected XML_ERR_INVALID_ENCODING\n");
|
||||||
|
err = 1;
|
||||||
|
}
|
||||||
|
xmlFreeDoc(doc);
|
||||||
|
#endif /* LIBXML_HTML_ENABLED */
|
||||||
|
|
||||||
|
#ifdef LIBXML_PUSH_ENABLED
|
||||||
|
{
|
||||||
|
xmlParserCtxtPtr ctxt;
|
||||||
|
|
||||||
|
ctxt = xmlCreatePushParserCtxt(NULL, NULL, NULL, 0, NULL);
|
||||||
|
xmlCtxtSetOptions(ctxt, XML_PARSE_NOERROR);
|
||||||
|
|
||||||
|
xmlParseChunk(ctxt, xml, sizeof(xml) - 1, 0);
|
||||||
|
xmlParseChunk(ctxt, "", 0, 1);
|
||||||
|
|
||||||
|
if (ctxt->errNo != XML_ERR_INVALID_ENCODING) {
|
||||||
|
fprintf(stderr, "xml, push: expected XML_ERR_INVALID_ENCODING\n");
|
||||||
|
err = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
xmlFreeDoc(ctxt->myDoc);
|
||||||
|
xmlFreeParserCtxt(ctxt);
|
||||||
|
|
||||||
|
#ifdef LIBXML_HTML_ENABLED
|
||||||
|
ctxt = htmlCreatePushParserCtxt(NULL, NULL, NULL, 0, NULL,
|
||||||
|
XML_CHAR_ENCODING_NONE);
|
||||||
|
xmlCtxtSetOptions(ctxt, XML_PARSE_NOERROR);
|
||||||
|
|
||||||
|
htmlParseChunk(ctxt, html, sizeof(html) - 1, 0);
|
||||||
|
htmlParseChunk(ctxt, "", 0, 1);
|
||||||
|
|
||||||
|
if (ctxt->errNo != XML_ERR_INVALID_ENCODING) {
|
||||||
|
fprintf(stderr, "html, push: expected XML_ERR_INVALID_ENCODING\n");
|
||||||
|
err = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
xmlFreeDoc(ctxt->myDoc);
|
||||||
|
htmlFreeParserCtxt(ctxt);
|
||||||
|
#endif /* LIBXML_HTML_ENABLED */
|
||||||
|
}
|
||||||
|
#endif /* LIBXML_PUSH_ENABLED */
|
||||||
|
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
#endif /* iconv || icu */
|
||||||
|
|
||||||
static int charEncConvImplError;
|
static int charEncConvImplError;
|
||||||
|
|
||||||
static int
|
static int
|
||||||
rot13Convert(unsigned char *out, int *outlen,
|
rot13Convert(void *vctxt, unsigned char *out, int *outlen,
|
||||||
const unsigned char *in, int *inlen, void *vctxt) {
|
const unsigned char *in, int *inlen,
|
||||||
|
int flush ATTRIBUTE_UNUSED) {
|
||||||
int *ctxt = vctxt;
|
int *ctxt = vctxt;
|
||||||
int inSize = *inlen;
|
int inSize = *inlen;
|
||||||
int outSize = *outlen;
|
int outSize = *outlen;
|
||||||
@ -1075,6 +1152,9 @@ main(void) {
|
|||||||
err |= testBuildRelativeUri();
|
err |= testBuildRelativeUri();
|
||||||
#if defined(_WIN32) || defined(__CYGWIN__)
|
#if defined(_WIN32) || defined(__CYGWIN__)
|
||||||
err |= testWindowsUri();
|
err |= testWindowsUri();
|
||||||
|
#endif
|
||||||
|
#if defined(LIBXML_ICONV_ENABLED) || defined(LIBXML_ICU_ENABLED)
|
||||||
|
err |= testTruncatedMultiByte();
|
||||||
#endif
|
#endif
|
||||||
err |= testCharEncConvImpl();
|
err |= testCharEncConvImpl();
|
||||||
|
|
||||||
|
4
xmlIO.c
4
xmlIO.c
@ -2201,7 +2201,7 @@ xmlParserInputBufferPush(xmlParserInputBufferPtr in,
|
|||||||
* convert as much as possible to the parser reading buffer.
|
* convert as much as possible to the parser reading buffer.
|
||||||
*/
|
*/
|
||||||
nbchars = SIZE_MAX;
|
nbchars = SIZE_MAX;
|
||||||
if (xmlCharEncInput(in, &nbchars) < 0)
|
if (xmlCharEncInput(in, &nbchars, /* flush */ 0) < 0)
|
||||||
return(-1);
|
return(-1);
|
||||||
if (nbchars > INT_MAX)
|
if (nbchars > INT_MAX)
|
||||||
nbchars = INT_MAX;
|
nbchars = INT_MAX;
|
||||||
@ -2312,7 +2312,7 @@ xmlParserInputBufferGrow(xmlParserInputBufferPtr in, int len) {
|
|||||||
else
|
else
|
||||||
sizeOut = SIZE_MAX;
|
sizeOut = SIZE_MAX;
|
||||||
|
|
||||||
if (xmlCharEncInput(in, &sizeOut) < 0)
|
if (xmlCharEncInput(in, &sizeOut, /* flush */ 0) < 0)
|
||||||
return(-1);
|
return(-1);
|
||||||
res = sizeOut;
|
res = sizeOut;
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user