parser: Fix push parser with encoding and single chunk

When push-parsing with an encoding handler, we must convert the whole
buffer in the initial conversion. Otherwise, parsing a single chunk
larger than ~4KB would fail.

Regressed with commit 34c9108f.
This commit is contained in:
Nick Wellnhofer 2025-01-29 23:49:56 +01:00
parent 4bd66d4549
commit 3eced32ea3
2 changed files with 40 additions and 1 deletions

View File

@ -1340,6 +1340,15 @@ xmlInputSetEncodingHandler(xmlParserInputPtr input,
input->consumed += processed;
in->rawconsumed = processed;
/*
* If we're push-parsing, we must convert the whole buffer.
*
* If we're pull-parsing, we could be parsing from a huge
* memory buffer which we don't want to convert completely.
*/
if (input->flags & XML_INPUT_PROGRESSIVE)
nbchars = SIZE_MAX;
else
nbchars = 4000 /* MINLEN */;
res = xmlCharEncInput(in, &nbchars);
if (res < 0)

View File

@ -340,6 +340,36 @@ testHugeEncodedChunk(void) {
xmlFreeParserCtxt(ctxt);
xmlFree(chunk);
/*
* Test the push parser with
*
* - a single call to xmlParseChunk,
* - a non-UTF8 encoding,
* - a chunk larger then MINLEN (4000 bytes).
*
* This verifies that the whole buffer is processed in the initial
* charset conversion.
*/
buf = xmlBufferCreate();
xmlBufferCat(buf,
BAD_CAST "<?xml version='1.0' encoding='ISO-8859-1'?>\n");
xmlBufferCat(buf, BAD_CAST "<doc>");
/* 20,000 characters */
for (i = 0; i < 2000; i++)
xmlBufferCat(buf, BAD_CAST "0123456789");
xmlBufferCat(buf, BAD_CAST "</doc>");
chunk = xmlBufferDetach(buf);
xmlBufferFree(buf);
ctxt = xmlCreatePushParserCtxt(NULL, NULL, NULL, 0, NULL);
xmlParseChunk(ctxt, (char *) chunk, xmlStrlen(chunk), 1);
err = ctxt->wellFormed ? 0 : 1;
xmlFreeDoc(ctxt->myDoc);
xmlFreeParserCtxt(ctxt);
xmlFree(chunk);
return err;
}
#endif