parser: Optimize xmlLoadEntityContent

Load entity content via xmlParserInputBufferGrow, avoiding a copy.

This also fixes an entity size accounting error.
This commit is contained in:
Nick Wellnhofer 2023-08-08 15:21:25 +02:00
parent facc2a06da
commit 5aff27ae78
2 changed files with 48 additions and 48 deletions

View File

@ -7775,9 +7775,11 @@ xmlParsePEReference(xmlParserCtxtPtr ctxt)
*/ */
static int static int
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) { xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
xmlParserInputPtr input; xmlParserInputPtr input = NULL;
xmlBufferPtr buf; xmlChar *content = NULL;
int l, c; size_t length, i;
int ret = -1;
int res;
if ((ctxt == NULL) || (entity == NULL) || if ((ctxt == NULL) || (entity == NULL) ||
((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) && ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
@ -7792,61 +7794,54 @@ xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
xmlGenericError(xmlGenericErrorContext, xmlGenericError(xmlGenericErrorContext,
"Reading %s entity content input\n", entity->name); "Reading %s entity content input\n", entity->name);
buf = xmlBufferCreate(); input = xmlLoadExternalEntity((char *) entity->URI,
if (buf == NULL) { (char *) entity->ExternalID, ctxt);
xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
"xmlLoadEntityContent parameter error");
return(-1);
}
xmlBufferSetAllocationScheme(buf, XML_BUFFER_ALLOC_DOUBLEIT);
input = xmlNewEntityInputStream(ctxt, entity);
if (input == NULL) { if (input == NULL) {
xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
"xmlLoadEntityContent input error"); "xmlLoadEntityContent input error");
xmlBufferFree(buf);
return(-1); return(-1);
} }
/* while ((res = xmlParserInputBufferGrow(input->buf, 16384)) > 0)
* Push the entity as the current input, read char by char ;
* saving to the buffer until the end of the entity or an error
*/ if (res < 0) {
if (xmlPushInput(ctxt, input) < 0) { xmlFatalErr(ctxt, input->buf->error, NULL);
xmlBufferFree(buf); goto error;
xmlFreeInputStream(input);
return(-1);
} }
GROW; length = xmlBufUse(input->buf->buffer);
c = CUR_CHAR(l); content = xmlBufDetach(input->buf->buffer);
while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
(IS_CHAR(c))) { if (length > INT_MAX) {
xmlBufferAdd(buf, ctxt->input->cur, l); xmlErrMemory(ctxt, NULL);
NEXTL(l); goto error;
c = CUR_CHAR(l);
}
if (ctxt->instate == XML_PARSER_EOF) {
xmlBufferFree(buf);
return(-1);
} }
if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) { for (i = 0; i < length; ) {
xmlSaturatedAdd(&ctxt->sizeentities, ctxt->input->consumed); int clen = length - i;
xmlPopInput(ctxt); int c = xmlGetUTF8Char(content + i, &clen);
} else if (!IS_CHAR(c)) {
xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
"xmlLoadEntityContent: invalid char value %d\n",
c);
xmlBufferFree(buf);
return(-1);
}
entity->content = buf->content;
entity->length = buf->use;
buf->content = NULL;
xmlBufferFree(buf);
return(0); if ((c < 0) || (!IS_CHAR(c))) {
xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
"xmlLoadEntityContent: invalid char value %d\n",
content[i]);
goto error;
}
i += clen;
}
xmlSaturatedAdd(&ctxt->sizeentities, length);
entity->content = content;
entity->length = length;
content = NULL;
ret = 0;
error:
xmlFree(content);
xmlFreeInputStream(input);
return(ret);
} }
/** /**

View File

@ -995,7 +995,12 @@ hugeDtdTest(const char *filename ATTRIBUTE_UNUSED,
total_size = strlen(hugeDocParts->start) + total_size = strlen(hugeDocParts->start) +
strlen(hugeDocParts->segment) * (MAX_NODES - 1) + strlen(hugeDocParts->segment) * (MAX_NODES - 1) +
strlen(hugeDocParts->finish) + strlen(hugeDocParts->finish) +
28; /*
* Other external entities pa.ent, pb.ent, pc.ent.
* These are currently counted twice because they're
* used both in DTD and EntityValue.
*/
(16 + 6 + 6) * 2;
if (ctxt->sizeentities != total_size) { if (ctxt->sizeentities != total_size) {
fprintf(stderr, "Wrong parsed entity size: %lu (expected %lu)\n", fprintf(stderr, "Wrong parsed entity size: %lu (expected %lu)\n",
ctxt->sizeentities, total_size); ctxt->sizeentities, total_size);