parser: Fix in-parameter-entity and in-external-dtd checks

Use in ctxt->input->entity instead of ctxt->inputNr to determine whether
we are inside a parameter entity.

Stop using ctxt->external to check whether we're in an external DTD.
This is signaled by ctxt->inSubset == 2.
This commit is contained in:
Nick Wellnhofer 2023-12-26 02:10:35 +01:00
parent 477a7ed82c
commit d944a41515
11 changed files with 66 additions and 110 deletions

View File

@ -6277,7 +6277,6 @@ htmlCtxtReset(htmlParserCtxtPtr ctxt)
ctxt->hasExternalSubset = 0; ctxt->hasExternalSubset = 0;
ctxt->hasPErefs = 0; ctxt->hasPErefs = 0;
ctxt->html = 1; ctxt->html = 1;
ctxt->external = 0;
ctxt->instate = XML_PARSER_START; ctxt->instate = XML_PARSER_START;
ctxt->token = 0; ctxt->token = 0;

4
SAX2.c
View File

@ -1219,8 +1219,6 @@ xmlSAX2AttributeInternal(void *ctx, const xmlChar *fullname,
} else } else
#endif /* LIBXML_VALID_ENABLED */ #endif /* LIBXML_VALID_ENABLED */
if (((ctxt->loadsubset & XML_SKIP_IDS) == 0) && if (((ctxt->loadsubset & XML_SKIP_IDS) == 0) &&
(((ctxt->replaceEntities == 0) && (ctxt->external != 2)) ||
((ctxt->replaceEntities != 0) && (ctxt->inSubset == 0))) &&
/* Don't create IDs containing entity references */ /* Don't create IDs containing entity references */
(ret->children != NULL) && (ret->children != NULL) &&
(ret->children->type == XML_TEXT_NODE) && (ret->children->type == XML_TEXT_NODE) &&
@ -1955,8 +1953,6 @@ xmlSAX2AttributeNs(xmlParserCtxtPtr ctxt,
} else } else
#endif /* LIBXML_VALID_ENABLED */ #endif /* LIBXML_VALID_ENABLED */
if (((ctxt->loadsubset & XML_SKIP_IDS) == 0) && if (((ctxt->loadsubset & XML_SKIP_IDS) == 0) &&
(((ctxt->replaceEntities == 0) && (ctxt->external != 2)) ||
((ctxt->replaceEntities != 0) && (ctxt->inSubset == 0))) &&
/* Don't create IDs containing entity references */ /* Don't create IDs containing entity references */
(ret->children != NULL) && (ret->children != NULL) &&
(ret->children->type == XML_TEXT_NODE) && (ret->children->type == XML_TEXT_NODE) &&

View File

@ -220,7 +220,7 @@ struct _xmlParserCtxt {
int hasExternalSubset; /* reference and external subset */ int hasExternalSubset; /* reference and external subset */
int hasPErefs; /* the internal subset has PE refs */ int hasPErefs; /* the internal subset has PE refs */
int external; /* are we parsing an external entity */ int external; /* unused */
int valid; /* is the document valid */ int valid; /* is the document valid */
int validate; /* shall we try to validate ? */ int validate; /* shall we try to validate ? */

View File

@ -28,6 +28,16 @@
#define PARSER_STOPPED(ctxt) ((ctxt)->disableSAX > 1) #define PARSER_STOPPED(ctxt) ((ctxt)->disableSAX > 1)
#define PARSER_IN_PE(ctxt) \
(((ctxt)->input->entity != NULL) && \
(((ctxt)->input->entity->etype == XML_INTERNAL_PARAMETER_ENTITY) || \
((ctxt)->input->entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)))
#define PARSER_EXTERNAL(ctxt) \
(((ctxt)->inSubset == 2) || \
(((ctxt)->input->entity != NULL) && \
((ctxt)->input->entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)))
XML_HIDDEN void XML_HIDDEN void
xmlCtxtVErr(xmlParserCtxtPtr ctxt, xmlNodePtr node, xmlErrorDomain domain, xmlCtxtVErr(xmlParserCtxtPtr ctxt, xmlNodePtr node, xmlErrorDomain domain,
xmlParserErrors code, xmlErrorLevel level, xmlParserErrors code, xmlErrorLevel level,

129
parser.c
View File

@ -995,7 +995,7 @@ xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
attr->prefix = prefix; attr->prefix = prefix;
attr->value = hvalue; attr->value = hvalue;
attr->valueEnd = hvalue.name + len; attr->valueEnd = hvalue.name + len;
attr->external = ctxt->external; attr->external = PARSER_EXTERNAL(ctxt);
attr->expandedSize = expandedSize; attr->expandedSize = expandedSize;
return; return;
@ -2142,8 +2142,7 @@ static int spacePop(xmlParserCtxtPtr ctxt) {
/* Don't shrink push parser buffer. */ /* Don't shrink push parser buffer. */
#define SHRINK \ #define SHRINK \
if (((ctxt->progressive == 0) || (ctxt->inputNr > 1)) && \ if ((ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
(ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
(ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \ (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
xmlParserShrink(ctxt); xmlParserShrink(ctxt);
@ -2190,13 +2189,17 @@ static int spacePop(xmlParserCtxtPtr ctxt) {
int int
xmlSkipBlankChars(xmlParserCtxtPtr ctxt) { xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
int res = 0; int res = 0;
int inParam;
int expandParam;
inParam = PARSER_IN_PE(ctxt);
expandParam = PARSER_EXTERNAL(ctxt);
/* /*
* It's Okay to use CUR/NEXT here since all the blanks are on * It's Okay to use CUR/NEXT here since all the blanks are on
* the ASCII range. * the ASCII range.
*/ */
if (((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) || if (!inParam && !expandParam) {
(ctxt->instate == XML_PARSER_START)) {
const xmlChar *cur; const xmlChar *cur;
/* /*
* if we are in the document content, go really fast * if we are in the document content, go really fast
@ -2219,23 +2222,29 @@ xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
} }
ctxt->input->cur = cur; ctxt->input->cur = cur;
} else { } else {
int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1));
while (PARSER_STOPPED(ctxt) == 0) { while (PARSER_STOPPED(ctxt) == 0) {
if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */ if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
NEXT; NEXT;
} else if (CUR == '%') { } else if (CUR == '%') {
/* if ((expandParam == 0) ||
* Need to handle support of entities branching here (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
*/
if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
break; break;
/*
* Expand parameter entity. We continue to consume
* whitespace at the start of the entity and possible
* even consume the whole entity and pop it. We might
* even pop multiple PEs in this loop.
*/
xmlParsePEReference(ctxt); xmlParsePEReference(ctxt);
inParam = PARSER_IN_PE(ctxt);
expandParam = PARSER_EXTERNAL(ctxt);
} else if (CUR == 0) { } else if (CUR == 0) {
unsigned long consumed; unsigned long consumed;
xmlEntityPtr ent; xmlEntityPtr ent;
if (ctxt->inputNr <= 1) if (inParam == 0)
break; break;
consumed = ctxt->input->consumed; consumed = ctxt->input->consumed;
@ -2257,6 +2266,9 @@ xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
xmlParserEntityCheck(ctxt, consumed); xmlParserEntityCheck(ctxt, consumed);
xmlPopInput(ctxt); xmlPopInput(ctxt);
inParam = PARSER_IN_PE(ctxt);
expandParam = PARSER_EXTERNAL(ctxt);
} else { } else {
break; break;
} }
@ -2567,61 +2579,6 @@ xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
*/ */
void void
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
switch(ctxt->instate) {
case XML_PARSER_CDATA_SECTION:
return;
case XML_PARSER_COMMENT:
return;
case XML_PARSER_START_TAG:
return;
case XML_PARSER_END_TAG:
return;
case XML_PARSER_EOF:
xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
return;
case XML_PARSER_PROLOG:
case XML_PARSER_START:
case XML_PARSER_XML_DECL:
case XML_PARSER_MISC:
xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
return;
case XML_PARSER_ENTITY_DECL:
case XML_PARSER_CONTENT:
case XML_PARSER_ATTRIBUTE_VALUE:
case XML_PARSER_PI:
case XML_PARSER_SYSTEM_LITERAL:
case XML_PARSER_PUBLIC_LITERAL:
/* we just ignore it there */
return;
case XML_PARSER_EPILOG:
xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
return;
case XML_PARSER_ENTITY_VALUE:
/*
* NOTE: in the case of entity values, we don't do the
* substitution here since we need the literal
* entity value to be able to save the internal
* subset of the document.
* This will be handled by xmlStringDecodeEntities
*/
return;
case XML_PARSER_DTD:
/*
* [WFC: Well-Formedness Constraint: PEs in Internal Subset]
* In the internal DTD subset, parameter-entity references
* can occur only where markup declarations can occur, not
* within markup declarations.
* In that case this is handled in xmlParseMarkupDecl
*/
if ((ctxt->external == 0) && (ctxt->inputNr == 1))
return;
if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
return;
break;
case XML_PARSER_IGNORE:
return;
}
xmlParsePEReference(ctxt); xmlParsePEReference(ctxt);
} }
@ -3888,8 +3845,7 @@ xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
tmp); tmp);
goto error; goto error;
} }
if ((tmp == '%') && (ctxt->inSubset == 1) && if ((tmp == '%') && (!PARSER_EXTERNAL(ctxt))) {
(ctxt->inputNr == 1)) {
xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL); xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
goto error; goto error;
} }
@ -6653,14 +6609,8 @@ xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
/* /*
* [ WFC: PEs in Internal Subset ] error handling. * [ WFC: PEs in Internal Subset ] error handling.
*/ */
if ((RAW == '%') && (ctxt->external == 0) && xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
(ctxt->inputNr == 1)) { "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
"PEReference: forbidden within markup decl in internal subset\n");
} else {
xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
"xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
}
return(-1); return(-1);
} }
@ -6911,7 +6861,6 @@ xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
void void
xmlParseTextDecl(xmlParserCtxtPtr ctxt) { xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
xmlChar *version; xmlChar *version;
int oldstate;
/* /*
* We know that '<?xml' is here. * We know that '<?xml' is here.
@ -6923,10 +6872,6 @@ xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
return; return;
} }
/* Avoid expansion of parameter entities when skipping blanks. */
oldstate = ctxt->instate;
ctxt->instate = XML_PARSER_START;
if (SKIP_BLANKS == 0) { if (SKIP_BLANKS == 0) {
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
"Space needed after '<?xml'\n"); "Space needed after '<?xml'\n");
@ -6958,7 +6903,6 @@ xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
/* /*
* The XML REC instructs us to stop parsing right here * The XML REC instructs us to stop parsing right here
*/ */
ctxt->instate = oldstate;
return; return;
} }
@ -6979,8 +6923,6 @@ xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
break; break;
} }
} }
ctxt->instate = oldstate;
} }
/** /**
@ -7002,6 +6944,11 @@ xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
xmlDetectEncoding(ctxt); xmlDetectEncoding(ctxt);
/*
* Don't expand PEs while parsing the text declaration
*/
ctxt->inSubset = 0;
if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) { if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
xmlParseTextDecl(ctxt); xmlParseTextDecl(ctxt);
if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
@ -7026,7 +6973,7 @@ xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
} }
ctxt->instate = XML_PARSER_DTD; ctxt->instate = XML_PARSER_DTD;
ctxt->external = 1; ctxt->inSubset = 2;
SKIP_BLANKS; SKIP_BLANKS;
while ((PARSER_STOPPED(ctxt) == 0) && (RAW != 0)) { while ((PARSER_STOPPED(ctxt) == 0) && (RAW != 0)) {
GROW; GROW;
@ -8337,7 +8284,6 @@ xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
* Is there any DTD definition ? * Is there any DTD definition ?
*/ */
if (RAW == '[') { if (RAW == '[') {
int baseInputNr = ctxt->inputNr;
ctxt->instate = XML_PARSER_DTD; ctxt->instate = XML_PARSER_DTD;
NEXT; NEXT;
/* /*
@ -8346,14 +8292,14 @@ xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
* Subsequence (markupdecl | PEReference | S)* * Subsequence (markupdecl | PEReference | S)*
*/ */
SKIP_BLANKS; SKIP_BLANKS;
while (((RAW != ']') || (ctxt->inputNr > baseInputNr)) && while (((RAW != ']') || (PARSER_IN_PE(ctxt))) &&
(PARSER_STOPPED(ctxt) == 0)) { (PARSER_STOPPED(ctxt) == 0)) {
/* /*
* Conditional sections are allowed from external entities included * Conditional sections are allowed from external entities included
* by PE References in the internal subset. * by PE References in the internal subset.
*/ */
if ((ctxt->inputNr > 1) && (ctxt->input->filename != NULL) && if ((PARSER_EXTERNAL(ctxt)) &&
(RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
xmlParseConditionalSections(ctxt); xmlParseConditionalSections(ctxt);
} else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) { } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
@ -12179,7 +12125,6 @@ xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
/* /*
* let's parse that entity knowing it's an external subset. * let's parse that entity knowing it's an external subset.
*/ */
ctxt->inSubset = 2;
ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
if (ctxt->myDoc == NULL) { if (ctxt->myDoc == NULL) {
xmlErrMemory(ctxt); xmlErrMemory(ctxt);
@ -12189,8 +12134,6 @@ xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none", ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
BAD_CAST "none", BAD_CAST "none"); BAD_CAST "none", BAD_CAST "none");
xmlDetectEncoding(ctxt);
xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none"); xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
if (ctxt->myDoc != NULL) { if (ctxt->myDoc != NULL) {
@ -12292,7 +12235,6 @@ xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
/* /*
* let's parse that entity knowing it's an external subset. * let's parse that entity knowing it's an external subset.
*/ */
ctxt->inSubset = 2;
ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
if (ctxt->myDoc == NULL) { if (ctxt->myDoc == NULL) {
xmlErrMemory(ctxt); xmlErrMemory(ctxt);
@ -13835,7 +13777,6 @@ xmlCtxtReset(xmlParserCtxtPtr ctxt)
ctxt->hasExternalSubset = 0; ctxt->hasExternalSubset = 0;
ctxt->hasPErefs = 0; ctxt->hasPErefs = 0;
ctxt->html = 0; ctxt->html = 0;
ctxt->external = 0;
ctxt->instate = XML_PARSER_START; ctxt->instate = XML_PARSER_START;
ctxt->token = 0; ctxt->token = 0;

View File

@ -437,7 +437,7 @@ xmlParserGrow(xmlParserCtxtPtr ctxt) {
if (buf == NULL) if (buf == NULL)
return(0); return(0);
/* Don't grow push parser buffer. */ /* Don't grow push parser buffer. */
if ((ctxt->progressive) && (ctxt->inputNr <= 1)) if ((ctxt->progressive) && (!PARSER_IN_PE(ctxt)))
return(0); return(0);
/* Don't grow memory buffers. */ /* Don't grow memory buffers. */
if ((buf->encoder == NULL) && (buf->readcallback == NULL)) if ((buf->encoder == NULL) && (buf->readcallback == NULL))
@ -529,7 +529,7 @@ xmlParserShrink(xmlParserCtxtPtr ctxt) {
if (buf == NULL) if (buf == NULL)
return; return;
/* Don't shrink pull parser memory buffers. */ /* Don't shrink pull parser memory buffers. */
if (((ctxt->progressive == 0) || (ctxt->inputNr > 1)) && if ((ctxt->progressive == 0) &&
(buf->encoder == NULL) && (buf->encoder == NULL) &&
(buf->readcallback == NULL)) (buf->readcallback == NULL))
return; return;
@ -2029,7 +2029,6 @@ xmlInitSAXParserCtxt(xmlParserCtxtPtr ctxt, const xmlSAXHandler *sax,
ctxt->hasExternalSubset = 0; ctxt->hasExternalSubset = 0;
ctxt->hasPErefs = 0; ctxt->hasPErefs = 0;
ctxt->html = 0; ctxt->html = 0;
ctxt->external = 0;
ctxt->instate = XML_PARSER_START; ctxt->instate = XML_PARSER_START;
ctxt->token = 0; ctxt->token = 0;

View File

@ -96,7 +96,7 @@ def callback(ctx, str):
err = err + "%s" % (str) err = err + "%s" % (str)
libxml2.registerErrorHandler(callback, "") libxml2.registerErrorHandler(callback, "")
parsing_error_files = ["766956", "cond_sect2", "t8", "t8a"] parsing_error_files = ["766956", "cond_sect2", "t8", "t8a", "pe-in-text-decl"]
expect_parsing_error = [os.path.join(dir_prefix, f + ".xml") for f in parsing_error_files] expect_parsing_error = [os.path.join(dir_prefix, f + ".xml") for f in parsing_error_files]
valid_files = glob.glob(os.path.join(dir_prefix, "*.x*")) valid_files = glob.glob(os.path.join(dir_prefix, "*.x*"))
@ -114,12 +114,11 @@ for file in valid_files:
if ret != 0 and file not in expect_parsing_error: if ret != 0 and file not in expect_parsing_error:
print("Error parsing and validating %s" % (file)) print("Error parsing and validating %s" % (file))
#sys.exit(1) #sys.exit(1)
if (err): if file in expect and err != expect[file]:
if not(file in expect and err == expect[file]): failures += 1
failures += 1 print("Error: ", err)
print("Error: ", err) if file in expect:
if file in expect: print("Expected: ", expect[file])
print("Expected: ", expect[file])
if failures: if failures:
print("Failed %d tests" % failures) print("Failed %d tests" % failures)

View File

@ -0,0 +1,3 @@
test/valid/dtds/pe-in-text-decl.dtd:1: parser error : parsing XML declaration: '?>' expected
<?xml version="1.0" %ent;?>
^

View File

@ -0,0 +1,4 @@
test/valid/dtds/pe-in-text-decl.dtd:1: parser error : parsing XML declaration: '?>' expected
<?xml version="1.0" %ent;?>
^
./test/valid/pe-in-text-decl.xml : failed to parse

View File

@ -0,0 +1 @@
<?xml version="1.0" %ent;?>

View File

@ -0,0 +1,4 @@
<!DOCTYPE doc SYSTEM "dtds/pe-in-text-decl.dtd" [
<!ENTITY % ent "foo">
]>
<doc/>