diff --git a/HTMLparser.c b/HTMLparser.c
index fd90a725..4b1a2fe1 100644
--- a/HTMLparser.c
+++ b/HTMLparser.c
@@ -6277,7 +6277,6 @@ htmlCtxtReset(htmlParserCtxtPtr ctxt)
ctxt->hasExternalSubset = 0;
ctxt->hasPErefs = 0;
ctxt->html = 1;
- ctxt->external = 0;
ctxt->instate = XML_PARSER_START;
ctxt->token = 0;
diff --git a/SAX2.c b/SAX2.c
index 11a31db9..47fe5b1e 100644
--- a/SAX2.c
+++ b/SAX2.c
@@ -1219,8 +1219,6 @@ xmlSAX2AttributeInternal(void *ctx, const xmlChar *fullname,
} else
#endif /* LIBXML_VALID_ENABLED */
if (((ctxt->loadsubset & XML_SKIP_IDS) == 0) &&
- (((ctxt->replaceEntities == 0) && (ctxt->external != 2)) ||
- ((ctxt->replaceEntities != 0) && (ctxt->inSubset == 0))) &&
/* Don't create IDs containing entity references */
(ret->children != NULL) &&
(ret->children->type == XML_TEXT_NODE) &&
@@ -1955,8 +1953,6 @@ xmlSAX2AttributeNs(xmlParserCtxtPtr ctxt,
} else
#endif /* LIBXML_VALID_ENABLED */
if (((ctxt->loadsubset & XML_SKIP_IDS) == 0) &&
- (((ctxt->replaceEntities == 0) && (ctxt->external != 2)) ||
- ((ctxt->replaceEntities != 0) && (ctxt->inSubset == 0))) &&
/* Don't create IDs containing entity references */
(ret->children != NULL) &&
(ret->children->type == XML_TEXT_NODE) &&
diff --git a/include/libxml/parser.h b/include/libxml/parser.h
index 31be8aed..b2cb9bc6 100644
--- a/include/libxml/parser.h
+++ b/include/libxml/parser.h
@@ -220,7 +220,7 @@ struct _xmlParserCtxt {
int hasExternalSubset; /* reference and external subset */
int hasPErefs; /* the internal subset has PE refs */
- int external; /* are we parsing an external entity */
+ int external; /* unused */
int valid; /* is the document valid */
int validate; /* shall we try to validate ? */
diff --git a/include/private/parser.h b/include/private/parser.h
index 2c005d02..02dd278c 100644
--- a/include/private/parser.h
+++ b/include/private/parser.h
@@ -28,6 +28,16 @@
#define PARSER_STOPPED(ctxt) ((ctxt)->disableSAX > 1)
+#define PARSER_IN_PE(ctxt) \
+ (((ctxt)->input->entity != NULL) && \
+ (((ctxt)->input->entity->etype == XML_INTERNAL_PARAMETER_ENTITY) || \
+ ((ctxt)->input->entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)))
+
+#define PARSER_EXTERNAL(ctxt) \
+ (((ctxt)->inSubset == 2) || \
+ (((ctxt)->input->entity != NULL) && \
+ ((ctxt)->input->entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)))
+
XML_HIDDEN void
xmlCtxtVErr(xmlParserCtxtPtr ctxt, xmlNodePtr node, xmlErrorDomain domain,
xmlParserErrors code, xmlErrorLevel level,
diff --git a/parser.c b/parser.c
index 47e56b3a..7dbeda67 100644
--- a/parser.c
+++ b/parser.c
@@ -995,7 +995,7 @@ xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
attr->prefix = prefix;
attr->value = hvalue;
attr->valueEnd = hvalue.name + len;
- attr->external = ctxt->external;
+ attr->external = PARSER_EXTERNAL(ctxt);
attr->expandedSize = expandedSize;
return;
@@ -2142,8 +2142,7 @@ static int spacePop(xmlParserCtxtPtr ctxt) {
/* Don't shrink push parser buffer. */
#define SHRINK \
- if (((ctxt->progressive == 0) || (ctxt->inputNr > 1)) && \
- (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
+ if ((ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
(ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
xmlParserShrink(ctxt);
@@ -2190,13 +2189,17 @@ static int spacePop(xmlParserCtxtPtr ctxt) {
int
xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
int res = 0;
+ int inParam;
+ int expandParam;
+
+ inParam = PARSER_IN_PE(ctxt);
+ expandParam = PARSER_EXTERNAL(ctxt);
/*
* It's Okay to use CUR/NEXT here since all the blanks are on
* the ASCII range.
*/
- if (((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) ||
- (ctxt->instate == XML_PARSER_START)) {
+ if (!inParam && !expandParam) {
const xmlChar *cur;
/*
* if we are in the document content, go really fast
@@ -2219,23 +2222,29 @@ xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
}
ctxt->input->cur = cur;
} else {
- int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1));
-
while (PARSER_STOPPED(ctxt) == 0) {
if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
NEXT;
} else if (CUR == '%') {
- /*
- * Need to handle support of entities branching here
- */
- if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
+ if ((expandParam == 0) ||
+ (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
break;
+
+ /*
+ * Expand parameter entity. We continue to consume
+ * whitespace at the start of the entity and possible
+ * even consume the whole entity and pop it. We might
+ * even pop multiple PEs in this loop.
+ */
xmlParsePEReference(ctxt);
+
+ inParam = PARSER_IN_PE(ctxt);
+ expandParam = PARSER_EXTERNAL(ctxt);
} else if (CUR == 0) {
unsigned long consumed;
xmlEntityPtr ent;
- if (ctxt->inputNr <= 1)
+ if (inParam == 0)
break;
consumed = ctxt->input->consumed;
@@ -2257,6 +2266,9 @@ xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
xmlParserEntityCheck(ctxt, consumed);
xmlPopInput(ctxt);
+
+ inParam = PARSER_IN_PE(ctxt);
+ expandParam = PARSER_EXTERNAL(ctxt);
} else {
break;
}
@@ -2567,61 +2579,6 @@ xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
*/
void
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
- switch(ctxt->instate) {
- case XML_PARSER_CDATA_SECTION:
- return;
- case XML_PARSER_COMMENT:
- return;
- case XML_PARSER_START_TAG:
- return;
- case XML_PARSER_END_TAG:
- return;
- case XML_PARSER_EOF:
- xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
- return;
- case XML_PARSER_PROLOG:
- case XML_PARSER_START:
- case XML_PARSER_XML_DECL:
- case XML_PARSER_MISC:
- xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
- return;
- case XML_PARSER_ENTITY_DECL:
- case XML_PARSER_CONTENT:
- case XML_PARSER_ATTRIBUTE_VALUE:
- case XML_PARSER_PI:
- case XML_PARSER_SYSTEM_LITERAL:
- case XML_PARSER_PUBLIC_LITERAL:
- /* we just ignore it there */
- return;
- case XML_PARSER_EPILOG:
- xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
- return;
- case XML_PARSER_ENTITY_VALUE:
- /*
- * NOTE: in the case of entity values, we don't do the
- * substitution here since we need the literal
- * entity value to be able to save the internal
- * subset of the document.
- * This will be handled by xmlStringDecodeEntities
- */
- return;
- case XML_PARSER_DTD:
- /*
- * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
- * In the internal DTD subset, parameter-entity references
- * can occur only where markup declarations can occur, not
- * within markup declarations.
- * In that case this is handled in xmlParseMarkupDecl
- */
- if ((ctxt->external == 0) && (ctxt->inputNr == 1))
- return;
- if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
- return;
- break;
- case XML_PARSER_IGNORE:
- return;
- }
-
xmlParsePEReference(ctxt);
}
@@ -3888,8 +3845,7 @@ xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
tmp);
goto error;
}
- if ((tmp == '%') && (ctxt->inSubset == 1) &&
- (ctxt->inputNr == 1)) {
+ if ((tmp == '%') && (!PARSER_EXTERNAL(ctxt))) {
xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
goto error;
}
@@ -6653,14 +6609,8 @@ xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
/*
* [ WFC: PEs in Internal Subset ] error handling.
*/
- if ((RAW == '%') && (ctxt->external == 0) &&
- (ctxt->inputNr == 1)) {
- xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
- "PEReference: forbidden within markup decl in internal subset\n");
- } else {
- xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
- "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
- }
+ xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
+ "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
return(-1);
}
@@ -6911,7 +6861,6 @@ xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
void
xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
xmlChar *version;
- int oldstate;
/*
* We know that 'instate;
- ctxt->instate = XML_PARSER_START;
-
if (SKIP_BLANKS == 0) {
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
"Space needed after 'instate = oldstate;
return;
}
@@ -6979,8 +6923,6 @@ xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
break;
}
}
-
- ctxt->instate = oldstate;
}
/**
@@ -7002,6 +6944,11 @@ xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
xmlDetectEncoding(ctxt);
+ /*
+ * Don't expand PEs while parsing the text declaration
+ */
+ ctxt->inSubset = 0;
+
if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
xmlParseTextDecl(ctxt);
if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
@@ -7026,7 +6973,7 @@ xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
}
ctxt->instate = XML_PARSER_DTD;
- ctxt->external = 1;
+ ctxt->inSubset = 2;
SKIP_BLANKS;
while ((PARSER_STOPPED(ctxt) == 0) && (RAW != 0)) {
GROW;
@@ -8337,7 +8284,6 @@ xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
* Is there any DTD definition ?
*/
if (RAW == '[') {
- int baseInputNr = ctxt->inputNr;
ctxt->instate = XML_PARSER_DTD;
NEXT;
/*
@@ -8346,14 +8292,14 @@ xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
* Subsequence (markupdecl | PEReference | S)*
*/
SKIP_BLANKS;
- while (((RAW != ']') || (ctxt->inputNr > baseInputNr)) &&
+ while (((RAW != ']') || (PARSER_IN_PE(ctxt))) &&
(PARSER_STOPPED(ctxt) == 0)) {
/*
* Conditional sections are allowed from external entities included
* by PE References in the internal subset.
*/
- if ((ctxt->inputNr > 1) && (ctxt->input->filename != NULL) &&
+ if ((PARSER_EXTERNAL(ctxt)) &&
(RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
xmlParseConditionalSections(ctxt);
} else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
@@ -12179,7 +12125,6 @@ xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
/*
* let's parse that entity knowing it's an external subset.
*/
- ctxt->inSubset = 2;
ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
if (ctxt->myDoc == NULL) {
xmlErrMemory(ctxt);
@@ -12189,8 +12134,6 @@ xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
BAD_CAST "none", BAD_CAST "none");
- xmlDetectEncoding(ctxt);
-
xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
if (ctxt->myDoc != NULL) {
@@ -12292,7 +12235,6 @@ xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
/*
* let's parse that entity knowing it's an external subset.
*/
- ctxt->inSubset = 2;
ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
if (ctxt->myDoc == NULL) {
xmlErrMemory(ctxt);
@@ -13835,7 +13777,6 @@ xmlCtxtReset(xmlParserCtxtPtr ctxt)
ctxt->hasExternalSubset = 0;
ctxt->hasPErefs = 0;
ctxt->html = 0;
- ctxt->external = 0;
ctxt->instate = XML_PARSER_START;
ctxt->token = 0;
diff --git a/parserInternals.c b/parserInternals.c
index e798f38c..42c09778 100644
--- a/parserInternals.c
+++ b/parserInternals.c
@@ -437,7 +437,7 @@ xmlParserGrow(xmlParserCtxtPtr ctxt) {
if (buf == NULL)
return(0);
/* Don't grow push parser buffer. */
- if ((ctxt->progressive) && (ctxt->inputNr <= 1))
+ if ((ctxt->progressive) && (!PARSER_IN_PE(ctxt)))
return(0);
/* Don't grow memory buffers. */
if ((buf->encoder == NULL) && (buf->readcallback == NULL))
@@ -529,7 +529,7 @@ xmlParserShrink(xmlParserCtxtPtr ctxt) {
if (buf == NULL)
return;
/* Don't shrink pull parser memory buffers. */
- if (((ctxt->progressive == 0) || (ctxt->inputNr > 1)) &&
+ if ((ctxt->progressive == 0) &&
(buf->encoder == NULL) &&
(buf->readcallback == NULL))
return;
@@ -2029,7 +2029,6 @@ xmlInitSAXParserCtxt(xmlParserCtxtPtr ctxt, const xmlSAXHandler *sax,
ctxt->hasExternalSubset = 0;
ctxt->hasPErefs = 0;
ctxt->html = 0;
- ctxt->external = 0;
ctxt->instate = XML_PARSER_START;
ctxt->token = 0;
diff --git a/python/tests/reader2.py b/python/tests/reader2.py
index 9f14ecd2..dbe061e7 100755
--- a/python/tests/reader2.py
+++ b/python/tests/reader2.py
@@ -96,7 +96,7 @@ def callback(ctx, str):
err = err + "%s" % (str)
libxml2.registerErrorHandler(callback, "")
-parsing_error_files = ["766956", "cond_sect2", "t8", "t8a"]
+parsing_error_files = ["766956", "cond_sect2", "t8", "t8a", "pe-in-text-decl"]
expect_parsing_error = [os.path.join(dir_prefix, f + ".xml") for f in parsing_error_files]
valid_files = glob.glob(os.path.join(dir_prefix, "*.x*"))
@@ -114,12 +114,11 @@ for file in valid_files:
if ret != 0 and file not in expect_parsing_error:
print("Error parsing and validating %s" % (file))
#sys.exit(1)
- if (err):
- if not(file in expect and err == expect[file]):
- failures += 1
- print("Error: ", err)
- if file in expect:
- print("Expected: ", expect[file])
+ if file in expect and err != expect[file]:
+ failures += 1
+ print("Error: ", err)
+ if file in expect:
+ print("Expected: ", expect[file])
if failures:
print("Failed %d tests" % failures)
diff --git a/result/valid/pe-in-text-decl.xml.err b/result/valid/pe-in-text-decl.xml.err
new file mode 100644
index 00000000..bead5f0a
--- /dev/null
+++ b/result/valid/pe-in-text-decl.xml.err
@@ -0,0 +1,3 @@
+test/valid/dtds/pe-in-text-decl.dtd:1: parser error : parsing XML declaration: '?>' expected
+
+ ^
diff --git a/result/valid/pe-in-text-decl.xml.err.rdr b/result/valid/pe-in-text-decl.xml.err.rdr
new file mode 100644
index 00000000..df424e7c
--- /dev/null
+++ b/result/valid/pe-in-text-decl.xml.err.rdr
@@ -0,0 +1,4 @@
+test/valid/dtds/pe-in-text-decl.dtd:1: parser error : parsing XML declaration: '?>' expected
+
+ ^
+./test/valid/pe-in-text-decl.xml : failed to parse
diff --git a/test/valid/dtds/pe-in-text-decl.dtd b/test/valid/dtds/pe-in-text-decl.dtd
new file mode 100644
index 00000000..9f39cf48
--- /dev/null
+++ b/test/valid/dtds/pe-in-text-decl.dtd
@@ -0,0 +1 @@
+
diff --git a/test/valid/pe-in-text-decl.xml b/test/valid/pe-in-text-decl.xml
new file mode 100644
index 00000000..39da8556
--- /dev/null
+++ b/test/valid/pe-in-text-decl.xml
@@ -0,0 +1,4 @@
+
+]>
+