diff --git a/HTMLparser.c b/HTMLparser.c index fd90a725..4b1a2fe1 100644 --- a/HTMLparser.c +++ b/HTMLparser.c @@ -6277,7 +6277,6 @@ htmlCtxtReset(htmlParserCtxtPtr ctxt) ctxt->hasExternalSubset = 0; ctxt->hasPErefs = 0; ctxt->html = 1; - ctxt->external = 0; ctxt->instate = XML_PARSER_START; ctxt->token = 0; diff --git a/SAX2.c b/SAX2.c index 11a31db9..47fe5b1e 100644 --- a/SAX2.c +++ b/SAX2.c @@ -1219,8 +1219,6 @@ xmlSAX2AttributeInternal(void *ctx, const xmlChar *fullname, } else #endif /* LIBXML_VALID_ENABLED */ if (((ctxt->loadsubset & XML_SKIP_IDS) == 0) && - (((ctxt->replaceEntities == 0) && (ctxt->external != 2)) || - ((ctxt->replaceEntities != 0) && (ctxt->inSubset == 0))) && /* Don't create IDs containing entity references */ (ret->children != NULL) && (ret->children->type == XML_TEXT_NODE) && @@ -1955,8 +1953,6 @@ xmlSAX2AttributeNs(xmlParserCtxtPtr ctxt, } else #endif /* LIBXML_VALID_ENABLED */ if (((ctxt->loadsubset & XML_SKIP_IDS) == 0) && - (((ctxt->replaceEntities == 0) && (ctxt->external != 2)) || - ((ctxt->replaceEntities != 0) && (ctxt->inSubset == 0))) && /* Don't create IDs containing entity references */ (ret->children != NULL) && (ret->children->type == XML_TEXT_NODE) && diff --git a/include/libxml/parser.h b/include/libxml/parser.h index 31be8aed..b2cb9bc6 100644 --- a/include/libxml/parser.h +++ b/include/libxml/parser.h @@ -220,7 +220,7 @@ struct _xmlParserCtxt { int hasExternalSubset; /* reference and external subset */ int hasPErefs; /* the internal subset has PE refs */ - int external; /* are we parsing an external entity */ + int external; /* unused */ int valid; /* is the document valid */ int validate; /* shall we try to validate ? */ diff --git a/include/private/parser.h b/include/private/parser.h index 2c005d02..02dd278c 100644 --- a/include/private/parser.h +++ b/include/private/parser.h @@ -28,6 +28,16 @@ #define PARSER_STOPPED(ctxt) ((ctxt)->disableSAX > 1) +#define PARSER_IN_PE(ctxt) \ + (((ctxt)->input->entity != NULL) && \ + (((ctxt)->input->entity->etype == XML_INTERNAL_PARAMETER_ENTITY) || \ + ((ctxt)->input->entity->etype == XML_EXTERNAL_PARAMETER_ENTITY))) + +#define PARSER_EXTERNAL(ctxt) \ + (((ctxt)->inSubset == 2) || \ + (((ctxt)->input->entity != NULL) && \ + ((ctxt)->input->entity->etype == XML_EXTERNAL_PARAMETER_ENTITY))) + XML_HIDDEN void xmlCtxtVErr(xmlParserCtxtPtr ctxt, xmlNodePtr node, xmlErrorDomain domain, xmlParserErrors code, xmlErrorLevel level, diff --git a/parser.c b/parser.c index 47e56b3a..7dbeda67 100644 --- a/parser.c +++ b/parser.c @@ -995,7 +995,7 @@ xmlAddDefAttrs(xmlParserCtxtPtr ctxt, attr->prefix = prefix; attr->value = hvalue; attr->valueEnd = hvalue.name + len; - attr->external = ctxt->external; + attr->external = PARSER_EXTERNAL(ctxt); attr->expandedSize = expandedSize; return; @@ -2142,8 +2142,7 @@ static int spacePop(xmlParserCtxtPtr ctxt) { /* Don't shrink push parser buffer. */ #define SHRINK \ - if (((ctxt->progressive == 0) || (ctxt->inputNr > 1)) && \ - (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \ + if ((ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \ (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \ xmlParserShrink(ctxt); @@ -2190,13 +2189,17 @@ static int spacePop(xmlParserCtxtPtr ctxt) { int xmlSkipBlankChars(xmlParserCtxtPtr ctxt) { int res = 0; + int inParam; + int expandParam; + + inParam = PARSER_IN_PE(ctxt); + expandParam = PARSER_EXTERNAL(ctxt); /* * It's Okay to use CUR/NEXT here since all the blanks are on * the ASCII range. */ - if (((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) || - (ctxt->instate == XML_PARSER_START)) { + if (!inParam && !expandParam) { const xmlChar *cur; /* * if we are in the document content, go really fast @@ -2219,23 +2222,29 @@ xmlSkipBlankChars(xmlParserCtxtPtr ctxt) { } ctxt->input->cur = cur; } else { - int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1)); - while (PARSER_STOPPED(ctxt) == 0) { if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */ NEXT; } else if (CUR == '%') { - /* - * Need to handle support of entities branching here - */ - if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0)) + if ((expandParam == 0) || + (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0)) break; + + /* + * Expand parameter entity. We continue to consume + * whitespace at the start of the entity and possible + * even consume the whole entity and pop it. We might + * even pop multiple PEs in this loop. + */ xmlParsePEReference(ctxt); + + inParam = PARSER_IN_PE(ctxt); + expandParam = PARSER_EXTERNAL(ctxt); } else if (CUR == 0) { unsigned long consumed; xmlEntityPtr ent; - if (ctxt->inputNr <= 1) + if (inParam == 0) break; consumed = ctxt->input->consumed; @@ -2257,6 +2266,9 @@ xmlSkipBlankChars(xmlParserCtxtPtr ctxt) { xmlParserEntityCheck(ctxt, consumed); xmlPopInput(ctxt); + + inParam = PARSER_IN_PE(ctxt); + expandParam = PARSER_EXTERNAL(ctxt); } else { break; } @@ -2567,61 +2579,6 @@ xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) { */ void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { - switch(ctxt->instate) { - case XML_PARSER_CDATA_SECTION: - return; - case XML_PARSER_COMMENT: - return; - case XML_PARSER_START_TAG: - return; - case XML_PARSER_END_TAG: - return; - case XML_PARSER_EOF: - xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL); - return; - case XML_PARSER_PROLOG: - case XML_PARSER_START: - case XML_PARSER_XML_DECL: - case XML_PARSER_MISC: - xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL); - return; - case XML_PARSER_ENTITY_DECL: - case XML_PARSER_CONTENT: - case XML_PARSER_ATTRIBUTE_VALUE: - case XML_PARSER_PI: - case XML_PARSER_SYSTEM_LITERAL: - case XML_PARSER_PUBLIC_LITERAL: - /* we just ignore it there */ - return; - case XML_PARSER_EPILOG: - xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL); - return; - case XML_PARSER_ENTITY_VALUE: - /* - * NOTE: in the case of entity values, we don't do the - * substitution here since we need the literal - * entity value to be able to save the internal - * subset of the document. - * This will be handled by xmlStringDecodeEntities - */ - return; - case XML_PARSER_DTD: - /* - * [WFC: Well-Formedness Constraint: PEs in Internal Subset] - * In the internal DTD subset, parameter-entity references - * can occur only where markup declarations can occur, not - * within markup declarations. - * In that case this is handled in xmlParseMarkupDecl - */ - if ((ctxt->external == 0) && (ctxt->inputNr == 1)) - return; - if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0) - return; - break; - case XML_PARSER_IGNORE: - return; - } - xmlParsePEReference(ctxt); } @@ -3888,8 +3845,7 @@ xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) { tmp); goto error; } - if ((tmp == '%') && (ctxt->inSubset == 1) && - (ctxt->inputNr == 1)) { + if ((tmp == '%') && (!PARSER_EXTERNAL(ctxt))) { xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL); goto error; } @@ -6653,14 +6609,8 @@ xmlParseElementDecl(xmlParserCtxtPtr ctxt) { /* * [ WFC: PEs in Internal Subset ] error handling. */ - if ((RAW == '%') && (ctxt->external == 0) && - (ctxt->inputNr == 1)) { - xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET, - "PEReference: forbidden within markup decl in internal subset\n"); - } else { - xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, - "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n"); - } + xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, + "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n"); return(-1); } @@ -6911,7 +6861,6 @@ xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) { void xmlParseTextDecl(xmlParserCtxtPtr ctxt) { xmlChar *version; - int oldstate; /* * We know that 'instate; - ctxt->instate = XML_PARSER_START; - if (SKIP_BLANKS == 0) { xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Space needed after 'instate = oldstate; return; } @@ -6979,8 +6923,6 @@ xmlParseTextDecl(xmlParserCtxtPtr ctxt) { break; } } - - ctxt->instate = oldstate; } /** @@ -7002,6 +6944,11 @@ xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID, xmlDetectEncoding(ctxt); + /* + * Don't expand PEs while parsing the text declaration + */ + ctxt->inSubset = 0; + if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) { xmlParseTextDecl(ctxt); if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { @@ -7026,7 +6973,7 @@ xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID, } ctxt->instate = XML_PARSER_DTD; - ctxt->external = 1; + ctxt->inSubset = 2; SKIP_BLANKS; while ((PARSER_STOPPED(ctxt) == 0) && (RAW != 0)) { GROW; @@ -8337,7 +8284,6 @@ xmlParseInternalSubset(xmlParserCtxtPtr ctxt) { * Is there any DTD definition ? */ if (RAW == '[') { - int baseInputNr = ctxt->inputNr; ctxt->instate = XML_PARSER_DTD; NEXT; /* @@ -8346,14 +8292,14 @@ xmlParseInternalSubset(xmlParserCtxtPtr ctxt) { * Subsequence (markupdecl | PEReference | S)* */ SKIP_BLANKS; - while (((RAW != ']') || (ctxt->inputNr > baseInputNr)) && + while (((RAW != ']') || (PARSER_IN_PE(ctxt))) && (PARSER_STOPPED(ctxt) == 0)) { /* * Conditional sections are allowed from external entities included * by PE References in the internal subset. */ - if ((ctxt->inputNr > 1) && (ctxt->input->filename != NULL) && + if ((PARSER_EXTERNAL(ctxt)) && (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) { xmlParseConditionalSections(ctxt); } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) { @@ -12179,7 +12125,6 @@ xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input, /* * let's parse that entity knowing it's an external subset. */ - ctxt->inSubset = 2; ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); if (ctxt->myDoc == NULL) { xmlErrMemory(ctxt); @@ -12189,8 +12134,6 @@ xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input, ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none", BAD_CAST "none", BAD_CAST "none"); - xmlDetectEncoding(ctxt); - xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none"); if (ctxt->myDoc != NULL) { @@ -12292,7 +12235,6 @@ xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID, /* * let's parse that entity knowing it's an external subset. */ - ctxt->inSubset = 2; ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0"); if (ctxt->myDoc == NULL) { xmlErrMemory(ctxt); @@ -13835,7 +13777,6 @@ xmlCtxtReset(xmlParserCtxtPtr ctxt) ctxt->hasExternalSubset = 0; ctxt->hasPErefs = 0; ctxt->html = 0; - ctxt->external = 0; ctxt->instate = XML_PARSER_START; ctxt->token = 0; diff --git a/parserInternals.c b/parserInternals.c index e798f38c..42c09778 100644 --- a/parserInternals.c +++ b/parserInternals.c @@ -437,7 +437,7 @@ xmlParserGrow(xmlParserCtxtPtr ctxt) { if (buf == NULL) return(0); /* Don't grow push parser buffer. */ - if ((ctxt->progressive) && (ctxt->inputNr <= 1)) + if ((ctxt->progressive) && (!PARSER_IN_PE(ctxt))) return(0); /* Don't grow memory buffers. */ if ((buf->encoder == NULL) && (buf->readcallback == NULL)) @@ -529,7 +529,7 @@ xmlParserShrink(xmlParserCtxtPtr ctxt) { if (buf == NULL) return; /* Don't shrink pull parser memory buffers. */ - if (((ctxt->progressive == 0) || (ctxt->inputNr > 1)) && + if ((ctxt->progressive == 0) && (buf->encoder == NULL) && (buf->readcallback == NULL)) return; @@ -2029,7 +2029,6 @@ xmlInitSAXParserCtxt(xmlParserCtxtPtr ctxt, const xmlSAXHandler *sax, ctxt->hasExternalSubset = 0; ctxt->hasPErefs = 0; ctxt->html = 0; - ctxt->external = 0; ctxt->instate = XML_PARSER_START; ctxt->token = 0; diff --git a/python/tests/reader2.py b/python/tests/reader2.py index 9f14ecd2..dbe061e7 100755 --- a/python/tests/reader2.py +++ b/python/tests/reader2.py @@ -96,7 +96,7 @@ def callback(ctx, str): err = err + "%s" % (str) libxml2.registerErrorHandler(callback, "") -parsing_error_files = ["766956", "cond_sect2", "t8", "t8a"] +parsing_error_files = ["766956", "cond_sect2", "t8", "t8a", "pe-in-text-decl"] expect_parsing_error = [os.path.join(dir_prefix, f + ".xml") for f in parsing_error_files] valid_files = glob.glob(os.path.join(dir_prefix, "*.x*")) @@ -114,12 +114,11 @@ for file in valid_files: if ret != 0 and file not in expect_parsing_error: print("Error parsing and validating %s" % (file)) #sys.exit(1) - if (err): - if not(file in expect and err == expect[file]): - failures += 1 - print("Error: ", err) - if file in expect: - print("Expected: ", expect[file]) + if file in expect and err != expect[file]: + failures += 1 + print("Error: ", err) + if file in expect: + print("Expected: ", expect[file]) if failures: print("Failed %d tests" % failures) diff --git a/result/valid/pe-in-text-decl.xml.err b/result/valid/pe-in-text-decl.xml.err new file mode 100644 index 00000000..bead5f0a --- /dev/null +++ b/result/valid/pe-in-text-decl.xml.err @@ -0,0 +1,3 @@ +test/valid/dtds/pe-in-text-decl.dtd:1: parser error : parsing XML declaration: '?>' expected + + ^ diff --git a/result/valid/pe-in-text-decl.xml.err.rdr b/result/valid/pe-in-text-decl.xml.err.rdr new file mode 100644 index 00000000..df424e7c --- /dev/null +++ b/result/valid/pe-in-text-decl.xml.err.rdr @@ -0,0 +1,4 @@ +test/valid/dtds/pe-in-text-decl.dtd:1: parser error : parsing XML declaration: '?>' expected + + ^ +./test/valid/pe-in-text-decl.xml : failed to parse diff --git a/test/valid/dtds/pe-in-text-decl.dtd b/test/valid/dtds/pe-in-text-decl.dtd new file mode 100644 index 00000000..9f39cf48 --- /dev/null +++ b/test/valid/dtds/pe-in-text-decl.dtd @@ -0,0 +1 @@ + diff --git a/test/valid/pe-in-text-decl.xml b/test/valid/pe-in-text-decl.xml new file mode 100644 index 00000000..39da8556 --- /dev/null +++ b/test/valid/pe-in-text-decl.xml @@ -0,0 +1,4 @@ + +]> +