From 45fe9924f044a2283fcd658dc210b9c23ddc0d78 Mon Sep 17 00:00:00 2001 From: Nick Wellnhofer Date: Mon, 22 Apr 2024 17:12:54 +0200 Subject: [PATCH] parser: Don't create reference in xmlLookupGeneralEntity This should only be done in xmlParseReference. The handling of undeclared entities is still somewhat inconsistent. In element content we create references even if entity substitution is enabled. In attribute values undeclared entities are always ignored. --- parser.c | 25 ++++++++++++++++--------- result/noent/undeclared-entity.xml | 7 +++++++ result/noent/undeclared-entity.xml.sax2 | 24 ++++++++++++++++++++++++ result/undeclared-entity.xml | 7 +++++++ result/undeclared-entity.xml.rde | 14 ++++++++++++++ result/undeclared-entity.xml.rdr | 14 ++++++++++++++ result/undeclared-entity.xml.sax | 23 +++++++++++++++++++++++ result/undeclared-entity.xml.sax2 | 24 ++++++++++++++++++++++++ test/undeclared-entity.xml | 6 ++++++ 9 files changed, 135 insertions(+), 9 deletions(-) create mode 100644 result/noent/undeclared-entity.xml create mode 100644 result/noent/undeclared-entity.xml.sax2 create mode 100644 result/undeclared-entity.xml create mode 100644 result/undeclared-entity.xml.rde create mode 100644 result/undeclared-entity.xml.rdr create mode 100644 result/undeclared-entity.xml.sax create mode 100644 result/undeclared-entity.xml.sax2 create mode 100644 test/undeclared-entity.xml diff --git a/parser.c b/parser.c index 3bd2ccc1..8af8aeab 100644 --- a/parser.c +++ b/parser.c @@ -7363,9 +7363,22 @@ xmlParseReference(xmlParserCtxtPtr ctxt) { * We are seeing an entity reference */ name = xmlParseEntityRefInternal(ctxt); - if (name != NULL) - ent = xmlLookupGeneralEntity(ctxt, name, /* isAttr */ 0); - if (ent == NULL) return; + if (name == NULL) + return; + ent = xmlLookupGeneralEntity(ctxt, name, /* isAttr */ 0); + if (ent == NULL) { + /* + * Create a reference for undeclared entities. + * TODO: Should we really create a reference if entity + * substitution is enabled? + */ + if ((ctxt->sax != NULL) && + (ctxt->disableSAX == 0) && + (ctxt->sax->reference != NULL)) { + ctxt->sax->reference(ctxt->userData, name); + } + return; + } if (!ctxt->wellFormed) return; @@ -7602,12 +7615,6 @@ xmlLookupGeneralEntity(xmlParserCtxtPtr ctxt, const xmlChar *name, int inAttr) { } else { xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY, "Entity '%s' not defined\n", name, NULL); - if ((ctxt->inSubset == 0) && - (ctxt->sax != NULL) && - (ctxt->disableSAX == 0) && - (ctxt->sax->reference != NULL)) { - ctxt->sax->reference(ctxt->userData, name); - } } ctxt->valid = 0; } diff --git a/result/noent/undeclared-entity.xml b/result/noent/undeclared-entity.xml new file mode 100644 index 00000000..1fd092f7 --- /dev/null +++ b/result/noent/undeclared-entity.xml @@ -0,0 +1,7 @@ + + + + + + &undeclared; + diff --git a/result/noent/undeclared-entity.xml.sax2 b/result/noent/undeclared-entity.xml.sax2 new file mode 100644 index 00000000..56f3f3d9 --- /dev/null +++ b/result/noent/undeclared-entity.xml.sax2 @@ -0,0 +1,24 @@ +SAX.setDocumentLocator() +SAX.startDocument() +SAX.comment( Having an external DTD makes undeclared entities a warning. ) +SAX.internalSubset(doc, , foo) +SAX.externalSubset(doc, , foo) +SAX.startElementNs(doc, NULL, NULL, 0, 0, 0) +SAX.characters( + , 5) +SAX.getEntity(undeclared) +SAX.warning: Entity 'undeclared' not defined +SAX.startElementNs(elem, NULL, NULL, 0, 1, 0, attr='"/> +...', 0) +SAX.endElementNs(elem, NULL, NULL) +SAX.characters( + , 5) +SAX.startElementNs(elem, NULL, NULL, 0, 0, 0) +SAX.getEntity(undeclared) +SAX.warning: Entity 'undeclared' not defined +SAX.reference(undeclared) +SAX.endElementNs(elem, NULL, NULL) +SAX.characters( +, 1) +SAX.endElementNs(doc, NULL, NULL) +SAX.endDocument() diff --git a/result/undeclared-entity.xml b/result/undeclared-entity.xml new file mode 100644 index 00000000..1fd092f7 --- /dev/null +++ b/result/undeclared-entity.xml @@ -0,0 +1,7 @@ + + + + + + &undeclared; + diff --git a/result/undeclared-entity.xml.rde b/result/undeclared-entity.xml.rde new file mode 100644 index 00000000..29ebf622 --- /dev/null +++ b/result/undeclared-entity.xml.rde @@ -0,0 +1,14 @@ +0 8 #comment 0 1 Having an external DTD makes undeclared entities a warning. +0 10 doc 0 0 +0 1 doc 0 0 +1 14 #text 0 1 + +1 1 elem 1 0 +1 14 #text 0 1 + +1 1 elem 0 0 +2 5 undeclared 0 0 +1 15 elem 0 0 +1 14 #text 0 1 + +0 15 doc 0 0 diff --git a/result/undeclared-entity.xml.rdr b/result/undeclared-entity.xml.rdr new file mode 100644 index 00000000..29ebf622 --- /dev/null +++ b/result/undeclared-entity.xml.rdr @@ -0,0 +1,14 @@ +0 8 #comment 0 1 Having an external DTD makes undeclared entities a warning. +0 10 doc 0 0 +0 1 doc 0 0 +1 14 #text 0 1 + +1 1 elem 1 0 +1 14 #text 0 1 + +1 1 elem 0 0 +2 5 undeclared 0 0 +1 15 elem 0 0 +1 14 #text 0 1 + +0 15 doc 0 0 diff --git a/result/undeclared-entity.xml.sax b/result/undeclared-entity.xml.sax new file mode 100644 index 00000000..90e7e54e --- /dev/null +++ b/result/undeclared-entity.xml.sax @@ -0,0 +1,23 @@ +SAX.setDocumentLocator() +SAX.startDocument() +SAX.comment( Having an external DTD makes undeclared entities a warning. ) +SAX.internalSubset(doc, , foo) +SAX.externalSubset(doc, , foo) +SAX.startElement(doc) +SAX.characters( + , 5) +SAX.getEntity(undeclared) +SAX.warning: Entity 'undeclared' not defined +SAX.startElement(elem, attr='') +SAX.endElement(elem) +SAX.characters( + , 5) +SAX.startElement(elem) +SAX.getEntity(undeclared) +SAX.warning: Entity 'undeclared' not defined +SAX.reference(undeclared) +SAX.endElement(elem) +SAX.characters( +, 1) +SAX.endElement(doc) +SAX.endDocument() diff --git a/result/undeclared-entity.xml.sax2 b/result/undeclared-entity.xml.sax2 new file mode 100644 index 00000000..56f3f3d9 --- /dev/null +++ b/result/undeclared-entity.xml.sax2 @@ -0,0 +1,24 @@ +SAX.setDocumentLocator() +SAX.startDocument() +SAX.comment( Having an external DTD makes undeclared entities a warning. ) +SAX.internalSubset(doc, , foo) +SAX.externalSubset(doc, , foo) +SAX.startElementNs(doc, NULL, NULL, 0, 0, 0) +SAX.characters( + , 5) +SAX.getEntity(undeclared) +SAX.warning: Entity 'undeclared' not defined +SAX.startElementNs(elem, NULL, NULL, 0, 1, 0, attr='"/> +...', 0) +SAX.endElementNs(elem, NULL, NULL) +SAX.characters( + , 5) +SAX.startElementNs(elem, NULL, NULL, 0, 0, 0) +SAX.getEntity(undeclared) +SAX.warning: Entity 'undeclared' not defined +SAX.reference(undeclared) +SAX.endElementNs(elem, NULL, NULL) +SAX.characters( +, 1) +SAX.endElementNs(doc, NULL, NULL) +SAX.endDocument() diff --git a/test/undeclared-entity.xml b/test/undeclared-entity.xml new file mode 100644 index 00000000..b2a335c3 --- /dev/null +++ b/test/undeclared-entity.xml @@ -0,0 +1,6 @@ + + + + + &undeclared; +