parser: Always copy content from entity to target.

Make sure that references from IDs are updated.

Note that if there are IDs with the same value in a document, the last
one will now be returned. IDs should be unique, but maybe this should be
addressed.
This commit is contained in:
Nick Wellnhofer 2023-12-27 03:53:24 +01:00
parent 6337ff793b
commit d025cfbb4b
7 changed files with 76 additions and 196 deletions

View File

@ -83,7 +83,7 @@ xmlFreeEntity(xmlEntityPtr entity)
dict = entity->doc->dict; dict = entity->doc->dict;
if ((entity->children) && (entity->owner == 1) && if ((entity->children) &&
(entity == (xmlEntityPtr) entity->children->parent)) (entity == (xmlEntityPtr) entity->children->parent))
xmlFreeNodeList(entity->children); xmlFreeNodeList(entity->children);
if ((entity->name != NULL) && if ((entity->name != NULL) &&
@ -152,7 +152,6 @@ xmlCreateEntity(xmlDocPtr doc, const xmlChar *name, int type,
ret->URI = NULL; /* to be computed by the layer knowing ret->URI = NULL; /* to be computed by the layer knowing
the defining entity */ the defining entity */
ret->orig = NULL; ret->orig = NULL;
ret->owner = 0;
return(ret); return(ret);

View File

@ -55,7 +55,7 @@ struct _xmlEntity {
struct _xmlEntity *nexte; /* unused */ struct _xmlEntity *nexte; /* unused */
const xmlChar *URI; /* the full URI as computed */ const xmlChar *URI; /* the full URI as computed */
int owner; /* does the entity own the childrens */ int owner; /* unused */
int flags; /* various flags */ int flags; /* various flags */
unsigned long expandedSize; /* expanded size */ unsigned long expandedSize; /* expanded size */
}; };

214
parser.c
View File

@ -209,8 +209,7 @@ xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
const char *encoding); const char *encoding);
static xmlParserErrors static xmlParserErrors
xmlCtxtParseEntity(xmlParserCtxtPtr oldctxt, xmlEntityPtr ent, xmlCtxtParseEntity(xmlParserCtxtPtr oldctxt, xmlEntityPtr ent);
xmlNodePtr *lst);
static int static int
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity); xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
@ -6985,10 +6984,8 @@ void
xmlParseReference(xmlParserCtxtPtr ctxt) { xmlParseReference(xmlParserCtxtPtr ctxt) {
xmlEntityPtr ent; xmlEntityPtr ent;
xmlChar *val; xmlChar *val;
xmlNodePtr list = NULL;
xmlParserErrors ret = XML_ERR_OK; xmlParserErrors ret = XML_ERR_OK;
if (RAW != '&') if (RAW != '&')
return; return;
@ -7064,9 +7061,6 @@ xmlParseReference(xmlParserCtxtPtr ctxt) {
* *
* Proposed fix: * Proposed fix:
* *
* - Remove the ent->owner optimization which tries to avoid the
* initial copy of the entity. Always make entities own the
* subtree.
* - Ignore current namespace declarations when parsing the * - Ignore current namespace declarations when parsing the
* entity. If a prefix can't be resolved, don't report an error * entity. If a prefix can't be resolved, don't report an error
* but mark it as unresolved. * but mark it as unresolved.
@ -7083,51 +7077,15 @@ xmlParseReference(xmlParserCtxtPtr ctxt) {
(ctxt->replaceEntities) || (ctxt->replaceEntities) ||
(ctxt->validate)) { (ctxt->validate)) {
if ((ent->flags & XML_ENT_PARSED) == 0) { if ((ent->flags & XML_ENT_PARSED) == 0) {
ret = xmlCtxtParseEntity(ctxt, ent, &list); ret = xmlCtxtParseEntity(ctxt, ent);
if ((ret == XML_ERR_OK) && (list != NULL)) { if ((ret != XML_ERR_OK) &&
ent->children = list; (ret != XML_ERR_ENTITY_LOOP) &&
/* (ret != XML_WAR_UNDECLARED_ENTITY)) {
* Prune it directly in the generated document xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
* except for single text nodes. "Entity '%s' failed to parse\n", ent->name);
*/ if (ent->content != NULL)
if ((ctxt->replaceEntities == 0) || ent->content[0] = 0;
(ctxt->parseMode == XML_PARSE_READER) ||
((list->type == XML_TEXT_NODE) &&
(list->next == NULL))) {
ent->owner = 1;
while (list != NULL) {
list->parent = (xmlNodePtr) ent;
if (list->doc != ent->doc)
xmlSetTreeDoc(list, ent->doc);
if (list->next == NULL)
ent->last = list;
list = list->next;
}
list = NULL;
} else {
ent->owner = 0;
while (list != NULL) {
list->parent = (xmlNodePtr) ctxt->node;
list->doc = ctxt->myDoc;
if (list->next == NULL)
ent->last = list;
list = list->next;
}
list = ent->children;
}
} else {
if ((ret != XML_ERR_OK) &&
(ret != XML_WAR_UNDECLARED_ENTITY)) {
xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
"Entity '%s' failed to parse\n", ent->name);
if (ent->content != NULL)
ent->content[0] = 0;
}
if (list != NULL) {
xmlFreeNodeList(list);
list = NULL;
}
} }
} else if (ent->children != NULL) { } else if (ent->children != NULL) {
/* /*
@ -7139,139 +7097,54 @@ xmlParseReference(xmlParserCtxtPtr ctxt) {
} else { } else {
/* /*
* Probably running in SAX mode and the callbacks don't * Probably running in SAX mode and the callbacks don't
* build the entity content. So unless we already went * build the entity content. Parse the entity again.
* though parsing for first checking go though the entity
* content to generate callbacks associated to the entity
* *
* This will also be triggered in normal tree builder mode * This will also be triggered in normal tree builder mode
* if an entity happens to be empty, causing unnecessary * if an entity happens to be empty, causing unnecessary
* reloads. It's hard to come up with a reliable check in * reloads. It's hard to come up with a reliable check in
* which mode we're running. * which mode we're running.
*/ */
xmlCtxtParseEntity(ctxt, ent, &list); xmlCtxtParseEntity(ctxt, ent);
} }
} }
if (ctxt->replaceEntities == 0) { if (ctxt->replaceEntities == 0) {
/* /*
* Create a node. * Create a reference
*/ */
if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) && if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
(!ctxt->disableSAX)) (!ctxt->disableSAX))
ctxt->sax->reference(ctxt->userData, ent->name); ctxt->sax->reference(ctxt->userData, ent->name);
} else if ((ent->children != NULL) && (ctxt->node != NULL)) { } else if ((ent->children != NULL) && (ctxt->node != NULL)) {
xmlNodePtr copy, cur;
/* /*
* Seems we are generating the DOM content, do * Seems we are generating the DOM content, copy the tree
* a simple tree copy for all references except the first
* In the first occurrence list contains the replacement.
*
* There is a problem on the handling of _private for entities
* (bug 155816): Should we copy the content of the field from
* the entity (possibly overwriting some value set by the user
* when a copy is created), should we leave it alone, or should
* we try to take care of different situations? The problem
* is exacerbated by the usage of this field by the xmlReader.
* To fix this bug, we look at _private on the created node
* and, if it's NULL, we copy in whatever was in the entity.
* If it's not NULL we leave it alone. This is somewhat of a
* hack - maybe we should have further tests to determine
* what to do.
*/ */
if (((list == NULL) && (ent->owner == 0)) || cur = ent->children;
(ctxt->parseMode == XML_PARSE_READER)) { while (cur != NULL) {
xmlNodePtr nw = NULL, cur, firstChild = NULL; copy = xmlDocCopyNode(cur, ctxt->myDoc, 1);
/* if (copy == NULL) {
* when operating on a reader, the entities definitions xmlErrMemory(ctxt);
* are always owning the entities subtree. break;
if (ctxt->parseMode == XML_PARSE_READER)
ent->owner = 1;
*/
cur = ent->children;
while (cur != NULL) {
nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
if (nw == NULL) {
xmlErrMemory(ctxt);
} else {
if (nw->_private == NULL)
nw->_private = cur->_private;
if (firstChild == NULL){
firstChild = nw;
}
nw = xmlAddChild(ctxt->node, nw);
if (nw == NULL)
xmlErrMemory(ctxt);
}
if (cur == ent->last) {
/*
* needed to detect some strange empty
* node cases in the reader tests
*/
if ((ctxt->parseMode == XML_PARSE_READER) &&
(nw != NULL) &&
(nw->type == XML_ELEMENT_NODE) &&
(nw->children == NULL))
nw->extra = 1;
break;
}
cur = cur->next;
} }
} else if ((list == NULL) || (ctxt->inputNr > 0)) {
xmlNodePtr nw = NULL, cur, next, last,
firstChild = NULL;
/* if (ctxt->parseMode == XML_PARSE_READER) {
* Copy the entity child list and make it the new /* Needed for reader */
* entity child list. The goal is to make sure any copy->extra = cur->extra;
* ID or REF referenced will be the one from the /* Maybe needed for reader */
* document content and not the entity copy. copy->_private = cur->_private;
*/
cur = ent->children;
ent->children = NULL;
last = ent->last;
ent->last = NULL;
while (cur != NULL) {
next = cur->next;
cur->next = NULL;
cur->parent = NULL;
nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
if (nw == NULL) {
xmlErrMemory(ctxt);
} else {
if (nw->_private == NULL)
nw->_private = cur->_private;
if (firstChild == NULL){
firstChild = cur;
}
if (xmlAddChild((xmlNodePtr) ent, nw) == NULL)
xmlErrMemory(ctxt);
}
if (xmlAddChild(ctxt->node, cur) == NULL)
xmlErrMemory(ctxt);
if (cur == last)
break;
cur = next;
} }
if (ent->owner == 0)
ent->owner = 1;
} else {
const xmlChar *nbktext;
/* /*
* the name change is to avoid coalescing of the * We have to call xmlAddChild to coalesce text nodes
* node with a possible previous text one which
* would make ent->children a dangling pointer
*/ */
nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext", copy = xmlAddChild(ctxt->node, copy);
-1); if (copy == NULL)
if (ent->children->type == XML_TEXT_NODE) xmlErrMemory(ctxt);
ent->children->name = nbktext;
if ((ent->last != ent->children) && cur = cur->next;
(ent->last->type == XML_TEXT_NODE))
ent->last->name = nbktext;
xmlAddChildList(ctxt->node, ent->children);
} }
/* /*
@ -7280,7 +7153,6 @@ xmlParseReference(xmlParserCtxtPtr ctxt) {
*/ */
ctxt->nodemem = 0; ctxt->nodemem = 0;
ctxt->nodelen = 0; ctxt->nodelen = 0;
return;
} }
} }
@ -12266,18 +12138,17 @@ error:
} }
static xmlParserErrors static xmlParserErrors
xmlCtxtParseEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr ent, xmlNodePtr *list) { xmlCtxtParseEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr ent) {
xmlParserInputPtr input; xmlParserInputPtr input;
xmlParserNsData *nsdb = NULL; xmlParserNsData *nsdb = NULL;
xmlParserNsData *oldNsdb = ctxt->nsdb; xmlParserNsData *oldNsdb = ctxt->nsdb;
xmlNodePtr list;
unsigned long oldsizeentcopy = ctxt->sizeentcopy; unsigned long oldsizeentcopy = ctxt->sizeentcopy;
unsigned long consumed; unsigned long consumed;
int isExternal; int isExternal;
int alreadyParsed; int alreadyParsed;
int ret; int ret;
*list = NULL;
isExternal = (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY); isExternal = (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY);
alreadyParsed = (ent->flags & XML_ENT_PARSED) ? 1 : 0; alreadyParsed = (ent->flags & XML_ENT_PARSED) ? 1 : 0;
@ -12336,7 +12207,7 @@ xmlCtxtParseEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr ent, xmlNodePtr *list) {
ctxt->nsdb = nsdb; ctxt->nsdb = nsdb;
ent->flags |= XML_ENT_EXPANDING; ent->flags |= XML_ENT_EXPANDING;
ret = xmlCtxtParseContent(ctxt, input, isExternal, list); ret = xmlCtxtParseContent(ctxt, input, isExternal, &list);
ent->flags &= ~XML_ENT_EXPANDING; ent->flags &= ~XML_ENT_EXPANDING;
ctxt->nsdb = oldNsdb; ctxt->nsdb = oldNsdb;
@ -12353,14 +12224,21 @@ xmlCtxtParseEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr ent, xmlNodePtr *list) {
xmlSaturatedAdd(&ctxt->sizeentities, consumed); xmlSaturatedAdd(&ctxt->sizeentities, consumed);
ent->expandedSize = ctxt->sizeentcopy; ent->expandedSize = ctxt->sizeentcopy;
ent->children = list;
while (list != NULL) {
list->parent = (xmlNodePtr) ent;
if (list->next == NULL)
ent->last = list;
list = list->next;
}
} else {
xmlFreeNodeList(list);
} }
/* This adds the old size back */ /* This adds the old size back */
if (xmlParserEntityCheck(ctxt, oldsizeentcopy)) { if (xmlParserEntityCheck(ctxt, oldsizeentcopy))
xmlFreeNodeList(*list);
*list = NULL;
ret = ctxt->errNo; ret = ctxt->errNo;
}
xmlParserNsFree(nsdb); xmlParserNsFree(nsdb);
xmlFreeInputStream(input); xmlFreeInputStream(input);

View File

@ -3,14 +3,11 @@
1 14 #text 0 1 1 14 #text 0 1
1 1 ent 0 0 1 1 ent 0 0
2 1 a 0 0 2 1 a 1 0
2 15 a 0 0
2 3 #text 0 1 , 2 3 #text 0 1 ,
2 1 b 0 0 2 1 b 1 0
2 15 b 0 0
2 3 #text 0 1 , 2 3 #text 0 1 ,
2 1 c 0 0 2 1 c 1 0
2 15 c 0 0
2 3 #text 0 1 , 2 3 #text 0 1 ,
2 1 d 1 0 2 1 d 1 0
1 15 ent 0 0 1 15 ent 0 0
@ -282,14 +279,11 @@
1 14 #text 0 1 1 14 #text 0 1
1 1 ent 0 0 1 1 ent 0 0
2 1 a 0 0 2 1 a 1 0
2 15 a 0 0
2 3 #text 0 1 , 2 3 #text 0 1 ,
2 1 b 0 0 2 1 b 1 0
2 15 b 0 0
2 3 #text 0 1 , 2 3 #text 0 1 ,
2 1 c 0 0 2 1 c 1 0
2 15 c 0 0
2 3 #text 0 1 , 2 3 #text 0 1 ,
2 1 d 1 0 2 1 d 1 0
1 15 ent 0 0 1 15 ent 0 0

View File

@ -1,6 +1,6 @@
Object is a Node Set : Object is a Node Set :
Set contains 1 nodes: Set contains 1 nodes:
1 ELEMENT foo 1 ELEMENT err
ATTRIBUTE id ATTRIBUTE id
TEXT TEXT
content=bar content=bar

2
tree.c
View File

@ -1353,7 +1353,6 @@ xmlStringLenGetNodeList(const xmlDoc *doc, const xmlChar *value, int len) {
goto out; goto out;
} }
} }
ent->owner = 1;
ent->flags |= XML_ENT_PARSED; ent->flags |= XML_ENT_PARSED;
temp = ent->children; temp = ent->children;
while (temp) { while (temp) {
@ -1575,7 +1574,6 @@ xmlStringGetNodeList(const xmlDoc *doc, const xmlChar *value) {
goto out; goto out;
} }
} }
ent->owner = 1;
ent->flags |= XML_ENT_PARSED; ent->flags |= XML_ENT_PARSED;
temp = ent->children; temp = ent->children;
while (temp) { while (temp) {

31
valid.c
View File

@ -2521,7 +2521,6 @@ xmlAddIDSafe(xmlDocPtr doc, const xmlChar *value, xmlAttrPtr attr,
int streaming, xmlIDPtr *id) { int streaming, xmlIDPtr *id) {
xmlIDPtr ret; xmlIDPtr ret;
xmlIDTablePtr table; xmlIDTablePtr table;
int res;
if (id != NULL) if (id != NULL)
*id = NULL; *id = NULL;
@ -2542,9 +2541,23 @@ xmlAddIDSafe(xmlDocPtr doc, const xmlChar *value, xmlAttrPtr attr,
table = (xmlIDTablePtr) doc->ids; table = (xmlIDTablePtr) doc->ids;
if (table == NULL) { if (table == NULL) {
doc->ids = table = xmlHashCreateDict(0, doc->dict); doc->ids = table = xmlHashCreateDict(0, doc->dict);
if (table == NULL)
return(-1);
} else {
ret = xmlHashLookup(table, value);
if (ret != NULL) {
/*
* Update the attribute to make entities work.
*/
if (ret->attr != NULL) {
ret->attr->id = NULL;
ret->attr = attr;
}
attr->atype = XML_ATTRIBUTE_ID;
attr->id = ret;
return(0);
}
} }
if (table == NULL)
return(-1);
ret = (xmlIDPtr) xmlMalloc(sizeof(xmlID)); ret = (xmlIDPtr) xmlMalloc(sizeof(xmlID));
if (ret == NULL) if (ret == NULL)
@ -2579,16 +2592,14 @@ xmlAddIDSafe(xmlDocPtr doc, const xmlChar *value, xmlAttrPtr attr,
} }
ret->lineno = xmlGetLineNo(attr->parent); ret->lineno = xmlGetLineNo(attr->parent);
res = xmlHashAdd(table, value, ret); if (xmlHashAddEntry(table, value, ret) < 0) {
if (res <= 0) {
xmlFreeID(ret); xmlFreeID(ret);
return(res); return(-1);
}
if (attr != NULL) {
attr->atype = XML_ATTRIBUTE_ID;
attr->id = ret;
} }
attr->atype = XML_ATTRIBUTE_ID;
attr->id = ret;
if (id != NULL) if (id != NULL)
*id = ret; *id = ret;
return(1); return(1);