From f506ec66547ef9bac97a2bf306d368ecea8c0c9e Mon Sep 17 00:00:00 2001 From: Nick Wellnhofer Date: Mon, 15 Apr 2024 11:27:44 +0200 Subject: [PATCH] parser: Always decode entities in namespace URIs Also decode entities in namespace URIs if entity substitution wasn't requested. This should fix some corner cases when comparing namespace URIs. The Namespaces in XML 1.0 spec says: > In a namespace declaration, the URI reference is the normalized value > of the attribute, so replacement of XML character and entity > references has already been done before any comparison. Make the serialization code escape special characters in namespace URIs like in attribute values. This fixes serialization if entities were substituted when parsing. Fixes https://gitlab.gnome.org/GNOME/libxslt/-/issues/106 --- parser.c | 20 ++++++++++----- result/entity-in-ns-uri.xml | 8 ++++++ result/entity-in-ns-uri.xml.rde | 11 ++++++++ result/entity-in-ns-uri.xml.rdr | 11 ++++++++ result/entity-in-ns-uri.xml.sax | 21 ++++++++++++++++ result/entity-in-ns-uri.xml.sax2 | 21 ++++++++++++++++ result/noent/entity-in-ns-uri.xml | 8 ++++++ result/noent/entity-in-ns-uri.xml.sax2 | 21 ++++++++++++++++ test/entity-in-ns-uri.xml | 7 ++++++ xmlsave.c | 35 +++++++++----------------- 10 files changed, 134 insertions(+), 29 deletions(-) create mode 100644 result/entity-in-ns-uri.xml create mode 100644 result/entity-in-ns-uri.xml.rde create mode 100644 result/entity-in-ns-uri.xml.rdr create mode 100644 result/entity-in-ns-uri.xml.sax create mode 100644 result/entity-in-ns-uri.xml.sax2 create mode 100644 result/noent/entity-in-ns-uri.xml create mode 100644 result/noent/entity-in-ns-uri.xml.sax2 create mode 100644 test/entity-in-ns-uri.xml diff --git a/parser.c b/parser.c index 95f3ed1c..3bc237b8 100644 --- a/parser.c +++ b/parser.c @@ -4280,7 +4280,7 @@ xmlExpandEntitiesInAttValue(xmlParserCtxtPtr ctxt, const xmlChar *str, */ static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *attlen, int *alloc, - int normalize) { + int normalize, int isNamespace) { unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ? XML_MAX_HUGE_LENGTH : XML_MAX_TEXT_LENGTH; @@ -4288,6 +4288,10 @@ xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *attlen, int *alloc, xmlChar *ret; int c, l, quote, flags, chunkSize; int inSpace = 1; + int replaceEntities; + + /* Always expand namespace URIs */ + replaceEntities = (ctxt->replaceEntities) || (isNamespace); xmlSBufInit(&buf, maxLength); @@ -4400,7 +4404,7 @@ xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *attlen, int *alloc, if (val == 0) goto error; - if ((val == '&') && (!ctxt->replaceEntities)) { + if ((val == '&') && (!replaceEntities)) { /* * The reparsing will be done in xmlStringGetNodeList() * called by the attribute() function in SAX.c @@ -4438,12 +4442,12 @@ xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *attlen, int *alloc, continue; if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) { - if ((ent->content[0] == '&') && (!ctxt->replaceEntities)) + if ((ent->content[0] == '&') && (!replaceEntities)) xmlSBufAddCString(&buf, "&", 5); else xmlSBufAddString(&buf, ent->content, ent->length); inSpace = 0; - } else if (ctxt->replaceEntities) { + } else if (replaceEntities) { xmlExpandEntityInAttValue(ctxt, &buf, ent->content, ent, normalize, &inSpace, ctxt->inputNr, /* check */ 1); @@ -4544,7 +4548,7 @@ error: xmlChar * xmlParseAttValue(xmlParserCtxtPtr ctxt) { if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL); - return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0)); + return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0, 0)); } /** @@ -8777,6 +8781,7 @@ xmlParseAttribute2(xmlParserCtxtPtr ctxt, const xmlChar *prefix, *name; xmlChar *val = NULL, *internal_val = NULL; int normalize = 0; + int isNamespace; *value = NULL; GROW; @@ -8812,7 +8817,10 @@ xmlParseAttribute2(xmlParserCtxtPtr ctxt, if (RAW == '=') { NEXT; SKIP_BLANKS; - val = xmlParseAttValueInternal(ctxt, len, alloc, normalize); + isNamespace = (((prefix == NULL) && (name == ctxt->str_xmlns)) || + (prefix == ctxt->str_xmlns)); + val = xmlParseAttValueInternal(ctxt, len, alloc, normalize, + isNamespace); if (val == NULL) goto error; } else { diff --git a/result/entity-in-ns-uri.xml b/result/entity-in-ns-uri.xml new file mode 100644 index 00000000..87655b01 --- /dev/null +++ b/result/entity-in-ns-uri.xml @@ -0,0 +1,8 @@ + + +]> + + + + diff --git a/result/entity-in-ns-uri.xml.rde b/result/entity-in-ns-uri.xml.rde new file mode 100644 index 00000000..aa228df6 --- /dev/null +++ b/result/entity-in-ns-uri.xml.rde @@ -0,0 +1,11 @@ +0 10 doc 0 0 +0 1 doc 0 0 +1 14 #text 0 1 + +1 1 e 1 0 +1 14 #text 0 1 + +1 1 e 1 0 +1 14 #text 0 1 + +0 15 doc 0 0 diff --git a/result/entity-in-ns-uri.xml.rdr b/result/entity-in-ns-uri.xml.rdr new file mode 100644 index 00000000..aa228df6 --- /dev/null +++ b/result/entity-in-ns-uri.xml.rdr @@ -0,0 +1,11 @@ +0 10 doc 0 0 +0 1 doc 0 0 +1 14 #text 0 1 + +1 1 e 1 0 +1 14 #text 0 1 + +1 1 e 1 0 +1 14 #text 0 1 + +0 15 doc 0 0 diff --git a/result/entity-in-ns-uri.xml.sax b/result/entity-in-ns-uri.xml.sax new file mode 100644 index 00000000..e06eabd4 --- /dev/null +++ b/result/entity-in-ns-uri.xml.sax @@ -0,0 +1,21 @@ +SAX.setDocumentLocator() +SAX.startDocument() +SAX.internalSubset(doc, , ) +SAX.entityDecl(e, 1, (null), (null), -) +SAX.getEntity(e) +SAX.externalSubset(doc, , ) +SAX.startElement(doc) +SAX.characters( + , 5) +SAX.getEntity(e) +SAX.startElement(e, xmlns='foo:x&x&x'x&e;x') +SAX.endElement(e) +SAX.characters( + , 5) +SAX.getEntity(e) +SAX.startElement(e, xmlns:ns='foo:x&x&x'x&e;x') +SAX.endElement(e) +SAX.characters( +, 1) +SAX.endElement(doc) +SAX.endDocument() diff --git a/result/entity-in-ns-uri.xml.sax2 b/result/entity-in-ns-uri.xml.sax2 new file mode 100644 index 00000000..13b2547e --- /dev/null +++ b/result/entity-in-ns-uri.xml.sax2 @@ -0,0 +1,21 @@ +SAX.setDocumentLocator() +SAX.startDocument() +SAX.internalSubset(doc, , ) +SAX.entityDecl(e, 1, (null), (null), -) +SAX.getEntity(e) +SAX.externalSubset(doc, , ) +SAX.startElementNs(doc, NULL, NULL, 0, 0, 0) +SAX.characters( + , 5) +SAX.getEntity(e) +SAX.startElementNs(e, NULL, 'foo:x&x&x'x-x', 1, xmlns='foo:x&x&x'x-x', 0, 0) +SAX.endElementNs(e, NULL, 'foo:x&x&x'x-x') +SAX.characters( + , 5) +SAX.getEntity(e) +SAX.startElementNs(e, NULL, NULL, 1, xmlns:ns='foo:x&x&x'x-x', 0, 0) +SAX.endElementNs(e, NULL, NULL) +SAX.characters( +, 1) +SAX.endElementNs(doc, NULL, NULL) +SAX.endDocument() diff --git a/result/noent/entity-in-ns-uri.xml b/result/noent/entity-in-ns-uri.xml new file mode 100644 index 00000000..87655b01 --- /dev/null +++ b/result/noent/entity-in-ns-uri.xml @@ -0,0 +1,8 @@ + + +]> + + + + diff --git a/result/noent/entity-in-ns-uri.xml.sax2 b/result/noent/entity-in-ns-uri.xml.sax2 new file mode 100644 index 00000000..13b2547e --- /dev/null +++ b/result/noent/entity-in-ns-uri.xml.sax2 @@ -0,0 +1,21 @@ +SAX.setDocumentLocator() +SAX.startDocument() +SAX.internalSubset(doc, , ) +SAX.entityDecl(e, 1, (null), (null), -) +SAX.getEntity(e) +SAX.externalSubset(doc, , ) +SAX.startElementNs(doc, NULL, NULL, 0, 0, 0) +SAX.characters( + , 5) +SAX.getEntity(e) +SAX.startElementNs(e, NULL, 'foo:x&x&x'x-x', 1, xmlns='foo:x&x&x'x-x', 0, 0) +SAX.endElementNs(e, NULL, 'foo:x&x&x'x-x') +SAX.characters( + , 5) +SAX.getEntity(e) +SAX.startElementNs(e, NULL, NULL, 1, xmlns:ns='foo:x&x&x'x-x', 0, 0) +SAX.endElementNs(e, NULL, NULL) +SAX.characters( +, 1) +SAX.endElementNs(doc, NULL, NULL) +SAX.endDocument() diff --git a/test/entity-in-ns-uri.xml b/test/entity-in-ns-uri.xml new file mode 100644 index 00000000..e00c0217 --- /dev/null +++ b/test/entity-in-ns-uri.xml @@ -0,0 +1,7 @@ + +]> + + + + diff --git a/xmlsave.c b/xmlsave.c index 13b2ed3c..cf4e0583 100644 --- a/xmlsave.c +++ b/xmlsave.c @@ -870,7 +870,8 @@ xmlOutputBufferWriteWSNonSig(xmlSaveCtxtPtr ctxt, int extra) * If @ctxt is supplied, @buf should be its buffer. */ static void -xmlNsDumpOutput(xmlOutputBufferPtr buf, xmlNsPtr cur, xmlSaveCtxtPtr ctxt) { +xmlNsDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlNsPtr cur, + xmlSaveCtxtPtr ctxt) { if ((cur == NULL) || (buf == NULL)) return; if ((cur->type == XML_LOCAL_NAMESPACE) && (cur->href != NULL)) { if (xmlStrEqual(cur->prefix, BAD_CAST "xml")) @@ -887,24 +888,12 @@ xmlNsDumpOutput(xmlOutputBufferPtr buf, xmlNsPtr cur, xmlSaveCtxtPtr ctxt) { xmlOutputBufferWriteString(buf, (const char *)cur->prefix); } else xmlOutputBufferWrite(buf, 5, "xmlns"); - xmlOutputBufferWrite(buf, 1, "="); - xmlOutputBufferWriteQuotedString(buf, cur->href); + xmlOutputBufferWrite(buf, 2, "=\""); + xmlBufAttrSerializeTxtContent(buf, doc, cur->href); + xmlOutputBufferWrite(buf, 1, "\""); } } -/** - * xmlNsDumpOutputCtxt - * @ctxt: the save context - * @cur: a namespace - * - * Dump a local Namespace definition to a save context. - * Should be called in the context of attribute dumps. - */ -static void -xmlNsDumpOutputCtxt(xmlSaveCtxtPtr ctxt, xmlNsPtr cur) { - xmlNsDumpOutput(ctxt->buf, cur, ctxt); -} - /** * xmlNsListDumpOutputCtxt * @ctxt: the save context @@ -914,9 +903,9 @@ xmlNsDumpOutputCtxt(xmlSaveCtxtPtr ctxt, xmlNsPtr cur) { * Should be called in the context of attribute dumps. */ static void -xmlNsListDumpOutputCtxt(xmlSaveCtxtPtr ctxt, xmlNsPtr cur) { +xmlNsListDumpOutputCtxt(xmlSaveCtxtPtr ctxt, xmlDocPtr doc, xmlNsPtr cur) { while (cur != NULL) { - xmlNsDumpOutput(ctxt->buf, cur, ctxt); + xmlNsDumpOutput(ctxt->buf, doc, cur, ctxt); cur = cur->next; } } @@ -932,7 +921,7 @@ xmlNsListDumpOutputCtxt(xmlSaveCtxtPtr ctxt, xmlNsPtr cur) { void xmlNsListDumpOutput(xmlOutputBufferPtr buf, xmlNsPtr cur) { while (cur != NULL) { - xmlNsDumpOutput(buf, cur, NULL); + xmlNsDumpOutput(buf, NULL, cur, NULL); cur = cur->next; } } @@ -1168,7 +1157,7 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) { } xmlOutputBufferWriteString(buf, (const char *)cur->name); if (cur->nsDef) - xmlNsListDumpOutputCtxt(ctxt, cur->nsDef); + xmlNsListDumpOutputCtxt(ctxt, cur->doc, cur->nsDef); for (attr = cur->properties; attr != NULL; attr = attr->next) xmlAttrDumpOutput(ctxt, attr); @@ -1308,7 +1297,7 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) { break; case XML_NAMESPACE_DECL: - xmlNsDumpOutputCtxt(ctxt, (xmlNsPtr) cur); + xmlNsDumpOutput(buf, NULL, (xmlNsPtr) cur, ctxt); break; default: @@ -1692,7 +1681,7 @@ xhtmlNodeDumpOutput(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) { break; case XML_NAMESPACE_DECL: - xmlNsDumpOutputCtxt(ctxt, (xmlNsPtr) cur); + xmlNsDumpOutput(buf, NULL, (xmlNsPtr) cur, ctxt); break; case XML_DTD_NODE: @@ -1747,7 +1736,7 @@ xhtmlNodeDumpOutput(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) { xmlOutputBufferWriteString(buf, (const char *)cur->name); if (cur->nsDef) - xmlNsListDumpOutputCtxt(ctxt, cur->nsDef); + xmlNsListDumpOutputCtxt(ctxt, cur->doc, cur->nsDef); if ((xmlStrEqual(cur->name, BAD_CAST "html") && (cur->ns == NULL) && (cur->nsDef == NULL))) { /*