From 8ae06d522310b09282c3bcb71f9793d7a41b5ecb Mon Sep 17 00:00:00 2001 From: Nick Wellnhofer Date: Thu, 29 Aug 2024 00:07:27 +0200 Subject: [PATCH] SAX2: Don't merge CDATA sections The Document Object Model (DOM) Level 3 Core Specification says: > Adjacent CDATASection nodes are not merged by use of the normalize > method of the Node interface. Fixes #412. --- SAX2.c | 2 +- result/adjacent-cdata.xml | 2 ++ result/adjacent-cdata.xml.rde | 5 +++++ result/adjacent-cdata.xml.rdr | 5 +++++ result/adjacent-cdata.xml.sax | 8 ++++++++ result/adjacent-cdata.xml.sax2 | 8 ++++++++ result/noent/adjacent-cdata.xml | 2 ++ result/noent/adjacent-cdata.xml.sax2 | 8 ++++++++ test/adjacent-cdata.xml | 1 + 9 files changed, 40 insertions(+), 1 deletion(-) create mode 100644 result/adjacent-cdata.xml create mode 100644 result/adjacent-cdata.xml.rde create mode 100644 result/adjacent-cdata.xml.rdr create mode 100644 result/adjacent-cdata.xml.sax create mode 100644 result/adjacent-cdata.xml.sax2 create mode 100644 result/noent/adjacent-cdata.xml create mode 100644 result/noent/adjacent-cdata.xml.sax2 create mode 100644 test/adjacent-cdata.xml diff --git a/SAX2.c b/SAX2.c index 4c3c7329..1a9310d5 100644 --- a/SAX2.c +++ b/SAX2.c @@ -2471,7 +2471,7 @@ xmlSAX2Text(xmlParserCtxtPtr ctxt, const xmlChar *ch, int len, } else { int coalesceText = (lastChild != NULL) && (lastChild->type == type) && - ((type != XML_TEXT_NODE) || + (((ctxt->html) && (type != XML_TEXT_NODE)) || (lastChild->name == xmlStringText)); if ((coalesceText) && (ctxt->nodemem != 0)) { int maxLength = (ctxt->options & XML_PARSE_HUGE) ? diff --git a/result/adjacent-cdata.xml b/result/adjacent-cdata.xml new file mode 100644 index 00000000..fada9eee --- /dev/null +++ b/result/adjacent-cdata.xml @@ -0,0 +1,2 @@ + + diff --git a/result/adjacent-cdata.xml.rde b/result/adjacent-cdata.xml.rde new file mode 100644 index 00000000..026949e0 --- /dev/null +++ b/result/adjacent-cdata.xml.rde @@ -0,0 +1,5 @@ +0 1 doc 0 0 +1 4 #cdata-section 0 1 abc +1 4 #cdata-section 0 1 def +1 4 #cdata-section 0 1 ghi +0 15 doc 0 0 diff --git a/result/adjacent-cdata.xml.rdr b/result/adjacent-cdata.xml.rdr new file mode 100644 index 00000000..026949e0 --- /dev/null +++ b/result/adjacent-cdata.xml.rdr @@ -0,0 +1,5 @@ +0 1 doc 0 0 +1 4 #cdata-section 0 1 abc +1 4 #cdata-section 0 1 def +1 4 #cdata-section 0 1 ghi +0 15 doc 0 0 diff --git a/result/adjacent-cdata.xml.sax b/result/adjacent-cdata.xml.sax new file mode 100644 index 00000000..c7806da7 --- /dev/null +++ b/result/adjacent-cdata.xml.sax @@ -0,0 +1,8 @@ +SAX.setDocumentLocator() +SAX.startDocument() +SAX.startElement(doc) +SAX.pcdata(abc, 3) +SAX.pcdata(def, 3) +SAX.pcdata(ghi, 3) +SAX.endElement(doc) +SAX.endDocument() diff --git a/result/adjacent-cdata.xml.sax2 b/result/adjacent-cdata.xml.sax2 new file mode 100644 index 00000000..56b4b3b7 --- /dev/null +++ b/result/adjacent-cdata.xml.sax2 @@ -0,0 +1,8 @@ +SAX.setDocumentLocator() +SAX.startDocument() +SAX.startElementNs(doc, NULL, NULL, 0, 0, 0) +SAX.pcdata(abc, 3) +SAX.pcdata(def, 3) +SAX.pcdata(ghi, 3) +SAX.endElementNs(doc, NULL, NULL) +SAX.endDocument() diff --git a/result/noent/adjacent-cdata.xml b/result/noent/adjacent-cdata.xml new file mode 100644 index 00000000..fada9eee --- /dev/null +++ b/result/noent/adjacent-cdata.xml @@ -0,0 +1,2 @@ + + diff --git a/result/noent/adjacent-cdata.xml.sax2 b/result/noent/adjacent-cdata.xml.sax2 new file mode 100644 index 00000000..56b4b3b7 --- /dev/null +++ b/result/noent/adjacent-cdata.xml.sax2 @@ -0,0 +1,8 @@ +SAX.setDocumentLocator() +SAX.startDocument() +SAX.startElementNs(doc, NULL, NULL, 0, 0, 0) +SAX.pcdata(abc, 3) +SAX.pcdata(def, 3) +SAX.pcdata(ghi, 3) +SAX.endElementNs(doc, NULL, NULL) +SAX.endDocument() diff --git a/test/adjacent-cdata.xml b/test/adjacent-cdata.xml new file mode 100644 index 00000000..5859226a --- /dev/null +++ b/test/adjacent-cdata.xml @@ -0,0 +1 @@ +