From 8696ebe182b9867cbded474e1a977664d3e0e144 Mon Sep 17 00:00:00 2001 From: Nick Wellnhofer Date: Tue, 11 Mar 2025 14:32:35 +0100 Subject: [PATCH] parser: Fix ignorableWhitespace callback If ignorableWhitespace differs from the "characters" callback, we have to check for blanks as well. Regressed with 1f5b537. --- SAX2.c | 2 +- parser.c | 15 ++++++++++----- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/SAX2.c b/SAX2.c index 436151ef..b5c2e476 100644 --- a/SAX2.c +++ b/SAX2.c @@ -2736,7 +2736,7 @@ xmlSAXVersion(xmlSAXHandler *hdlr, int version) hdlr->reference = xmlSAX2Reference; hdlr->characters = xmlSAX2Characters; hdlr->cdataBlock = xmlSAX2CDataBlock; - hdlr->ignorableWhitespace = xmlSAX2IgnorableWhitespace; + hdlr->ignorableWhitespace = xmlSAX2Characters; hdlr->processingInstruction = xmlSAX2ProcessingInstruction; hdlr->comment = xmlSAX2Comment; hdlr->warning = xmlParserWarning; diff --git a/parser.c b/parser.c index 42684e02..8a26968a 100644 --- a/parser.c +++ b/parser.c @@ -4779,18 +4779,23 @@ static const unsigned char test_char_data[256] = { static void xmlCharacters(xmlParserCtxtPtr ctxt, const xmlChar *buf, int size) { + int checkBlanks; + if ((ctxt->sax == NULL) || (ctxt->disableSAX)) return; + checkBlanks = (!ctxt->keepBlanks) || + (ctxt->sax->ignorableWhitespace != ctxt->sax->characters); + /* * Calling areBlanks with only parts of a text node * is fundamentally broken, making the NOBLANKS option * essentially unusable. */ - if ((!ctxt->keepBlanks) && - (ctxt->sax->ignorableWhitespace != ctxt->sax->characters) && + if ((checkBlanks) && (areBlanks(ctxt, buf, size, 1))) { - if (ctxt->sax->ignorableWhitespace != NULL) + if ((ctxt->sax->ignorableWhitespace != NULL) && + (ctxt->keepBlanks)) ctxt->sax->ignorableWhitespace(ctxt->userData, buf, size); } else { if (ctxt->sax->characters != NULL) @@ -4798,9 +4803,9 @@ xmlCharacters(xmlParserCtxtPtr ctxt, const xmlChar *buf, int size) { /* * The old code used to update this value for "complex" data - * even if keepBlanks was true. This was probably a bug. + * even if checkBlanks was false. This was probably a bug. */ - if ((!ctxt->keepBlanks) && (*ctxt->space == -1)) + if ((checkBlanks) && (*ctxt->space == -1)) *ctxt->space = -2; } }