diff --git a/doc/xmllint.xml b/doc/xmllint.xml index 547bf678..3de2b875 100644 --- a/doc/xmllint.xml +++ b/doc/xmllint.xml @@ -283,6 +283,10 @@ environment variable controls the indentation. The default value is two spaces " "). + + Especially in the absence of a DTD, this feature has never worked reliably + and is fundamentally broken. + diff --git a/parser.c b/parser.c index ccfa9e53..3e034d47 100644 --- a/parser.c +++ b/parser.c @@ -4914,6 +4914,11 @@ get_more_space: (ctxt->disableSAX == 0) && (ctxt->sax->ignorableWhitespace != ctxt->sax->characters)) { + /* + * Calling areBlanks with only parts of a text node + * is fundamentally broken, making the NOBLANKS option + * essentially unusable. + */ if (areBlanks(ctxt, tmp, nbchar, 1)) { if (ctxt->sax->ignorableWhitespace != NULL) ctxt->sax->ignorableWhitespace(ctxt->userData, @@ -13715,11 +13720,9 @@ xmlCtxtSetOptionsInternal(xmlParserCtxtPtr ctxt, int options, int keepMask) * * XML_PARSE_NOBLANKS * - * Remove some text nodes containing only whitespace from the - * result document. Which nodes are removed depends on DTD - * element declarations or a conservative heuristic. The - * reindenting feature of the serialization code relies on this - * option to be set when parsing. Use of this option is + * Remove some whitespace from the result document. Where to + * remove whitespace depends on DTD element declarations or a + * broken heuristic with unfixable bugs. Use of this option is * DISCOURAGED. * * Not supported by the push parser.