diff --git a/HTMLparser.c b/HTMLparser.c index 83d70de9..eac9964a 100644 --- a/HTMLparser.c +++ b/HTMLparser.c @@ -2965,16 +2965,44 @@ htmlCharDataSAXCallback(htmlParserCtxtPtr ctxt, const xmlChar *buf, if ((mode == 0) || (mode == DATA_RCDATA) || (ctxt->sax->cdataBlock == NULL)) { - int blank = areBlanks(ctxt, buf, size); + if ((ctxt->name == NULL) || + (xmlStrEqual(ctxt->name, BAD_CAST "html")) || + (xmlStrEqual(ctxt->name, BAD_CAST "head"))) { + int i; - if ((mode == 0) && (blank > 0) && (!ctxt->keepBlanks)) { + /* + * Add leading whitespace to html or head elements before + * calling htmlCheckParagraph. + */ + for (i = 0; i < size; i++) + if (!IS_WS_HTML(buf[i])) + break; + + if (i > 0) { + if (!ctxt->keepBlanks) { + if (ctxt->sax->ignorableWhitespace != NULL) + ctxt->sax->ignorableWhitespace(ctxt->userData, buf, i); + } else { + if (ctxt->sax->characters != NULL) + ctxt->sax->characters(ctxt->userData, buf, i); + } + + buf += i; + size -= i; + } + + if (size <= 0) + return; + + htmlCheckParagraph(ctxt); + } + + if ((mode == 0) && + (!ctxt->keepBlanks) && + (areBlanks(ctxt, buf, size))) { if (ctxt->sax->ignorableWhitespace != NULL) - ctxt->sax->ignorableWhitespace(ctxt->userData, - buf, size); + ctxt->sax->ignorableWhitespace(ctxt->userData, buf, size); } else { - if ((mode == 0) && (blank < 0)) - htmlCheckParagraph(ctxt); - if (ctxt->sax->characters != NULL) ctxt->sax->characters(ctxt->userData, buf, size); } diff --git a/result/HTML/implied1.html b/result/HTML/implied1.html new file mode 100644 index 00000000..6c860658 --- /dev/null +++ b/result/HTML/implied1.html @@ -0,0 +1,6 @@ + + +
+x +
+ diff --git a/result/HTML/implied1.html.sax b/result/HTML/implied1.html.sax new file mode 100644 index 00000000..b3d71651 --- /dev/null +++ b/result/HTML/implied1.html.sax @@ -0,0 +1,14 @@ +SAX.setDocumentLocator() +SAX.startDocument() +SAX.startElement(html) +SAX.startElement(head) +SAX.characters( , 3) +SAX.endElement(head) +SAX.startElement(body) +SAX.startElement(p) +SAX.characters(x +, 2) +SAX.endElement(p) +SAX.endElement(body) +SAX.endElement(html) +SAX.endDocument() diff --git a/test/HTML/implied1.html b/test/HTML/implied1.html new file mode 100644 index 00000000..d288ee72 --- /dev/null +++ b/test/HTML/implied1.html @@ -0,0 +1 @@ + x