mirror of
https://gitlab.gnome.org/GNOME/libxml2
synced 2025-03-28 21:33:13 +00:00
html: Stop implying <p> start tags
Only <html>, <head> or <body> should be implied. Opening extra <p> tags has always been a libxml2 quirk.
This commit is contained in:
parent
71122421a1
commit
8cf6129bbd
71
HTMLparser.c
71
HTMLparser.c
@ -1164,19 +1164,6 @@ static const htmlStartCloseEntry htmlStartClose[] = {
|
|||||||
{ "xmp", "ul" }
|
{ "xmp", "ul" }
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
|
||||||
* The list of HTML elements which are supposed not to have
|
|
||||||
* CDATA content and where a p element will be implied
|
|
||||||
*
|
|
||||||
* TODO: extend that list by reading the HTML SGML DTD on
|
|
||||||
* implied paragraph
|
|
||||||
*/
|
|
||||||
static const char *const htmlNoContentElements[] = {
|
|
||||||
"html",
|
|
||||||
"head",
|
|
||||||
NULL
|
|
||||||
};
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The list of HTML attributes which are of content %Script;
|
* The list of HTML attributes which are of content %Script;
|
||||||
* NOTE: when adding ones, check htmlIsScriptAttribute() since
|
* NOTE: when adding ones, check htmlIsScriptAttribute() since
|
||||||
@ -1568,48 +1555,22 @@ htmlCheckImplied(htmlParserCtxtPtr ctxt, const xmlChar *newtag) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* htmlCheckParagraph
|
* htmlStartCharData
|
||||||
* @ctxt: an HTML parser context
|
* @ctxt: an HTML parser context
|
||||||
*
|
*
|
||||||
* Check whether a p element need to be implied before inserting
|
* Prepare for non-whitespace character data.
|
||||||
* characters in the current element.
|
|
||||||
*
|
|
||||||
* Returns 1 if a paragraph has been inserted, 0 if not and -1
|
|
||||||
* in case of error.
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static int
|
static void
|
||||||
htmlCheckParagraph(htmlParserCtxtPtr ctxt) {
|
htmlStartCharData(htmlParserCtxtPtr ctxt) {
|
||||||
const xmlChar *tag;
|
if (ctxt->options & (HTML_PARSE_NOIMPLIED | HTML_PARSE_HTML5))
|
||||||
int i;
|
return;
|
||||||
|
|
||||||
if (ctxt == NULL)
|
|
||||||
return(-1);
|
|
||||||
if (ctxt->options & HTML_PARSE_HTML5)
|
|
||||||
return(0);
|
|
||||||
|
|
||||||
tag = ctxt->name;
|
|
||||||
if (tag == NULL) {
|
|
||||||
htmlAutoClose(ctxt, BAD_CAST"p");
|
|
||||||
htmlCheckImplied(ctxt, BAD_CAST"p");
|
|
||||||
htmlnamePush(ctxt, BAD_CAST"p");
|
|
||||||
if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
|
|
||||||
ctxt->sax->startElement(ctxt->userData, BAD_CAST"p", NULL);
|
|
||||||
return(1);
|
|
||||||
}
|
|
||||||
if (!htmlOmittedDefaultValue)
|
if (!htmlOmittedDefaultValue)
|
||||||
return(0);
|
return;
|
||||||
for (i = 0; htmlNoContentElements[i] != NULL; i++) {
|
|
||||||
if (xmlStrEqual(tag, BAD_CAST htmlNoContentElements[i])) {
|
if (xmlStrEqual(ctxt->name, BAD_CAST "head"))
|
||||||
htmlAutoClose(ctxt, BAD_CAST"p");
|
htmlAutoClose(ctxt, BAD_CAST "p");
|
||||||
htmlCheckImplied(ctxt, BAD_CAST"p");
|
htmlCheckImplied(ctxt, BAD_CAST "p");
|
||||||
htmlnamePush(ctxt, BAD_CAST"p");
|
|
||||||
if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL))
|
|
||||||
ctxt->sax->startElement(ctxt->userData, BAD_CAST"p", NULL);
|
|
||||||
return(1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return(0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -2972,7 +2933,7 @@ htmlCharDataSAXCallback(htmlParserCtxtPtr ctxt, const xmlChar *buf,
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* Add leading whitespace to html or head elements before
|
* Add leading whitespace to html or head elements before
|
||||||
* calling htmlCheckParagraph.
|
* calling htmlStartCharData.
|
||||||
*/
|
*/
|
||||||
for (i = 0; i < size; i++)
|
for (i = 0; i < size; i++)
|
||||||
if (!IS_WS_HTML(buf[i]))
|
if (!IS_WS_HTML(buf[i]))
|
||||||
@ -2994,7 +2955,7 @@ htmlCharDataSAXCallback(htmlParserCtxtPtr ctxt, const xmlChar *buf,
|
|||||||
if (size <= 0)
|
if (size <= 0)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
htmlCheckParagraph(ctxt);
|
htmlStartCharData(ctxt);
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((mode == 0) &&
|
if ((mode == 0) &&
|
||||||
@ -4084,7 +4045,7 @@ htmlParseEndTag(htmlParserCtxtPtr ctxt)
|
|||||||
SKIP(2);
|
SKIP(2);
|
||||||
|
|
||||||
if (ctxt->input->cur >= ctxt->input->end) {
|
if (ctxt->input->cur >= ctxt->input->end) {
|
||||||
htmlCheckParagraph(ctxt);
|
htmlStartCharData(ctxt);
|
||||||
if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
|
if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
|
||||||
(ctxt->sax->characters != NULL))
|
(ctxt->sax->characters != NULL))
|
||||||
ctxt->sax->characters(ctxt->userData,
|
ctxt->sax->characters(ctxt->userData,
|
||||||
@ -4243,7 +4204,7 @@ htmlParseContent(htmlParserCtxtPtr ctxt) {
|
|||||||
} else if (IS_ASCII_LETTER(NXT(1))) {
|
} else if (IS_ASCII_LETTER(NXT(1))) {
|
||||||
htmlParseElementInternal(ctxt);
|
htmlParseElementInternal(ctxt);
|
||||||
} else {
|
} else {
|
||||||
htmlCheckParagraph(ctxt);
|
htmlStartCharData(ctxt);
|
||||||
if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
|
if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
|
||||||
(ctxt->sax->characters != NULL))
|
(ctxt->sax->characters != NULL))
|
||||||
ctxt->sax->characters(ctxt->userData, BAD_CAST "<", 1);
|
ctxt->sax->characters(ctxt->userData, BAD_CAST "<", 1);
|
||||||
@ -5187,7 +5148,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
|
|||||||
ctxt->checkIndex = 0;
|
ctxt->checkIndex = 0;
|
||||||
} else {
|
} else {
|
||||||
ctxt->instate = XML_PARSER_CONTENT;
|
ctxt->instate = XML_PARSER_CONTENT;
|
||||||
htmlCheckParagraph(ctxt);
|
htmlStartCharData(ctxt);
|
||||||
if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
|
if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
|
||||||
(ctxt->sax->characters != NULL))
|
(ctxt->sax->characters != NULL))
|
||||||
ctxt->sax->characters(ctxt->userData,
|
ctxt->sax->characters(ctxt->userData,
|
||||||
|
@ -1,2 +1,2 @@
|
|||||||
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
|
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
|
||||||
<html><body><p>&jÙ</p></body></html>
|
<html><body>&jÙ</body></html>
|
||||||
|
@ -2,10 +2,8 @@ SAX.setDocumentLocator()
|
|||||||
SAX.startDocument()
|
SAX.startDocument()
|
||||||
SAX.startElement(html)
|
SAX.startElement(html)
|
||||||
SAX.startElement(body)
|
SAX.startElement(body)
|
||||||
SAX.startElement(p)
|
|
||||||
SAX.characters(&j, 2)
|
SAX.characters(&j, 2)
|
||||||
SAX.characters(Ù, 2)
|
SAX.characters(Ù, 2)
|
||||||
SAX.endElement(p)
|
|
||||||
SAX.endElement(body)
|
SAX.endElement(body)
|
||||||
SAX.endElement(html)
|
SAX.endElement(html)
|
||||||
SAX.endDocument()
|
SAX.endDocument()
|
||||||
|
@ -1,3 +1,3 @@
|
|||||||
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
|
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
|
||||||
<html><body><p>&:ê
|
<html><body>&:ê
|
||||||
</p></body></html>
|
</body></html>
|
||||||
|
@ -2,11 +2,9 @@ SAX.setDocumentLocator()
|
|||||||
SAX.startDocument()
|
SAX.startDocument()
|
||||||
SAX.startElement(html)
|
SAX.startElement(html)
|
||||||
SAX.startElement(body)
|
SAX.startElement(body)
|
||||||
SAX.startElement(p)
|
|
||||||
SAX.characters(&:, 2)
|
SAX.characters(&:, 2)
|
||||||
SAX.characters(ê
|
SAX.characters(ê
|
||||||
, 3)
|
, 3)
|
||||||
SAX.endElement(p)
|
|
||||||
SAX.endElement(body)
|
SAX.endElement(body)
|
||||||
SAX.endElement(html)
|
SAX.endElement(html)
|
||||||
SAX.endDocument()
|
SAX.endDocument()
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
|
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
|
||||||
<html>
|
<html>
|
||||||
<head> </head>
|
<head> </head>
|
||||||
<body><p>x
|
<body>x
|
||||||
</p></body>
|
</body>
|
||||||
</html>
|
</html>
|
||||||
|
@ -5,10 +5,8 @@ SAX.startElement(head)
|
|||||||
SAX.characters( , 3)
|
SAX.characters( , 3)
|
||||||
SAX.endElement(head)
|
SAX.endElement(head)
|
||||||
SAX.startElement(body)
|
SAX.startElement(body)
|
||||||
SAX.startElement(p)
|
|
||||||
SAX.characters(x
|
SAX.characters(x
|
||||||
, 2)
|
, 2)
|
||||||
SAX.endElement(p)
|
|
||||||
SAX.endElement(body)
|
SAX.endElement(body)
|
||||||
SAX.endElement(html)
|
SAX.endElement(html)
|
||||||
SAX.endDocument()
|
SAX.endDocument()
|
||||||
|
@ -474,7 +474,7 @@ testHtmlPushWithEncoding(void) {
|
|||||||
err = 1;
|
err = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
node = xmlDocGetRootElement(doc)->children->children->children;
|
node = xmlDocGetRootElement(doc)->children->children;
|
||||||
if (!xmlStrEqual(node->content, BAD_CAST "-\xC3\xA4-")) {
|
if (!xmlStrEqual(node->content, BAD_CAST "-\xC3\xA4-")) {
|
||||||
fprintf(stderr, "testHtmlPushWithEncoding failed\n");
|
fprintf(stderr, "testHtmlPushWithEncoding failed\n");
|
||||||
err = 1;
|
err = 1;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user