Detect change of encoding when parsing HTML names

From https://bugzilla.gnome.org/show_bug.cgi?id=758518

Happens when a file has a name getting parsed, but no valid encoding
set, so libxml has to guess what the encoding is. This patch detects
when the buffer location changes, and if it does, restarts the parsing
of the name.

This slightly change a couple of regression tests output
This commit is contained in:
Hugh Davenport 2016-05-04 11:23:49 +08:00 committed by Daniel Veillard
parent b1d34de46a
commit beca86e8c8
4 changed files with 12 additions and 3 deletions

View File

@ -2492,6 +2492,7 @@ htmlParseNameComplex(xmlParserCtxtPtr ctxt) {
int len = 0, l;
int c;
int count = 0;
const xmlChar *base = ctxt->input->base;
/*
* Handler for more complex cases
@ -2517,6 +2518,13 @@ htmlParseNameComplex(xmlParserCtxtPtr ctxt) {
len += l;
NEXTL(l);
c = CUR_CHAR(l);
if (ctxt->input->base != base) {
/*
* We changed encoding from an unknown encoding
* Input buffer changed location, so we better start again
*/
return(htmlParseNameComplex(ctxt));
}
}
if (ctxt->input->base > ctxt->input->cur - len)

View File

@ -1,3 +1,3 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
<html><body><p>&amp;
<html><body><p>&amp;&ecirc;
</p></body></html>

View File

@ -1,3 +1,3 @@
./test/HTML/758605.html:1: HTML parser error : htmlParseEntityRef: no name
./test/HTML/758605.html:1: HTML parser error : htmlParseEntityRef: expecting ';'
ê
^

View File

@ -1,10 +1,11 @@
SAX.setDocumentLocator()
SAX.startDocument()
SAX.error: htmlParseEntityRef: no name
SAX.error: htmlParseEntityRef: expecting ';'
SAX.startElement(html)
SAX.startElement(body)
SAX.startElement(p)
SAX.characters(&amp;, 1)
SAX.characters(&ecirc;, 2)
SAX.ignorableWhitespace(
, 1)
SAX.endElement(p)