mirror of
https://gitlab.gnome.org/GNOME/libxml2
synced 2025-03-28 21:33:13 +00:00
html: Parse bogus comments instead of ignoring them
Also treat XML processing instructions as bogus comments.
This commit is contained in:
parent
8444017578
commit
3adb396d87
307
HTMLparser.c
307
HTMLparser.c
@ -40,8 +40,6 @@
|
|||||||
|
|
||||||
static int htmlOmittedDefaultValue = 1;
|
static int htmlOmittedDefaultValue = 1;
|
||||||
|
|
||||||
static void htmlParseComment(htmlParserCtxtPtr ctxt);
|
|
||||||
|
|
||||||
static int
|
static int
|
||||||
htmlParseElementInternal(htmlParserCtxtPtr ctxt);
|
htmlParseElementInternal(htmlParserCtxtPtr ctxt);
|
||||||
|
|
||||||
@ -2545,23 +2543,6 @@ htmlNewDoc(const xmlChar *URI, const xmlChar *ExternalID) {
|
|||||||
|
|
||||||
static const xmlChar * htmlParseNameComplex(xmlParserCtxtPtr ctxt);
|
static const xmlChar * htmlParseNameComplex(xmlParserCtxtPtr ctxt);
|
||||||
|
|
||||||
static void
|
|
||||||
htmlSkipBogusComment(htmlParserCtxtPtr ctxt) {
|
|
||||||
int c;
|
|
||||||
|
|
||||||
htmlParseErr(ctxt, XML_HTML_INCORRECTLY_OPENED_COMMENT,
|
|
||||||
"Incorrectly opened comment\n", NULL, NULL);
|
|
||||||
|
|
||||||
while (PARSER_STOPPED(ctxt) == 0) {
|
|
||||||
c = CUR;
|
|
||||||
if (c == 0)
|
|
||||||
break;
|
|
||||||
NEXT;
|
|
||||||
if (c == '>')
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* htmlParseHTMLName:
|
* htmlParseHTMLName:
|
||||||
* @ctxt: an HTML parser context
|
* @ctxt: an HTML parser context
|
||||||
@ -3368,147 +3349,27 @@ htmlParseExternalID(htmlParserCtxtPtr ctxt, xmlChar **publicID) {
|
|||||||
return(URI);
|
return(URI);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* htmlParsePI:
|
|
||||||
* @ctxt: an HTML parser context
|
|
||||||
*
|
|
||||||
* Parse an XML Processing Instruction. HTML5 doesn't allow processing
|
|
||||||
* instructions, so this will be removed at some point.
|
|
||||||
*/
|
|
||||||
static void
|
|
||||||
htmlParsePI(htmlParserCtxtPtr ctxt) {
|
|
||||||
xmlChar *buf = NULL;
|
|
||||||
int len = 0;
|
|
||||||
int size = HTML_PARSER_BUFFER_SIZE;
|
|
||||||
int cur, l;
|
|
||||||
int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
|
|
||||||
XML_MAX_HUGE_LENGTH :
|
|
||||||
XML_MAX_TEXT_LENGTH;
|
|
||||||
const xmlChar *target;
|
|
||||||
xmlParserInputState state;
|
|
||||||
|
|
||||||
if ((RAW == '<') && (NXT(1) == '?')) {
|
|
||||||
state = ctxt->instate;
|
|
||||||
ctxt->instate = XML_PARSER_PI;
|
|
||||||
/*
|
|
||||||
* this is a Processing Instruction.
|
|
||||||
*/
|
|
||||||
SKIP(2);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Parse the target name and check for special support like
|
|
||||||
* namespace.
|
|
||||||
*/
|
|
||||||
target = htmlParseName(ctxt);
|
|
||||||
if (target != NULL) {
|
|
||||||
if (RAW == '>') {
|
|
||||||
SKIP(1);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* SAX: PI detected.
|
|
||||||
*/
|
|
||||||
if ((ctxt->sax) && (!ctxt->disableSAX) &&
|
|
||||||
(ctxt->sax->processingInstruction != NULL))
|
|
||||||
ctxt->sax->processingInstruction(ctxt->userData,
|
|
||||||
target, NULL);
|
|
||||||
goto done;
|
|
||||||
}
|
|
||||||
buf = xmlMalloc(size);
|
|
||||||
if (buf == NULL) {
|
|
||||||
htmlErrMemory(ctxt);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
cur = CUR;
|
|
||||||
if (!IS_BLANK(cur)) {
|
|
||||||
htmlParseErr(ctxt, XML_ERR_SPACE_REQUIRED,
|
|
||||||
"ParsePI: PI %s space expected\n", target, NULL);
|
|
||||||
}
|
|
||||||
SKIP_BLANKS;
|
|
||||||
cur = CUR_CHAR(l);
|
|
||||||
while ((cur != 0) && (cur != '>')) {
|
|
||||||
if (len + 5 >= size) {
|
|
||||||
xmlChar *tmp;
|
|
||||||
|
|
||||||
size *= 2;
|
|
||||||
tmp = (xmlChar *) xmlRealloc(buf, size);
|
|
||||||
if (tmp == NULL) {
|
|
||||||
htmlErrMemory(ctxt);
|
|
||||||
xmlFree(buf);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
buf = tmp;
|
|
||||||
}
|
|
||||||
if (IS_CHAR(cur)) {
|
|
||||||
COPY_BUF(buf,len,cur);
|
|
||||||
} else {
|
|
||||||
htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,
|
|
||||||
"Invalid char in processing instruction "
|
|
||||||
"0x%X\n", cur);
|
|
||||||
}
|
|
||||||
if (len > maxLength) {
|
|
||||||
htmlParseErr(ctxt, XML_ERR_PI_NOT_FINISHED,
|
|
||||||
"PI %s too long", target, NULL);
|
|
||||||
xmlFree(buf);
|
|
||||||
goto done;
|
|
||||||
}
|
|
||||||
NEXTL(l);
|
|
||||||
cur = CUR_CHAR(l);
|
|
||||||
}
|
|
||||||
buf[len] = 0;
|
|
||||||
if (cur != '>') {
|
|
||||||
htmlParseErr(ctxt, XML_ERR_PI_NOT_FINISHED,
|
|
||||||
"ParsePI: PI %s never end ...\n", target, NULL);
|
|
||||||
} else {
|
|
||||||
SKIP(1);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* SAX: PI detected.
|
|
||||||
*/
|
|
||||||
if ((ctxt->sax) && (!ctxt->disableSAX) &&
|
|
||||||
(ctxt->sax->processingInstruction != NULL))
|
|
||||||
ctxt->sax->processingInstruction(ctxt->userData,
|
|
||||||
target, buf);
|
|
||||||
}
|
|
||||||
xmlFree(buf);
|
|
||||||
} else {
|
|
||||||
htmlParseErr(ctxt, XML_ERR_PI_NOT_STARTED,
|
|
||||||
"PI is not started correctly", NULL, NULL);
|
|
||||||
}
|
|
||||||
|
|
||||||
done:
|
|
||||||
ctxt->instate = state;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* htmlParseComment:
|
* htmlParseComment:
|
||||||
* @ctxt: an HTML parser context
|
* @ctxt: an HTML parser context
|
||||||
|
* @bogus: true if this is a bogus comment
|
||||||
*
|
*
|
||||||
* Parse an HTML comment
|
* Parse an HTML comment
|
||||||
*/
|
*/
|
||||||
static void
|
static void
|
||||||
htmlParseComment(htmlParserCtxtPtr ctxt) {
|
htmlParseComment(htmlParserCtxtPtr ctxt, int bogus) {
|
||||||
xmlChar *buf = NULL;
|
xmlChar *buf = NULL;
|
||||||
int len;
|
int len;
|
||||||
int size = HTML_PARSER_BUFFER_SIZE;
|
int size = HTML_PARSER_BUFFER_SIZE;
|
||||||
int q, ql;
|
|
||||||
int r, rl;
|
|
||||||
int cur, l;
|
int cur, l;
|
||||||
int next, nl;
|
|
||||||
int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
|
int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
|
||||||
XML_MAX_HUGE_LENGTH :
|
XML_MAX_HUGE_LENGTH :
|
||||||
XML_MAX_TEXT_LENGTH;
|
XML_MAX_TEXT_LENGTH;
|
||||||
xmlParserInputState state;
|
xmlParserInputState state;
|
||||||
|
|
||||||
/*
|
|
||||||
* Check that there is a comment right here.
|
|
||||||
*/
|
|
||||||
if ((RAW != '<') || (NXT(1) != '!') ||
|
|
||||||
(NXT(2) != '-') || (NXT(3) != '-')) return;
|
|
||||||
|
|
||||||
state = ctxt->instate;
|
state = ctxt->instate;
|
||||||
ctxt->instate = XML_PARSER_COMMENT;
|
ctxt->instate = XML_PARSER_COMMENT;
|
||||||
SKIP(4);
|
|
||||||
buf = xmlMalloc(size);
|
buf = xmlMalloc(size);
|
||||||
if (buf == NULL) {
|
if (buf == NULL) {
|
||||||
htmlErrMemory(ctxt);
|
htmlErrMemory(ctxt);
|
||||||
@ -3516,36 +3377,34 @@ htmlParseComment(htmlParserCtxtPtr ctxt) {
|
|||||||
}
|
}
|
||||||
len = 0;
|
len = 0;
|
||||||
buf[len] = 0;
|
buf[len] = 0;
|
||||||
q = CUR_CHAR(ql);
|
|
||||||
if (q == 0)
|
|
||||||
goto unfinished;
|
|
||||||
if (q == '>') {
|
|
||||||
htmlParseErr(ctxt, XML_ERR_COMMENT_ABRUPTLY_ENDED, "Comment abruptly ended", NULL, NULL);
|
|
||||||
cur = '>';
|
|
||||||
goto finished;
|
|
||||||
}
|
|
||||||
NEXTL(ql);
|
|
||||||
r = CUR_CHAR(rl);
|
|
||||||
if (r == 0)
|
|
||||||
goto unfinished;
|
|
||||||
if (q == '-' && r == '>') {
|
|
||||||
htmlParseErr(ctxt, XML_ERR_COMMENT_ABRUPTLY_ENDED, "Comment abruptly ended", NULL, NULL);
|
|
||||||
cur = '>';
|
|
||||||
goto finished;
|
|
||||||
}
|
|
||||||
NEXTL(rl);
|
|
||||||
cur = CUR_CHAR(l);
|
|
||||||
while ((cur != 0) &&
|
|
||||||
((cur != '>') ||
|
|
||||||
(r != '-') || (q != '-'))) {
|
|
||||||
NEXTL(l);
|
|
||||||
next = CUR_CHAR(nl);
|
|
||||||
|
|
||||||
if ((q == '-') && (r == '-') && (cur == '!') && (next == '>')) {
|
cur = CUR_CHAR(l);
|
||||||
htmlParseErr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
|
if (!bogus) {
|
||||||
"Comment incorrectly closed by '--!>'", NULL, NULL);
|
if (cur == '>') {
|
||||||
cur = '>';
|
SKIP(1);
|
||||||
break;
|
goto done;
|
||||||
|
} else if ((cur == '-') && (NXT(1) == '>')) {
|
||||||
|
SKIP(2);
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
while (cur != 0) {
|
||||||
|
if (bogus) {
|
||||||
|
if (cur == '>') {
|
||||||
|
SKIP(1);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if ((cur == '-') && (NXT(1) == '-')) {
|
||||||
|
if (NXT(2) == '>') {
|
||||||
|
SKIP(3);
|
||||||
|
break;
|
||||||
|
} else if ((NXT(2) == '!') && (NXT(3) == '>')) {
|
||||||
|
SKIP(4);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (len + 5 >= size) {
|
if (len + 5 >= size) {
|
||||||
@ -3556,15 +3415,16 @@ htmlParseComment(htmlParserCtxtPtr ctxt) {
|
|||||||
if (tmp == NULL) {
|
if (tmp == NULL) {
|
||||||
xmlFree(buf);
|
xmlFree(buf);
|
||||||
htmlErrMemory(ctxt);
|
htmlErrMemory(ctxt);
|
||||||
|
ctxt->instate = state;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
buf = tmp;
|
buf = tmp;
|
||||||
}
|
}
|
||||||
if (IS_CHAR(q)) {
|
if (IS_CHAR(cur)) {
|
||||||
COPY_BUF(buf,len,q);
|
COPY_BUF(buf,len,cur);
|
||||||
} else {
|
} else {
|
||||||
htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,
|
htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,
|
||||||
"Invalid char in comment 0x%X\n", q);
|
"Invalid char in comment 0x%X\n", cur);
|
||||||
}
|
}
|
||||||
if (len > maxLength) {
|
if (len > maxLength) {
|
||||||
htmlParseErr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
|
htmlParseErr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
|
||||||
@ -3574,29 +3434,19 @@ htmlParseComment(htmlParserCtxtPtr ctxt) {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
q = r;
|
NEXTL(l);
|
||||||
ql = rl;
|
cur = CUR_CHAR(l);
|
||||||
r = cur;
|
|
||||||
rl = l;
|
|
||||||
cur = next;
|
|
||||||
l = nl;
|
|
||||||
}
|
|
||||||
finished:
|
|
||||||
buf[len] = 0;
|
|
||||||
if (cur == '>') {
|
|
||||||
SKIP(1);
|
|
||||||
if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
|
|
||||||
(!ctxt->disableSAX))
|
|
||||||
ctxt->sax->comment(ctxt->userData, buf);
|
|
||||||
xmlFree(buf);
|
|
||||||
ctxt->instate = state;
|
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
unfinished:
|
done:
|
||||||
htmlParseErr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
|
buf[len] = 0;
|
||||||
"Comment not terminated \n<!--%.50s\n", buf, NULL);
|
if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
|
||||||
|
(!ctxt->disableSAX))
|
||||||
|
ctxt->sax->comment(ctxt->userData, buf);
|
||||||
xmlFree(buf);
|
xmlFree(buf);
|
||||||
|
|
||||||
|
ctxt->instate = state;
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -4294,12 +4144,15 @@ htmlParseContent(htmlParserCtxtPtr ctxt) {
|
|||||||
BAD_CAST "DOCTYPE" , NULL);
|
BAD_CAST "DOCTYPE" , NULL);
|
||||||
htmlParseDocTypeDecl(ctxt);
|
htmlParseDocTypeDecl(ctxt);
|
||||||
} else if ((NXT(2) == '-') && (NXT(3) == '-')) {
|
} else if ((NXT(2) == '-') && (NXT(3) == '-')) {
|
||||||
htmlParseComment(ctxt);
|
SKIP(4);
|
||||||
|
htmlParseComment(ctxt, /* bogus */ 0);
|
||||||
} else {
|
} else {
|
||||||
htmlSkipBogusComment(ctxt);
|
SKIP(2);
|
||||||
|
htmlParseComment(ctxt, /* bogus */ 1);
|
||||||
}
|
}
|
||||||
} else if (NXT(1) == '?') {
|
} else if (NXT(1) == '?') {
|
||||||
htmlParsePI(ctxt);
|
SKIP(1);
|
||||||
|
htmlParseComment(ctxt, /* bogus */ 1);
|
||||||
} else if (IS_ASCII_LETTER(NXT(1))) {
|
} else if (IS_ASCII_LETTER(NXT(1))) {
|
||||||
htmlParseElementInternal(ctxt);
|
htmlParseElementInternal(ctxt);
|
||||||
} else {
|
} else {
|
||||||
@ -4551,15 +4404,19 @@ htmlParseDocument(htmlParserCtxtPtr ctxt) {
|
|||||||
/*
|
/*
|
||||||
* Parse possible comments and PIs before any content
|
* Parse possible comments and PIs before any content
|
||||||
*/
|
*/
|
||||||
while (((CUR == '<') && (NXT(1) == '!') &&
|
while (CUR == '<') {
|
||||||
(NXT(2) == '-') && (NXT(3) == '-')) ||
|
if ((NXT(1) == '!') && (NXT(2) == '-') && (NXT(3) == '-')) {
|
||||||
((CUR == '<') && (NXT(1) == '?'))) {
|
SKIP(4);
|
||||||
htmlParseComment(ctxt);
|
htmlParseComment(ctxt, /* bogus */ 0);
|
||||||
htmlParsePI(ctxt);
|
} else if (NXT(1) == '?') {
|
||||||
|
SKIP(1);
|
||||||
|
htmlParseComment(ctxt, /* bogus */ 1);
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
SKIP_BLANKS;
|
SKIP_BLANKS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Then possibly doc type declaration(s) and more Misc
|
* Then possibly doc type declaration(s) and more Misc
|
||||||
* (doctypedecl Misc*)?
|
* (doctypedecl Misc*)?
|
||||||
@ -4576,12 +4433,16 @@ htmlParseDocument(htmlParserCtxtPtr ctxt) {
|
|||||||
/*
|
/*
|
||||||
* Parse possible comments and PIs before any content
|
* Parse possible comments and PIs before any content
|
||||||
*/
|
*/
|
||||||
while ((PARSER_STOPPED(ctxt) == 0) &&
|
while (CUR == '<') {
|
||||||
(((CUR == '<') && (NXT(1) == '!') &&
|
if ((NXT(1) == '!') && (NXT(2) == '-') && (NXT(3) == '-')) {
|
||||||
(NXT(2) == '-') && (NXT(3) == '-')) ||
|
SKIP(4);
|
||||||
((CUR == '<') && (NXT(1) == '?')))) {
|
htmlParseComment(ctxt, /* bogus */ 0);
|
||||||
htmlParseComment(ctxt);
|
} else if (NXT(1) == '?') {
|
||||||
htmlParsePI(ctxt);
|
SKIP(1);
|
||||||
|
htmlParseComment(ctxt, /* bogus */ 1);
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
SKIP_BLANKS;
|
SKIP_BLANKS;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -5200,13 +5061,15 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
|
|||||||
(in->cur[2] == '-') && (in->cur[3] == '-')) {
|
(in->cur[2] == '-') && (in->cur[3] == '-')) {
|
||||||
if ((!terminate) && (htmlParseLookupCommentEnd(ctxt) < 0))
|
if ((!terminate) && (htmlParseLookupCommentEnd(ctxt) < 0))
|
||||||
goto done;
|
goto done;
|
||||||
htmlParseComment(ctxt);
|
SKIP(4);
|
||||||
|
htmlParseComment(ctxt, /* bogus */ 0);
|
||||||
ctxt->instate = XML_PARSER_MISC;
|
ctxt->instate = XML_PARSER_MISC;
|
||||||
} else if ((cur == '<') && (next == '?')) {
|
} else if ((cur == '<') && (next == '?')) {
|
||||||
if ((!terminate) &&
|
if ((!terminate) &&
|
||||||
(htmlParseLookupString(ctxt, 2, ">", 1, 0) < 0))
|
(htmlParseLookupString(ctxt, 2, ">", 1, 0) < 0))
|
||||||
goto done;
|
goto done;
|
||||||
htmlParsePI(ctxt);
|
SKIP(1);
|
||||||
|
htmlParseComment(ctxt, /* bogus */ 1);
|
||||||
ctxt->instate = XML_PARSER_MISC;
|
ctxt->instate = XML_PARSER_MISC;
|
||||||
} else if ((cur == '<') && (next == '!') &&
|
} else if ((cur == '<') && (next == '!') &&
|
||||||
(UPP(2) == 'D') && (UPP(3) == 'O') &&
|
(UPP(2) == 'D') && (UPP(3) == 'O') &&
|
||||||
@ -5236,13 +5099,15 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
|
|||||||
(in->cur[2] == '-') && (in->cur[3] == '-')) {
|
(in->cur[2] == '-') && (in->cur[3] == '-')) {
|
||||||
if ((!terminate) && (htmlParseLookupCommentEnd(ctxt) < 0))
|
if ((!terminate) && (htmlParseLookupCommentEnd(ctxt) < 0))
|
||||||
goto done;
|
goto done;
|
||||||
htmlParseComment(ctxt);
|
SKIP(4);
|
||||||
|
htmlParseComment(ctxt, /* bogus */ 0);
|
||||||
ctxt->instate = XML_PARSER_PROLOG;
|
ctxt->instate = XML_PARSER_PROLOG;
|
||||||
} else if ((cur == '<') && (next == '?')) {
|
} else if ((cur == '<') && (next == '?')) {
|
||||||
if ((!terminate) &&
|
if ((!terminate) &&
|
||||||
(htmlParseLookupString(ctxt, 2, ">", 1, 0) < 0))
|
(htmlParseLookupString(ctxt, 2, ">", 1, 0) < 0))
|
||||||
goto done;
|
goto done;
|
||||||
htmlParsePI(ctxt);
|
SKIP(1);
|
||||||
|
htmlParseComment(ctxt, /* bogus */ 1);
|
||||||
ctxt->instate = XML_PARSER_PROLOG;
|
ctxt->instate = XML_PARSER_PROLOG;
|
||||||
} else if ((cur == '<') && (next == '!') &&
|
} else if ((cur == '<') && (next == '!') &&
|
||||||
(avail < 4)) {
|
(avail < 4)) {
|
||||||
@ -5267,13 +5132,15 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
|
|||||||
(in->cur[2] == '-') && (in->cur[3] == '-')) {
|
(in->cur[2] == '-') && (in->cur[3] == '-')) {
|
||||||
if ((!terminate) && (htmlParseLookupCommentEnd(ctxt) < 0))
|
if ((!terminate) && (htmlParseLookupCommentEnd(ctxt) < 0))
|
||||||
goto done;
|
goto done;
|
||||||
htmlParseComment(ctxt);
|
SKIP(4);
|
||||||
|
htmlParseComment(ctxt, /* bogus */ 0);
|
||||||
ctxt->instate = XML_PARSER_EPILOG;
|
ctxt->instate = XML_PARSER_EPILOG;
|
||||||
} else if ((cur == '<') && (next == '?')) {
|
} else if ((cur == '<') && (next == '?')) {
|
||||||
if ((!terminate) &&
|
if ((!terminate) &&
|
||||||
(htmlParseLookupString(ctxt, 2, ">", 1, 0) < 0))
|
(htmlParseLookupString(ctxt, 2, ">", 1, 0) < 0))
|
||||||
goto done;
|
goto done;
|
||||||
htmlParsePI(ctxt);
|
SKIP(1);
|
||||||
|
htmlParseComment(ctxt, /* bogus */ 1);
|
||||||
ctxt->instate = XML_PARSER_EPILOG;
|
ctxt->instate = XML_PARSER_EPILOG;
|
||||||
} else if ((cur == '<') && (next == '!') &&
|
} else if ((cur == '<') && (next == '!') &&
|
||||||
(avail < 4)) {
|
(avail < 4)) {
|
||||||
@ -5489,19 +5356,23 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
|
|||||||
if ((!terminate) &&
|
if ((!terminate) &&
|
||||||
(htmlParseLookupCommentEnd(ctxt) < 0))
|
(htmlParseLookupCommentEnd(ctxt) < 0))
|
||||||
goto done;
|
goto done;
|
||||||
htmlParseComment(ctxt);
|
SKIP(4);
|
||||||
|
htmlParseComment(ctxt, /* bogus */ 0);
|
||||||
ctxt->instate = XML_PARSER_CONTENT;
|
ctxt->instate = XML_PARSER_CONTENT;
|
||||||
} else {
|
} else {
|
||||||
if ((!terminate) &&
|
if ((!terminate) &&
|
||||||
(htmlParseLookupString(ctxt, 2, ">", 1, 0) < 0))
|
(htmlParseLookupString(ctxt, 2, ">", 1, 0) < 0))
|
||||||
goto done;
|
goto done;
|
||||||
htmlSkipBogusComment(ctxt);
|
SKIP(2);
|
||||||
|
htmlParseComment(ctxt, /* bogus */ 1);
|
||||||
|
ctxt->instate = XML_PARSER_CONTENT;
|
||||||
}
|
}
|
||||||
} else if ((cur == '<') && (next == '?')) {
|
} else if ((cur == '<') && (next == '?')) {
|
||||||
if ((!terminate) &&
|
if ((!terminate) &&
|
||||||
(htmlParseLookupString(ctxt, 2, ">", 1, 0) < 0))
|
(htmlParseLookupString(ctxt, 2, ">", 1, 0) < 0))
|
||||||
goto done;
|
goto done;
|
||||||
htmlParsePI(ctxt);
|
SKIP(1);
|
||||||
|
htmlParseComment(ctxt, /* bogus */ 1);
|
||||||
ctxt->instate = XML_PARSER_CONTENT;
|
ctxt->instate = XML_PARSER_CONTENT;
|
||||||
} else if ((cur == '<') && (next == '/')) {
|
} else if ((cur == '<') && (next == '/')) {
|
||||||
ctxt->instate = XML_PARSER_END_TAG;
|
ctxt->instate = XML_PARSER_END_TAG;
|
||||||
|
@ -1,2 +1,2 @@
|
|||||||
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
|
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
|
||||||
<html><body><p>“</p></body></html>
|
<!--?a“-->
|
||||||
|
@ -1,3 +0,0 @@
|
|||||||
./test/HTML/758518-tag.html:1: HTML parser error : PI is not started correctly
|
|
||||||
|
|
||||||
^
|
|
@ -1,10 +1,4 @@
|
|||||||
SAX.setDocumentLocator()
|
SAX.setDocumentLocator()
|
||||||
SAX.startDocument()
|
SAX.startDocument()
|
||||||
SAX.error: PI is not started correctlySAX.startElement(html)
|
SAX.comment(?a)
|
||||||
SAX.startElement(body)
|
|
||||||
SAX.startElement(p)
|
|
||||||
SAX.characters(“, 2)
|
|
||||||
SAX.endElement(p)
|
|
||||||
SAX.endElement(body)
|
|
||||||
SAX.endElement(html)
|
|
||||||
SAX.endDocument()
|
SAX.endDocument()
|
||||||
|
@ -1,2 +1,3 @@
|
|||||||
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
|
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
|
||||||
|
<!--<!doctype
|
||||||
|
-->
|
||||||
|
@ -1,7 +1,3 @@
|
|||||||
./test/HTML/758606.html:1: HTML parser error : Invalid char in comment 0xC
|
./test/HTML/758606.html:1: HTML parser error : Invalid char in comment 0xC
|
||||||
<!--<!doctype
|
<!--<!doctype
|
||||||
^
|
^
|
||||||
./test/HTML/758606.html:2: HTML parser error : Comment not terminated
|
|
||||||
<!--<!doctyp
|
|
||||||
|
|
||||||
^
|
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
SAX.setDocumentLocator()
|
SAX.setDocumentLocator()
|
||||||
SAX.startDocument()
|
SAX.startDocument()
|
||||||
SAX.error: Invalid char in comment 0xC
|
SAX.error: Invalid char in comment 0xC
|
||||||
SAX.error: Comment not terminated
|
SAX.comment(<!doctype
|
||||||
<!--<!doctyp
|
)
|
||||||
SAX.endDocument()
|
SAX.endDocument()
|
||||||
|
@ -1,2 +1,3 @@
|
|||||||
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
|
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
|
||||||
|
<!--‘<!dOctYPE
|
||||||
|
-->
|
||||||
|
@ -1,7 +1,3 @@
|
|||||||
./test/HTML/758606_2.html:1: HTML parser error : Invalid char in comment 0xC
|
./test/HTML/758606_2.html:1: HTML parser error : Invalid char in comment 0xC
|
||||||
<!dOctYPE
|
<!--
|
||||||
^
|
^
|
||||||
./test/HTML/758606_2.html:2: HTML parser error : Comment not terminated
|
|
||||||
<!--<!dOctYP
|
|
||||||
|
|
||||||
^
|
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
SAX.setDocumentLocator()
|
SAX.setDocumentLocator()
|
||||||
SAX.startDocument()
|
SAX.startDocument()
|
||||||
SAX.error: Invalid char in comment 0xC
|
SAX.error: Invalid char in comment 0xC
|
||||||
SAX.error: Comment not terminated
|
SAX.comment(<!dOctYPE
|
||||||
<!--<!dOctYP
|
)
|
||||||
SAX.endDocument()
|
SAX.endDocument()
|
||||||
|
@ -1,3 +0,0 @@
|
|||||||
./test/HTML/comments.html:10: HTML parser error : Comment incorrectly closed by '--!>'
|
|
||||||
<!--incorrectly closed comment--!><span id=under-test>whatwg guidance is
|
|
||||||
^
|
|
@ -24,7 +24,7 @@ SAX.characters(
|
|||||||
SAX.startElement(div)
|
SAX.startElement(div)
|
||||||
SAX.characters(
|
SAX.characters(
|
||||||
, 9)
|
, 9)
|
||||||
SAX.error: Comment incorrectly closed by '--!>'SAX.comment(incorrectly closed comment)
|
SAX.comment(incorrectly closed comment)
|
||||||
SAX.startElement(span, id='under-test')
|
SAX.startElement(span, id='under-test')
|
||||||
SAX.characters(whatwg guidance is that this s, 49)
|
SAX.characters(whatwg guidance is that this s, 49)
|
||||||
SAX.endElement(span)
|
SAX.endElement(span)
|
||||||
|
@ -1,3 +0,0 @@
|
|||||||
./test/HTML/comments2.html:10: HTML parser error : Comment incorrectly closed by '--!>'
|
|
||||||
<!--incorrectly closed comment--!><span id=under-test>whatwg guidance is
|
|
||||||
^
|
|
@ -24,7 +24,7 @@ SAX.characters(
|
|||||||
SAX.startElement(div)
|
SAX.startElement(div)
|
||||||
SAX.characters(
|
SAX.characters(
|
||||||
, 9)
|
, 9)
|
||||||
SAX.error: Comment incorrectly closed by '--!>'SAX.comment(incorrectly closed comment)
|
SAX.comment(incorrectly closed comment)
|
||||||
SAX.startElement(span, id='under-test')
|
SAX.startElement(span, id='under-test')
|
||||||
SAX.characters(whatwg guidance is that this s, 49)
|
SAX.characters(whatwg guidance is that this s, 49)
|
||||||
SAX.endElement(span)
|
SAX.endElement(span)
|
||||||
|
@ -1,6 +0,0 @@
|
|||||||
./test/HTML/comments3.html:10: HTML parser error : Comment abruptly ended
|
|
||||||
<!-->the previous node should be an empty comment, and this should be a
|
|
||||||
^
|
|
||||||
./test/HTML/comments3.html:13: HTML parser error : Comment abruptly ended
|
|
||||||
<!--->the previous node should be an empty comment, and this should be a
|
|
||||||
^
|
|
@ -24,7 +24,7 @@ SAX.characters(
|
|||||||
SAX.startElement(div)
|
SAX.startElement(div)
|
||||||
SAX.characters(
|
SAX.characters(
|
||||||
, 9)
|
, 9)
|
||||||
SAX.error: Comment abruptly endedSAX.comment()
|
SAX.comment()
|
||||||
SAX.characters(the previous node should be an, 86)
|
SAX.characters(the previous node should be an, 86)
|
||||||
SAX.endElement(div)
|
SAX.endElement(div)
|
||||||
SAX.characters(
|
SAX.characters(
|
||||||
@ -32,7 +32,7 @@ SAX.characters(
|
|||||||
SAX.startElement(div)
|
SAX.startElement(div)
|
||||||
SAX.characters(
|
SAX.characters(
|
||||||
, 9)
|
, 9)
|
||||||
SAX.error: Comment abruptly endedSAX.comment()
|
SAX.comment()
|
||||||
SAX.characters(the previous node should be an, 86)
|
SAX.characters(the previous node should be an, 86)
|
||||||
SAX.endElement(div)
|
SAX.endElement(div)
|
||||||
SAX.characters(
|
SAX.characters(
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
|
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
|
||||||
<html>
|
<html>
|
||||||
<body>
|
<body>
|
||||||
...
|
<!--[if !supportLists]-->...<!--[endif]-->
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
||||||
|
@ -1,6 +0,0 @@
|
|||||||
./test/HTML/issue380.html:3: HTML parser error : Incorrectly opened comment
|
|
||||||
<![if !supportLists]>...<![endif]>
|
|
||||||
^
|
|
||||||
./test/HTML/issue380.html:3: HTML parser error : Incorrectly opened comment
|
|
||||||
<![if !supportLists]>...<![endif]>
|
|
||||||
^
|
|
@ -6,9 +6,9 @@ SAX.characters(
|
|||||||
SAX.startElement(body)
|
SAX.startElement(body)
|
||||||
SAX.characters(
|
SAX.characters(
|
||||||
, 5)
|
, 5)
|
||||||
SAX.error: Incorrectly opened comment
|
SAX.comment([if !supportLists])
|
||||||
SAX.characters(..., 3)
|
SAX.characters(..., 3)
|
||||||
SAX.error: Incorrectly opened comment
|
SAX.comment([endif])
|
||||||
SAX.characters(
|
SAX.characters(
|
||||||
, 3)
|
, 3)
|
||||||
SAX.endElement(body)
|
SAX.endElement(body)
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
|
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
|
||||||
<?xml-stylesheet href="./css/ht2html.css" type="text/css"?><html>
|
<!--?xml-stylesheet href="./css/ht2html.css" type="text/css"?--><html>
|
||||||
<!-- THIS PAGE IS AUTOMATICALLY GENERATED. DO NOT EDIT. -->
|
<!-- THIS PAGE IS AUTOMATICALLY GENERATED. DO NOT EDIT. -->
|
||||||
<head>
|
<head>
|
||||||
<title>Python Programming Language</title>
|
<title>Python Programming Language</title>
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
SAX.setDocumentLocator()
|
SAX.setDocumentLocator()
|
||||||
SAX.startDocument()
|
SAX.startDocument()
|
||||||
SAX.internalSubset(html, -//W3C//DTD HTML 4.01 Transitional//EN, http://www.w3.org/TR/html4/loose.dtd)
|
SAX.internalSubset(html, -//W3C//DTD HTML 4.01 Transitional//EN, http://www.w3.org/TR/html4/loose.dtd)
|
||||||
SAX.processingInstruction(xml-stylesheet, href="./css/ht2html.css" type="text/css"?)
|
SAX.comment(?xml-stylesheet href="./css/ht2html.css" type="text/css"?)
|
||||||
SAX.startElement(html)
|
SAX.startElement(html)
|
||||||
SAX.characters(
|
SAX.characters(
|
||||||
, 1)
|
, 1)
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
|
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
|
||||||
<?xml encoding="UTF-8"><html><body>
|
<!--?xml encoding="UTF-8"--><html><body>
|
||||||
<p>öäüß</p>
|
<p>öäüß</p>
|
||||||
</body></html>
|
</body></html>
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
SAX.setDocumentLocator()
|
SAX.setDocumentLocator()
|
||||||
SAX.startDocument()
|
SAX.startDocument()
|
||||||
SAX.processingInstruction(xml, encoding="UTF-8")
|
SAX.comment(?xml encoding="UTF-8")
|
||||||
SAX.startElement(html)
|
SAX.startElement(html)
|
||||||
SAX.startElement(body)
|
SAX.startElement(body)
|
||||||
SAX.startElement(p)
|
SAX.startElement(p)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user