From 597f1c1f340a2d972ec47a9b4fc53f497da0208a Mon Sep 17 00:00:00 2001 From: Daniel Veillard Date: Sun, 3 Jul 2005 23:00:18 +0000 Subject: [PATCH] applied patch from James Bursa fixing an html parsing bug in push mode * HTMLparser.c: applied patch from James Bursa fixing an html parsing bug in push mode * result/HTML/repeat.html* test/HTML/repeat.html: added the test to the regression suite Daniel --- ChangeLog | 7 +++++++ HTMLparser.c | 42 ++++++++++++++++++------------------- result/HTML/repeat.html | 5 +++++ result/HTML/repeat.html.err | 0 result/HTML/repeat.html.sax | 14 +++++++++++++ test/HTML/repeat.html | 1 + 6 files changed, 47 insertions(+), 22 deletions(-) create mode 100644 result/HTML/repeat.html create mode 100644 result/HTML/repeat.html.err create mode 100644 result/HTML/repeat.html.sax create mode 100644 test/HTML/repeat.html diff --git a/ChangeLog b/ChangeLog index 5d5ab9f6..dee3d81f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,10 @@ +Mon Jul 4 00:58:44 CEST 2005 Daniel Veillard + + * HTMLparser.c: applied patch from James Bursa fixing an html parsing + bug in push mode + * result/HTML/repeat.html* test/HTML/repeat.html: added the test to the + regression suite + Sun Jul 3 23:42:31 CEST 2005 Daniel Veillard * testapi.c tree.c: fixing a leak detected by testapi in diff --git a/HTMLparser.c b/HTMLparser.c index 3e1e75ae..c6115d03 100644 --- a/HTMLparser.c +++ b/HTMLparser.c @@ -3349,9 +3349,10 @@ htmlCheckMeta(htmlParserCtxtPtr ctxt, const xmlChar **atts) { * * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>' * + * Returns 0 in case of success and -1 in case of error. */ -static void +static int htmlParseStartTag(htmlParserCtxtPtr ctxt) { const xmlChar *name; const xmlChar *attname; @@ -3365,9 +3366,9 @@ htmlParseStartTag(htmlParserCtxtPtr ctxt) { if ((ctxt == NULL) || (ctxt->input == NULL)) { htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, "htmlParseStartTag: context error\n", NULL, NULL); - return; + return -1; } - if (CUR != '<') return; + if (CUR != '<') return -1; NEXT; GROW; @@ -3379,7 +3380,7 @@ htmlParseStartTag(htmlParserCtxtPtr ctxt) { /* Dump the bogus tag like browsers do */ while ((IS_CHAR_CH(CUR)) && (CUR != '>')) NEXT; - return; + return -1; } if (xmlStrEqual(name, BAD_CAST"meta")) meta = 1; @@ -3402,14 +3403,14 @@ htmlParseStartTag(htmlParserCtxtPtr ctxt) { htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR, "htmlParseStartTag: misplaced tag\n", name, NULL); - return; + return 0; } if ((ctxt->nameNr != 1) && (xmlStrEqual(name, BAD_CAST"head"))) { htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR, "htmlParseStartTag: misplaced tag\n", name, NULL); - return; + return 0; } if (xmlStrEqual(name, BAD_CAST"body")) { int indx; @@ -3420,7 +3421,7 @@ htmlParseStartTag(htmlParserCtxtPtr ctxt) { name, NULL); while ((IS_CHAR_CH(CUR)) && (CUR != '>')) NEXT; - return; + return 0; } } } @@ -3533,6 +3534,8 @@ failed: xmlFree((xmlChar *) atts[i]); } } + + return 0; } /** @@ -3847,16 +3850,15 @@ htmlParseElement(htmlParserCtxtPtr ctxt) { xmlChar *currentNode = NULL; const htmlElemDesc * info; htmlParserNodeInfo node_info; - const xmlChar *oldname; + int failed; int depth; const xmlChar *oldptr; if ((ctxt == NULL) || (ctxt->input == NULL)) { htmlParseErr(ctxt, XML_ERR_INTERNAL_ERROR, - "htmlParseStartTag: context error\n", NULL, NULL); + "htmlParseElement: context error\n", NULL, NULL); return; } - depth = ctxt->nameNr; /* Capture start position */ if (ctxt->record_info) { node_info.begin_pos = ctxt->input->consumed + @@ -3864,11 +3866,9 @@ htmlParseElement(htmlParserCtxtPtr ctxt) { node_info.begin_line = ctxt->input->line; } - oldname = ctxt->name; - htmlParseStartTag(ctxt); + failed = htmlParseStartTag(ctxt); name = ctxt->name; - if (((depth == ctxt->nameNr) && (xmlStrEqual(oldname, ctxt->name))) || - (name == NULL)) { + if (failed || (name == NULL)) { if (CUR == '>') NEXT; return; @@ -4577,11 +4577,11 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { #endif } else { ctxt->instate = XML_PARSER_MISC; - } #ifdef DEBUG_PUSH - xmlGenericError(xmlGenericErrorContext, - "HPP: entering MISC\n"); + xmlGenericError(xmlGenericErrorContext, + "HPP: entering MISC\n"); #endif + } break; case XML_PARSER_MISC: SKIP_BLANKS; @@ -4739,7 +4739,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { break; case XML_PARSER_START_TAG: { const xmlChar *name, *oldname; - int depth = ctxt->nameNr; + int failed; const htmlElemDesc * info; if (avail < 2) @@ -4766,11 +4766,9 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { (htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0)) goto done; - oldname = ctxt->name; - htmlParseStartTag(ctxt); + failed = htmlParseStartTag(ctxt); name = ctxt->name; - if (((depth == ctxt->nameNr) && - (xmlStrEqual(oldname, ctxt->name))) || + if (failed || (name == NULL)) { if (CUR == '>') NEXT; diff --git a/result/HTML/repeat.html b/result/HTML/repeat.html new file mode 100644 index 00000000..550c66f9 --- /dev/null +++ b/result/HTML/repeat.html @@ -0,0 +1,5 @@ + + + + + diff --git a/result/HTML/repeat.html.err b/result/HTML/repeat.html.err new file mode 100644 index 00000000..e69de29b diff --git a/result/HTML/repeat.html.sax b/result/HTML/repeat.html.sax new file mode 100644 index 00000000..2dbf35c1 --- /dev/null +++ b/result/HTML/repeat.html.sax @@ -0,0 +1,14 @@ +SAX.setDocumentLocator() +SAX.startDocument() +SAX.startElement(html) +SAX.startElement(body) +SAX.startElement(td) +SAX.endElement(td) +SAX.startElement(td) +SAX.comment( ) +SAX.ignorableWhitespace( +, 1) +SAX.endElement(td) +SAX.endElement(body) +SAX.endElement(html) +SAX.endDocument() diff --git a/test/HTML/repeat.html b/test/HTML/repeat.html new file mode 100644 index 00000000..d6d6f978 --- /dev/null +++ b/test/HTML/repeat.html @@ -0,0 +1 @@ +