diff --git a/HTMLparser.c b/HTMLparser.c index bf9a7882..db783d6d 100644 --- a/HTMLparser.c +++ b/HTMLparser.c @@ -5265,67 +5265,54 @@ htmlParseLookupGt(xmlParserCtxtPtr ctxt) { } /** - * htmlParseLookupSequence: - * @ctxt: an HTML parser context - * @first: the first char to lookup - * @next: the next char to lookup or zero - * @third: the next char to lookup or zero + * htmlParseLookupString: + * @ctxt: an XML parser context + * @startDelta: delta to apply at the start + * @str: string + * @strLen: length of string * - * Try to find if a sequence (first, next, third) or just (first next) or - * (first) is available in the input stream. - * This function has a side effect of (possibly) incrementing ctxt->checkIndex - * to avoid rescanning sequences of bytes, it DOES change the state of the - * parser, do not use liberally. - * This is basically similar to xmlParseLookupSequence() - * - * Returns the index to the current parsing point if the full sequence - * is available, -1 otherwise. + * Check whether the input buffer contains a string. */ static int -htmlParseLookupSequence(htmlParserCtxtPtr ctxt, xmlChar first, - xmlChar next, xmlChar third) { - size_t base, len; - htmlParserInputPtr in; - const xmlChar *buf; - int quote; +htmlParseLookupString(xmlParserCtxtPtr ctxt, size_t startDelta, + const char *str, size_t strLen) { + const xmlChar *cur, *term; + int ret; - in = ctxt->input; - if (in == NULL) - return (-1); - - base = ctxt->checkIndex; - quote = ctxt->endCheckState; - - buf = in->cur; - len = in->end - in->cur; - - /* take into account the sequence length */ - if (third) - len -= 2; - else if (next) - len--; - for (; base < len; base++) { - if (base >= INT_MAX / 2) { - ctxt->checkIndex = 0; - ctxt->endCheckState = 0; - return (base - 2); - } - if (buf[base] == first) { - if (third != 0) { - if ((buf[base + 1] != next) || (buf[base + 2] != third)) - continue; - } else if (next != 0) { - if (buf[base + 1] != next) - continue; - } - ctxt->checkIndex = 0; - ctxt->endCheckState = 0; - return (base); - } + if (ctxt->checkIndex == 0) { + cur = ctxt->input->cur + startDelta; + } else { + cur = ctxt->input->cur + ctxt->checkIndex; } - ctxt->checkIndex = base; - ctxt->endCheckState = quote; - return (-1); + + term = BAD_CAST strstr((const char *) cur, str); + if (term == NULL) { + const xmlChar *end = ctxt->input->end; + size_t index; + + /* Rescan (strLen - 1) characters. */ + if ((size_t) (end - cur) < strLen) + end = cur; + else + end -= strLen - 1; + index = end - ctxt->input->cur; + if (index > INT_MAX / 2) { + ctxt->checkIndex = 0; + ret = INT_MAX / 2; + } else { + ctxt->checkIndex = index; + ret = -1; + } + } else { + ctxt->checkIndex = 0; + + if (term - ctxt->input->cur > INT_MAX / 2) + ret = INT_MAX / 2; + else + ret = term - ctxt->input->cur; + } + + return(ret); } /** @@ -5338,7 +5325,6 @@ htmlParseLookupSequence(htmlParserCtxtPtr ctxt, xmlChar first, * This function has a side effect of (possibly) incrementing ctxt->checkIndex * to avoid rescanning sequences of bytes, it DOES change the state of the * parser, do not use liberally. - * This wraps to htmlParseLookupSequence() * * Returns the index to the current parsing point if the full sequence is available, -1 otherwise. */ @@ -5349,7 +5335,7 @@ htmlParseLookupCommentEnd(htmlParserCtxtPtr ctxt) int offset; while (1) { - mark = htmlParseLookupSequence(ctxt, '-', '-', 0); + mark = htmlParseLookupString(ctxt, 2, "--", 2); if (mark < 0) break; if ((NXT(mark+2) == '>') || @@ -5457,7 +5443,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { (UPP(6) == 'Y') && (UPP(7) == 'P') && (UPP(8) == 'E')) { if ((!terminate) && - (htmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) + (htmlParseLookupString(ctxt, 9, ">", 1) < 0)) goto done; htmlParseDocTypeDecl(ctxt); ctxt->instate = XML_PARSER_PROLOG; @@ -5493,7 +5479,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { ctxt->instate = XML_PARSER_MISC; } else if ((cur == '<') && (next == '?')) { if ((!terminate) && - (htmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) + (htmlParseLookupString(ctxt, 2, ">", 1) < 0)) goto done; htmlParsePI(ctxt); ctxt->instate = XML_PARSER_MISC; @@ -5503,7 +5489,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { (UPP(6) == 'Y') && (UPP(7) == 'P') && (UPP(8) == 'E')) { if ((!terminate) && - (htmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) + (htmlParseLookupString(ctxt, 9, ">", 1) < 0)) goto done; htmlParseDocTypeDecl(ctxt); ctxt->instate = XML_PARSER_PROLOG; @@ -5529,7 +5515,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { ctxt->instate = XML_PARSER_PROLOG; } else if ((cur == '<') && (next == '?')) { if ((!terminate) && - (htmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) + (htmlParseLookupString(ctxt, 2, ">", 1) < 0)) goto done; htmlParsePI(ctxt); ctxt->instate = XML_PARSER_PROLOG; @@ -5560,7 +5546,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { ctxt->instate = XML_PARSER_EPILOG; } else if ((cur == '<') && (next == '?')) { if ((!terminate) && - (htmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) + (htmlParseLookupString(ctxt, 2, ">", 1) < 0)) goto done; htmlParsePI(ctxt); ctxt->instate = XML_PARSER_EPILOG; @@ -5732,7 +5718,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { int idx; xmlChar val; - idx = htmlParseLookupSequence(ctxt, '<', '/', 0); + idx = htmlParseLookupString(ctxt, 0, "cur[idx + 2]; @@ -5762,7 +5748,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { (UPP(6) == 'Y') && (UPP(7) == 'P') && (UPP(8) == 'E')) { if ((!terminate) && - (htmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) + (htmlParseLookupString(ctxt, 9, ">", 1) < 0)) goto done; htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR, "Misplaced DOCTYPE declaration\n", @@ -5776,13 +5762,13 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { ctxt->instate = XML_PARSER_CONTENT; } else { if ((!terminate) && - (htmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) + (htmlParseLookupString(ctxt, 2, ">", 1) < 0)) goto done; htmlSkipBogusComment(ctxt); } } else if ((cur == '<') && (next == '?')) { if ((!terminate) && - (htmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) + (htmlParseLookupString(ctxt, 2, ">", 1) < 0)) goto done; htmlParsePI(ctxt); ctxt->instate = XML_PARSER_CONTENT; @@ -5810,7 +5796,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { * data detection. */ if ((!terminate) && - (htmlParseLookupSequence(ctxt, '<', 0, 0) < 0)) + (htmlParseLookupString(ctxt, 0, "<", 1) < 0)) goto done; ctxt->checkIndex = 0; while ((PARSER_STOPPED(ctxt) == 0) &&