mirror of
https://gitlab.gnome.org/GNOME/libxml2
synced 2025-03-28 21:33:13 +00:00
html: Rework htmlLookupSequence
Rename to htmlLookupString and use strstr for increased performance.
This commit is contained in:
parent
637215a4de
commit
dc2d498318
122
HTMLparser.c
122
HTMLparser.c
@ -5265,67 +5265,54 @@ htmlParseLookupGt(xmlParserCtxtPtr ctxt) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* htmlParseLookupSequence:
|
* htmlParseLookupString:
|
||||||
* @ctxt: an HTML parser context
|
* @ctxt: an XML parser context
|
||||||
* @first: the first char to lookup
|
* @startDelta: delta to apply at the start
|
||||||
* @next: the next char to lookup or zero
|
* @str: string
|
||||||
* @third: the next char to lookup or zero
|
* @strLen: length of string
|
||||||
*
|
*
|
||||||
* Try to find if a sequence (first, next, third) or just (first next) or
|
* Check whether the input buffer contains a string.
|
||||||
* (first) is available in the input stream.
|
|
||||||
* This function has a side effect of (possibly) incrementing ctxt->checkIndex
|
|
||||||
* to avoid rescanning sequences of bytes, it DOES change the state of the
|
|
||||||
* parser, do not use liberally.
|
|
||||||
* This is basically similar to xmlParseLookupSequence()
|
|
||||||
*
|
|
||||||
* Returns the index to the current parsing point if the full sequence
|
|
||||||
* is available, -1 otherwise.
|
|
||||||
*/
|
*/
|
||||||
static int
|
static int
|
||||||
htmlParseLookupSequence(htmlParserCtxtPtr ctxt, xmlChar first,
|
htmlParseLookupString(xmlParserCtxtPtr ctxt, size_t startDelta,
|
||||||
xmlChar next, xmlChar third) {
|
const char *str, size_t strLen) {
|
||||||
size_t base, len;
|
const xmlChar *cur, *term;
|
||||||
htmlParserInputPtr in;
|
int ret;
|
||||||
const xmlChar *buf;
|
|
||||||
int quote;
|
|
||||||
|
|
||||||
in = ctxt->input;
|
if (ctxt->checkIndex == 0) {
|
||||||
if (in == NULL)
|
cur = ctxt->input->cur + startDelta;
|
||||||
return (-1);
|
} else {
|
||||||
|
cur = ctxt->input->cur + ctxt->checkIndex;
|
||||||
base = ctxt->checkIndex;
|
|
||||||
quote = ctxt->endCheckState;
|
|
||||||
|
|
||||||
buf = in->cur;
|
|
||||||
len = in->end - in->cur;
|
|
||||||
|
|
||||||
/* take into account the sequence length */
|
|
||||||
if (third)
|
|
||||||
len -= 2;
|
|
||||||
else if (next)
|
|
||||||
len--;
|
|
||||||
for (; base < len; base++) {
|
|
||||||
if (base >= INT_MAX / 2) {
|
|
||||||
ctxt->checkIndex = 0;
|
|
||||||
ctxt->endCheckState = 0;
|
|
||||||
return (base - 2);
|
|
||||||
}
|
|
||||||
if (buf[base] == first) {
|
|
||||||
if (third != 0) {
|
|
||||||
if ((buf[base + 1] != next) || (buf[base + 2] != third))
|
|
||||||
continue;
|
|
||||||
} else if (next != 0) {
|
|
||||||
if (buf[base + 1] != next)
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
ctxt->checkIndex = 0;
|
|
||||||
ctxt->endCheckState = 0;
|
|
||||||
return (base);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
ctxt->checkIndex = base;
|
|
||||||
ctxt->endCheckState = quote;
|
term = BAD_CAST strstr((const char *) cur, str);
|
||||||
return (-1);
|
if (term == NULL) {
|
||||||
|
const xmlChar *end = ctxt->input->end;
|
||||||
|
size_t index;
|
||||||
|
|
||||||
|
/* Rescan (strLen - 1) characters. */
|
||||||
|
if ((size_t) (end - cur) < strLen)
|
||||||
|
end = cur;
|
||||||
|
else
|
||||||
|
end -= strLen - 1;
|
||||||
|
index = end - ctxt->input->cur;
|
||||||
|
if (index > INT_MAX / 2) {
|
||||||
|
ctxt->checkIndex = 0;
|
||||||
|
ret = INT_MAX / 2;
|
||||||
|
} else {
|
||||||
|
ctxt->checkIndex = index;
|
||||||
|
ret = -1;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
ctxt->checkIndex = 0;
|
||||||
|
|
||||||
|
if (term - ctxt->input->cur > INT_MAX / 2)
|
||||||
|
ret = INT_MAX / 2;
|
||||||
|
else
|
||||||
|
ret = term - ctxt->input->cur;
|
||||||
|
}
|
||||||
|
|
||||||
|
return(ret);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -5338,7 +5325,6 @@ htmlParseLookupSequence(htmlParserCtxtPtr ctxt, xmlChar first,
|
|||||||
* This function has a side effect of (possibly) incrementing ctxt->checkIndex
|
* This function has a side effect of (possibly) incrementing ctxt->checkIndex
|
||||||
* to avoid rescanning sequences of bytes, it DOES change the state of the
|
* to avoid rescanning sequences of bytes, it DOES change the state of the
|
||||||
* parser, do not use liberally.
|
* parser, do not use liberally.
|
||||||
* This wraps to htmlParseLookupSequence()
|
|
||||||
*
|
*
|
||||||
* Returns the index to the current parsing point if the full sequence is available, -1 otherwise.
|
* Returns the index to the current parsing point if the full sequence is available, -1 otherwise.
|
||||||
*/
|
*/
|
||||||
@ -5349,7 +5335,7 @@ htmlParseLookupCommentEnd(htmlParserCtxtPtr ctxt)
|
|||||||
int offset;
|
int offset;
|
||||||
|
|
||||||
while (1) {
|
while (1) {
|
||||||
mark = htmlParseLookupSequence(ctxt, '-', '-', 0);
|
mark = htmlParseLookupString(ctxt, 2, "--", 2);
|
||||||
if (mark < 0)
|
if (mark < 0)
|
||||||
break;
|
break;
|
||||||
if ((NXT(mark+2) == '>') ||
|
if ((NXT(mark+2) == '>') ||
|
||||||
@ -5457,7 +5443,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
|
|||||||
(UPP(6) == 'Y') && (UPP(7) == 'P') &&
|
(UPP(6) == 'Y') && (UPP(7) == 'P') &&
|
||||||
(UPP(8) == 'E')) {
|
(UPP(8) == 'E')) {
|
||||||
if ((!terminate) &&
|
if ((!terminate) &&
|
||||||
(htmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
|
(htmlParseLookupString(ctxt, 9, ">", 1) < 0))
|
||||||
goto done;
|
goto done;
|
||||||
htmlParseDocTypeDecl(ctxt);
|
htmlParseDocTypeDecl(ctxt);
|
||||||
ctxt->instate = XML_PARSER_PROLOG;
|
ctxt->instate = XML_PARSER_PROLOG;
|
||||||
@ -5493,7 +5479,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
|
|||||||
ctxt->instate = XML_PARSER_MISC;
|
ctxt->instate = XML_PARSER_MISC;
|
||||||
} else if ((cur == '<') && (next == '?')) {
|
} else if ((cur == '<') && (next == '?')) {
|
||||||
if ((!terminate) &&
|
if ((!terminate) &&
|
||||||
(htmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
|
(htmlParseLookupString(ctxt, 2, ">", 1) < 0))
|
||||||
goto done;
|
goto done;
|
||||||
htmlParsePI(ctxt);
|
htmlParsePI(ctxt);
|
||||||
ctxt->instate = XML_PARSER_MISC;
|
ctxt->instate = XML_PARSER_MISC;
|
||||||
@ -5503,7 +5489,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
|
|||||||
(UPP(6) == 'Y') && (UPP(7) == 'P') &&
|
(UPP(6) == 'Y') && (UPP(7) == 'P') &&
|
||||||
(UPP(8) == 'E')) {
|
(UPP(8) == 'E')) {
|
||||||
if ((!terminate) &&
|
if ((!terminate) &&
|
||||||
(htmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
|
(htmlParseLookupString(ctxt, 9, ">", 1) < 0))
|
||||||
goto done;
|
goto done;
|
||||||
htmlParseDocTypeDecl(ctxt);
|
htmlParseDocTypeDecl(ctxt);
|
||||||
ctxt->instate = XML_PARSER_PROLOG;
|
ctxt->instate = XML_PARSER_PROLOG;
|
||||||
@ -5529,7 +5515,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
|
|||||||
ctxt->instate = XML_PARSER_PROLOG;
|
ctxt->instate = XML_PARSER_PROLOG;
|
||||||
} else if ((cur == '<') && (next == '?')) {
|
} else if ((cur == '<') && (next == '?')) {
|
||||||
if ((!terminate) &&
|
if ((!terminate) &&
|
||||||
(htmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
|
(htmlParseLookupString(ctxt, 2, ">", 1) < 0))
|
||||||
goto done;
|
goto done;
|
||||||
htmlParsePI(ctxt);
|
htmlParsePI(ctxt);
|
||||||
ctxt->instate = XML_PARSER_PROLOG;
|
ctxt->instate = XML_PARSER_PROLOG;
|
||||||
@ -5560,7 +5546,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
|
|||||||
ctxt->instate = XML_PARSER_EPILOG;
|
ctxt->instate = XML_PARSER_EPILOG;
|
||||||
} else if ((cur == '<') && (next == '?')) {
|
} else if ((cur == '<') && (next == '?')) {
|
||||||
if ((!terminate) &&
|
if ((!terminate) &&
|
||||||
(htmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
|
(htmlParseLookupString(ctxt, 2, ">", 1) < 0))
|
||||||
goto done;
|
goto done;
|
||||||
htmlParsePI(ctxt);
|
htmlParsePI(ctxt);
|
||||||
ctxt->instate = XML_PARSER_EPILOG;
|
ctxt->instate = XML_PARSER_EPILOG;
|
||||||
@ -5732,7 +5718,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
|
|||||||
int idx;
|
int idx;
|
||||||
xmlChar val;
|
xmlChar val;
|
||||||
|
|
||||||
idx = htmlParseLookupSequence(ctxt, '<', '/', 0);
|
idx = htmlParseLookupString(ctxt, 0, "</", 2);
|
||||||
if (idx < 0)
|
if (idx < 0)
|
||||||
goto done;
|
goto done;
|
||||||
val = in->cur[idx + 2];
|
val = in->cur[idx + 2];
|
||||||
@ -5762,7 +5748,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
|
|||||||
(UPP(6) == 'Y') && (UPP(7) == 'P') &&
|
(UPP(6) == 'Y') && (UPP(7) == 'P') &&
|
||||||
(UPP(8) == 'E')) {
|
(UPP(8) == 'E')) {
|
||||||
if ((!terminate) &&
|
if ((!terminate) &&
|
||||||
(htmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
|
(htmlParseLookupString(ctxt, 9, ">", 1) < 0))
|
||||||
goto done;
|
goto done;
|
||||||
htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
|
htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
|
||||||
"Misplaced DOCTYPE declaration\n",
|
"Misplaced DOCTYPE declaration\n",
|
||||||
@ -5776,13 +5762,13 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
|
|||||||
ctxt->instate = XML_PARSER_CONTENT;
|
ctxt->instate = XML_PARSER_CONTENT;
|
||||||
} else {
|
} else {
|
||||||
if ((!terminate) &&
|
if ((!terminate) &&
|
||||||
(htmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
|
(htmlParseLookupString(ctxt, 2, ">", 1) < 0))
|
||||||
goto done;
|
goto done;
|
||||||
htmlSkipBogusComment(ctxt);
|
htmlSkipBogusComment(ctxt);
|
||||||
}
|
}
|
||||||
} else if ((cur == '<') && (next == '?')) {
|
} else if ((cur == '<') && (next == '?')) {
|
||||||
if ((!terminate) &&
|
if ((!terminate) &&
|
||||||
(htmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
|
(htmlParseLookupString(ctxt, 2, ">", 1) < 0))
|
||||||
goto done;
|
goto done;
|
||||||
htmlParsePI(ctxt);
|
htmlParsePI(ctxt);
|
||||||
ctxt->instate = XML_PARSER_CONTENT;
|
ctxt->instate = XML_PARSER_CONTENT;
|
||||||
@ -5810,7 +5796,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
|
|||||||
* data detection.
|
* data detection.
|
||||||
*/
|
*/
|
||||||
if ((!terminate) &&
|
if ((!terminate) &&
|
||||||
(htmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
|
(htmlParseLookupString(ctxt, 0, "<", 1) < 0))
|
||||||
goto done;
|
goto done;
|
||||||
ctxt->checkIndex = 0;
|
ctxt->checkIndex = 0;
|
||||||
while ((PARSER_STOPPED(ctxt) == 0) &&
|
while ((PARSER_STOPPED(ctxt) == 0) &&
|
||||||
|
Loading…
x
Reference in New Issue
Block a user