mirror of
https://gitlab.gnome.org/GNOME/libxml2
synced 2025-03-28 21:33:13 +00:00
added support for HTML PIs #156087 added specific tests Daniel
* HTMLparser.c: added support for HTML PIs #156087 * test/HTML/python.html result/HTML/python.html*: added specific tests Daniel
This commit is contained in:
parent
01c3bd53d8
commit
fc484dd0a0
@ -1,3 +1,8 @@
|
||||
Fri Oct 22 16:36:50 CEST 2004 Daniel Veillard <daniel@veillard.com>
|
||||
|
||||
* HTMLparser.c: added support for HTML PIs #156087
|
||||
* test/HTML/python.html result/HTML/python.html*: added specific tests
|
||||
|
||||
Fri Oct 22 15:20:23 CEST 2004 Daniel Veillard <daniel@veillard.com>
|
||||
|
||||
* threads.c: fixed nasty bug #156087
|
||||
|
180
HTMLparser.c
180
HTMLparser.c
@ -2808,6 +2808,117 @@ htmlParseExternalID(htmlParserCtxtPtr ctxt, xmlChar **publicID) {
|
||||
return(URI);
|
||||
}
|
||||
|
||||
/**
|
||||
* xmlParsePI:
|
||||
* @ctxt: an XML parser context
|
||||
*
|
||||
* parse an XML Processing Instruction.
|
||||
*
|
||||
* [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
|
||||
*/
|
||||
static void
|
||||
htmlParsePI(htmlParserCtxtPtr ctxt) {
|
||||
xmlChar *buf = NULL;
|
||||
int len = 0;
|
||||
int size = HTML_PARSER_BUFFER_SIZE;
|
||||
int cur, l;
|
||||
const xmlChar *target;
|
||||
xmlParserInputState state;
|
||||
int count = 0;
|
||||
|
||||
if ((RAW == '<') && (NXT(1) == '?')) {
|
||||
state = ctxt->instate;
|
||||
ctxt->instate = XML_PARSER_PI;
|
||||
/*
|
||||
* this is a Processing Instruction.
|
||||
*/
|
||||
SKIP(2);
|
||||
SHRINK;
|
||||
|
||||
/*
|
||||
* Parse the target name and check for special support like
|
||||
* namespace.
|
||||
*/
|
||||
target = htmlParseName(ctxt);
|
||||
if (target != NULL) {
|
||||
if (RAW == '>') {
|
||||
SKIP(1);
|
||||
|
||||
/*
|
||||
* SAX: PI detected.
|
||||
*/
|
||||
if ((ctxt->sax) && (!ctxt->disableSAX) &&
|
||||
(ctxt->sax->processingInstruction != NULL))
|
||||
ctxt->sax->processingInstruction(ctxt->userData,
|
||||
target, NULL);
|
||||
ctxt->instate = state;
|
||||
return;
|
||||
}
|
||||
buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
|
||||
if (buf == NULL) {
|
||||
htmlErrMemory(ctxt, NULL);
|
||||
ctxt->instate = state;
|
||||
return;
|
||||
}
|
||||
cur = CUR;
|
||||
if (!IS_BLANK(cur)) {
|
||||
htmlParseErr(ctxt, XML_ERR_SPACE_REQUIRED,
|
||||
"ParsePI: PI %s space expected\n", target, NULL);
|
||||
}
|
||||
SKIP_BLANKS;
|
||||
cur = CUR_CHAR(l);
|
||||
while (IS_CHAR(cur) && (cur != '>')) {
|
||||
if (len + 5 >= size) {
|
||||
xmlChar *tmp;
|
||||
|
||||
size *= 2;
|
||||
tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
|
||||
if (tmp == NULL) {
|
||||
htmlErrMemory(ctxt, NULL);
|
||||
xmlFree(buf);
|
||||
ctxt->instate = state;
|
||||
return;
|
||||
}
|
||||
buf = tmp;
|
||||
}
|
||||
count++;
|
||||
if (count > 50) {
|
||||
GROW;
|
||||
count = 0;
|
||||
}
|
||||
COPY_BUF(l,buf,len,cur);
|
||||
NEXTL(l);
|
||||
cur = CUR_CHAR(l);
|
||||
if (cur == 0) {
|
||||
SHRINK;
|
||||
GROW;
|
||||
cur = CUR_CHAR(l);
|
||||
}
|
||||
}
|
||||
buf[len] = 0;
|
||||
if (cur != '>') {
|
||||
htmlParseErr(ctxt, XML_ERR_PI_NOT_FINISHED,
|
||||
"ParsePI: PI %s never end ...\n", target, NULL);
|
||||
} else {
|
||||
SKIP(1);
|
||||
|
||||
/*
|
||||
* SAX: PI detected.
|
||||
*/
|
||||
if ((ctxt->sax) && (!ctxt->disableSAX) &&
|
||||
(ctxt->sax->processingInstruction != NULL))
|
||||
ctxt->sax->processingInstruction(ctxt->userData,
|
||||
target, buf);
|
||||
}
|
||||
xmlFree(buf);
|
||||
} else {
|
||||
htmlParseErr(ctxt, XML_ERR_PI_NOT_STARTED,
|
||||
"PI is not started correctly", NULL, NULL);
|
||||
}
|
||||
ctxt->instate = state;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* htmlParseComment:
|
||||
* @ctxt: an HTML parser context
|
||||
@ -3643,14 +3754,21 @@ htmlParseContent(htmlParserCtxtPtr ctxt) {
|
||||
}
|
||||
|
||||
/*
|
||||
* Second case : a sub-element.
|
||||
* Second case : a Processing Instruction.
|
||||
*/
|
||||
else if ((CUR == '<') && (NXT(1) == '?')) {
|
||||
htmlParsePI(ctxt);
|
||||
}
|
||||
|
||||
/*
|
||||
* Third case : a sub-element.
|
||||
*/
|
||||
else if (CUR == '<') {
|
||||
htmlParseElement(ctxt);
|
||||
}
|
||||
|
||||
/*
|
||||
* Third case : a reference. If if has not been resolved,
|
||||
* Fourth case : a reference. If if has not been resolved,
|
||||
* parsing returns it's Name, create the node
|
||||
*/
|
||||
else if (CUR == '&') {
|
||||
@ -3658,7 +3776,7 @@ htmlParseContent(htmlParserCtxtPtr ctxt) {
|
||||
}
|
||||
|
||||
/*
|
||||
* Fourth : end of the resource
|
||||
* Fifth case : end of the resource
|
||||
*/
|
||||
else if (CUR == 0) {
|
||||
htmlAutoCloseOnEnd(ctxt);
|
||||
@ -3852,11 +3970,13 @@ htmlParseDocument(htmlParserCtxtPtr ctxt) {
|
||||
|
||||
|
||||
/*
|
||||
* Parse possible comments before any content
|
||||
* Parse possible comments and PIs before any content
|
||||
*/
|
||||
while ((CUR == '<') && (NXT(1) == '!') &&
|
||||
(NXT(2) == '-') && (NXT(3) == '-')) {
|
||||
while (((CUR == '<') && (NXT(1) == '!') &&
|
||||
(NXT(2) == '-') && (NXT(3) == '-')) ||
|
||||
((CUR == '<') && (NXT(1) == '?'))) {
|
||||
htmlParseComment(ctxt);
|
||||
htmlParsePI(ctxt);
|
||||
SKIP_BLANKS;
|
||||
}
|
||||
|
||||
@ -3875,11 +3995,13 @@ htmlParseDocument(htmlParserCtxtPtr ctxt) {
|
||||
SKIP_BLANKS;
|
||||
|
||||
/*
|
||||
* Parse possible comments before any content
|
||||
* Parse possible comments and PIs before any content
|
||||
*/
|
||||
while ((CUR == '<') && (NXT(1) == '!') &&
|
||||
(NXT(2) == '-') && (NXT(3) == '-')) {
|
||||
while (((CUR == '<') && (NXT(1) == '!') &&
|
||||
(NXT(2) == '-') && (NXT(3) == '-')) ||
|
||||
((CUR == '<') && (NXT(1) == '?'))) {
|
||||
htmlParseComment(ctxt);
|
||||
htmlParsePI(ctxt);
|
||||
SKIP_BLANKS;
|
||||
}
|
||||
|
||||
@ -4444,6 +4566,16 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
|
||||
#endif
|
||||
htmlParseComment(ctxt);
|
||||
ctxt->instate = XML_PARSER_MISC;
|
||||
} else if ((cur == '<') && (next == '?')) {
|
||||
if ((!terminate) &&
|
||||
(htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0))
|
||||
goto done;
|
||||
#ifdef DEBUG_PUSH
|
||||
xmlGenericError(xmlGenericErrorContext,
|
||||
"HPP: Parsing PI\n");
|
||||
#endif
|
||||
htmlParsePI(ctxt);
|
||||
ctxt->instate = XML_PARSER_MISC;
|
||||
} else if ((cur == '<') && (next == '!') &&
|
||||
(UPP(2) == 'D') && (UPP(3) == 'O') &&
|
||||
(UPP(4) == 'C') && (UPP(5) == 'T') &&
|
||||
@ -4494,6 +4626,16 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
|
||||
#endif
|
||||
htmlParseComment(ctxt);
|
||||
ctxt->instate = XML_PARSER_PROLOG;
|
||||
} else if ((cur == '<') && (next == '?')) {
|
||||
if ((!terminate) &&
|
||||
(htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0))
|
||||
goto done;
|
||||
#ifdef DEBUG_PUSH
|
||||
xmlGenericError(xmlGenericErrorContext,
|
||||
"HPP: Parsing PI\n");
|
||||
#endif
|
||||
htmlParsePI(ctxt);
|
||||
ctxt->instate = XML_PARSER_PROLOG;
|
||||
} else if ((cur == '<') && (next == '!') &&
|
||||
(avail < 4)) {
|
||||
goto done;
|
||||
@ -4531,6 +4673,16 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
|
||||
#endif
|
||||
htmlParseComment(ctxt);
|
||||
ctxt->instate = XML_PARSER_EPILOG;
|
||||
} else if ((cur == '<') && (next == '?')) {
|
||||
if ((!terminate) &&
|
||||
(htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0))
|
||||
goto done;
|
||||
#ifdef DEBUG_PUSH
|
||||
xmlGenericError(xmlGenericErrorContext,
|
||||
"HPP: Parsing PI\n");
|
||||
#endif
|
||||
htmlParsePI(ctxt);
|
||||
ctxt->instate = XML_PARSER_EPILOG;
|
||||
} else if ((cur == '<') && (next == '!') &&
|
||||
(avail < 4)) {
|
||||
goto done;
|
||||
@ -4737,6 +4889,16 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
|
||||
#endif
|
||||
htmlParseComment(ctxt);
|
||||
ctxt->instate = XML_PARSER_CONTENT;
|
||||
} else if ((cur == '<') && (next == '?')) {
|
||||
if ((!terminate) &&
|
||||
(htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0))
|
||||
goto done;
|
||||
#ifdef DEBUG_PUSH
|
||||
xmlGenericError(xmlGenericErrorContext,
|
||||
"HPP: Parsing PI\n");
|
||||
#endif
|
||||
htmlParsePI(ctxt);
|
||||
ctxt->instate = XML_PARSER_CONTENT;
|
||||
} else if ((cur == '<') && (next == '!') && (avail < 4)) {
|
||||
goto done;
|
||||
} else if ((cur == '<') && (next == '/')) {
|
||||
|
1
SAX.c
1
SAX.c
@ -115,6 +115,7 @@ inithtmlDefaultSAXHandler(xmlSAXHandlerV1 *hdlr)
|
||||
hdlr->characters = xmlSAX2Characters;
|
||||
hdlr->cdataBlock = xmlSAX2CDataBlock;
|
||||
hdlr->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
|
||||
hdlr->processingInstruction = xmlSAX2ProcessingInstruction;
|
||||
hdlr->processingInstruction = NULL;
|
||||
hdlr->comment = xmlSAX2Comment;
|
||||
hdlr->warning = xmlParserWarning;
|
||||
|
2
SAX2.c
2
SAX2.c
@ -2635,7 +2635,7 @@ xmlSAX2InitHtmlDefaultSAXHandler(xmlSAXHandler *hdlr)
|
||||
hdlr->characters = xmlSAX2Characters;
|
||||
hdlr->cdataBlock = xmlSAX2CDataBlock;
|
||||
hdlr->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
|
||||
hdlr->processingInstruction = NULL;
|
||||
hdlr->processingInstruction = xmlSAX2ProcessingInstruction;
|
||||
hdlr->comment = xmlSAX2Comment;
|
||||
hdlr->warning = xmlParserWarning;
|
||||
hdlr->error = xmlParserError;
|
||||
|
@ -430,7 +430,7 @@ xmlSAXHandlerV1 htmlDefaultSAXHandler = {
|
||||
NULL,
|
||||
xmlSAX2Characters,
|
||||
xmlSAX2IgnorableWhitespace,
|
||||
NULL,
|
||||
xmlSAX2ProcessingInstruction,
|
||||
xmlSAX2Comment,
|
||||
xmlParserWarning,
|
||||
xmlParserError,
|
||||
|
5
result/HTML/python.html
Normal file
5
result/HTML/python.html
Normal file
@ -0,0 +1,5 @@
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
|
||||
<?xml-stylesheet href="./css/ht2html.css" type="text/css"?><html>
|
||||
<!-- THIS PAGE IS AUTOMATICALLY GENERATED. DO NOT EDIT. --><head><title>Python Programming Language</title></head>
|
||||
<body></body>
|
||||
</html>
|
0
result/HTML/python.html.err
Normal file
0
result/HTML/python.html.err
Normal file
29
result/HTML/python.html.sax
Normal file
29
result/HTML/python.html.sax
Normal file
@ -0,0 +1,29 @@
|
||||
SAX.setDocumentLocator()
|
||||
SAX.startDocument()
|
||||
SAX.internalSubset(html, -//W3C//DTD HTML 4.01 Transitional//EN, http://www.w3.org/TR/html4/loose.dtd)
|
||||
SAX.processingInstruction(xml-stylesheet, href="./css/ht2html.css" type="text/css"?)
|
||||
SAX.startElement(html)
|
||||
SAX.ignorableWhitespace(
|
||||
, 1)
|
||||
SAX.comment( THIS PAGE IS AUTOMATICALLY GENERATED. DO NOT EDIT. )
|
||||
SAX.ignorableWhitespace(
|
||||
, 1)
|
||||
SAX.startElement(head)
|
||||
SAX.ignorableWhitespace(
|
||||
, 1)
|
||||
SAX.startElement(title)
|
||||
SAX.characters(Python Programming Language, 27)
|
||||
SAX.endElement(title)
|
||||
SAX.ignorableWhitespace(
|
||||
, 1)
|
||||
SAX.endElement(head)
|
||||
SAX.ignorableWhitespace(
|
||||
, 1)
|
||||
SAX.startElement(body)
|
||||
SAX.ignorableWhitespace(
|
||||
, 1)
|
||||
SAX.endElement(body)
|
||||
SAX.endElement(html)
|
||||
SAX.ignorableWhitespace(
|
||||
, 1)
|
||||
SAX.endDocument()
|
10
test/HTML/python.html
Normal file
10
test/HTML/python.html
Normal file
@ -0,0 +1,10 @@
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
|
||||
"http://www.w3.org/TR/html4/loose.dtd" >
|
||||
<?xml-stylesheet href="./css/ht2html.css" type="text/css"?>
|
||||
<html>
|
||||
<!-- THIS PAGE IS AUTOMATICALLY GENERATED. DO NOT EDIT. -->
|
||||
<head>
|
||||
<title>Python Programming Language</title>
|
||||
</head>
|
||||
<body>
|
||||
</body></html>
|
Loading…
x
Reference in New Issue
Block a user