html: HTML5 character data states

This commit is contained in:
Nick Wellnhofer 2024-09-06 17:49:04 +02:00
parent 5951179239
commit f9ed30e972
8 changed files with 463 additions and 381 deletions

File diff suppressed because it is too large Load Diff

View File

@ -67,6 +67,8 @@ struct _htmlElemDesc {
const char** attrs_opt; /* Optional Attributes */
const char** attrs_depr; /* Additional deprecated attributes */
const char** attrs_req; /* Required attributes */
int dataMode;
};
/*

View File

@ -22,7 +22,8 @@
</frameset>
<noframes>
<body bgcolor="#FFFFFF" text="#000000" link="#000080" vlink="#000080" alink="#000080" topmargin="0" leftmargin="0" marginheight="0" marginwidth="0">
<body bgcolor="#FFFFFF" text="#000000" link="#000080" vlink="#000080" alink="#000080"
topmargin="0" leftmargin="0" marginheight="0" marginwidth="0">
<p>This page uses frames, but your browser doesn't support them.</p>
</body>
</noframes>

View File

@ -47,20 +47,9 @@ SAX.startElement(frame, name='rbottom', noresize, src='doc2_files/contents.htm',
SAX.endElement(frame)
SAX.endElement(frameset)
SAX.startElement(noframes)
SAX.characters(
SAX.cdata(
, 4)
SAX.startElement(body, bgcolor='#FFFFFF', text='#000000', link='#000080', vlink='#000080', alink='#000080', topmargin='0', leftmargin='0', marginheight='0', marginwidth='0')
SAX.characters(
, 3)
SAX.startElement(p)
SAX.characters(This page uses frames, but you, 61)
SAX.endElement(p)
SAX.characters(
, 3)
SAX.endElement(body)
SAX.characters(
, 3)
&lt;body bgcolor="#FFFFFF", 238)
SAX.endElement(noframes)
SAX.endElement(frameset)
SAX.endElement(html)

View File

@ -47,7 +47,7 @@ eval("page" + id + " = window.open(URL, '" + id + "', 'toolbars=0, scrollbars=0,
<td valign="top" width="31"><a href="http://bp6.gamesquad.net/"><img align="bottom" border="0" height="74" src="doc3_files/logo.gif" width="252"></a></td>
<td align="left" bgcolor="#000000">
<img height="15" src="doc3_files/spacer.gif" width="15"><!-- START GAMESQUAD.NET IFRAME RICH MEDIA CODE --> <!-- © 2000 GameSquad.net All Rights Reserved. --><iframe border="0" frameborder="no" height="60" marginheight="0" marginwidth="0" scrolling="no" src="doc3_files/adcycle.htm" width="468">
<a href="http://ads.gamesquad.net/addclick.exe/adclick.cgi?REGION=game|tech|ent&amp;id=1" target="_top"><img src="http://ads.gamesquad.net/addclick.exe/adcycle.cgi?group=52&amp;media=1&amp;id=1" width="468" height="60" border="0" alt="GSN ROS Ad"></a>
<a href="http://ads.gamesquad.net/addclick.exe/adclick.cgi?REGION=game|tech|ent&id=1" target="_top"><img src="http://ads.gamesquad.net/addclick.exe/adcycle.cgi?group=52&media=1&id=1" width=468 height=60 border=0 ALT="GSN ROS Ad"></a>
</iframe><!-- END GAMESQUAD.NET IFRAME RICH MEDIA CODE --><br><img height="15" src="doc3_files/spacer.gif" width="400"> </td>
</tr>
<tr>
@ -454,7 +454,7 @@ eval("page" + id + " = window.open(URL, '" + id + "', 'toolbars=0, scrollbars=0,
may send me some news to be posted if you find any ( we don't want
to flood Tim ;-) ).<br><br>My e-mail address is <a href="mailto:killz@i82hq.com">killz@i82hq.com</a><br><br>Ciao for
now.<br><br>The ÐÐ.</font><br><br>
</font><center><iframe frameborder="0" height="60" marginheight="0" marginwidth="0" noresize scrolling="no" src="doc3_files/ad_iframe.htm" width="468"><a href="http://ads.adflight.com/go_static.asp?asid=7708" target="_top"><img width="468" height="60" border="0" alt="Advertisement" src="http://ads.adflight.com/ad_static.asp?pid=2097&amp;sid=1881&amp;asid=7708"></a></iframe></center>
</font><center><iframe frameborder="0" height="60" marginheight="0" marginwidth="0" noresize scrolling="no" src="doc3_files/ad_iframe.htm" width="468"><a href="http://ads.adflight.com/go_static.asp?asid=7708" target="_top"><img width=468 height=60 border=0 alt="Advertisement" src="http://ads.adflight.com/ad_static.asp?pid=2097&sid=1881&asid=7708"></a></iframe></center>
</td>
</tr>
</tbody>
@ -512,7 +512,7 @@ eval("page" + id + " = window.open(URL, '" + id + "', 'toolbars=0, scrollbars=0,
document.write("SRC=http://www.goto.com/d/ssn/dynconsole/?t");
document.write("ype=html&size=100x90&url=http://www.goto.co");
document.write("m/d/search/ssn/&target=_blank&Partner=SSN80");
document.write("42DF8478957377>");
document.write("42DF8478957377></IFRAME>");
} else if ((parseInt(navigator.appVersion) > 3)
&& (navigator.appName == "Netscape")) {
document.write("<SCRIPT language=javascript type=text/javas");
@ -520,14 +520,15 @@ eval("page" + id + " = window.open(URL, '" + id + "', 'toolbars=0, scrollbars=0,
document.write("SRC=http://www.goto.com/d/ssn/dynconsole/?t");
document.write("ype=js&size=100x90&url=http://www.goto.com/");
document.write("d/search/ssn/&target=_blank&Partner=SSN8042");
document.write("DF8478957377>");
document.write("DF8478957377></SC");
document.write("RIPT>");
} else {
document.write("<A TARGET=_blank ");
document.write("HREF=http://www.goto.com/d/search/ssn/?from");
document.write("GIF=true>");
document.write("<IMG ismap ");
document.write("SRC=http://www.goto.com/d/ssn/dynconsole/?t");
document.write("ype=gif&size=100x90>");
document.write("ype=gif&size=100x90></A>");
}
// -->
</script>

View File

@ -22,15 +22,6 @@ om/ad_static.asp?pid=2097&sid=1881&asid=7708"></a></IFRAME></CENTER></LI></FONT>
./test/HTML/doc3.htm:772: HTML parser error : Unexpected end tag : form
archive</A></FONT> </FORM></CENTER></TD></TR></TBODY></TABLE><!--
^
./test/HTML/doc3.htm:795: HTML parser error : Unexpected end tag : iframe
document.write("42DF8478957377></IFRAME>");
^
./test/HTML/doc3.htm:804: HTML parser error : Unexpected end tag : sc");
document.write("RIPT>");
^
./test/HTML/doc3.htm:811: HTML parser error : Unexpected end tag : a
document.write("ype=gif&size=100x90></A>");
^
./test/HTML/doc3.htm:820: HTML parser error : Unexpected end tag : a
</A></A></B><B></NOSCRIPT></B><B><!-- END GoTo.com Search Box --></B
^

View File

@ -83,14 +83,8 @@ SAX.comment( START GAMESQUAD.NET IFRAME RICH MEDIA CODE )
SAX.characters( , 1)
SAX.comment( © 2000 GameSquad.net All Rights Reserved. )
SAX.startElement(iframe, border='0', frameborder='no', height='60', marginheight='0', marginwidth='0', scrolling='no', src='doc3_files/adcycle.htm', width='468')
SAX.characters(
, 1)
SAX.startElement(a, href='http://ads.gamesquad.net/addclick.exe/adclick.cgi?REGION=game|tech|ent&amp;id=1', target='_top')
SAX.startElement(img, src='http://ads.gamesquad.net/addclick.exe/adcycle.cgi?group=52&amp;media=1&amp;id=1', width='468', height='60', border='0', alt='GSN ROS Ad')
SAX.endElement(img)
SAX.endElement(a)
SAX.characters(
, 1)
SAX.cdata(
&lt;a href="http://ads.gamesq, 235)
SAX.endElement(iframe)
SAX.comment( END GAMESQUAD.NET IFRAME RICH MEDIA CODE )
SAX.startElement(br)
@ -2563,10 +2557,7 @@ SAX.characters(
SAX.endElement(font)
SAX.startElement(center)
SAX.startElement(iframe, frameborder='0', height='60', marginheight='0', marginwidth='0', noresize, scrolling='no', src='doc3_files/ad_iframe.htm', width='468')
SAX.startElement(a, href='http://ads.adflight.com/go_static.asp?asid=7708', target='_top')
SAX.startElement(img, width='468', height='60', border='0', alt='Advertisement', src='http://ads.adflight.com/ad_static.asp?pid=2097&amp;sid=1881&amp;asid=7708')
SAX.endElement(img)
SAX.endElement(a)
SAX.cdata(&lt;a href="http://ads.adfligh, 202)
SAX.endElement(iframe)
SAX.endElement(center)
SAX.error: Unexpected end tag : li
@ -2687,18 +2678,9 @@ SAX.characters(
SAX.startElement(script, language='javascript', type='text/javascript')
SAX.cdata(
&lt;!--
if ((, 532)
SAX.error: Unexpected end tag : iframe
SAX.cdata(");
} else if ((parseI, 463)
SAX.error: Unexpected end tag : sc");
SAX.cdata(");
} else {
d, 328)
SAX.error: Unexpected end tag : a
SAX.cdata(");
}
// --, 37)
if ((, 1000)
SAX.cdata(377&gt;&lt;/SC");
docu, 410)
SAX.endElement(script)
SAX.characters(
, 14)

View File

@ -2024,8 +2024,13 @@ pushBoundaryTest(const char *filename, const char *result,
} else if (isText) {
int c = *ctxt->input->cur;
/* 3 bytes for partial UTF-8 */
max = ((c == '<') || (c == '&')) ? 1 : 3;
if ((options & XML_PARSE_HTML) &&
(ctxt->endCheckState)) {
max = strlen((const char *) ctxt->name) + 2;
} else {
/* 3 bytes for partial UTF-8 */
max = ((c == '<') || (c == '&')) ? 1 : 3;
}
} else if (ctxt->instate == XML_PARSER_CDATA_SECTION) {
/* 2 bytes for terminator, 3 bytes for UTF-8 */
max = 5;