parser: Fix parsing of doctype declarations

Fix some long-standing issues.

Fixes #504.
This commit is contained in:
Nick Wellnhofer 2025-02-01 22:02:33 +01:00
parent c13fcc1910
commit b4d3d87ed2
12 changed files with 45 additions and 49 deletions

View File

@ -8238,7 +8238,10 @@ xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
*/
SKIP(9);
SKIP_BLANKS;
if (SKIP_BLANKS == 0) {
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
"Space required after 'DOCTYPE'\n");
}
/*
* Parse the DOCTYPE name.
@ -8272,20 +8275,9 @@ xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
(!ctxt->disableSAX))
ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
/*
* Is there any internal subset declarations ?
* they are handled separately in xmlParseInternalSubset()
*/
if (RAW == '[')
return;
/*
* We should be at the end of the DOCTYPE declaration.
*/
if (RAW != '>') {
if ((RAW != '[') && (RAW != '>')) {
xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
}
NEXT;
}
/**
@ -10782,7 +10774,9 @@ xmlParseDocument(xmlParserCtxtPtr ctxt) {
xmlParseDocTypeDecl(ctxt);
if (RAW == '[') {
xmlParseInternalSubset(ctxt);
}
} else if (RAW == '>') {
NEXT;
}
/*
* Create and update the external subset.
@ -11508,6 +11502,8 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
if (RAW == '[') {
ctxt->instate = XML_PARSER_DTD;
} else {
if (RAW == '>')
NEXT;
/*
* Create and update the external subset.
*/

View File

@ -1,3 +1,6 @@
./test/errors/759573.xml:1: parser error : Space required after 'DOCTYPE'
<?h?><!DOCTYPEt[<!ELEMENT t (A)><!ENTITY % xx '&#37;<![INCLUDE[000&#37;&#3000;00
^
./test/errors/759573.xml:1: parser error : Space required after '<!ENTITY'
ELEMENT t (A)><!ENTITY % xx '&#37;<![INCLUDE[000&#37;&#3000;000&#37;z;'><!ENTITY
^
@ -7,15 +10,9 @@ LEMENT t (A)><!ENTITY % xx '&#37;<![INCLUDE[000&#37;&#3000;000&#37;z;'><!ENTITYz
./test/errors/759573.xml:1: parser error : Entity value required
LEMENT t (A)><!ENTITY % xx '&#37;<![INCLUDE[000&#37;&#3000;000&#37;z;'><!ENTITYz
^
./test/errors/759573.xml:1: parser error : PEReference: no name
./test/errors/759573.xml:1: parser error : Entity 'xx' not defined
T t (A)><!ENTITY % xx '&#37;<![INCLUDE[000&#37;&#3000;000&#37;z;'><!ENTITYz>%xx;
^
Entity: line 1:
%<![INCLUDE[000%ஸ000%z;
^
./test/errors/759573.xml:1: parser error : Content error in the internal subset
T t (A)><!ENTITY % xx '&#37;<![INCLUDE[000&#37;&#3000;000&#37;z;'><!ENTITYz>%xx;
^
Entity: line 1:
%<![INCLUDE[000%ஸ000%z;
^

View File

@ -1,3 +1,6 @@
./test/errors/759573.xml:1: parser error : Space required after 'DOCTYPE'
<?h?><!DOCTYPEt[<!ELEMENT t (A)><!ENTITY % xx '&#37;<![INCLUDE[000&#37;&#3000;00
^
./test/errors/759573.xml:1: parser error : Space required after '<!ENTITY'
ELEMENT t (A)><!ENTITY % xx '&#37;<![INCLUDE[000&#37;&#3000;000&#37;z;'><!ENTITY
^
@ -7,15 +10,9 @@ LEMENT t (A)><!ENTITY % xx '&#37;<![INCLUDE[000&#37;&#3000;000&#37;z;'><!ENTITYz
./test/errors/759573.xml:1: parser error : Entity value required
LEMENT t (A)><!ENTITY % xx '&#37;<![INCLUDE[000&#37;&#3000;000&#37;z;'><!ENTITYz
^
./test/errors/759573.xml:1: parser error : PEReference: no name
./test/errors/759573.xml:1: parser warning : Entity 'xx' not defined
T t (A)><!ENTITY % xx '&#37;<![INCLUDE[000&#37;&#3000;000&#37;z;'><!ENTITYz>%xx;
^
Entity: line 1:
%<![INCLUDE[000%ஸ000%z;
^
./test/errors/759573.xml:1: parser error : Content error in the internal subset
T t (A)><!ENTITY % xx '&#37;<![INCLUDE[000&#37;&#3000;000&#37;z;'><!ENTITYz>%xx;
^
Entity: line 1:
%<![INCLUDE[000%ஸ000%z;
^

View File

@ -1,22 +1,4 @@
./test/errors/759573.xml:1: parser error : Space required after '<!ENTITY'
ELEMENT t (A)><!ENTITY % xx '&#37;<![INCLUDE[000&#37;&#3000;000&#37;z;'><!ENTITY
^
./test/errors/759573.xml:1: parser error : Space required after the entity name
LEMENT t (A)><!ENTITY % xx '&#37;<![INCLUDE[000&#37;&#3000;000&#37;z;'><!ENTITYz
^
./test/errors/759573.xml:1: parser error : Entity value required
LEMENT t (A)><!ENTITY % xx '&#37;<![INCLUDE[000&#37;&#3000;000&#37;z;'><!ENTITYz
^
./test/errors/759573.xml:1: parser error : PEReference: no name
T t (A)><!ENTITY % xx '&#37;<![INCLUDE[000&#37;&#3000;000&#37;z;'><!ENTITYz>%xx;
^
Entity: line 1:
%<![INCLUDE[000%ஸ000%z;
^
./test/errors/759573.xml:1: parser error : Content error in the internal subset
T t (A)><!ENTITY % xx '&#37;<![INCLUDE[000&#37;&#3000;000&#37;z;'><!ENTITYz>%xx;
^
Entity: line 1:
%<![INCLUDE[000%ஸ000%z;
^
./test/errors/759573.xml:1: parser error : Space required after 'DOCTYPE'
<?h?><!DOCTYPEt[<!ELEMENT t (A)><!ENTITY % xx '&#37;<![INCLUDE[000&#37;&#3000;00
^
./test/errors/759573.xml : failed to parse

View File

@ -0,0 +1,3 @@
./test/errors/doctype1.xml:1: parser error : Start tag expected, '<' not found
<!DOCTYPE doc>[]>
^

View File

@ -0,0 +1,3 @@
./test/errors/doctype1.xml:1: parser error : Start tag expected, '<' not found
<!DOCTYPE doc>[]>
^

View File

@ -0,0 +1,4 @@
./test/errors/doctype1.xml:1: parser error : Start tag expected, '<' not found
<!DOCTYPE doc>[]>
^
./test/errors/doctype1.xml : failed to parse

View File

@ -0,0 +1,3 @@
./test/errors/doctype2.xml:1: parser error : Space required after 'DOCTYPE'
<!DOCTYPEdoc>
^

View File

@ -0,0 +1,3 @@
./test/errors/doctype2.xml:1: parser error : Space required after 'DOCTYPE'
<!DOCTYPEdoc>
^

View File

@ -0,0 +1,4 @@
./test/errors/doctype2.xml:1: parser error : Space required after 'DOCTYPE'
<!DOCTYPEdoc>
^
./test/errors/doctype2.xml : failed to parse

2
test/errors/doctype1.xml Normal file
View File

@ -0,0 +1,2 @@
<!DOCTYPE doc>[]>
<doc/>

2
test/errors/doctype2.xml Normal file
View File

@ -0,0 +1,2 @@
<!DOCTYPEdoc>
<doc/>