Add an HTML parser option to avoid a default doctype

- include/libxml/HTMLparser.h: defines the new HTML parser option
  HTML_PARSE_NODEFDTD
- HTMLparser.c: if option is set don't add a default DTD
- xmllint.c: add the corresponding --nodefdtd option in xmllint
This commit is contained in:
Daniel Veillard 2010-07-26 14:02:42 +02:00
parent 2ee91eb658
commit f1121c48af
3 changed files with 14 additions and 1 deletions

View File

@ -4670,7 +4670,7 @@ htmlParseDocument(htmlParserCtxtPtr ctxt) {
if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
ctxt->sax->endDocument(ctxt->userData);
if (ctxt->myDoc != NULL) {
if ((!(ctxt->options & HTML_PARSE_NODEFDTD)) && (ctxt->myDoc != NULL)) {
dtd = xmlGetIntSubset(ctxt->myDoc);
if (dtd == NULL)
ctxt->myDoc->intSubset =
@ -6530,6 +6530,10 @@ htmlCtxtUseOptions(htmlParserCtxtPtr ctxt, int options)
ctxt->options |= XML_PARSE_HUGE;
options -= XML_PARSE_HUGE;
}
if (options & HTML_PARSE_NODEFDTD) {
ctxt->options |= HTML_PARSE_NODEFDTD;
options -= HTML_PARSE_NODEFDTD;
}
ctxt->dictNames = 0;
return (options);
}

View File

@ -177,6 +177,7 @@ XMLPUBFUN void XMLCALL
*/
typedef enum {
HTML_PARSE_RECOVER = 1<<0, /* Relaxed parsing */
HTML_PARSE_NODEFDTD = 1<<2, /* do not default a doctype if not found */
HTML_PARSE_NOERROR = 1<<5, /* suppress error reports */
HTML_PARSE_NOWARNING= 1<<6, /* suppress warning reports */
HTML_PARSE_PEDANTIC = 1<<7, /* pedantic error reporting */

View File

@ -162,6 +162,9 @@ static int html = 0;
static int xmlout = 0;
#endif
static int htmlout = 0;
#if defined(LIBXML_HTML_ENABLED)
static int nodefdtd = 0;
#endif
#ifdef LIBXML_PUSH_ENABLED
static int push = 0;
#endif /* LIBXML_PUSH_ENABLED */
@ -2995,6 +2998,7 @@ static void usage(const char *name) {
#ifdef LIBXML_HTML_ENABLED
printf("\t--html : use the HTML parser\n");
printf("\t--xmlout : force to use the XML serializer when using --html\n");
printf("\t--nodefdtd : do not default HTML doctype\n");
#endif
#ifdef LIBXML_PUSH_ENABLED
printf("\t--push : use the push mode of the parser\n");
@ -3157,6 +3161,10 @@ main(int argc, char **argv) {
else if ((!strcmp(argv[i], "-xmlout")) ||
(!strcmp(argv[i], "--xmlout"))) {
xmlout++;
} else if ((!strcmp(argv[i], "-nodefdtd")) ||
(!strcmp(argv[i], "--nodefdtd"))) {
nodefdtd++;
options |= HTML_PARSE_NODEFDTD;
}
#endif /* LIBXML_HTML_ENABLED */
else if ((!strcmp(argv[i], "-loaddtd")) ||