From 7c1206fc060fd4260f7c6f003c32fbbea3e542fd Mon Sep 17 00:00:00 2001 From: Daniel Veillard Date: Thu, 14 Oct 1999 09:10:25 +0000 Subject: [PATCH] Revamped HTML parsing, lots of bug fixes for HTML stuff, Added xmlValidGetValidElements and xmlValidGetPotentialChildren, Completed and cleaned up the tests, Added doc for new modules gnome-xml-xmlmemory.html and gnome-xml-nanohttp.html, Daniel --- ChangeLog | 17 + HTMLparser.c | 293 +++--- HTMLtree.c | 4 +- Makefile.am | 6 +- SAX.c | 2 +- SAXresult/cdata | 9 + SAXresult/comment.xml | 17 + SAXresult/comment2.xml | 13 + SAXresult/dtd12 | 16 + SAXresult/ent6 | 16 + SAXresult/ent7 | 21 + SAXresult/eve.xml | 10 + SAXresult/ns | 11 + SAXresult/ns2 | 5 + SAXresult/ns3 | 5 + SAXresult/ns4 | 5 + SAXresult/pi.xml | 17 + SAXresult/pi2.xml | 13 + debugXML.c | 4 + doc/html/gnome-xml-nanohttp.html | 945 ++++++++++++++++++ doc/html/gnome-xml-xmlmemory.html | 1277 +++++++++++++++++++++++++ entities.c | 12 +- include/libxml/tree.h | 3 +- include/libxml/valid.h | 8 + result/HTML/Down.html.err | 0 result/HTML/fp40.htm | 6 +- result/HTML/fp40.htm.err | 3 + result/HTML/liclose.html | 14 + result/HTML/liclose.html.err | 0 result/HTML/reg1.html | 12 + result/HTML/reg1.html.err | 0 result/HTML/reg2.html | 15 + result/HTML/reg2.html.err | 0 result/HTML/reg3.html | 16 + result/HTML/reg3.html.err | 0 result/HTML/reg4.html | 13 + result/HTML/reg4.html.err | 3 + result/HTML/test2.html.err | 0 result/HTML/test3.html.err | 12 + result/XPath/expr/compare | 24 + result/XPath/expr/equality | 24 + result/comment.xml | 6 + result/comment2.xml | 6 + result/noent/comment.xml | 6 + result/noent/comment2.xml | 6 + result/noent/ns | 4 + result/noent/ns2 | 2 + result/noent/ns3 | 2 + result/noent/ns4 | 2 + result/noent/pi.xml | 6 + result/noent/pi2.xml | 6 + result/ns | 4 + result/ns2 | 2 + result/ns3 | 2 + result/ns4 | 2 + result/pi.xml | 6 + result/pi2.xml | 6 + result/valid/REC-xml-19980210.xml.err | 0 result/valid/dia.xml.err | 0 result/valid/xlink.xml.err | 6 + test/HTML/liclose.html | 13 + test/HTML/reg1.html | 10 + test/HTML/reg2.html | 12 + test/HTML/reg3.html | 13 + test/HTML/reg4.html | 12 + test/XPath/docs/id | 28 + test/XPath/expr/compare | 24 + test/XPath/expr/equality | 25 + test/comment.xml | 6 + test/comment2.xml | 6 + test/ns | 4 + test/ns2 | 3 + test/ns3 | 3 + test/ns4 | 2 + test/pi.xml | 6 + test/pi2.xml | 6 + testHTML.c | 577 ++++++++++- testXPath.c | 3 +- tester.c | 36 +- tree.c | 4 + tree.h | 3 +- valid.c | 168 ++++ valid.h | 8 + xmlmemory.c | 43 +- xpath.c | 6 +- 85 files changed, 3822 insertions(+), 144 deletions(-) create mode 100644 SAXresult/cdata create mode 100644 SAXresult/comment.xml create mode 100644 SAXresult/comment2.xml create mode 100644 SAXresult/dtd12 create mode 100644 SAXresult/ent6 create mode 100644 SAXresult/ent7 create mode 100644 SAXresult/eve.xml create mode 100644 SAXresult/ns create mode 100644 SAXresult/ns2 create mode 100644 SAXresult/ns3 create mode 100644 SAXresult/ns4 create mode 100644 SAXresult/pi.xml create mode 100644 SAXresult/pi2.xml create mode 100644 doc/html/gnome-xml-nanohttp.html create mode 100644 doc/html/gnome-xml-xmlmemory.html create mode 100644 result/HTML/Down.html.err create mode 100644 result/HTML/fp40.htm.err create mode 100644 result/HTML/liclose.html create mode 100644 result/HTML/liclose.html.err create mode 100644 result/HTML/reg1.html create mode 100644 result/HTML/reg1.html.err create mode 100644 result/HTML/reg2.html create mode 100644 result/HTML/reg2.html.err create mode 100644 result/HTML/reg3.html create mode 100644 result/HTML/reg3.html.err create mode 100644 result/HTML/reg4.html create mode 100644 result/HTML/reg4.html.err create mode 100644 result/HTML/test2.html.err create mode 100644 result/HTML/test3.html.err create mode 100644 result/XPath/expr/compare create mode 100644 result/XPath/expr/equality create mode 100644 result/comment.xml create mode 100644 result/comment2.xml create mode 100644 result/noent/comment.xml create mode 100644 result/noent/comment2.xml create mode 100644 result/noent/ns create mode 100644 result/noent/ns2 create mode 100644 result/noent/ns3 create mode 100644 result/noent/ns4 create mode 100644 result/noent/pi.xml create mode 100644 result/noent/pi2.xml create mode 100644 result/ns create mode 100644 result/ns2 create mode 100644 result/ns3 create mode 100644 result/ns4 create mode 100644 result/pi.xml create mode 100644 result/pi2.xml create mode 100644 result/valid/REC-xml-19980210.xml.err create mode 100644 result/valid/dia.xml.err create mode 100644 result/valid/xlink.xml.err create mode 100644 test/HTML/liclose.html create mode 100644 test/HTML/reg1.html create mode 100644 test/HTML/reg2.html create mode 100644 test/HTML/reg3.html create mode 100644 test/HTML/reg4.html create mode 100644 test/XPath/docs/id create mode 100644 test/XPath/expr/compare create mode 100644 test/XPath/expr/equality create mode 100644 test/comment.xml create mode 100644 test/comment2.xml create mode 100644 test/ns create mode 100644 test/ns2 create mode 100644 test/ns3 create mode 100644 test/ns4 create mode 100644 test/pi.xml create mode 100644 test/pi2.xml diff --git a/ChangeLog b/ChangeLog index 0de0634f..6649f2b7 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,20 @@ +Thu Oct 14 10:29:56 CEST 1999 Daniel Veillard + + * HTMLparser.c, HTMLtree.c, tree.h: completely revamped the + HTMLparser and debugged the HTML related code. HTML documents + now have their own type + * entities.c: do not dump ' for HTML output + * xmlmemory.c: improvement, breakpoint mechanism + * testHTML.c: added --sax --repeat ... + * Makefile.am: improved the HTML tests + * valid.[ch]: added xmlValidGetValidElements and + xmlValidGetPotentialChildren + * tester.c: added --insert to test the 2 new functions + * test//* result//* SAXresult//* : regression test cleanup + and extension. + * doc/html : added doc for new modules gnome-xml-xmlmemory.html and + gnome-xml-nanohttp.html + Mon Oct 11 14:31:58 CEST 1999 Daniel Veillard * HTMLparser.c: fixed problems with some autoclose tags diff --git a/HTMLparser.c b/HTMLparser.c index 3c274732..33b42b7b 100644 --- a/HTMLparser.c +++ b/HTMLparser.c @@ -75,8 +75,9 @@ int html##name##Push(htmlParserCtxtPtr ctxt, type value) { \ } \ type html##name##Pop(htmlParserCtxtPtr ctxt) { \ type ret; \ - if (ctxt->name##Nr <= 0) return(0); \ + if (ctxt->name##Nr < 0) return(0); \ ctxt->name##Nr--; \ + if (ctxt->name##Nr < 0) return(0); \ if (ctxt->name##Nr > 0) \ ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \ else \ @@ -445,17 +446,21 @@ htmlCheckAutoClose(const xmlChar *new, const xmlChar *old) { void htmlAutoClose(htmlParserCtxtPtr ctxt, const xmlChar *new) { xmlChar *oldname; - while ((ctxt->name != NULL) && (htmlCheckAutoClose(new, ctxt->name))) { #ifdef DEBUG - printf("htmlAutoClose: %s closes %s\n", new, ctxt->name); + fprintf(stderr,"htmlAutoClose: %s closes %s\n", new, ctxt->name); #endif if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) ctxt->sax->endElement(ctxt->userData, ctxt->name); oldname = ctxt->name; htmlnamePop(ctxt); - xmlFree(oldname); + if (oldname != NULL) { +#ifdef DEBUG + fprintf(stderr,"htmlAutoClose: popped %s\n", oldname); +#endif + xmlFree(oldname); + } } } @@ -470,21 +475,42 @@ void htmlAutoCloseOnClose(htmlParserCtxtPtr ctxt, const xmlChar *new) { htmlElemDescPtr info; xmlChar *oldname; + int i; - while ((ctxt->name != NULL) && - (xmlStrcmp(new, ctxt->name))) { +#ifdef DEBUG + fprintf(stderr,"Close of %s stack: %d elements\n", new, ctxt->nameNr); + for (i = 0;i < ctxt->nameNr;i++) + fprintf(stderr,"%d : %s\n", i, ctxt->nameTab[i]); +#endif + + for (i = (ctxt->nameNr - 1);i >= 0;i--) { + if (!xmlStrcmp(new, ctxt->nameTab[i])) break; + } + if (i < 0) return; + + while (xmlStrcmp(new, ctxt->name)) { info = htmlTagLookup(ctxt->name); if ((info == NULL) || (info->endTag == 1)) { #ifdef DEBUG - printf("htmlAutoCloseOnClose: %s closes %s\n", new, ctxt->name); + fprintf(stderr,"htmlAutoCloseOnClose: %s closes %s\n", new, ctxt->name); +#endif + } else { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "Opening and ending tag mismatch: %s and %s\n", + new, ctxt->name); + ctxt->wellFormed = 0; + } + if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) + ctxt->sax->endElement(ctxt->userData, ctxt->name); + oldname = ctxt->name; + htmlnamePop(ctxt); + if (oldname != NULL) { +#ifdef DEBUG + fprintf(stderr,"htmlAutoCloseOnClose: popped %s\n", oldname); #endif - if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) - ctxt->sax->endElement(ctxt->userData, ctxt->name); - oldname = ctxt->name; - htmlnamePop(ctxt); xmlFree(oldname); - } else - break; + } } } @@ -808,7 +834,7 @@ htmlEntityLookup(const xmlChar *name) { sizeof(html40EntitiesTable[0]));i++) { if (!xmlStrcmp(name, BAD_CAST html40EntitiesTable[i].name)) { #ifdef DEBUG - printf("Found entity %s\n", name); + fprintf(stderr,"Found entity %s\n", name); #endif return(&html40EntitiesTable[i]); } @@ -827,9 +853,7 @@ htmlEntityLookup(const xmlChar *name) { * * Subtitute the HTML entities by their value * - * TODO: once the internal representation will be UTF-8, all entities - * will be substituable, in the meantime we only apply the substitution - * to the one with values in the 0-255 UNICODE range + * DEPRECATED !!!! * * Returns A newly allocated string with the substitution done. The caller * must deallocate it ! @@ -1138,7 +1162,7 @@ htmlNewDoc(const xmlChar *URI, const xmlChar *ExternalID) { } memset(cur, 0, sizeof(xmlDoc)); - cur->type = XML_DOCUMENT_NODE; + cur->type = XML_HTML_DOCUMENT_NODE; cur->version = NULL; cur->intSubset = NULL; xmlCreateIntSubset(cur, BAD_CAST "HTML", ExternalID, URI); @@ -1248,28 +1272,29 @@ htmlParseName(htmlParserCtxtPtr ctxt) { /** * htmlParseHTMLAttribute: * @ctxt: an HTML parser context + * @stop: a char stop value * - * parse an HTML attribute value (without quotes). + * parse an HTML attribute value till the stop (quote), if + * stop is 0 then it stops at the first space * - * Returns the Nmtoken parsed or NULL + * Returns the attribute parsed or NULL */ xmlChar * -htmlParseHTMLAttribute(htmlParserCtxtPtr ctxt) { +htmlParseHTMLAttribute(htmlParserCtxtPtr ctxt, const xmlChar stop) { xmlChar buf[HTML_MAX_NAMELEN]; int len = 0; GROW; - while ((!IS_BLANK(CUR)) && (CUR != '<') && - (CUR != '&') && (CUR != '>') && - (CUR != '\'') && (CUR != '"')) { + while ((CUR != 0) && (CUR != stop) && (CUR != '>')) { + if ((stop == 0) && (IS_BLANK(CUR))) break; buf[len++] = CUR; NEXT; if (len >= HTML_MAX_NAMELEN) { fprintf(stderr, "htmlParseHTMLAttribute: reached HTML_MAX_NAMELEN limit\n"); while ((!IS_BLANK(CUR)) && (CUR != '<') && - (CUR != '&') && (CUR != '>') && + (CUR != '>') && (CUR != '\'') && (CUR != '"')) NEXT; break; @@ -1384,13 +1409,7 @@ htmlParseAttValue(htmlParserCtxtPtr ctxt) { if (CUR == '"') { NEXT; - ret = htmlDecodeEntities(ctxt, -1, '"', '<', 0); - if (CUR == '<') { - if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) - ctxt->sax->error(ctxt->userData, - "Unescaped '<' not allowed in attributes values\n"); - ctxt->wellFormed = 0; - } + ret = htmlParseHTMLAttribute(ctxt, '"'); if (CUR != '"') { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n"); @@ -1399,13 +1418,7 @@ htmlParseAttValue(htmlParserCtxtPtr ctxt) { NEXT; } else if (CUR == '\'') { NEXT; - ret = htmlDecodeEntities(ctxt, -1, '\'', '<', 0); - if (CUR == '<') { - if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) - ctxt->sax->error(ctxt->userData, - "Unescaped '<' not allowed in attributes values\n"); - ctxt->wellFormed = 0; - } + ret = htmlParseHTMLAttribute(ctxt, '\''); if (CUR != '\'') { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n"); @@ -1416,14 +1429,13 @@ htmlParseAttValue(htmlParserCtxtPtr ctxt) { /* * That's an HTMLism, the attribute value may not be quoted */ - ret = htmlParseHTMLAttribute(ctxt); + ret = htmlParseHTMLAttribute(ctxt, 0); if (ret == NULL) { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "AttValue: no value found\n"); ctxt->wellFormed = 0; } } - return(ret); } @@ -1912,10 +1924,9 @@ htmlParseAttribute(htmlParserCtxtPtr ctxt, xmlChar **value) { * * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>' * - * Returns the element name parsed */ -xmlChar * +void htmlParseStartTag(htmlParserCtxtPtr ctxt) { xmlChar *name; xmlChar *attname; @@ -1925,16 +1936,17 @@ htmlParseStartTag(htmlParserCtxtPtr ctxt) { int maxatts = 0; int i; - if (CUR != '<') return(NULL); + if (CUR != '<') return; NEXT; + GROW; name = htmlParseHTMLName(ctxt); if (name == NULL) { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "htmlParseStartTag: invalid element name\n"); ctxt->wellFormed = 0; - return(NULL); + return; } /* @@ -1952,7 +1964,9 @@ htmlParseStartTag(htmlParserCtxtPtr ctxt) { (CUR != '>') && ((CUR != '/') || (NXT(1) != '>'))) { const xmlChar *q = CUR_PTR; + int cons = ctxt->input->consumed; + GROW; attname = htmlParseAttribute(ctxt, &attvalue); if ((attname != NULL) && (attvalue != NULL)) { /* @@ -1961,12 +1975,13 @@ htmlParseStartTag(htmlParserCtxtPtr ctxt) { for (i = 0; i < nbatts;i += 2) { if (!xmlStrcmp(atts[i], attname)) { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) - ctxt->sax->error(ctxt->userData, "Attribute %s redefined\n", - name); + ctxt->sax->error(ctxt->userData, + "Attribute %s redefined\n", + attname); ctxt->wellFormed = 0; xmlFree(attname); xmlFree(attvalue); - break; + goto failed; } } @@ -1979,7 +1994,8 @@ htmlParseStartTag(htmlParserCtxtPtr ctxt) { if (atts == NULL) { fprintf(stderr, "malloc of %ld byte failed\n", maxatts * (long)sizeof(xmlChar *)); - return(NULL); + if (name != NULL) xmlFree(name); + return; } } else if (nbatts + 2 < maxatts) { maxatts *= 2; @@ -1987,7 +2003,8 @@ htmlParseStartTag(htmlParserCtxtPtr ctxt) { if (atts == NULL) { fprintf(stderr, "realloc of %ld byte failed\n", maxatts * (long)sizeof(xmlChar *)); - return(NULL); + if (name != NULL) xmlFree(name); + return; } } atts[nbatts++] = attname; @@ -1996,8 +2013,9 @@ htmlParseStartTag(htmlParserCtxtPtr ctxt) { atts[nbatts + 1] = NULL; } +failed: SKIP_BLANKS; - if (q == CUR_PTR) { + if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, "htmlParseStartTag: problem parsing attributes\n"); @@ -2010,6 +2028,9 @@ htmlParseStartTag(htmlParserCtxtPtr ctxt) { * SAX: Start of Element ! */ htmlnamePush(ctxt, xmlStrdup(name)); +#ifdef DEBUG + fprintf(stderr,"Start of element %s: pushed %s\n", name, ctxt->name); +#endif if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL)) ctxt->sax->startElement(ctxt->userData, name, atts); @@ -2017,13 +2038,12 @@ htmlParseStartTag(htmlParserCtxtPtr ctxt) { for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]); xmlFree(atts); } - return(name); + if (name != NULL) xmlFree(name); } /** * htmlParseEndTag: * @ctxt: an HTML parser context - * @tagname: the tag name as parsed in the opening tag. * * parse an end of tag * @@ -2035,7 +2055,7 @@ htmlParseStartTag(htmlParserCtxtPtr ctxt) { */ void -htmlParseEndTag(htmlParserCtxtPtr ctxt, const xmlChar *tagname) { +htmlParseEndTag(htmlParserCtxtPtr ctxt) { xmlChar *name; xmlChar *oldname; int i; @@ -2062,27 +2082,26 @@ htmlParseEndTag(htmlParserCtxtPtr ctxt, const xmlChar *tagname) { NEXT; /* - * Check that we are not closing an already closed tag, - *

...

is a really common error ! + * If the name read is not one of the element in the parsing stack + * then return, it's just an error. */ - for (i = ctxt->nameNr - 1;i >= 0;i--) { - if ((ctxt->nameTab[i] != NULL) && - (!xmlStrcmp(tagname, ctxt->nameTab[i]))) - break; + for (i = (ctxt->nameNr - 1);i >= 0;i--) { + if (!xmlStrcmp(name, ctxt->nameTab[i])) break; } if (i < 0) { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) - ctxt->sax->error(ctxt->userData, - "htmlParseEndTag: unexpected close for tag %s\n", - tagname); + ctxt->sax->error(ctxt->userData, + "Unexpected end tag : %s\n", name); xmlFree(name); ctxt->wellFormed = 0; return; } + /* * Check for auto-closure of HTML elements. */ + htmlAutoCloseOnClose(ctxt, name); /* @@ -2090,7 +2109,10 @@ htmlParseEndTag(htmlParserCtxtPtr ctxt, const xmlChar *tagname) { * With the exception that the autoclose may have popped stuff out * of the stack. */ - if (xmlStrcmp(name, tagname)) { + if (xmlStrcmp(name, ctxt->name)) { +#ifdef DEBUG + fprintf(stderr,"End of tag %s: expecting %s\n", name, ctxt->name); +#endif if ((ctxt->name != NULL) && (xmlStrcmp(ctxt->name, name))) { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) @@ -2104,11 +2126,22 @@ htmlParseEndTag(htmlParserCtxtPtr ctxt, const xmlChar *tagname) { /* * SAX: End of Tag */ - if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) - ctxt->sax->endElement(ctxt->userData, name); oldname = ctxt->name; - htmlnamePop(ctxt); - xmlFree(oldname); + if (!xmlStrcmp(oldname, name)) { + if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) + ctxt->sax->endElement(ctxt->userData, name); + htmlnamePop(ctxt); + if (oldname != NULL) { +#ifdef DEBUG + fprintf(stderr,"End of tag %s: popping out %s\n", name, oldname); +#endif + xmlFree(oldname); +#ifdef DEBUG + } else { + fprintf(stderr,"End of tag %s: stack empty !!!\n", name); +#endif + } + } if (name != NULL) xmlFree(name); @@ -2170,18 +2203,30 @@ htmlParseReference(htmlParserCtxtPtr ctxt) { */ void -htmlParseContent(htmlParserCtxtPtr ctxt, const xmlChar *name) { +htmlParseContent(htmlParserCtxtPtr ctxt) { xmlChar *currentNode; + int depth; currentNode = ctxt->name; - while ((CUR != '<') || (NXT(1) != '/')) { + depth = ctxt->nameNr; + while (1) { const xmlChar *test = CUR_PTR; + GROW; + /* + * Our tag or one of it's parent or children is ending. + */ + if ((CUR == '<') && (NXT(1) == '/')) { + htmlParseEndTag(ctxt); + return; + } + /* * Has this node been popped out during parsing of * the next element */ - if (currentNode != ctxt->name) return; + if ((currentNode != ctxt->name) && + (depth >= ctxt->nameNr)) return; /* * First case : a comment @@ -2220,13 +2265,9 @@ htmlParseContent(htmlParserCtxtPtr ctxt, const xmlChar *name) { ctxt->wellFormed = 0; break; } + GROW; } - - /* - * parse the end of tag: 'nameNr; /* Capture start position */ if (ctxt->record_info) { @@ -2256,8 +2298,21 @@ htmlParseElement(htmlParserCtxtPtr ctxt) { node_info.begin_line = ctxt->input->line; } - name = htmlParseStartTag(ctxt); - if (name == NULL) { + oldname = ctxt->name; + htmlParseStartTag(ctxt); + name = ctxt->name; +#ifdef DEBUG + if (oldname == NULL) + fprintf(stderr, "Start of element %s\n", name); + else if (name == NULL) + fprintf(stderr, "Start of element failed, was %s\n", oldname); + else + fprintf(stderr, "Start of element %s, was %s\n", name, oldname); +#endif + if (((depth == ctxt->nameNr) && (oldname == ctxt->name)) || + (name == NULL)) { + if (CUR == '>') + NEXT; return; } @@ -2286,9 +2341,12 @@ htmlParseElement(htmlParserCtxtPtr ctxt) { if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) ctxt->sax->endElement(ctxt->userData, name); oldname = ctxt->name; +#ifdef DEBUG + fprintf(stderr,"End of tag the XML way: popping out %s\n", oldname); +#endif htmlnamePop(ctxt); - xmlFree(oldname); - xmlFree(name); + if (oldname != NULL) + xmlFree(oldname); return; } @@ -2303,11 +2361,17 @@ htmlParseElement(htmlParserCtxtPtr ctxt) { /* * end of parsing of this node. */ - nodePop(ctxt); - xmlFree(name); - oldname = ctxt->name; - htmlnamePop(ctxt); - xmlFree(oldname); + if (!xmlStrcmp(name, ctxt->name)) { + nodePop(ctxt); + xmlFree(name); + oldname = ctxt->name; +#ifdef DEBUG + fprintf(stderr,"End of start tag problem: popping out %s\n", oldname); +#endif + htmlnamePop(ctxt); + if (oldname != NULL) + xmlFree(oldname); + } /* * Capture end position and add node @@ -2328,10 +2392,13 @@ htmlParseElement(htmlParserCtxtPtr ctxt) { if ((info != NULL) && (info->empty)) { if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) ctxt->sax->endElement(ctxt->userData, name); - xmlFree(name); oldname = ctxt->name; +#ifdef DEBUG + fprintf(stderr,"End of empty tag %s : popping out %s\n", name, oldname); +#endif htmlnamePop(ctxt); - xmlFree(oldname); + if (oldname != NULL) + xmlFree(oldname); return; } @@ -2339,36 +2406,32 @@ htmlParseElement(htmlParserCtxtPtr ctxt) { * Parse the content of the element: */ currentNode = ctxt->name; - htmlParseContent(ctxt, name); - - /* - * check whether the element get popped due to auto closure - * on start tag - */ - if (currentNode != ctxt->name) { - xmlFree(name); - return; - } + depth = ctxt->nameNr; + while (IS_CHAR(CUR)) { + htmlParseContent(ctxt); + if (ctxt->nameNr < depth) break; + } if (!IS_CHAR(CUR)) { if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) ctxt->sax->error(ctxt->userData, - "Premature end of data in tag %.30s\n", openTag); + "Premature end of data in tag %s\n", currentNode); ctxt->wellFormed = 0; /* * end of parsing of this node. */ nodePop(ctxt); - xmlFree(name); oldname = ctxt->name; +#ifdef DEBUG + fprintf(stderr,"Premature end of tag %s : popping out %s\n", name, oldname); +#endif htmlnamePop(ctxt); - xmlFree(oldname); + if (oldname != NULL) + xmlFree(oldname); return; } - xmlFree(name); - /* * Capture end position and add node */ @@ -2479,9 +2542,14 @@ htmlInitParserCtxt(htmlParserCtxtPtr ctxt) if (sax == NULL) { fprintf(stderr, "htmlInitParserCtxt: out of memory\n"); } + memset(sax, 0, sizeof(htmlSAXHandler)); /* Allocate the Input stack */ - ctxt->inputTab = (htmlParserInputPtr *) xmlMalloc(5 * sizeof(htmlParserInputPtr)); + ctxt->inputTab = (htmlParserInputPtr *) + xmlMalloc(5 * sizeof(htmlParserInputPtr)); + if (ctxt->inputTab == NULL) { + fprintf(stderr, "htmlInitParserCtxt: out of memory\n"); + } ctxt->inputNr = 0; ctxt->inputMax = 5; ctxt->input = NULL; @@ -2538,7 +2606,8 @@ htmlFreeParserCtxt(htmlParserCtxtPtr ctxt) if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab); while ((oldname = ctxt->name) != NULL) { htmlnamePop(ctxt); - xmlFree(oldname); + if (oldname != NULL) + xmlFree(oldname); } if (ctxt->nameTab != NULL) xmlFree(ctxt->nameTab); if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab); @@ -2575,22 +2644,12 @@ htmlCreateDocParserCtxt(xmlChar *cur, const char *encoding) { xmlFree(ctxt); return(NULL); } + memset(input, 0, sizeof(htmlParserInput)); - /* - * plug some encoding conversion routines here. !!! - if (encoding != NULL) { - enc = htmlDetectCharEncoding(cur); - htmlSwitchEncoding(ctxt, enc); - } - */ - - input->filename = NULL; input->line = 1; input->col = 1; input->base = cur; input->cur = cur; - input->free = NULL; - input->buf = NULL; inputPush(ctxt, input); return(ctxt); @@ -2685,6 +2744,7 @@ htmlCreateFileParserCtxt(const char *filename, const char *encoding) perror("malloc"); return(NULL); } + memset(ctxt, 0, sizeof(htmlParserCtxt)); htmlInitParserCtxt(ctxt); inputStream = (htmlParserInputPtr) xmlMalloc(sizeof(htmlParserInput)); if (inputStream == NULL) { @@ -2692,6 +2752,7 @@ htmlCreateFileParserCtxt(const char *filename, const char *encoding) xmlFree(ctxt); return(NULL); } + memset(inputStream, 0, sizeof(htmlParserInput)); inputStream->filename = xmlMemStrdup(filename); inputStream->line = 1; diff --git a/HTMLtree.c b/HTMLtree.c index 4e21c0fc..90e1c7d1 100644 --- a/HTMLtree.c +++ b/HTMLtree.c @@ -185,7 +185,7 @@ htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) { if (cur->properties != NULL) htmlAttrListDump(buf, doc, cur->properties); - if (info->empty) { + if ((info != NULL) && (info->empty)) { xmlBufferWriteChar(buf, ">"); if (cur->next != NULL) { if ((cur->next->type != HTML_TEXT_NODE) && @@ -195,7 +195,7 @@ htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) { return; } if ((cur->content == NULL) && (cur->childs == NULL)) { - if (info->endTag != 0) + if ((info != NULL) && (info->endTag != 0)) xmlBufferWriteChar(buf, ">"); else { xmlBufferWriteChar(buf, "> $(srcdir)/result/HTML/$$name ; \ + $(top_builddir)/testHTML $$i > $(srcdir)/result/HTML/$$name 2>$(srcdir)/result/HTML/$$name.err ; \ else \ echo Testing $$name ; \ - $(top_builddir)/testHTML $$i > result.$$name ; \ + $(top_builddir)/testHTML $$i > result.$$name 2>error.$$name ; \ diff $(srcdir)/result/HTML/$$name result.$$name ; \ + diff $(srcdir)/result/HTML/$$name.err error.$$name ; \ $(top_builddir)/testHTML result.$$name > result2.$$name ; \ diff result.$$name result2.$$name ; \ - rm result.$$name result2.$$name ; \ + rm result.$$name result2.$$name error.$$name ; \ fi ; fi ; done) XMLtests : tester diff --git a/SAX.c b/SAX.c index e0c50660..3461fbbd 100644 --- a/SAX.c +++ b/SAX.c @@ -552,7 +552,7 @@ attribute(void *ctx, const xmlChar *fullname, const xmlChar *value) ret = xmlNewNsProp(ctxt->node, namespace, name, NULL); if (ret != NULL) { - if (ctxt->replaceEntities == 0) + if ((ctxt->replaceEntities == 0) && (!ctxt->html)) ret->val = xmlStringGetNodeList(ctxt->myDoc, value); else ret->val = xmlNewDocText(ctxt->myDoc, value); diff --git a/SAXresult/cdata b/SAXresult/cdata new file mode 100644 index 00000000..e751d1b2 --- /dev/null +++ b/SAXresult/cdata @@ -0,0 +1,9 @@ +SAX.setDocumentLocator() +SAX.startDocument() +SAX.startElement(doc) +SAX.characters( +, 1) +SAX.characters( +, 1) +SAX.endElement(doc) +SAX.endDocument() diff --git a/SAXresult/comment.xml b/SAXresult/comment.xml new file mode 100644 index 00000000..d969036c --- /dev/null +++ b/SAXresult/comment.xml @@ -0,0 +1,17 @@ +SAX.setDocumentLocator() +SAX.startDocument() +SAX.startElement(doc) +SAX.characters( +, 1) +SAX.comment( document start ) +SAX.characters( +, 1) +SAX.startElement(empty) +SAX.endElement(empty) +SAX.characters( +, 1) +SAX.comment( document end ) +SAX.characters( +, 1) +SAX.endElement(doc) +SAX.endDocument() diff --git a/SAXresult/comment2.xml b/SAXresult/comment2.xml new file mode 100644 index 00000000..bffd7e1f --- /dev/null +++ b/SAXresult/comment2.xml @@ -0,0 +1,13 @@ +SAX.setDocumentLocator() +SAX.startDocument() +SAX.comment( document start ) +SAX.startElement(doc) +SAX.characters( +, 1) +SAX.startElement(empty) +SAX.endElement(empty) +SAX.characters( +, 1) +SAX.endElement(doc) +SAX.comment( document end ) +SAX.endDocument() diff --git a/SAXresult/dtd12 b/SAXresult/dtd12 new file mode 100644 index 00000000..d6e2b641 --- /dev/null +++ b/SAXresult/dtd12 @@ -0,0 +1,16 @@ +xmlSAXUserParseFile returned error 27 +SAX.setDocumentLocator() +SAX.startDocument() +SAX.internalSubset(doc, (null), (null)) +SAX.entityDecl(YN, 4, (null), (null), "Yes") +SAX.getParameterEntity(YN) +SAX.getParameterEntity(YN) +SAX.error: PEReference: %YN; not found +SAX.entityDecl(WhatHeSaid, 1, (null), (null), He said ) +SAX.getEntity(WhatHeSaid) +SAX.startElement(doc) +SAX.getEntity(WhatHeSaid) +SAX.warning: Entity 'WhatHeSaid' not defined +SAX.endElement(doc) +SAX.endDocument() +xmlSAXUserParseFile returned error 27 diff --git a/SAXresult/ent6 b/SAXresult/ent6 new file mode 100644 index 00000000..2e67c517 --- /dev/null +++ b/SAXresult/ent6 @@ -0,0 +1,16 @@ +SAX.setDocumentLocator() +SAX.startDocument() +SAX.internalSubset(doc, (null), (null)) +SAX.entityDecl(lt, 1, (null), (null), <) +SAX.getEntity(lt) +SAX.entityDecl(gt, 1, (null), (null), >) +SAX.getEntity(gt) +SAX.entityDecl(amp, 1, (null), (null), &) +SAX.getEntity(amp) +SAX.entityDecl(apos, 1, (null), (null), ') +SAX.getEntity(apos) +SAX.entityDecl(quot, 1, (null), (null), ") +SAX.getEntity(quot) +SAX.startElement(doc) +SAX.endElement(doc) +SAX.endDocument() diff --git a/SAXresult/ent7 b/SAXresult/ent7 new file mode 100644 index 00000000..c2293024 --- /dev/null +++ b/SAXresult/ent7 @@ -0,0 +1,21 @@ +xmlSAXUserParseFile returned error 27 +SAX.setDocumentLocator() +SAX.startDocument() +SAX.internalSubset(item, (null), (null)) +SAX.entityDecl(sampleEnt, 4, (null), (null), ) +SAX.getParameterEntity(sampleEnt) +SAX.entityDecl(sampleEnt, 1, (null), (null), the hyacinth girl) +SAX.getEntity(sampleEnt) +SAX.getParameterEntity(sampleEnt) +SAX.error: PEReference: %sampleEnt; not found +SAX.elementDecl(para, 3, ...) +SAX.startElement(item) +SAX.startElement(para) +SAX.characters('they called me , 16) +SAX.getEntity(sampleEnt) +SAX.warning: Entity 'sampleEnt' not defined +SAX.characters(', 1) +SAX.endElement(para) +SAX.endElement(item) +SAX.endDocument() +xmlSAXUserParseFile returned error 27 diff --git a/SAXresult/eve.xml b/SAXresult/eve.xml new file mode 100644 index 00000000..dd954cf2 --- /dev/null +++ b/SAXresult/eve.xml @@ -0,0 +1,10 @@ +SAX.setDocumentLocator() +SAX.startDocument() +SAX.internalSubset(spec, -//testspec//, dtds/eve.dtd) +SAX.entityDecl(iso6.doc.date, 1, (null), (null), 29-May-1999) +SAX.getEntity(iso6.doc.date) +SAX.startElement(spec) +SAX.characters( +, 1) +SAX.endElement(spec) +SAX.endDocument() diff --git a/SAXresult/ns b/SAXresult/ns new file mode 100644 index 00000000..45e00f75 --- /dev/null +++ b/SAXresult/ns @@ -0,0 +1,11 @@ +SAX.setDocumentLocator() +SAX.startDocument() +SAX.startElement(dia:diagram, xmlns:dia='http://www.lysator.liu.se/~alla/dia/') +SAX.characters( + , 3) +SAX.startElement(dia:diagramdata, dia:testattr='test') +SAX.endElement(dia:diagramdata) +SAX.characters( +, 1) +SAX.endElement(dia:diagram) +SAX.endDocument() diff --git a/SAXresult/ns2 b/SAXresult/ns2 new file mode 100644 index 00000000..64a3fe8f --- /dev/null +++ b/SAXresult/ns2 @@ -0,0 +1,5 @@ +SAX.setDocumentLocator() +SAX.startDocument() +SAX.startElement(dia:diagram, xmlns:dia='http://www.lysator.liu.se/~alla/dia/', dia:testattr='test') +SAX.endElement(dia:diagram) +SAX.endDocument() diff --git a/SAXresult/ns3 b/SAXresult/ns3 new file mode 100644 index 00000000..421c7f0f --- /dev/null +++ b/SAXresult/ns3 @@ -0,0 +1,5 @@ +SAX.setDocumentLocator() +SAX.startDocument() +SAX.startElement(dia:diagram, dia:testattr='test', xmlns:dia='http://www.lysator.liu.se/~alla/dia/') +SAX.endElement(dia:diagram) +SAX.endDocument() diff --git a/SAXresult/ns4 b/SAXresult/ns4 new file mode 100644 index 00000000..674b9a38 --- /dev/null +++ b/SAXresult/ns4 @@ -0,0 +1,5 @@ +SAX.setDocumentLocator() +SAX.startDocument() +SAX.startElement(diagram, testattr='test', xml:lang='en', xml:link='simple', xml:space='preserve') +SAX.endElement(diagram) +SAX.endDocument() diff --git a/SAXresult/pi.xml b/SAXresult/pi.xml new file mode 100644 index 00000000..2a1b9bd5 --- /dev/null +++ b/SAXresult/pi.xml @@ -0,0 +1,17 @@ +SAX.setDocumentLocator() +SAX.startDocument() +SAX.startElement(doc) +SAX.characters( +, 1) +SAX.processingInstruction(document-start, doc) +SAX.characters( +, 1) +SAX.startElement(empty) +SAX.endElement(empty) +SAX.characters( +, 1) +SAX.processingInstruction(document-end, doc) +SAX.characters( +, 1) +SAX.endElement(doc) +SAX.endDocument() diff --git a/SAXresult/pi2.xml b/SAXresult/pi2.xml new file mode 100644 index 00000000..3100a177 --- /dev/null +++ b/SAXresult/pi2.xml @@ -0,0 +1,13 @@ +SAX.setDocumentLocator() +SAX.startDocument() +SAX.processingInstruction(document-start, doc) +SAX.startElement(doc) +SAX.characters( +, 1) +SAX.startElement(empty) +SAX.endElement(empty) +SAX.characters( +, 1) +SAX.endElement(doc) +SAX.processingInstruction(document-end, doc) +SAX.endDocument() diff --git a/debugXML.c b/debugXML.c index 24618e95..0c77b653 100644 --- a/debugXML.c +++ b/debugXML.c @@ -152,6 +152,7 @@ void xmlDebugDumpOneNode(FILE *output, xmlNodePtr node, int depth) { fprintf(output, "COMMENT\n"); break; case XML_DOCUMENT_NODE: + case XML_HTML_DOCUMENT_NODE: fprintf(output, "Error, DOCUMENT found here\n"); break; case XML_DOCUMENT_TYPE_NODE: @@ -238,6 +239,9 @@ void xmlDebugDumpDocument(FILE *output, xmlDocPtr doc) { case XML_DOCUMENT_NODE: fprintf(output, "DOCUMENT\n"); break; + case XML_HTML_DOCUMENT_NODE: + fprintf(output, "HTML DOCUMENT\n"); + break; case XML_DOCUMENT_TYPE_NODE: fprintf(output, "Error, DOCUMENT_TYPE\n"); break; diff --git a/doc/html/gnome-xml-nanohttp.html b/doc/html/gnome-xml-nanohttp.html new file mode 100644 index 00000000..14244a92 --- /dev/null +++ b/doc/html/gnome-xml-nanohttp.html @@ -0,0 +1,945 @@ +nanohttp
Gnome XML Library Reference Manual
<<< Previous PageHomeUp 

nanohttp

Name

nanohttp —

Synopsis


+
+int         xmlNanoHTTPFetch                (const char *URL,
+                                             const char *filename);
+void*       xmlNanoHTTPMethod               (const char *URL,
+                                             const char *method,
+                                             const char *input,
+                                             char **contentType,
+                                             const char *headers);
+void*       xmlNanoHTTPOpen                 (const char *URL,
+                                             char **contentType);
+int         xmlNanoHTTPReturnCode           (void *ctx);
+int         xmlNanoHTTPRead                 (void *ctx,
+                                             void *dest,
+                                             int len);
+int         xmlNanoHTTPSave                 (void *ctxt,
+                                             const char *filename);
+void        xmlNanoHTTPClose                (void *ctx);

Description

Details

xmlNanoHTTPFetch ()

int         xmlNanoHTTPFetch                (const char *URL,
+                                             const char *filename);

This function try to fetch the indicated resource via HTTP GET +and save it's content in the file.

URL : The URL to load
filename : the filename where the content should be saved
Returns :-1 in case of failure, 0 incase of success. The contentType, +if provided must be freed by the caller


xmlNanoHTTPMethod ()

void*       xmlNanoHTTPMethod               (const char *URL,
+                                             const char *method,
+                                             const char *input,
+                                             char **contentType,
+                                             const char *headers);

This function try to open a connection to the indicated resource +via HTTP using the given method, adding the given extra headers +and the input buffer for the request content.

URL : The URL to load
method : the HTTP method to use
input : the input string if any
contentType : the Content-Type information IN and OUT
headers : the extra headers


xmlNanoHTTPOpen ()

void*       xmlNanoHTTPOpen                 (const char *URL,
+                                             char **contentType);

This function try to open a connection to the indicated resource +via HTTP GET.

URL : The URL to load
contentType : if available the Content-Type information will be +returned at that location


xmlNanoHTTPReturnCode ()

int         xmlNanoHTTPReturnCode           (void *ctx);

ctx : the HTTP context
Returns :the HTTP return code for the request.


xmlNanoHTTPRead ()

int         xmlNanoHTTPRead                 (void *ctx,
+                                             void *dest,
+                                             int len);

This function tries to read len bytes from the existing HTTP connection +and saves them in dest. This is a blocking call.

ctx : the HTTP context
dest : a buffer
len : the buffer length
Returns :the number of byte read. 0 is an indication of an end of connection. +-1 indicates a parameter error.


xmlNanoHTTPSave ()

int         xmlNanoHTTPSave                 (void *ctxt,
+                                             const char *filename);

This function saves the output of the HTTP transaction to a file +It closes and free the context at the end

ctxt : 
filename : the filename where the content should be saved
Returns :-1 in case of failure, 0 incase of success.


xmlNanoHTTPClose ()

void        xmlNanoHTTPClose                (void *ctx);

This function closes an HTTP context, it ends up the connection and +free all data related to it.

ctx : the HTTP context



<<< Previous PageHomeUp 
xmlmemory 
\ No newline at end of file diff --git a/doc/html/gnome-xml-xmlmemory.html b/doc/html/gnome-xml-xmlmemory.html new file mode 100644 index 00000000..44ecc56b --- /dev/null +++ b/doc/html/gnome-xml-xmlmemory.html @@ -0,0 +1,1277 @@ +xmlmemory
Gnome XML Library Reference Manual
<<< Previous PageHomeUpNext Page >>>

xmlmemory

Name

xmlmemory —

Synopsis


+
+#define     NO_DEBUG_MEMORY
+void        xmlFree                         (void *ptr);
+void*       xmlMalloc                       (size_t size);
+void*       xmlRealloc                      (void *ptr,
+                                             size_t size);
+char*       xmlMemStrdup                    (const char *str);
+int         xmlInitMemory                   (void);
+int         xmlMemUsed                      (void);
+void        xmlMemoryDump                   (void);
+void        xmlMemDisplay                   (FILE *fp);
+#define     DEBUG_MEMORY_LOCATION
+#define     DEBUG_MEMORY
+#define     MEM_LIST
+void*       xmlMallocLoc                    (int size,
+                                             const char *file,
+                                             int line);
+void*       xmlReallocLoc                   (void *ptr,
+                                             int size,
+                                             const char *file,
+                                             int line);
+char*       xmlMemStrdupLoc                 (const char *str,
+                                             const char *file,
+                                             int line);

Description

Details

NO_DEBUG_MEMORY

#define     NO_DEBUG_MEMORY


xmlFree ()

void        xmlFree                         (void *ptr);

a free() equivalent, with error checking.

ptr : 


xmlMalloc ()

void*       xmlMalloc                       (size_t size);

a malloc() equivalent, with logging of the allocation info.

size : 


xmlRealloc ()

void*       xmlRealloc                      (void *ptr,
+                                             size_t size);

a realloc() equivalent, with logging of the allocation info.

ptr : 
size : 


xmlMemStrdup ()

char*       xmlMemStrdup                    (const char *str);

a strdup() equivalent, with logging of the allocation info.

str : 
Returns : 


xmlInitMemory ()

int         xmlInitMemory                   (void);

Initialize the memory layer.

Returns : 


xmlMemUsed ()

int         xmlMemUsed                      (void);

returns the amount of memory currenly allocated

Returns : 


xmlMemoryDump ()

void        xmlMemoryDump                   (void);

Dump in-extenso the memory blocks allocated to the file .memorylist


xmlMemDisplay ()

void        xmlMemDisplay                   (FILE *fp);

show in-extenso the memory blocks allocated

fp : 


DEBUG_MEMORY_LOCATION

#define     DEBUG_MEMORY_LOCATION


DEBUG_MEMORY

#define     DEBUG_MEMORY


MEM_LIST

#define     MEM_LIST


xmlMallocLoc ()

void*       xmlMallocLoc                    (int size,
+                                             const char *file,
+                                             int line);

a malloc() equivalent, with logging of the allocation info.

size : 
file : the line number
line : 


xmlReallocLoc ()

void*       xmlReallocLoc                   (void *ptr,
+                                             int size,
+                                             const char *file,
+                                             int line);

a realloc() equivalent, with logging of the allocation info.

ptr : 
size : 
file : the line number
line : 


xmlMemStrdupLoc ()

char*       xmlMemStrdupLoc                 (const char *str,
+                                             const char *file,
+                                             int line);

a strdup() equivalent, with logging of the allocation info.

str : 
file : the line number
line : 
Returns : 



<<< Previous PageHomeUpNext Page >>>
parserInternalsnanohttp
\ No newline at end of file diff --git a/entities.c b/entities.c index 78f7ec2c..78571e02 100644 --- a/entities.c +++ b/entities.c @@ -401,6 +401,8 @@ xmlEncodeEntities(xmlDocPtr doc, const xmlChar *input) { const xmlChar *cur = input; xmlChar *out = buffer; static int warning = 1; + int html = 0; + if (warning) { fprintf(stderr, "Deprecated API xmlEncodeEntities() used\n"); @@ -409,6 +411,9 @@ xmlEncodeEntities(xmlDocPtr doc, const xmlChar *input) { } if (input == NULL) return(NULL); + if (doc != NULL) + html = (doc->type == XML_HTML_DOCUMENT_NODE); + if (buffer == NULL) { buffer_size = 1000; buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar)); @@ -452,7 +457,7 @@ xmlEncodeEntities(xmlDocPtr doc, const xmlChar *input) { *out++ = 'o'; *out++ = 't'; *out++ = ';'; - } else if (*cur == '\'') { + } else if ((*cur == '\'') && (!html)) { *out++ = '&'; *out++ = 'a'; *out++ = 'p'; @@ -536,8 +541,11 @@ xmlEncodeEntitiesReentrant(xmlDocPtr doc, const xmlChar *input) { xmlChar *buffer = NULL; xmlChar *out = NULL; int buffer_size = 0; + int html = 0; if (input == NULL) return(NULL); + if (doc != NULL) + html = (doc->type == XML_HTML_DOCUMENT_NODE); /* * allocate an translation buffer. @@ -584,7 +592,7 @@ xmlEncodeEntitiesReentrant(xmlDocPtr doc, const xmlChar *input) { *out++ = 'o'; *out++ = 't'; *out++ = ';'; - } else if (*cur == '\'') { + } else if ((*cur == '\'') && (!html)) { *out++ = '&'; *out++ = 'a'; *out++ = 'p'; diff --git a/include/libxml/tree.h b/include/libxml/tree.h index 6c7e795a..6c8ab102 100644 --- a/include/libxml/tree.h +++ b/include/libxml/tree.h @@ -35,7 +35,8 @@ typedef enum { XML_DOCUMENT_NODE= 9, XML_DOCUMENT_TYPE_NODE= 10, XML_DOCUMENT_FRAG_NODE= 11, - XML_NOTATION_NODE= 12 + XML_NOTATION_NODE= 12, + XML_HTML_DOCUMENT_NODE= 13 } xmlElementType; /* diff --git a/include/libxml/valid.h b/include/libxml/valid.h index 73a2a543..8c016a93 100644 --- a/include/libxml/valid.h +++ b/include/libxml/valid.h @@ -218,6 +218,14 @@ xmlNotationPtr xmlGetDtdNotationDesc (xmlDtdPtr dtd, xmlElementPtr xmlGetDtdElementDesc (xmlDtdPtr dtd, const xmlChar *name); +int xmlValidGetValidElements(xmlNode *prev, + xmlNode *next, + const xmlChar **list, + int max); +int xmlValidGetPotentialChildren(xmlElementContent *ctree, + const xmlChar **list, + int *len, + int max); #ifdef __cplusplus } #endif diff --git a/result/HTML/Down.html.err b/result/HTML/Down.html.err new file mode 100644 index 00000000..e69de29b diff --git a/result/HTML/fp40.htm b/result/HTML/fp40.htm index 6497272a..4e70b0c7 100644 --- a/result/HTML/fp40.htm +++ b/result/HTML/fp40.htm @@ -143,10 +143,10 @@ Extensions, see the Server Extensions Resource Kit Update at: Microsoft Knowledge Base

For further technical information on FrontPage, please consult Support Online. Use Support -Online to easily search Microsoft Product Support Services' collection of resources including -technical articles from Microsoft's extensive Knowledge Base, FAQs, troubleshooters to find +Online to easily search Microsoft Product Support Services' collection of resources including +technical articles from Microsoft's extensive Knowledge Base, FAQs, troubleshooters to find fast, accurate answers. You can also customize the site to control your search using either -keywords or the site's natural language search engine, which uses normal everyday language for +keywords or the site's natural language search engine, which uses normal everyday language for answering inquiries, so you can write your question in your own words. To begin, go to http://support.microsoft.com/support/.

diff --git a/result/HTML/fp40.htm.err b/result/HTML/fp40.htm.err new file mode 100644 index 00000000..ab847035 --- /dev/null +++ b/result/HTML/fp40.htm.err @@ -0,0 +1,3 @@ +./test/HTML/fp40.htm:153: error: htmlParseEntityRef: no name +technical articles from Microsoft's extensive Knowledge Base, FAQs, & troublesh + ^ diff --git a/result/HTML/liclose.html b/result/HTML/liclose.html new file mode 100644 index 00000000..edc8eb1e --- /dev/null +++ b/result/HTML/liclose.html @@ -0,0 +1,14 @@ + + + + + + +

    +
  • First item +
  • +
  • Second item, closes the first one +
  • +
+ + diff --git a/result/HTML/liclose.html.err b/result/HTML/liclose.html.err new file mode 100644 index 00000000..e69de29b diff --git a/result/HTML/reg1.html b/result/HTML/reg1.html new file mode 100644 index 00000000..0a5d6549 --- /dev/null +++ b/result/HTML/reg1.html @@ -0,0 +1,12 @@ + + + +Regression test 1 + + +

Regression test 1

+

+Ok file no problem +

+ + diff --git a/result/HTML/reg1.html.err b/result/HTML/reg1.html.err new file mode 100644 index 00000000..e69de29b diff --git a/result/HTML/reg2.html b/result/HTML/reg2.html new file mode 100644 index 00000000..198ee044 --- /dev/null +++ b/result/HTML/reg2.html @@ -0,0 +1,15 @@ + + + +Regression test 2 + + +

Regression test 2

+

+Autoclose of tag P +

+

+Ok file no problem +

+ + diff --git a/result/HTML/reg2.html.err b/result/HTML/reg2.html.err new file mode 100644 index 00000000..e69de29b diff --git a/result/HTML/reg3.html b/result/HTML/reg3.html new file mode 100644 index 00000000..65866f91 --- /dev/null +++ b/result/HTML/reg3.html @@ -0,0 +1,16 @@ + + + +Regression test 3 + + +

Regression test 3

+

+Autoclose of tag P +

+
+

+Ok file no problem +

+ + diff --git a/result/HTML/reg3.html.err b/result/HTML/reg3.html.err new file mode 100644 index 00000000..e69de29b diff --git a/result/HTML/reg4.html b/result/HTML/reg4.html new file mode 100644 index 00000000..afae62c2 --- /dev/null +++ b/result/HTML/reg4.html @@ -0,0 +1,13 @@ + + + +Regression test 4 + + +

Regression test 4

+

+Wrong close of tag P +

+
+ + diff --git a/result/HTML/reg4.html.err b/result/HTML/reg4.html.err new file mode 100644 index 00000000..d11f77c5 --- /dev/null +++ b/result/HTML/reg4.html.err @@ -0,0 +1,3 @@ +./test/HTML/reg4.html:10: error: Unexpected end tag : P +

+ ^ diff --git a/result/HTML/test2.html.err b/result/HTML/test2.html.err new file mode 100644 index 00000000..e69de29b diff --git a/result/HTML/test3.html.err b/result/HTML/test3.html.err new file mode 100644 index 00000000..82d84a13 --- /dev/null +++ b/result/HTML/test3.html.err @@ -0,0 +1,12 @@ +./test/HTML/test3.html:6: error: Unexpected end tag : P +


+ ^ +./test/HTML/test3.html:13: error: Unexpected end tag : P +


+ ^ +./test/HTML/test3.html:27: error: Opening and ending tag mismatch: H4 and B +

Links

+ ^ +./test/HTML/test3.html:27: error: Unexpected end tag : B +

Links

+ ^ diff --git a/result/XPath/expr/compare b/result/XPath/expr/compare new file mode 100644 index 00000000..daae1a2e --- /dev/null +++ b/result/XPath/expr/compare @@ -0,0 +1,24 @@ +Object is a Boolean : true +Object is a Boolean : true +Object is a Boolean : false +Object is a Boolean : false +Object is a Boolean : false +Object is a Boolean : false +Object is a Boolean : true +Object is a Boolean : true +Object is a Boolean : false +Object is a Boolean : true +Object is a Boolean : false +Object is a Boolean : true +Object is a Boolean : true +Object is a Boolean : true +Object is a Boolean : false +Object is a Boolean : false +Object is a Boolean : true +Object is a Boolean : true +Object is a Boolean : false +Object is a Boolean : false +Object is a Boolean : true +Object is a Boolean : true +Object is a Boolean : false +Object is a Boolean : false diff --git a/result/XPath/expr/equality b/result/XPath/expr/equality new file mode 100644 index 00000000..92d6d1c7 --- /dev/null +++ b/result/XPath/expr/equality @@ -0,0 +1,24 @@ +Object is a Boolean : true +Object is a Boolean : false +Object is a Boolean : false +Object is a Boolean : true +Object is a Boolean : true +Object is a Boolean : false +Object is a Boolean : false +Object is a Boolean : true +Object is a Boolean : true +Object is a Boolean : false +Object is a Boolean : false +Object is a Boolean : true +Object is a Boolean : true +Object is a Boolean : false +Object is a Boolean : false +Object is a Boolean : true +Object is a Boolean : false +Object is a Boolean : true +Object is a Boolean : true +Object is a Boolean : false +Object is a Boolean : true +Object is a Boolean : false +Object is a Boolean : false +Object is a Boolean : true diff --git a/result/comment.xml b/result/comment.xml new file mode 100644 index 00000000..567160aa --- /dev/null +++ b/result/comment.xml @@ -0,0 +1,6 @@ + + + + + + diff --git a/result/comment2.xml b/result/comment2.xml new file mode 100644 index 00000000..26242381 --- /dev/null +++ b/result/comment2.xml @@ -0,0 +1,6 @@ + + + + + + diff --git a/result/noent/comment.xml b/result/noent/comment.xml new file mode 100644 index 00000000..567160aa --- /dev/null +++ b/result/noent/comment.xml @@ -0,0 +1,6 @@ + + + + + + diff --git a/result/noent/comment2.xml b/result/noent/comment2.xml new file mode 100644 index 00000000..26242381 --- /dev/null +++ b/result/noent/comment2.xml @@ -0,0 +1,6 @@ + + + + + + diff --git a/result/noent/ns b/result/noent/ns new file mode 100644 index 00000000..94b927e5 --- /dev/null +++ b/result/noent/ns @@ -0,0 +1,4 @@ + + + + diff --git a/result/noent/ns2 b/result/noent/ns2 new file mode 100644 index 00000000..b69ad82f --- /dev/null +++ b/result/noent/ns2 @@ -0,0 +1,2 @@ + + diff --git a/result/noent/ns3 b/result/noent/ns3 new file mode 100644 index 00000000..b69ad82f --- /dev/null +++ b/result/noent/ns3 @@ -0,0 +1,2 @@ + + diff --git a/result/noent/ns4 b/result/noent/ns4 new file mode 100644 index 00000000..fb7bc3e7 --- /dev/null +++ b/result/noent/ns4 @@ -0,0 +1,2 @@ + + diff --git a/result/noent/pi.xml b/result/noent/pi.xml new file mode 100644 index 00000000..27bed5b9 --- /dev/null +++ b/result/noent/pi.xml @@ -0,0 +1,6 @@ + + + + + + diff --git a/result/noent/pi2.xml b/result/noent/pi2.xml new file mode 100644 index 00000000..acf76f95 --- /dev/null +++ b/result/noent/pi2.xml @@ -0,0 +1,6 @@ + + + + + + diff --git a/result/ns b/result/ns new file mode 100644 index 00000000..94b927e5 --- /dev/null +++ b/result/ns @@ -0,0 +1,4 @@ + + + + diff --git a/result/ns2 b/result/ns2 new file mode 100644 index 00000000..b69ad82f --- /dev/null +++ b/result/ns2 @@ -0,0 +1,2 @@ + + diff --git a/result/ns3 b/result/ns3 new file mode 100644 index 00000000..b69ad82f --- /dev/null +++ b/result/ns3 @@ -0,0 +1,2 @@ + + diff --git a/result/ns4 b/result/ns4 new file mode 100644 index 00000000..fb7bc3e7 --- /dev/null +++ b/result/ns4 @@ -0,0 +1,2 @@ + + diff --git a/result/pi.xml b/result/pi.xml new file mode 100644 index 00000000..27bed5b9 --- /dev/null +++ b/result/pi.xml @@ -0,0 +1,6 @@ + + + + + + diff --git a/result/pi2.xml b/result/pi2.xml new file mode 100644 index 00000000..acf76f95 --- /dev/null +++ b/result/pi2.xml @@ -0,0 +1,6 @@ + + + + + + diff --git a/result/valid/REC-xml-19980210.xml.err b/result/valid/REC-xml-19980210.xml.err new file mode 100644 index 00000000..e69de29b diff --git a/result/valid/dia.xml.err b/result/valid/dia.xml.err new file mode 100644 index 00000000..e69de29b diff --git a/result/valid/xlink.xml.err b/result/valid/xlink.xml.err new file mode 100644 index 00000000..6e8beefd --- /dev/null +++ b/result/valid/xlink.xml.err @@ -0,0 +1,6 @@ +./test/valid/xlink.xml:450: validity error: ID dt-arc already defined +

An arc is contained within an + ^ +./test/valid/xlink.xml:530: validity error: IDREF attribute def reference an unknown ID 'dt-xlg' + +^ diff --git a/test/HTML/liclose.html b/test/HTML/liclose.html new file mode 100644 index 00000000..73640344 --- /dev/null +++ b/test/HTML/liclose.html @@ -0,0 +1,13 @@ + + + + + + +

    +
  • First item +
  • Second item, closes the first one +
+ + diff --git a/test/HTML/reg1.html b/test/HTML/reg1.html new file mode 100644 index 00000000..ecdd007f --- /dev/null +++ b/test/HTML/reg1.html @@ -0,0 +1,10 @@ + + +Regression test 1 + + +

Regression test 1

+

+Ok file no problem + + diff --git a/test/HTML/reg2.html b/test/HTML/reg2.html new file mode 100644 index 00000000..7145c194 --- /dev/null +++ b/test/HTML/reg2.html @@ -0,0 +1,12 @@ + + +Regression test 2 + + +

Regression test 2

+

+Autoclose of tag P +

+Ok file no problem + + diff --git a/test/HTML/reg3.html b/test/HTML/reg3.html new file mode 100644 index 00000000..014483ba --- /dev/null +++ b/test/HTML/reg3.html @@ -0,0 +1,13 @@ + + +Regression test 3 + + +

Regression test 3

+

+Autoclose of tag P +


+

+Ok file no problem + + diff --git a/test/HTML/reg4.html b/test/HTML/reg4.html new file mode 100644 index 00000000..7d04ca23 --- /dev/null +++ b/test/HTML/reg4.html @@ -0,0 +1,12 @@ + + +Regression test 4 + + +

Regression test 4

+

+Wrong close of tag P +


+

+ + diff --git a/test/XPath/docs/id b/test/XPath/docs/id new file mode 100644 index 00000000..4b6659ff --- /dev/null +++ b/test/XPath/docs/id @@ -0,0 +1,28 @@ + + + + Welcome to Gnome + + + The Linux adventure +

bla bla bla ...

+ +

...

+
+ + Chapter 2 +

this is chapter 2 ...

+
+ + Chapter 3 +

this is chapter 3 ...

+
+ + Chapter 4 +

this is chapter 4 ...

+
+ + Chapter 5 +

this is chapter 5 ...

+
+
diff --git a/test/XPath/expr/compare b/test/XPath/expr/compare new file mode 100644 index 00000000..81b26322 --- /dev/null +++ b/test/XPath/expr/compare @@ -0,0 +1,24 @@ +0<1 +0<=1 +0>1 +0>=1 +1<0 +1<=0 +1>0 +1>=0 +1<1 +1<=1 +1>1 +1>=1 +'0'<1 +'0'<=1 +'0'>1 +'0'>=1 +0<'1.2' +0<='1.2' +0>'1.2' +0>='1.2' +false()<1 +false()<=1 +0>true() +0>=true() diff --git a/test/XPath/expr/equality b/test/XPath/expr/equality new file mode 100644 index 00000000..2a8c84fc --- /dev/null +++ b/test/XPath/expr/equality @@ -0,0 +1,25 @@ +1=1 +1!=1 +1=0 +1!=0 +true()=true() +true()!=true() +true()=false() +false()!=true() +'test'='test' +'test'!='test' +'test2'='test' +'test2'!='test' +false()=0 +false()!=0 +false()=1 +false()!=1 +0=true() +0!=true() +1=true() +1!=true() +true()='test' +false()='test' +'test'!=true() +'test'!=false() + diff --git a/test/comment.xml b/test/comment.xml new file mode 100644 index 00000000..98c5effd --- /dev/null +++ b/test/comment.xml @@ -0,0 +1,6 @@ + + + + + + diff --git a/test/comment2.xml b/test/comment2.xml new file mode 100644 index 00000000..9e122ecf --- /dev/null +++ b/test/comment2.xml @@ -0,0 +1,6 @@ + + + + + + diff --git a/test/ns b/test/ns new file mode 100644 index 00000000..94b927e5 --- /dev/null +++ b/test/ns @@ -0,0 +1,4 @@ + + + + diff --git a/test/ns2 b/test/ns2 new file mode 100644 index 00000000..80aaf945 --- /dev/null +++ b/test/ns2 @@ -0,0 +1,3 @@ + + diff --git a/test/ns3 b/test/ns3 new file mode 100644 index 00000000..76bb20d4 --- /dev/null +++ b/test/ns3 @@ -0,0 +1,3 @@ + + diff --git a/test/ns4 b/test/ns4 new file mode 100644 index 00000000..136bf923 --- /dev/null +++ b/test/ns4 @@ -0,0 +1,2 @@ + + diff --git a/test/pi.xml b/test/pi.xml new file mode 100644 index 00000000..48c7ff04 --- /dev/null +++ b/test/pi.xml @@ -0,0 +1,6 @@ + + + + + + diff --git a/test/pi2.xml b/test/pi2.xml new file mode 100644 index 00000000..710d51c9 --- /dev/null +++ b/test/pi2.xml @@ -0,0 +1,6 @@ + + + + + + diff --git a/testHTML.c b/testHTML.c index 9415a398..8bced6cb 100644 --- a/testHTML.c +++ b/testHTML.c @@ -15,6 +15,8 @@ #include #include +#include + #ifdef HAVE_SYS_TYPES_H #include @@ -32,12 +34,16 @@ #include #endif +#include "xmlmemory.h" #include "HTMLparser.h" #include "HTMLtree.h" #include "debugXML.h" static int debug = 0; static int copy = 0; +static int sax = 0; +static int repeat = 0; +static int noout = 0; /* * Note: this is perfectly clean HTML, i.e. not a useful test. @@ -59,12 +65,544 @@ We are doing our best to get it back on-line,\n\ "; */ +xmlSAXHandler emptySAXHandlerStruct = { + NULL, /* internalSubset */ + NULL, /* isStandalone */ + NULL, /* hasInternalSubset */ + NULL, /* hasExternalSubset */ + NULL, /* resolveEntity */ + NULL, /* getEntity */ + NULL, /* entityDecl */ + NULL, /* notationDecl */ + NULL, /* attributeDecl */ + NULL, /* elementDecl */ + NULL, /* unparsedEntityDecl */ + NULL, /* setDocumentLocator */ + NULL, /* startDocument */ + NULL, /* endDocument */ + NULL, /* startElement */ + NULL, /* endElement */ + NULL, /* reference */ + NULL, /* characters */ + NULL, /* ignorableWhitespace */ + NULL, /* processingInstruction */ + NULL, /* comment */ + NULL, /* xmlParserWarning */ + NULL, /* xmlParserError */ + NULL, /* xmlParserError */ + NULL, /* getParameterEntity */ +}; + +xmlSAXHandlerPtr emptySAXHandler = &emptySAXHandlerStruct; +extern xmlSAXHandlerPtr debugSAXHandler; + +/************************************************************************ + * * + * Debug Handlers * + * * + ************************************************************************/ + +/** + * isStandaloneDebug: + * @ctxt: An XML parser context + * + * Is this document tagged standalone ? + * + * Returns 1 if true + */ +int +isStandaloneDebug(void *ctx) +{ + fprintf(stdout, "SAX.isStandalone()\n"); + return(0); +} + +/** + * hasInternalSubsetDebug: + * @ctxt: An XML parser context + * + * Does this document has an internal subset + * + * Returns 1 if true + */ +int +hasInternalSubsetDebug(void *ctx) +{ + fprintf(stdout, "SAX.hasInternalSubset()\n"); + return(0); +} + +/** + * hasExternalSubsetDebug: + * @ctxt: An XML parser context + * + * Does this document has an external subset + * + * Returns 1 if true + */ +int +hasExternalSubsetDebug(void *ctx) +{ + fprintf(stdout, "SAX.hasExternalSubset()\n"); + return(0); +} + +/** + * hasInternalSubsetDebug: + * @ctxt: An XML parser context + * + * Does this document has an internal subset + */ +void +internalSubsetDebug(void *ctx, const xmlChar *name, + const xmlChar *ExternalID, const xmlChar *SystemID) +{ + /* xmlDtdPtr externalSubset; */ + + fprintf(stdout, "SAX.internalSubset(%s, %s, %s)\n", + name, ExternalID, SystemID); + +/*********** + if ((ExternalID != NULL) || (SystemID != NULL)) { + externalSubset = xmlParseDTD(ExternalID, SystemID); + if (externalSubset != NULL) { + xmlFreeDtd(externalSubset); + } + } + ***********/ +} + +/** + * resolveEntityDebug: + * @ctxt: An XML parser context + * @publicId: The public ID of the entity + * @systemId: The system ID of the entity + * + * Special entity resolver, better left to the parser, it has + * more context than the application layer. + * The default behaviour is to NOT resolve the entities, in that case + * the ENTITY_REF nodes are built in the structure (and the parameter + * values). + * + * Returns the xmlParserInputPtr if inlined or NULL for DOM behaviour. + */ +xmlParserInputPtr +resolveEntityDebug(void *ctx, const xmlChar *publicId, const xmlChar *systemId) +{ + /* xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; */ + + + fprintf(stdout, "SAX.resolveEntity("); + if (publicId != NULL) + fprintf(stdout, "%s", (char *)publicId); + else + fprintf(stdout, " "); + if (systemId != NULL) + fprintf(stdout, ", %s)\n", (char *)systemId); + else + fprintf(stdout, ", )\n"); +/********* + if (systemId != NULL) { + return(xmlNewInputFromFile(ctxt, (char *) systemId)); + } + *********/ + return(NULL); +} + +/** + * getEntityDebug: + * @ctxt: An XML parser context + * @name: The entity name + * + * Get an entity by name + * + * Returns the xmlParserInputPtr if inlined or NULL for DOM behaviour. + */ +xmlEntityPtr +getEntityDebug(void *ctx, const xmlChar *name) +{ + fprintf(stdout, "SAX.getEntity(%s)\n", name); + return(NULL); +} + +/** + * getParameterEntityDebug: + * @ctxt: An XML parser context + * @name: The entity name + * + * Get a parameter entity by name + * + * Returns the xmlParserInputPtr + */ +xmlEntityPtr +getParameterEntityDebug(void *ctx, const xmlChar *name) +{ + fprintf(stdout, "SAX.getParameterEntity(%s)\n", name); + return(NULL); +} + + +/** + * entityDeclDebug: + * @ctxt: An XML parser context + * @name: the entity name + * @type: the entity type + * @publicId: The public ID of the entity + * @systemId: The system ID of the entity + * @content: the entity value (without processing). + * + * An entity definition has been parsed + */ +void +entityDeclDebug(void *ctx, const xmlChar *name, int type, + const xmlChar *publicId, const xmlChar *systemId, xmlChar *content) +{ + fprintf(stdout, "SAX.entityDecl(%s, %d, %s, %s, %s)\n", + name, type, publicId, systemId, content); +} + +/** + * attributeDeclDebug: + * @ctxt: An XML parser context + * @name: the attribute name + * @type: the attribute type + * + * An attribute definition has been parsed + */ +void +attributeDeclDebug(void *ctx, const xmlChar *elem, const xmlChar *name, + int type, int def, const xmlChar *defaultValue, + xmlEnumerationPtr tree) +{ + fprintf(stdout, "SAX.attributeDecl(%s, %s, %d, %d, %s, ...)\n", + elem, name, type, def, defaultValue); +} + +/** + * elementDeclDebug: + * @ctxt: An XML parser context + * @name: the element name + * @type: the element type + * @content: the element value (without processing). + * + * An element definition has been parsed + */ +void +elementDeclDebug(void *ctx, const xmlChar *name, int type, + xmlElementContentPtr content) +{ + fprintf(stdout, "SAX.elementDecl(%s, %d, ...)\n", + name, type); +} + +/** + * notationDeclDebug: + * @ctxt: An XML parser context + * @name: The name of the notation + * @publicId: The public ID of the entity + * @systemId: The system ID of the entity + * + * What to do when a notation declaration has been parsed. + */ +void +notationDeclDebug(void *ctx, const xmlChar *name, + const xmlChar *publicId, const xmlChar *systemId) +{ + fprintf(stdout, "SAX.notationDecl(%s, %s, %s)\n", + (char *) name, (char *) publicId, (char *) systemId); +} + +/** + * unparsedEntityDeclDebug: + * @ctxt: An XML parser context + * @name: The name of the entity + * @publicId: The public ID of the entity + * @systemId: The system ID of the entity + * @notationName: the name of the notation + * + * What to do when an unparsed entity declaration is parsed + */ +void +unparsedEntityDeclDebug(void *ctx, const xmlChar *name, + const xmlChar *publicId, const xmlChar *systemId, + const xmlChar *notationName) +{ + fprintf(stdout, "SAX.unparsedEntityDecl(%s, %s, %s, %s)\n", + (char *) name, (char *) publicId, (char *) systemId, + (char *) notationName); +} + +/** + * setDocumentLocatorDebug: + * @ctxt: An XML parser context + * @loc: A SAX Locator + * + * Receive the document locator at startup, actually xmlDefaultSAXLocator + * Everything is available on the context, so this is useless in our case. + */ +void +setDocumentLocatorDebug(void *ctx, xmlSAXLocatorPtr loc) +{ + fprintf(stdout, "SAX.setDocumentLocator()\n"); +} + +/** + * startDocumentDebug: + * @ctxt: An XML parser context + * + * called when the document start being processed. + */ +void +startDocumentDebug(void *ctx) +{ + fprintf(stdout, "SAX.startDocument()\n"); +} + +/** + * endDocumentDebug: + * @ctxt: An XML parser context + * + * called when the document end has been detected. + */ +void +endDocumentDebug(void *ctx) +{ + fprintf(stdout, "SAX.endDocument()\n"); +} + +/** + * startElementDebug: + * @ctxt: An XML parser context + * @name: The element name + * + * called when an opening tag has been processed. + */ +void +startElementDebug(void *ctx, const xmlChar *name, const xmlChar **atts) +{ + int i; + + fprintf(stdout, "SAX.startElement(%s", (char *) name); + if (atts != NULL) { + for (i = 0;(atts[i] != NULL);i++) { + fprintf(stdout, ", %s='", atts[i++]); + fprintf(stdout, "%s'", atts[i]); + } + } + fprintf(stdout, ")\n"); +} + +/** + * endElementDebug: + * @ctxt: An XML parser context + * @name: The element name + * + * called when the end of an element has been detected. + */ +void +endElementDebug(void *ctx, const xmlChar *name) +{ + fprintf(stdout, "SAX.endElement(%s)\n", (char *) name); +} + +/** + * charactersDebug: + * @ctxt: An XML parser context + * @ch: a xmlChar string + * @len: the number of xmlChar + * + * receiving some chars from the parser. + * Question: how much at a time ??? + */ +void +charactersDebug(void *ctx, const xmlChar *ch, int len) +{ + int i; + + fprintf(stdout, "SAX.characters("); + for (i = 0;(i < len) && (i < 30);i++) + fprintf(stdout, "%c", ch[i]); + fprintf(stdout, ", %d)\n", len); +} + +/** + * referenceDebug: + * @ctxt: An XML parser context + * @name: The entity name + * + * called when an entity reference is detected. + */ +void +referenceDebug(void *ctx, const xmlChar *name) +{ + fprintf(stdout, "SAX.reference(%s)\n", name); +} + +/** + * ignorableWhitespaceDebug: + * @ctxt: An XML parser context + * @ch: a xmlChar string + * @start: the first char in the string + * @len: the number of xmlChar + * + * receiving some ignorable whitespaces from the parser. + * Question: how much at a time ??? + */ +void +ignorableWhitespaceDebug(void *ctx, const xmlChar *ch, int len) +{ + fprintf(stdout, "SAX.ignorableWhitespace(%.30s, %d)\n", + (char *) ch, len); +} + +/** + * processingInstructionDebug: + * @ctxt: An XML parser context + * @target: the target name + * @data: the PI data's + * @len: the number of xmlChar + * + * A processing instruction has been parsed. + */ +void +processingInstructionDebug(void *ctx, const xmlChar *target, + const xmlChar *data) +{ + fprintf(stdout, "SAX.processingInstruction(%s, %s)\n", + (char *) target, (char *) data); +} + +/** + * commentDebug: + * @ctxt: An XML parser context + * @value: the comment content + * + * A comment has been parsed. + */ +void +commentDebug(void *ctx, const xmlChar *value) +{ + fprintf(stdout, "SAX.comment(%s)\n", value); +} + +/** + * warningDebug: + * @ctxt: An XML parser context + * @msg: the message to display/transmit + * @...: extra parameters for the message display + * + * Display and format a warning messages, gives file, line, position and + * extra parameters. + */ +void +warningDebug(void *ctx, const char *msg, ...) +{ + va_list args; + + va_start(args, msg); + fprintf(stdout, "SAX.warning: "); + vfprintf(stdout, msg, args); + va_end(args); +} + +/** + * errorDebug: + * @ctxt: An XML parser context + * @msg: the message to display/transmit + * @...: extra parameters for the message display + * + * Display and format a error messages, gives file, line, position and + * extra parameters. + */ +void +errorDebug(void *ctx, const char *msg, ...) +{ + va_list args; + + va_start(args, msg); + fprintf(stdout, "SAX.error: "); + vfprintf(stdout, msg, args); + va_end(args); +} + +/** + * fatalErrorDebug: + * @ctxt: An XML parser context + * @msg: the message to display/transmit + * @...: extra parameters for the message display + * + * Display and format a fatalError messages, gives file, line, position and + * extra parameters. + */ +void +fatalErrorDebug(void *ctx, const char *msg, ...) +{ + va_list args; + + va_start(args, msg); + fprintf(stdout, "SAX.fatalError: "); + vfprintf(stdout, msg, args); + va_end(args); +} + +xmlSAXHandler debugSAXHandlerStruct = { + internalSubsetDebug, + isStandaloneDebug, + hasInternalSubsetDebug, + hasExternalSubsetDebug, + resolveEntityDebug, + getEntityDebug, + entityDeclDebug, + notationDeclDebug, + attributeDeclDebug, + elementDeclDebug, + unparsedEntityDeclDebug, + setDocumentLocatorDebug, + startDocumentDebug, + endDocumentDebug, + startElementDebug, + endElementDebug, + referenceDebug, + charactersDebug, + ignorableWhitespaceDebug, + processingInstructionDebug, + commentDebug, + warningDebug, + errorDebug, + fatalErrorDebug, + getParameterEntityDebug, +}; + +xmlSAXHandlerPtr debugSAXHandler = &debugSAXHandlerStruct; /************************************************************************ * * * Debug * * * ************************************************************************/ +void parseSAXFile(char *filename) { + htmlDocPtr doc; + /* + * Empty callbacks for checking + */ + doc = htmlSAXParseFile(filename, NULL, emptySAXHandler, NULL); + if (doc != NULL) { + fprintf(stdout, "htmlSAXParseFile returned non-NULL\n"); + xmlFreeDoc(doc); + } + + if (!noout) { + /* + * Debug callback + */ + doc = htmlSAXParseFile(filename, NULL, debugSAXHandler, NULL); + if (doc != NULL) { + fprintf(stdout, "htmlSAXParseFile returned non-NULL\n"); + xmlFreeDoc(doc); + } + } +} + void parseAndPrintFile(char *filename) { htmlDocPtr doc, tmp; @@ -85,10 +623,12 @@ void parseAndPrintFile(char *filename) { /* * print it. */ - if (!debug) - htmlDocDump(stdout, doc); - else - xmlDebugDumpDocument(stdout, doc); + if (!noout) { + if (!debug) + htmlDocDump(stdout, doc); + else + xmlDebugDumpDocument(stdout, doc); + } /* * free it. @@ -128,7 +668,7 @@ void parseAndPrintBuffer(xmlChar *buf) { } int main(int argc, char **argv) { - int i; + int i, count; int files = 0; for (i = 1; i < argc ; i++) { @@ -136,20 +676,43 @@ int main(int argc, char **argv) { debug++; else if ((!strcmp(argv[i], "-copy")) || (!strcmp(argv[i], "--copy"))) copy++; + else if ((!strcmp(argv[i], "-sax")) || (!strcmp(argv[i], "--sax"))) + sax++; + else if ((!strcmp(argv[i], "-noout")) || (!strcmp(argv[i], "--noout"))) + noout++; + else if ((!strcmp(argv[i], "-repeat")) || + (!strcmp(argv[i], "--repeat"))) + repeat++; } for (i = 1; i < argc ; i++) { if (argv[i][0] != '-') { - parseAndPrintFile(argv[i]); + if (repeat) { + for (count = 0;count < 100 * repeat;count++) { + if (sax) + parseSAXFile(argv[i]); + else + parseAndPrintFile(argv[i]); + } + } else { + if (sax) + parseSAXFile(argv[i]); + else + parseAndPrintFile(argv[i]); + } files ++; } } if (files == 0) { - printf("Usage : %s [--debug] [--copy] HTMLfiles ...\n", + printf("Usage : %s [--debug] [--copy] [--copy] HTMLfiles ...\n", argv[0]); printf("\tParse the HTML files and output the result of the parsing\n"); printf("\t--debug : dump a debug tree of the in-memory document\n"); printf("\t--copy : used to test the internal copy implementation\n"); + printf("\t--sax : debug the sequence of SAX callbacks\n"); + printf("\t--repeat : parse the file 100 times, for timing or profiling\n"); + printf("\t--noout : do not print the result\n"); } + xmlMemoryDump(); return(0); } diff --git a/testXPath.c b/testXPath.c index 236f5062..2c298493 100644 --- a/testXPath.c +++ b/testXPath.c @@ -90,7 +90,8 @@ void xmlXPAthDebugDumpNodeSet(FILE *output, xmlNodeSetPtr cur) { fprintf(output, "%d", i + 1); if (cur->nodeTab[i] == NULL) fprintf(output, " NULL\n"); - else if (cur->nodeTab[i]->type == XML_DOCUMENT_NODE) + else if ((cur->nodeTab[i]->type == XML_DOCUMENT_NODE) || + (cur->nodeTab[i]->type == XML_HTML_DOCUMENT_NODE)) fprintf(output, " /\n"); else if (cur->nodeTab[i]->type == XML_ATTRIBUTE_NODE) xmlDebugDumpAttr(output, (xmlAttrPtr)cur->nodeTab[i], 2); diff --git a/tester.c b/tester.c index cff492c8..7002614b 100644 --- a/tester.c +++ b/tester.c @@ -44,6 +44,7 @@ static int noent = 0; static int noout = 0; static int valid = 0; static int repeat = 0; +static int insert = 0; extern int xmlDoValidityCheckingDefaultValue; @@ -140,10 +141,33 @@ void parseAndPrintFile(char *filename) { xmlFreeDoc(tmp); } - /* - * print it. - */ - if (noout == 0) { + if (insert) { + const xmlChar* list[256]; + int nb, i; + xmlNodePtr node; + + if (doc->root != NULL) { + node = doc->root; + while ((node != NULL) && (node->last == NULL)) node = node->next; + if (node != NULL) { + nb = xmlValidGetValidElements(node->last, NULL, list, 256); + if (nb < 0) { + printf("could not get valid list of elements\n"); + } else if (nb == 0) { + printf("No element can be indersted under root\n"); + } else { + printf("%d element types can be indersted under root:\n", + nb); + for (i = 0;i < nb;i++) { + printf("%s\n", list[i]); + } + } + } + } + }else if (noout == 0) { + /* + * print it. + */ if (!debug) xmlDocDump(stdout, doc); else @@ -211,6 +235,9 @@ int main(int argc, char **argv) { else if ((!strcmp(argv[i], "-valid")) || (!strcmp(argv[i], "--valid"))) valid++; + else if ((!strcmp(argv[i], "-insert")) || + (!strcmp(argv[i], "--insert"))) + insert++; else if ((!strcmp(argv[i], "-repeat")) || (!strcmp(argv[i], "--repeat"))) repeat++; @@ -238,6 +265,7 @@ int main(int argc, char **argv) { printf("\t--noout : don't output the result\n"); printf("\t--valid : validate the document in addition to std well-formed check\n"); printf("\t--repeat : parse the file 100 times, for timing or profiling\n"); + printf("\t--insert : test for valid insertions\n"); } xmlMemoryDump(); diff --git a/tree.c b/tree.c index d1caae9e..3a06dfa9 100644 --- a/tree.c +++ b/tree.c @@ -2013,6 +2013,7 @@ xmlNodeGetContent(xmlNodePtr cur) { case XML_ENTITY_NODE: case XML_COMMENT_NODE: case XML_DOCUMENT_NODE: + case XML_HTML_DOCUMENT_NODE: case XML_DOCUMENT_TYPE_NODE: case XML_NOTATION_NODE: return(NULL); @@ -2066,6 +2067,7 @@ xmlNodeSetContent(xmlNodePtr cur, const xmlChar *content) { cur->content = NULL; break; case XML_DOCUMENT_NODE: + case XML_HTML_DOCUMENT_NODE: case XML_DOCUMENT_TYPE_NODE: break; case XML_NOTATION_NODE: @@ -2115,6 +2117,7 @@ xmlNodeSetContentLen(xmlNodePtr cur, const xmlChar *content, int len) { cur->content = NULL; break; case XML_DOCUMENT_NODE: + case XML_HTML_DOCUMENT_NODE: case XML_DOCUMENT_TYPE_NODE: break; case XML_NOTATION_NODE: @@ -2180,6 +2183,7 @@ xmlNodeAddContentLen(xmlNodePtr cur, const xmlChar *content, int len) { if (content != NULL) cur->content = xmlStrncat(cur->content, content, len); case XML_DOCUMENT_NODE: + case XML_HTML_DOCUMENT_NODE: case XML_DOCUMENT_TYPE_NODE: break; case XML_NOTATION_NODE: diff --git a/tree.h b/tree.h index 6c7e795a..6c8ab102 100644 --- a/tree.h +++ b/tree.h @@ -35,7 +35,8 @@ typedef enum { XML_DOCUMENT_NODE= 9, XML_DOCUMENT_TYPE_NODE= 10, XML_DOCUMENT_FRAG_NODE= 11, - XML_NOTATION_NODE= 12 + XML_NOTATION_NODE= 12, + XML_HTML_DOCUMENT_NODE= 13 } xmlElementType; /* diff --git a/valid.c b/valid.c index 123a93c3..901b030a 100644 --- a/valid.c +++ b/valid.c @@ -2670,6 +2670,7 @@ xmlSprintfElementChilds(char *buf, xmlNodePtr node, int glob) { break; case XML_ATTRIBUTE_NODE: case XML_DOCUMENT_NODE: + case XML_HTML_DOCUMENT_NODE: case XML_DOCUMENT_TYPE_NODE: case XML_DOCUMENT_FRAG_NODE: case XML_NOTATION_NODE: @@ -2962,3 +2963,170 @@ xmlValidateDocument(xmlValidCtxtPtr ctxt, xmlDocPtr doc) { return(ret); } + +/************************************************************************ + * * + * Routines for dynamic validation editing * + * * + ************************************************************************/ + +/** + * xmlValidGetPotentialChildren: + * @ctree: an element content tree + * @list: an array to store the list of child names + * @len: a pointer to the number of element in the list + * @max: the size of the array + * + * Build/extend a list of potential children allowed by the content tree + * + * returns the number of element in the list, or -1 in case of error. + */ + +int +xmlValidGetPotentialChildren(xmlElementContent *ctree, const xmlChar **list, + int *len, int max) { + int i; + + if ((ctree == NULL) || (list == NULL) || (len == NULL)) + return(-1); + if (*len >= max) return(*len); + + switch (ctree->type) { + case XML_ELEMENT_CONTENT_PCDATA: + for (i = 0; i < *len;i++) + if (!xmlStrcmp("#PCDATA", list[i])) return(*len); + list[(*len)++] = "#PCDATA"; + break; + case XML_ELEMENT_CONTENT_ELEMENT: + for (i = 0; i < *len;i++) + if (!xmlStrcmp(ctree->name, list[i])) return(*len); + list[(*len)++] = ctree->name; + break; + case XML_ELEMENT_CONTENT_SEQ: + xmlValidGetPotentialChildren(ctree->c1, list, len, max); + xmlValidGetPotentialChildren(ctree->c2, list, len, max); + break; + case XML_ELEMENT_CONTENT_OR: + xmlValidGetPotentialChildren(ctree->c1, list, len, max); + xmlValidGetPotentialChildren(ctree->c2, list, len, max); + break; + } + + return(*len); +} + +/** + * xmlValidGetValidElements: + * @prev: an element to insert after + * @next: an element to insert next + * @list: an array to store the list of child names + * @max: the size of the array + * + * This function returns the list of authorized children to insert + * within an existing tree while respecting the validity constraints + * forced by the Dtd. The insertion point is defined using @prev and + * @next in the following ways: + * to insert before 'node': xmlValidGetValidElements(node->prev, node, ... + * to insert next 'node': xmlValidGetValidElements(node, node->next, ... + * to replace 'node': xmlValidGetValidElements(node->prev, node->next, ... + * to prepend a child to 'node': xmlValidGetValidElements(NULL, node->childs, + * to append a child to 'node': xmlValidGetValidElements(node->last, NULL, ... + * + * pointers to the element names are inserted at the beginning of the array + * and do not need to be freed. + * + * returns the number of element in the list, or -1 in case of error. If + * the function returns the value @max the caller is invited to grow the + * receiving array and retry. + */ + +int +xmlValidGetValidElements(xmlNode *prev, xmlNode *next, const xmlChar **list, + int max) { + int nb_valid_elements = 0; + const xmlChar *elements[256]; + int nb_elements = 0, i; + + xmlNode *ref_node; + xmlNode *parent; + xmlNode *test_node; + + xmlNode *prev_next; + xmlNode *next_prev; + xmlNode *parent_childs; + xmlNode *parent_last; + + xmlElement *element_desc; + + if (prev == NULL && next == NULL) + return(-1); + + if (list == NULL) return(-1); + if (max <= 0) return(-1); + + nb_valid_elements = 0; + ref_node = prev ? prev : next; + parent = ref_node->parent; + + /* + * Retrieves the parent element declaration + */ + element_desc = xmlGetDtdElementDesc(parent->doc->intSubset, + parent->name); + if ((element_desc == NULL) && (parent->doc->extSubset != NULL)) + element_desc = xmlGetDtdElementDesc(parent->doc->extSubset, + parent->name); + if (element_desc == NULL) return(-1); + + /* + * Do a backup of the current tree structure + */ + prev_next = prev ? prev->next : NULL; + next_prev = next ? next->prev : NULL; + parent_childs = parent->childs; + parent_last = parent->last; + + /* + * Creates a dummy node and insert it into the tree + */ + test_node = xmlNewNode (NULL, ""); + test_node->doc = ref_node->doc; + test_node->parent = parent; + test_node->prev = prev; + test_node->next = next; + + if (prev) prev->next = test_node; + else parent->childs = test_node; + + if (next) next->prev = test_node; + else parent->last = test_node; + + /* + * Insert each potential child node and check if the parent is + * still valid + */ + nb_elements = xmlValidGetPotentialChildren(element_desc->content, + elements, &nb_elements, 256); + + for (i = 0;i < nb_elements;i++) { + test_node->name = elements[i]; + if (xmlValidateOneElement(NULL, parent->doc, parent)) { + int j; + + for (j = 0; j < nb_valid_elements;j++) + if (!xmlStrcmp(elements[i], list[j])) break; + list[nb_valid_elements++] = elements[i]; + if (nb_valid_elements >= max) break; + } + } + + /* + * Restore the tree structure + */ + if (prev) prev->next = prev_next; + if (next) next->prev = next_prev; + parent->childs = parent_childs; + parent->last = parent_last; + + return(nb_valid_elements); +} diff --git a/valid.h b/valid.h index 73a2a543..8c016a93 100644 --- a/valid.h +++ b/valid.h @@ -218,6 +218,14 @@ xmlNotationPtr xmlGetDtdNotationDesc (xmlDtdPtr dtd, xmlElementPtr xmlGetDtdElementDesc (xmlDtdPtr dtd, const xmlChar *name); +int xmlValidGetValidElements(xmlNode *prev, + xmlNode *next, + const xmlChar **list, + int max); +int xmlValidGetPotentialChildren(xmlElementContent *ctree, + const xmlChar **list, + int *len, + int max); #ifdef __cplusplus } #endif diff --git a/xmlmemory.c b/xmlmemory.c index ca9e9c01..d88fbb10 100644 --- a/xmlmemory.c +++ b/xmlmemory.c @@ -23,6 +23,10 @@ #ifdef HAVE_MALLOC_H #include #endif +#ifdef HAVE_STDLIB_H +#include +#endif + #include "xmlmemory.h" @@ -78,7 +82,10 @@ typedef struct memnod { static unsigned long debugMemSize = 0; +static unsigned long debugMaxMemSize = 0; static int block=0; +int xmlMemStopAtBlock = 0; +int xmlMemInitialized = 0; #ifdef MEM_LIST static MEMHDR *memlist = NULL; #endif @@ -94,11 +101,24 @@ void debugmem_list_delete(MEMHDR *); #define TEST_POINT #endif +/** + * xmlMallocBreakpoint: + * + * Breakpoint to use in conjunction with xmlMemStopAtBlock. When the block + * number reaches the specified value this function is called. One need to add a breakpoint + * to it to get the context in which the given block is allocated. + */ + +void +xmlMallocBreakpoint(void) { + fprintf(stderr, "xmlMallocBreakpoint reached on block %d\n", xmlMemStopAtBlock); +} + /** * xmlMallocLoc: * @size: an int specifying the size in byte to allocate. * @file: the file name or NULL - * @file: the line number + @file: the line number * * a malloc() equivalent, with logging of the allocation info. * @@ -110,6 +130,7 @@ xmlMallocLoc(int size, const char * file, int line) { MEMHDR *p; + if (!xmlMemInitialized) xmlInitMemory(); #ifdef DEBUG_MEMORY fprintf(stderr, "Malloc(%d)\n",size); #endif @@ -129,6 +150,7 @@ xmlMallocLoc(int size, const char * file, int line) p->mh_file = file; p->mh_line = line; debugMemSize += size; + if (debugMemSize > debugMaxMemSize) debugMaxMemSize = debugMemSize; #ifdef MEM_LIST debugmem_list_add(p); #endif @@ -137,6 +159,7 @@ xmlMallocLoc(int size, const char * file, int line) fprintf(stderr, "Malloc(%d) Ok\n",size); #endif + if (xmlMemStopAtBlock == block) xmlMallocBreakpoint(); TEST_POINT @@ -176,6 +199,7 @@ xmlReallocLoc(void *ptr,int size, const char * file, int line) MEMHDR *p; unsigned long number; + if (!xmlMemInitialized) xmlInitMemory(); TEST_POINT p = CLIENT_2_HDR(ptr); @@ -201,6 +225,7 @@ xmlReallocLoc(void *ptr,int size, const char * file, int line) p->mh_file = file; p->mh_line = line; debugMemSize += size; + if (debugMemSize > debugMaxMemSize) debugMaxMemSize = debugMemSize; #ifdef MEM_LIST debugmem_list_add(p); #endif @@ -281,6 +306,7 @@ xmlMemStrdupLoc(const char *str, const char *file, int line) size_t size = strlen(str) + 1; MEMHDR *p; + if (!xmlMemInitialized) xmlInitMemory(); TEST_POINT p = (MEMHDR *) malloc(RESERVE_SIZE+size); @@ -294,11 +320,14 @@ xmlMemStrdupLoc(const char *str, const char *file, int line) p->mh_file = file; p->mh_line = line; debugMemSize += size; + if (debugMemSize > debugMaxMemSize) debugMaxMemSize = debugMemSize; #ifdef MEM_LIST debugmem_list_add(p); #endif s = HDR_2_CLIENT(p); + if (xmlMemStopAtBlock == block) xmlMallocBreakpoint(); + if (s != NULL) strcpy(s,str); else @@ -365,7 +394,8 @@ xmlMemDisplay(FILE *fp) #endif - fprintf(fp," MEMORY ALLOCATED : %lu\n",debugMemSize); + fprintf(fp," MEMORY ALLOCATED : %lu, MAX was %lu\n", + debugMemSize, debugMaxMemSize); fprintf(fp,"BLOCK NUMBER SIZE TYPE\n"); idx = 0; p = memlist; @@ -473,6 +503,15 @@ int xmlInitMemory(void) { int ret; + +#ifdef HAVE_STDLIB_H + char *breakpoint; + + breakpoint = getenv("XML_MEM_BREAKPOINT"); + if (breakpoint != NULL) { + sscanf(breakpoint, "%d", &xmlMemStopAtBlock); + } +#endif #ifdef DEBUG_MEMORY fprintf(stderr, "xmlInitMemory() Ok\n"); diff --git a/xpath.c b/xpath.c index 3a703bb7..f646e9f7 100644 --- a/xpath.c +++ b/xpath.c @@ -562,7 +562,8 @@ xmlXPathDebugNodeSet(FILE *output, xmlNodeSetPtr obj) { fprintf(output, " NULL !\n"); return; } - if (obj->nodeTab[i]->type == XML_DOCUMENT_NODE) + if ((obj->nodeTab[i]->type == XML_DOCUMENT_NODE) || + (obj->nodeTab[i]->type == XML_HTML_DOCUMENT_NODE)) fprintf(output, " /"); else if (obj->nodeTab[i]->name == NULL) fprintf(output, " noname!"); @@ -1471,7 +1472,8 @@ xmlXPathNextChild(xmlXPathParserContextPtr ctxt, xmlNodePtr cur) { return(ctxt->context->doc->root); return(ctxt->context->node->childs); } - if (ctxt->context->node->type == XML_DOCUMENT_NODE) + if ((ctxt->context->node->type == XML_DOCUMENT_NODE) || + (ctxt->context->node->type == XML_HTML_DOCUMENT_NODE)) return(NULL); return(cur->next); }