From 8874b94cd2e2086f4cefe026286e0f64cac6ec9a Mon Sep 17 00:00:00 2001 From: Daniel Veillard Date: Thu, 25 Aug 2005 13:19:21 +0000 Subject: [PATCH] added a parser XML_PARSE_COMPACT option to allocate small text nodes (less * HTMLparser.c parser.c SAX2.c debugXML.c tree.c valid.c xmlreader.c xmllint.c include/libxml/HTMLparser.h include/libxml/parser.h: added a parser XML_PARSE_COMPACT option to allocate small text nodes (less than 8 bytes on 32bits, less than 16bytes on 64bits) directly within the node, various changes to cope with this. * result/XPath/tests/* result/XPath/xptr/* result/xmlid/*: this slightly change the output Daniel --- ChangeLog | 10 ++++++ HTMLparser.c | 4 +++ SAX2.c | 18 +++++++--- debugXML.c | 14 +++++--- include/libxml/HTMLparser.h | 3 +- include/libxml/parser.h | 3 +- parser.c | 11 +++++-- result/XPath/tests/chaptersbase | 4 +-- result/XPath/tests/idsimple | 2 +- result/XPath/tests/langsimple | 16 ++++----- result/XPath/tests/mixedpat | 32 +++++++++--------- result/XPath/tests/simpleabbr | 8 ++--- result/XPath/tests/simplebase | 4 +-- result/XPath/tests/usr1check | 4 +-- result/XPath/xptr/strrange | 14 ++++---- result/XPath/xptr/strrange2 | 2 +- result/xmlid/id_err2.xml | 2 +- result/xmlid/id_tst1.xml | 2 +- result/xmlid/id_tst2.xml | 2 +- result/xmlid/id_tst3.xml | 2 +- result/xmlid/id_tst4.xml | 2 +- runtest.c | 2 +- testXPath.c | 4 +-- tree.c | 58 +++++++++++++++++++++------------ valid.c | 48 +++++++++++---------------- xmllint.c | 6 +++- xmlreader.c | 16 +++++++-- 27 files changed, 176 insertions(+), 117 deletions(-) diff --git a/ChangeLog b/ChangeLog index c928c974..a32e9f7b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,13 @@ +Thu Aug 25 15:14:56 CEST 2005 Daniel Veillard + + * HTMLparser.c parser.c SAX2.c debugXML.c tree.c valid.c xmlreader.c + xmllint.c include/libxml/HTMLparser.h include/libxml/parser.h: + added a parser XML_PARSE_COMPACT option to allocate small + text nodes (less than 8 bytes on 32bits, less than 16bytes on 64bits) + directly within the node, various changes to cope with this. + * result/XPath/tests/* result/XPath/xptr/* result/xmlid/*: this + slightly change the output + Thu Aug 25 12:16:26 CEST 2005 Daniel Veillard * configure.in: patch from Andrew W. Nosenko, use se $GCC = 'yes' diff --git a/HTMLparser.c b/HTMLparser.c index fe36c89f..6b8b5624 100644 --- a/HTMLparser.c +++ b/HTMLparser.c @@ -5820,6 +5820,10 @@ htmlCtxtUseOptions(htmlParserCtxtPtr ctxt, int options) ctxt->recovery = 1; } else ctxt->recovery = 0; + if (options & HTML_PARSE_COMPACT) { + ctxt->options |= HTML_PARSE_COMPACT; + options -= HTML_PARSE_COMPACT; + } ctxt->dictNames = 0; return (options); } diff --git a/SAX2.c b/SAX2.c index 241e90de..a73fa1fa 100644 --- a/SAX2.c +++ b/SAX2.c @@ -1777,6 +1777,7 @@ xmlSAX2TextNode(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) { xmlErrMemory(ctxt, "xmlSAX2Characters"); return(NULL); } + memset(ret, 0, sizeof(xmlNode)); /* * intern the formatting blanks found between tags, or the * very short strings @@ -1784,7 +1785,14 @@ xmlSAX2TextNode(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) { if (ctxt->dictNames) { xmlChar cur = str[len]; - if ((len <= 3) && ((cur == '"') || (cur == '\'') || + if ((len < (int) (2 * sizeof(void *))) && + (ctxt->options & XML_PARSE_COMPACT)) { + /* store the string in the node overrithing properties and nsDef */ + xmlChar *tmp = (xmlChar *) &(ret->properties); + memcpy(tmp, str, len); + tmp[len] = 0; + intern = tmp; + } else if ((len <= 3) && ((cur == '"') || (cur == '\'') || ((cur == '<') && (str[len + 1] != '!')))) { intern = xmlDictLookup(ctxt->dict, str, len); } else if (IS_BLANK_CH(*str) && (len < 60) && (cur == '<') && @@ -1798,7 +1806,6 @@ xmlSAX2TextNode(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) { } } skip: - memset(ret, 0, sizeof(xmlNode)); ret->type = XML_TEXT_NODE; ret->name = xmlStringText; @@ -2407,8 +2414,11 @@ xmlSAX2Characters(void *ctx, const xmlChar *ch, int len) * We try to minimaze realloc() uses and avoid copying * and recomputing length over and over. */ - if ((ctxt->nodemem == ctxt->nodelen + 1) && - (xmlDictOwns(ctxt->dict, lastChild->content))) { + if (lastChild->content == (xmlChar *)&(lastChild->properties)) { + lastChild->content = xmlStrdup(lastChild->content); + lastChild->properties = NULL; + } else if ((ctxt->nodemem == ctxt->nodelen + 1) && + (xmlDictOwns(ctxt->dict, lastChild->content))) { lastChild->content = xmlStrdup(lastChild->content); } if (ctxt->nodelen + len >= ctxt->nodemem) { diff --git a/debugXML.c b/debugXML.c index 3cb2848b..24c7c4df 100644 --- a/debugXML.c +++ b/debugXML.c @@ -902,9 +902,15 @@ xmlCtxtDumpOneNode(xmlDebugCtxtPtr ctxt, xmlNodePtr node) if (!ctxt->check) { xmlCtxtDumpSpaces(ctxt); if (node->name == (const xmlChar *) xmlStringTextNoenc) - fprintf(ctxt->output, "TEXT no enc\n"); + fprintf(ctxt->output, "TEXT no enc"); else - fprintf(ctxt->output, "TEXT\n"); + fprintf(ctxt->output, "TEXT"); + if (node->content == (xmlChar *) &(node->properties)) + fprintf(ctxt->output, " compact\n"); + else if (xmlDictOwns(ctxt->dict, node->content) == 1) + fprintf(ctxt->output, " interned\n"); + else + fprintf(ctxt->output, "\n"); } break; case XML_CDATA_SECTION_NODE: @@ -1005,9 +1011,9 @@ xmlCtxtDumpOneNode(xmlDebugCtxtPtr ctxt, xmlNodePtr node) fprintf(ctxt->output, "PBM: doc == NULL !!!\n"); } ctxt->depth++; - if (node->nsDef != NULL) + if ((node->type == XML_ELEMENT_NODE) && (node->nsDef != NULL)) xmlCtxtDumpNamespaceList(ctxt, node->nsDef); - if (node->properties != NULL) + if ((node->type == XML_ELEMENT_NODE) && (node->properties != NULL)) xmlCtxtDumpAttrList(ctxt, node->properties); if (node->type != XML_ENTITY_REF_NODE) { if ((node->type != XML_ELEMENT_NODE) && (node->content != NULL)) { diff --git a/include/libxml/HTMLparser.h b/include/libxml/HTMLparser.h index 2c1e8d21..8477efbb 100644 --- a/include/libxml/HTMLparser.h +++ b/include/libxml/HTMLparser.h @@ -178,7 +178,8 @@ typedef enum { HTML_PARSE_NOWARNING= 1<<6, /* suppress warning reports */ HTML_PARSE_PEDANTIC = 1<<7, /* pedantic error reporting */ HTML_PARSE_NOBLANKS = 1<<8, /* remove blank nodes */ - HTML_PARSE_NONET = 1<<11 /* Forbid network access */ + HTML_PARSE_NONET = 1<<11,/* Forbid network access */ + HTML_PARSE_COMPACT = 1<<16 /* compact small text nodes */ } htmlParserOption; XMLPUBFUN void XMLCALL diff --git a/include/libxml/parser.h b/include/libxml/parser.h index 6e750e71..6d6ea375 100644 --- a/include/libxml/parser.h +++ b/include/libxml/parser.h @@ -1088,7 +1088,8 @@ typedef enum { XML_PARSE_NODICT = 1<<12,/* Do not reuse the context dictionnary */ XML_PARSE_NSCLEAN = 1<<13,/* remove redundant namespaces declarations */ XML_PARSE_NOCDATA = 1<<14,/* merge CDATA as text nodes */ - XML_PARSE_NOXINCNODE= 1<<15 /* do not generate XINCLUDE START/END nodes */ + XML_PARSE_NOXINCNODE= 1<<15,/* do not generate XINCLUDE START/END nodes */ + XML_PARSE_COMPACT = 1<<16 /* compact small text nodes */ } xmlParserOption; XMLPUBFUN void XMLCALL diff --git a/parser.c b/parser.c index 44e61802..6a905a0d 100644 --- a/parser.c +++ b/parser.c @@ -684,7 +684,7 @@ xmlAddDefAttrs(xmlParserCtxtPtr ctxt, } /* - * plit the element name into prefix:localname , the string found + * Split the element name into prefix:localname , the string found * are within the DTD and hen not associated to namespace names. */ name = xmlSplitQName3(fullattr, &len); @@ -11319,8 +11319,9 @@ xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt, *lst = cur; while (cur != NULL) { #ifdef LIBXML_VALID_ENABLED - if (oldctxt->validate && oldctxt->wellFormed && - oldctxt->myDoc && oldctxt->myDoc->intSubset) { + if ((oldctxt->validate) && (oldctxt->wellFormed) && + (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) && + (cur->type == XML_ELEMENT_NODE)) { oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt, oldctxt->myDoc, cur); } @@ -12843,6 +12844,10 @@ xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options) ctxt->options |= XML_PARSE_NONET; options -= XML_PARSE_NONET; } + if (options & XML_PARSE_COMPACT) { + ctxt->options |= XML_PARSE_COMPACT; + options -= XML_PARSE_COMPACT; + } ctxt->linenumbers = 1; return (options); } diff --git a/result/XPath/tests/chaptersbase b/result/XPath/tests/chaptersbase index 03c65670..10636c30 100644 --- a/result/XPath/tests/chaptersbase +++ b/result/XPath/tests/chaptersbase @@ -72,10 +72,10 @@ Set contains 1 nodes: Expression: /child::EXAMPLE/child::head/node() Object is a Node Set : Set contains 3 nodes: -1 TEXT +1 TEXT compact content= 2 ELEMENT title -3 TEXT +3 TEXT compact content= ======================== diff --git a/result/XPath/tests/idsimple b/result/XPath/tests/idsimple index 891b52e5..76d9286f 100644 --- a/result/XPath/tests/idsimple +++ b/result/XPath/tests/idsimple @@ -5,7 +5,7 @@ Object is a Node Set : Set contains 1 nodes: 1 ELEMENT EXAMPLE ATTRIBUTE id - TEXT + TEXT compact content=root ATTRIBUTE prop1 TEXT diff --git a/result/XPath/tests/langsimple b/result/XPath/tests/langsimple index d8d7afdb..02544625 100644 --- a/result/XPath/tests/langsimple +++ b/result/XPath/tests/langsimple @@ -5,30 +5,30 @@ Object is a Node Set : Set contains 9 nodes: 1 ELEMENT b ATTRIBUTE lang - TEXT + TEXT compact content=en 2 ELEMENT x 3 ELEMENT x 4 ELEMENT para ATTRIBUTE lang - TEXT + TEXT compact content=en 5 ELEMENT div ATTRIBUTE lang - TEXT + TEXT compact content=en 6 ELEMENT para 7 ELEMENT para ATTRIBUTE lang - TEXT + TEXT compact content=EN 8 ELEMENT para ATTRIBUTE lang - TEXT + TEXT compact content=en-us 9 ELEMENT para ATTRIBUTE lang - TEXT + TEXT compact content=EN-US ======================== @@ -37,11 +37,11 @@ Object is a Node Set : Set contains 2 nodes: 1 ELEMENT para ATTRIBUTE lang - TEXT + TEXT compact content=en-us 2 ELEMENT para ATTRIBUTE lang - TEXT + TEXT compact content=EN-US ======================== diff --git a/result/XPath/tests/mixedpat b/result/XPath/tests/mixedpat index 3be0c497..f3d746ac 100644 --- a/result/XPath/tests/mixedpat +++ b/result/XPath/tests/mixedpat @@ -5,7 +5,7 @@ Object is a Node Set : Set contains 1 nodes: 1 ELEMENT s ATTRIBUTE p - TEXT + TEXT compact content=root ======================== @@ -14,11 +14,11 @@ Object is a Node Set : Set contains 2 nodes: 1 ELEMENT s ATTRIBUTE p - TEXT + TEXT compact content=root 2 ELEMENT s ATTRIBUTE p - TEXT + TEXT compact content=p1 ======================== @@ -27,11 +27,11 @@ Object is a Node Set : Set contains 2 nodes: 1 ELEMENT s ATTRIBUTE p - TEXT + TEXT compact content=root 2 ELEMENT s ATTRIBUTE p - TEXT + TEXT compact content=p1 ======================== @@ -40,11 +40,11 @@ Object is a Node Set : Set contains 2 nodes: 1 ELEMENT s ATTRIBUTE p - TEXT + TEXT compact content=root 2 ELEMENT s ATTRIBUTE p - TEXT + TEXT compact content=p1 ======================== @@ -53,15 +53,15 @@ Object is a Node Set : Set contains 3 nodes: 1 ELEMENT s ATTRIBUTE p - TEXT + TEXT compact content=root 2 ELEMENT s ATTRIBUTE p - TEXT + TEXT compact content=p1 3 ELEMENT s ATTRIBUTE p - TEXT + TEXT compact content=p2 ======================== @@ -70,16 +70,16 @@ Object is a Node Set : Set contains 4 nodes: 1 ELEMENT s ATTRIBUTE p - TEXT + TEXT compact content=root 2 ELEMENT p1 3 ELEMENT s ATTRIBUTE p - TEXT + TEXT compact content=p1 4 ELEMENT s ATTRIBUTE p - TEXT + TEXT compact content=p2 ======================== @@ -88,14 +88,14 @@ Object is a Node Set : Set contains 4 nodes: 1 ELEMENT s ATTRIBUTE p - TEXT + TEXT compact content=root 2 ELEMENT p1 3 ELEMENT s ATTRIBUTE p - TEXT + TEXT compact content=p1 4 ELEMENT s ATTRIBUTE p - TEXT + TEXT compact content=p2 diff --git a/result/XPath/tests/simpleabbr b/result/XPath/tests/simpleabbr index 85da41a2..61378127 100644 --- a/result/XPath/tests/simpleabbr +++ b/result/XPath/tests/simpleabbr @@ -45,7 +45,7 @@ Object is a Node Set : Set contains 2 nodes: 1 TEXT content=bla bla bla ... -2 TEXT +2 TEXT compact content=... ======================== @@ -54,7 +54,7 @@ Object is a Node Set : Set contains 2 nodes: 1 TEXT content=bla bla bla ... -2 TEXT +2 TEXT compact content=... ======================== @@ -63,7 +63,7 @@ Object is a Node Set : Set contains 2 nodes: 1 TEXT content=bla bla bla ... -2 TEXT +2 TEXT compact content=... ======================== @@ -77,5 +77,5 @@ Set contains 1 nodes: Expression: (//p/text())[position()=last()] Object is a Node Set : Set contains 1 nodes: -1 TEXT +1 TEXT compact content=... diff --git a/result/XPath/tests/simplebase b/result/XPath/tests/simplebase index 38fa1449..3ba17493 100644 --- a/result/XPath/tests/simplebase +++ b/result/XPath/tests/simplebase @@ -53,10 +53,10 @@ Set contains 1 nodes: Expression: /child::EXAMPLE/child::head/node() Object is a Node Set : Set contains 3 nodes: -1 TEXT +1 TEXT compact content= 2 ELEMENT title -3 TEXT +3 TEXT compact content= ======================== diff --git a/result/XPath/tests/usr1check b/result/XPath/tests/usr1check index 03089916..f284198b 100644 --- a/result/XPath/tests/usr1check +++ b/result/XPath/tests/usr1check @@ -5,8 +5,8 @@ Object is a Node Set : Set contains 1 nodes: 1 ELEMENT ITEM ATTRIBUTE monto - TEXT + TEXT compact content=50.12 ATTRIBUTE divisa - TEXT + TEXT compact content=DOL diff --git a/result/XPath/xptr/strrange b/result/XPath/xptr/strrange index c14ed917..9c0096d5 100644 --- a/result/XPath/xptr/strrange +++ b/result/XPath/xptr/strrange @@ -44,7 +44,7 @@ Expression: xpointer(string-range(//p, 'difficult')) Object is a Location Set: 1 : Object is a range : From index 3 in node - TEXT + TEXT compact content=a diff To index 4 in node TEXT @@ -56,10 +56,10 @@ Expression: xpointer(string-range(//p, 'spanning')) Object is a Location Set: 1 : Object is a range : From index 3 in node - TEXT + TEXT compact content=a span To index 3 in node - TEXT + TEXT compact content=ing one @@ -80,17 +80,17 @@ Expression: xpointer(string-range(//seq, '')) Object is a Location Set: 1 : Object is a collapsed range : index 1 in node - TEXT + TEXT compact content=123 2 : Object is a collapsed range : index 2 in node - TEXT + TEXT compact content=123 3 : Object is a collapsed range : index 3 in node - TEXT + TEXT compact content=123 4 : Object is a collapsed range : index 4 in node - TEXT + TEXT compact content=123 diff --git a/result/XPath/xptr/strrange2 b/result/XPath/xptr/strrange2 index ea6ee45a..fd6c0ea4 100644 --- a/result/XPath/xptr/strrange2 +++ b/result/XPath/xptr/strrange2 @@ -60,5 +60,5 @@ Expression: xpointer(string-range(//p, 'difficult', 1, 0)) Object is a Location Set: 1 : Object is a collapsed range : index 3 in node - TEXT + TEXT compact content=a diff diff --git a/result/xmlid/id_err2.xml b/result/xmlid/id_err2.xml index 33ee896d..c7956dca 100644 --- a/result/xmlid/id_err2.xml +++ b/result/xmlid/id_err2.xml @@ -2,5 +2,5 @@ Object is a Node Set : Set contains 1 nodes: 1 ELEMENT foo ATTRIBUTE id - TEXT + TEXT compact content=bar diff --git a/result/xmlid/id_tst1.xml b/result/xmlid/id_tst1.xml index 33ee896d..c7956dca 100644 --- a/result/xmlid/id_tst1.xml +++ b/result/xmlid/id_tst1.xml @@ -2,5 +2,5 @@ Object is a Node Set : Set contains 1 nodes: 1 ELEMENT foo ATTRIBUTE id - TEXT + TEXT compact content=bar diff --git a/result/xmlid/id_tst2.xml b/result/xmlid/id_tst2.xml index 33ee896d..c7956dca 100644 --- a/result/xmlid/id_tst2.xml +++ b/result/xmlid/id_tst2.xml @@ -2,5 +2,5 @@ Object is a Node Set : Set contains 1 nodes: 1 ELEMENT foo ATTRIBUTE id - TEXT + TEXT compact content=bar diff --git a/result/xmlid/id_tst3.xml b/result/xmlid/id_tst3.xml index e2f82286..bf020eb2 100644 --- a/result/xmlid/id_tst3.xml +++ b/result/xmlid/id_tst3.xml @@ -2,5 +2,5 @@ Object is a Node Set : Set contains 1 nodes: 1 ELEMENT o:o ATTRIBUTE id - TEXT + TEXT compact content=bar diff --git a/result/xmlid/id_tst4.xml b/result/xmlid/id_tst4.xml index 33ee896d..c7956dca 100644 --- a/result/xmlid/id_tst4.xml +++ b/result/xmlid/id_tst4.xml @@ -2,5 +2,5 @@ Object is a Node Set : Set contains 1 nodes: 1 ELEMENT foo ATTRIBUTE id - TEXT + TEXT compact content=bar diff --git a/runtest.c b/runtest.c index d03322dc..897cb9b1 100644 --- a/runtest.c +++ b/runtest.c @@ -4306,7 +4306,7 @@ launchTests(testDescPtr tst) { testErrorsSize = 0; testErrors[0] = 0; res = tst->func(globbuf.gl_pathv[i], result, error, - tst->options); + tst->options | XML_PARSE_COMPACT); xmlResetLastError(); if (res != 0) { fprintf(stderr, "File %s generated an error\n", diff --git a/testXPath.c b/testXPath.c index 8086a810..677419aa 100644 --- a/testXPath.c +++ b/testXPath.c @@ -178,9 +178,9 @@ int main(int argc, char **argv) { } if (document == NULL) { if (filename == NULL) - document = xmlReadDoc(buffer,NULL,NULL,0); + document = xmlReadDoc(buffer,NULL,NULL,XML_PARSE_COMPACT); else - document = xmlReadFile(filename,NULL,0); + document = xmlReadFile(filename,NULL,XML_PARSE_COMPACT); } for (i = 1; i < argc ; i++) { if ((!strcmp(argv[i], "-i")) || (!strcmp(argv[i], "--input"))) { diff --git a/tree.c b/tree.c index 3e5378fc..1da8400c 100644 --- a/tree.c +++ b/tree.c @@ -3287,7 +3287,8 @@ xmlFreeNodeList(xmlNodePtr cur) { if ((cur->type != XML_ELEMENT_NODE) && (cur->type != XML_XINCLUDE_START) && (cur->type != XML_XINCLUDE_END) && - (cur->type != XML_ENTITY_REF_NODE)) { + (cur->type != XML_ENTITY_REF_NODE) && + (cur->content != (xmlChar *) &(cur->properties))) { DICT_FREE(cur->content) } if (((cur->type == XML_ELEMENT_NODE) || @@ -3356,7 +3357,8 @@ xmlFreeNode(xmlNodePtr cur) { (cur->content != NULL) && (cur->type != XML_ENTITY_REF_NODE) && (cur->type != XML_XINCLUDE_END) && - (cur->type != XML_XINCLUDE_START)) { + (cur->type != XML_XINCLUDE_START) && + (cur->content != (xmlChar *) &(cur->properties))) { DICT_FREE(cur->content) } @@ -3811,7 +3813,7 @@ xmlStaticCopyNode(const xmlNodePtr node, xmlDocPtr doc, xmlNodePtr parent, if (!extended) goto out; - if (node->nsDef != NULL) + if ((node->type == XML_ELEMENT_NODE) && (node->nsDef != NULL)) ret->nsDef = xmlCopyNamespaceList(node->nsDef); if (node->ns != NULL) { @@ -3838,7 +3840,7 @@ xmlStaticCopyNode(const xmlNodePtr node, xmlDocPtr doc, xmlNodePtr parent, ret->ns = ns; } } - if (node->properties != NULL) + if ((node->type == XML_ELEMENT_NODE) && (node->properties != NULL)) ret->properties = xmlCopyPropList(ret, node->properties); if (node->type == XML_ENTITY_REF_NODE) { if ((doc == NULL) || (node->doc != doc)) { @@ -5106,7 +5108,8 @@ xmlNodeSetContent(xmlNodePtr cur, const xmlChar *content) { case XML_ENTITY_NODE: case XML_PI_NODE: case XML_COMMENT_NODE: - if (cur->content != NULL) { + if ((cur->content != NULL) && + (cur->content != (xmlChar *) &(cur->properties))) { if (!((cur->doc != NULL) && (cur->doc->dict != NULL) && (xmlDictOwns(cur->doc->dict, cur->content)))) xmlFree(cur->content); @@ -5117,6 +5120,8 @@ xmlNodeSetContent(xmlNodePtr cur, const xmlChar *content) { cur->content = xmlStrdup(content); } else cur->content = NULL; + cur->properties = NULL; + cur->nsDef = NULL; break; case XML_DOCUMENT_NODE: case XML_HTML_DOCUMENT_NODE: @@ -5178,8 +5183,11 @@ xmlNodeSetContentLen(xmlNodePtr cur, const xmlChar *content, int len) { case XML_PI_NODE: case XML_COMMENT_NODE: case XML_NOTATION_NODE: - if (cur->content != NULL) { - xmlFree(cur->content); + if ((cur->content != NULL) && + (cur->content != (xmlChar *) &(cur->properties))) { + if (!((cur->doc != NULL) && (cur->doc->dict != NULL) && + (xmlDictOwns(cur->doc->dict, cur->content)))) + xmlFree(cur->content); } if (cur->children != NULL) xmlFreeNodeList(cur->children); cur->children = cur->last = NULL; @@ -5187,6 +5195,8 @@ xmlNodeSetContentLen(xmlNodePtr cur, const xmlChar *content, int len) { cur->content = xmlStrndup(content, len); } else cur->content = NULL; + cur->properties = NULL; + cur->nsDef = NULL; break; case XML_DOCUMENT_NODE: case XML_DTD_NODE: @@ -5257,10 +5267,12 @@ xmlNodeAddContentLen(xmlNodePtr cur, const xmlChar *content, int len) { case XML_COMMENT_NODE: case XML_NOTATION_NODE: if (content != NULL) { - if ((cur->doc != NULL) && (cur->doc->dict != NULL) && - xmlDictOwns(cur->doc->dict, cur->content)) { - cur->content = - xmlStrncatNew(cur->content, content, len); + if ((cur->content == (xmlChar *) &(cur->properties)) || + ((cur->doc != NULL) && (cur->doc->dict != NULL) && + xmlDictOwns(cur->doc->dict, cur->content))) { + cur->content = xmlStrncatNew(cur->content, content, len); + cur->properties = NULL; + cur->nsDef = NULL; break; } cur->content = xmlStrncat(cur->content, content, len); @@ -5903,7 +5915,8 @@ xmlHasProp(xmlNodePtr node, const xmlChar *name) { xmlAttrPtr prop; xmlDocPtr doc; - if ((node == NULL) || (name == NULL)) return(NULL); + if ((node == NULL) || (node->type != XML_ELEMENT_NODE) || (name == NULL)) + return(NULL); /* * Check on the properties attached to the node */ @@ -5959,7 +5972,7 @@ xmlHasNsProp(xmlNodePtr node, const xmlChar *name, const xmlChar *nameSpace) { xmlDocPtr doc; #endif /* LIBXML_TREE_ENABLED */ - if (node == NULL) + if ((node == NULL) || (node->type != XML_ELEMENT_NODE) || (name == NULL)) return(NULL); prop = node->properties; @@ -6057,7 +6070,9 @@ xmlGetProp(xmlNodePtr node, const xmlChar *name) { xmlAttrPtr prop; xmlDocPtr doc; - if ((node == NULL) || (name == NULL)) return(NULL); + if ((node == NULL) || (node->type != XML_ELEMENT_NODE) || (name == NULL)) + return(NULL); + /* * Check on the properties attached to the node */ @@ -6114,7 +6129,8 @@ xmlGetNoNsProp(xmlNodePtr node, const xmlChar *name) { xmlAttrPtr prop; xmlDocPtr doc; - if ((node == NULL) || (name == NULL)) return(NULL); + if ((node == NULL) || (node->type != XML_ELEMENT_NODE) || (name == NULL)) + return(NULL); /* * Check on the properties attached to the node */ @@ -6172,7 +6188,7 @@ xmlGetNsProp(xmlNodePtr node, const xmlChar *name, const xmlChar *nameSpace) { xmlDocPtr doc; xmlNsPtr ns; - if (node == NULL) + if ((node == NULL) || (node->type != XML_ELEMENT_NODE)) return(NULL); prop = node->properties; @@ -6236,7 +6252,7 @@ int xmlUnsetProp(xmlNodePtr node, const xmlChar *name) { xmlAttrPtr prop, prev = NULL;; - if ((node == NULL) || (name == NULL)) + if ((node == NULL) || (node->type != XML_ELEMENT_NODE) || (name == NULL)) return(-1); prop = node->properties; while (prop != NULL) { @@ -6265,7 +6281,7 @@ int xmlUnsetNsProp(xmlNodePtr node, xmlNsPtr ns, const xmlChar *name) { xmlAttrPtr prop, prev = NULL;; - if ((node == NULL) || (name == NULL)) + if ((node == NULL) || (node->type != XML_ELEMENT_NODE) || (name == NULL)) return(-1); prop = node->properties; if (ns == NULL) @@ -6471,12 +6487,14 @@ xmlTextConcat(xmlNodePtr node, const xmlChar *content, int len) { return(-1); } /* need to check if content is currently in the dictionary */ - if ((node->doc != NULL) && (node->doc->dict != NULL) && - xmlDictOwns(node->doc->dict, node->content)) { + if ((node->content == (xmlChar *) &(node->properties)) || + ((node->doc != NULL) && (node->doc->dict != NULL) && + xmlDictOwns(node->doc->dict, node->content))) { node->content = xmlStrncatNew(node->content, content, len); } else { node->content = xmlStrncat(node->content, content, len); } + node->properties = NULL; if (node->content == NULL) return(-1); return(0); diff --git a/valid.c b/valid.c index 35c21ae1..c0141ff1 100644 --- a/valid.c +++ b/valid.c @@ -5860,24 +5860,12 @@ xmlValidateOneElement(xmlValidCtxtPtr ctxt, xmlDocPtr doc, NULL,NULL,NULL); return(0); } - if (elem->properties != NULL) { - xmlErrValidNode(ctxt, elem, XML_ERR_INTERNAL_ERROR, - "Text element has attribute !\n", - NULL,NULL,NULL); - return(0); - } if (elem->ns != NULL) { xmlErrValidNode(ctxt, elem, XML_ERR_INTERNAL_ERROR, "Text element has namespace !\n", NULL,NULL,NULL); return(0); } - if (elem->nsDef != NULL) { - xmlErrValidNode(ctxt, elem, XML_ERR_INTERNAL_ERROR, - "Text element has namespace !\n", - NULL,NULL,NULL); - return(0); - } if (elem->content == NULL) { xmlErrValidNode(ctxt, elem, XML_ERR_INTERNAL_ERROR, "Text element has no content !\n", @@ -6302,23 +6290,25 @@ xmlValidateElement(xmlValidCtxtPtr ctxt, xmlDocPtr doc, xmlNodePtr elem) { } ret &= xmlValidateOneElement(ctxt, doc, elem); - attr = elem->properties; - while (attr != NULL) { - value = xmlNodeListGetString(doc, attr->children, 0); - ret &= xmlValidateOneAttribute(ctxt, doc, elem, attr, value); - if (value != NULL) - xmlFree((char *)value); - attr= attr->next; - } - ns = elem->nsDef; - while (ns != NULL) { - if (elem->ns == NULL) - ret &= xmlValidateOneNamespace(ctxt, doc, elem, NULL, - ns, ns->href); - else - ret &= xmlValidateOneNamespace(ctxt, doc, elem, elem->ns->prefix, - ns, ns->href); - ns = ns->next; + if (elem->type == XML_ELEMENT_NODE) { + attr = elem->properties; + while (attr != NULL) { + value = xmlNodeListGetString(doc, attr->children, 0); + ret &= xmlValidateOneAttribute(ctxt, doc, elem, attr, value); + if (value != NULL) + xmlFree((char *)value); + attr= attr->next; + } + ns = elem->nsDef; + while (ns != NULL) { + if (elem->ns == NULL) + ret &= xmlValidateOneNamespace(ctxt, doc, elem, NULL, + ns, ns->href); + else + ret &= xmlValidateOneNamespace(ctxt, doc, elem, + elem->ns->prefix, ns, ns->href); + ns = ns->next; + } } child = elem->children; while (child != NULL) { diff --git a/xmllint.c b/xmllint.c index 65e17b36..607cea31 100644 --- a/xmllint.c +++ b/xmllint.c @@ -196,7 +196,7 @@ static const char *pattern = NULL; static xmlPatternPtr patternc = NULL; static xmlStreamCtxtPtr patstream = NULL; #endif -static int options = 0; +static int options = XML_PARSE_COMPACT; static int sax = 0; /************************************************************************ @@ -2793,6 +2793,7 @@ static void usage(const char *name) { printf("\t--path 'paths': provide a set of paths for resources\n"); printf("\t--load-trace : print trace of all external entites loaded\n"); printf("\t--nonet : refuse to fetch DTDs or entities over network\n"); + printf("\t--nocompact : do not generate compact text nodes\n"); printf("\t--htmlout : output results as HTML\n"); printf("\t--nowrap : do not put HTML doc wrapper\n"); #ifdef LIBXML_VALID_ENABLED @@ -3175,6 +3176,9 @@ main(int argc, char **argv) { } else if ((!strcmp(argv[i], "-nonet")) || (!strcmp(argv[i], "--nonet"))) { options |= XML_PARSE_NONET; + } else if ((!strcmp(argv[i], "-nocompact")) || + (!strcmp(argv[i], "--nocompact"))) { + options &= ~XML_PARSE_COMPACT; } else if ((!strcmp(argv[i], "-load-trace")) || (!strcmp(argv[i], "--load-trace"))) { load_trace++; diff --git a/xmlreader.c b/xmlreader.c index 5fdf3a51..7efa273b 100644 --- a/xmlreader.c +++ b/xmlreader.c @@ -348,7 +348,8 @@ xmlTextReaderFreeNodeList(xmlTextReaderPtr reader, xmlNodePtr cur) { (cur->type == XML_XINCLUDE_END)) && (cur->properties != NULL)) xmlTextReaderFreePropList(reader, cur->properties); - if ((cur->type != XML_ELEMENT_NODE) && + if ((cur->content != (xmlChar *) &(cur->properties)) && + (cur->type != XML_ELEMENT_NODE) && (cur->type != XML_XINCLUDE_START) && (cur->type != XML_XINCLUDE_END) && (cur->type != XML_ENTITY_REF_NODE)) { @@ -422,7 +423,8 @@ xmlTextReaderFreeNode(xmlTextReaderPtr reader, xmlNodePtr cur) { (cur->type == XML_XINCLUDE_END)) && (cur->properties != NULL)) xmlTextReaderFreePropList(reader, cur->properties); - if ((cur->type != XML_ELEMENT_NODE) && + if ((cur->content != (xmlChar *) &(cur->properties)) && + (cur->type != XML_ELEMENT_NODE) && (cur->type != XML_XINCLUDE_START) && (cur->type != XML_XINCLUDE_END) && (cur->type != XML_ENTITY_REF_NODE)) { @@ -2810,7 +2812,9 @@ xmlTextReaderReadAttributeValue(xmlTextReaderPtr reader) { reader->faketext = xmlNewDocText(reader->node->doc, ns->href); } else { - if (reader->faketext->content != NULL) + if ((reader->faketext->content != NULL) && + (reader->faketext->content != + (xmlChar *) &(reader->faketext->properties))) xmlFree(reader->faketext->content); reader->faketext->content = xmlStrdup(ns->href); } @@ -4776,6 +4780,12 @@ xmlTextReaderSetup(xmlTextReaderPtr reader, if (reader == NULL) return (-1); + /* + * we force the generation of compact text nodes on the reader + * since usr applications should never modify the tree + */ + options |= XML_PARSE_COMPACT; + reader->doc = NULL; reader->entNr = 0; reader->parserFlags = options;