From e0854c3f8328d06177b2026e59c51ed146180023 Mon Sep 17 00:00:00 2001 From: Daniel Veillard Date: Sun, 27 Aug 2000 21:12:29 +0000 Subject: [PATCH] Bunch of new parser cleanup work: - SAX.c tree.c debugXML.c: fixed bogus behaviour when an undeclared namespace prefix was used, added a warning. Cleaned up support w.r.t. entities, spilling out a warning and being pedantic on lookups. - test/warning/ent9 : added testcase for previous example. - TODO: updated - parserInternals.h parser.c: changed the way names are parsed now allow infinite size and decrease penalty for normal use - parser.c: Started a big cleanup/check of the parser code, fixed some of the most tortuous entity code, spotted code unused anymore - test/*: added tests for very long names and related nasty things. Daniel --- ChangeLog | 16 + SAX.c | 8 +- TODO | 11 +- debugXML.c | 23 +- include/libxml/parserInternals.h | 2 +- parser.c | 609 ++++++++++++++++++++++--------- parserInternals.h | 2 +- result/bigentname.xml | 6 + result/bigname.xml | 2 + result/bigname2.xml | 2 + result/noent/bigentname.xml | 6 + result/noent/bigname.xml | 2 + result/noent/bigname2.xml | 2 + result/noent/dtd12 | 2 +- result/noent/tstblanks.xml | 2 + result/noent/xml1 | 2 +- result/tstblanks.xml | 2 + test/bigentname.xml | 5 + test/bigname.xml | 1 + test/bigname2.xml | 1 + test/tstblanks.xml | 495 +++++++++++++++++++++++++ test/warning/ent9 | 7 + tree.c | 28 +- 23 files changed, 1038 insertions(+), 198 deletions(-) create mode 100644 result/bigentname.xml create mode 100644 result/bigname.xml create mode 100644 result/bigname2.xml create mode 100644 result/noent/bigentname.xml create mode 100644 result/noent/bigname.xml create mode 100644 result/noent/bigname2.xml create mode 100644 result/noent/tstblanks.xml create mode 100644 result/tstblanks.xml create mode 100644 test/bigentname.xml create mode 100644 test/bigname.xml create mode 100644 test/bigname2.xml create mode 100644 test/tstblanks.xml create mode 100644 test/warning/ent9 diff --git a/ChangeLog b/ChangeLog index 8a55c1b5..fefc457a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,19 @@ +Sun Aug 27 22:14:01 CEST 2000 Daniel Veillard + + * SAX.c tree.c debugXML.c: fixed bogus behaviour when an + undeclared namespace prefix was used, added a warning. + Cleaned up support w.r.t. entities, spilling out a warning + and being pedantic on lookups. + * test/warning/ent9 : added testcase for previous example. + * TODO: updated + * parserInternals.h parser.c: changed the way names are parsed + now allow infinite size and decrease penalty for normal use + * parser.c: Started a big cleanup/check of the parser code, + fixed some of the most tortuous entity code, spotted code + unused anymore + * test/*: added tests for very long names and related nasty + things. + Sat Aug 26 23:31:04 CEST 2000 Daniel Veillard * doc/encoding.html: added encoding aliases doc diff --git a/SAX.c b/SAX.c index 43e847db..3a7d9a9f 100644 --- a/SAX.c +++ b/SAX.c @@ -955,6 +955,12 @@ startElement(void *ctx, const xmlChar *fullname, const xmlChar **atts) ns = xmlSearchNs(ctxt->myDoc, ret, prefix); if ((ns == NULL) && (parent != NULL)) ns = xmlSearchNs(ctxt->myDoc, parent, prefix); + if ((prefix != NULL) && (ns == NULL)) { + ns = xmlNewNs(ret, NULL, prefix); + if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) + ctxt->sax->warning(ctxt->userData, + "Namespace prefix %s is not defined\n", prefix); + } xmlSetNs(ret, ns); /* @@ -1121,7 +1127,7 @@ characters(void *ctx, const xmlChar *ch, int len) } #endif } else { - if (xmlNodeIsText(lastChild)) { + if ((xmlNodeIsText(lastChild)) && (ctxt->nodemem != 0)) { #ifndef XML_USE_BUFFER_CONTENT /* * The whole point of maintaining nodelen and nodemem, diff --git a/TODO b/TODO index 246eb2da..1fd2e445 100644 --- a/TODO +++ b/TODO @@ -6,9 +6,14 @@ TODO: ===== +- cleanup the mess with URI references when composing entities. +- performances: there is still improvements needed when parsing Docbook DTD + a single function to optimize/avoid. +- Moving all deprecated functions to a different module, allow to compile + it out. - DOM needs - xmlAttrPtr xmlNewDocProp(xmlDocPtr doc, const xmlChar *name, const xmlChar *value) int xmlPruneProp(xmlNodePtr node, xmlAtttrPtr attr); +- listing all attributes in a node. - General checking of DTD validation in presence of namespaces ... hairy mostly done - Fix DTD + namespace validity problem @@ -20,7 +25,7 @@ TODO: - Find way of representing PERefs in the Dtd so that %entity; can be saved back. - Go through erratas and do the cleanup. - http://www.w3.org/XML/xml-19980210-errata ... bummmer + http://www.w3.org/XML/xml-19980210-errata ... started ... - Handle undefined namespaces in entity contents better ... at least issue a warning - fix --disable-corba configure switch handling, and use XML_WITHOUT_CORBA @@ -95,6 +100,8 @@ EXTENSIONS: Done: ===== +- DOM needs + xmlAttrPtr xmlNewDocProp(xmlDocPtr doc, const xmlChar *name, const xmlChar *value) - problem when parsing hrefs with & with the HTML parser (IRC ac) - If the internal encoding is not UTF8 saving to a given encoding doesn't work => fix to force UTF8 encoding ... diff --git a/debugXML.c b/debugXML.c index 7c00fb18..ddce5c5e 100644 --- a/debugXML.c +++ b/debugXML.c @@ -37,6 +37,10 @@ void xmlDebugDumpString(FILE *output, const xmlChar *str) { int i; + if (str == NULL) { + fprintf(output, "(NULL)"); + return; + } for (i = 0;i < 40;i++) if (str[i] == 0) return; else if (IS_BLANK(str[i])) fputc(' ', output); @@ -370,13 +374,20 @@ void xmlDebugDumpNamespace(FILE *output, xmlNsPtr ns, int depth) { fprintf(output, shift); if (ns->type == XML_GLOBAL_NAMESPACE) fprintf(output, "old "); - if (ns->prefix != NULL) - fprintf(output, "namespace %s href=", ns->prefix); - else - fprintf(output, "default namespace href="); + if (ns->href == NULL) { + if (ns->prefix != NULL) + fprintf(output, "incomplete namespace %s href=NULL\n", ns->prefix); + else + fprintf(output, "incomplete default namespace href=NULL\n"); + } else { + if (ns->prefix != NULL) + fprintf(output, "namespace %s href=", ns->prefix); + else + fprintf(output, "default namespace href="); - xmlDebugDumpString(output, ns->href); - fprintf(output, "\n"); + xmlDebugDumpString(output, ns->href); + fprintf(output, "\n"); + } } void xmlDebugDumpNamespaceList(FILE *output, xmlNsPtr ns, int depth) { diff --git a/include/libxml/parserInternals.h b/include/libxml/parserInternals.h index 8fd6ffc6..7956dd3c 100644 --- a/include/libxml/parserInternals.h +++ b/include/libxml/parserInternals.h @@ -15,7 +15,7 @@ extern "C" { #endif -#define XML_MAX_NAMELEN 1000 +#define XML_MAX_NAMELEN 100 /************************************************************************ * * diff --git a/parser.c b/parser.c index 253d9d48..0964881f 100644 --- a/parser.c +++ b/parser.c @@ -1,6 +1,14 @@ /* * parser.c : an XML 1.0 non-verifying parser * + * References: + * The XML specification: + * http://www.w3.org/TR/REC-xml + * Original 1.0 version: + * http://www.w3.org/TR/1998/REC-xml-19980210 + * XML second edition working draft + * http://www.w3.org/TR/2000/WD-xml-2e-20000814 + * * See Copyright for the status of this software. * * Daniel.Veillard@w3.org @@ -1176,9 +1184,13 @@ int xmlSkipBlankChars(xmlParserCtxtPtr ctxt) { int cur, res = 0; + /* + * It's Okay to use CUR/NEXT here since all the blanks are on + * the ASCII range. + */ do { cur = CUR; - while (IS_BLANK(cur)) { + while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */ NEXT; cur = CUR; res++; @@ -1188,9 +1200,12 @@ xmlSkipBlankChars(xmlParserCtxtPtr ctxt) { xmlPopInput(ctxt); cur = CUR; } + /* + * Need to handle support of entities branching here + */ if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); - } while (IS_BLANK(cur)); + } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */ return(res); } @@ -1609,10 +1624,10 @@ xmlFreeParserCtxt(xmlParserCtxtPtr ctxt) if (ctxt == NULL) return; - while ((input = inputPop(ctxt)) != NULL) { + while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */ xmlFreeInputStream(input); } - while ((oldname = namePop(ctxt)) != NULL) { + while ((oldname = namePop(ctxt)) != NULL) { /* Non consuming */ xmlFree(oldname); } if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab); @@ -1706,21 +1721,26 @@ xmlCheckEntity(xmlParserCtxtPtr ctxt, const xmlChar *content) { int xmlParseCharRef(xmlParserCtxtPtr ctxt) { int val = 0; + int count = 0; if (ctxt->token != 0) { val = ctxt->token; ctxt->token = 0; return(val); } + /* + * Using RAW/CUR/NEXT is okay since we are working on ASCII range here + */ if ((RAW == '&') && (NXT(1) == '#') && (NXT(2) == 'x')) { SKIP(3); - while (RAW != ';') { - if ((RAW >= '0') && (RAW <= '9')) + GROW; + while (RAW != ';') { /* loop blocked by count */ + if ((RAW >= '0') && (RAW <= '9') && (count < 20)) val = val * 16 + (CUR - '0'); - else if ((RAW >= 'a') && (RAW <= 'f')) + else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20)) val = val * 16 + (CUR - 'a') + 10; - else if ((RAW >= 'A') && (RAW <= 'F')) + else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20)) val = val * 16 + (CUR - 'A') + 10; else { ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF; @@ -1733,6 +1753,7 @@ xmlParseCharRef(xmlParserCtxtPtr ctxt) { break; } NEXT; + count++; } if (RAW == ';') { /* on purpose to avoid reentrancy problems with NEXT and SKIP */ @@ -1741,8 +1762,9 @@ xmlParseCharRef(xmlParserCtxtPtr ctxt) { } } else if ((RAW == '&') && (NXT(1) == '#')) { SKIP(2); - while (RAW != ';') { - if ((RAW >= '0') && (RAW <= '9')) + GROW; + while (RAW != ';') { /* loop blocked by count */ + if ((RAW >= '0') && (RAW <= '9') && (count < 20)) val = val * 10 + (CUR - '0'); else { ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF; @@ -1755,6 +1777,7 @@ xmlParseCharRef(xmlParserCtxtPtr ctxt) { break; } NEXT; + count++; } if (RAW == ';') { /* on purpose to avoid reentrancy problems with NEXT and SKIP */ @@ -1818,7 +1841,7 @@ xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) { if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) { ptr += 3; cur = *ptr; - while (cur != ';') { + while (cur != ';') { /* Non input consuming loop */ if ((cur >= '0') && (cur <= '9')) val = val * 16 + (cur - '0'); else if ((cur >= 'a') && (cur <= 'f')) @@ -1843,7 +1866,7 @@ xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) { } else if ((cur == '&') && (ptr[1] == '#')){ ptr += 2; cur = *ptr; - while (cur != ';') { + while (cur != ';') { /* Non input consuming loops */ if ((cur >= '0') && (cur <= '9')) val = val * 10 + (cur - '0'); else { @@ -1912,6 +1935,8 @@ xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) { * A PEReference may have been detectect in the current input stream * the handling is done accordingly to * http://www.w3.org/TR/REC-xml#entproc + * + * TODO: the default handling part seems deprecated now ... cut it off */ void xmlParserHandleReference(xmlParserCtxtPtr ctxt) { @@ -1974,7 +1999,7 @@ xmlParserHandleReference(xmlParserCtxtPtr ctxt) { * substitution here since we need the literal * entity value to be able to save the internal * subset of the document. - * This will be handled by xmlDecodeEntities + * This will be handled by xmlStringDecodeEntities */ return; case XML_PARSER_CONTENT: @@ -2026,7 +2051,7 @@ xmlParserHandleReference(xmlParserCtxtPtr ctxt) { * substitution here since we need the literal * entity value to be able to save the internal * subset of the document. - * This will be handled by xmlDecodeEntities + * This will be handled by xmlStringDecodeEntities */ return; case XML_PARSER_ATTRIBUTE_VALUE: @@ -2036,7 +2061,7 @@ xmlParserHandleReference(xmlParserCtxtPtr ctxt) { * the parser is explicitely asked to substitute * entities. The SAX callback is called with values * without entity substitution. - * This will then be handled by xmlDecodeEntities + * This will then be handled by xmlStringDecodeEntities */ return; case XML_PARSER_ENTITY_DECL: @@ -2055,6 +2080,10 @@ xmlParserHandleReference(xmlParserCtxtPtr ctxt) { return; } +/* TODO: this seems not reached anymore .... Verify ... */ +fprintf(stderr, "Reached deprecated section in xmlParserHandleReference()\n"); +fprintf(stderr, "Please forward the document to Daniel.Veillard@w3.org\n"); +fprintf(stderr, "indicating the version: %s, thanks !\n", xmlParserVersion); NEXT; name = xmlScanName(ctxt); if (name == NULL) { @@ -2214,7 +2243,7 @@ xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { * substitution here since we need the literal * entity value to be able to save the internal * subset of the document. - * This will be handled by xmlDecodeEntities + * This will be handled by xmlStringDecodeEntities */ return; case XML_PARSER_DTD: @@ -2340,6 +2369,11 @@ xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) { * @end2: an end marker xmlChar, 0 if none * @end3: an end marker xmlChar, 0 if none * + * This function is deprecated, we now always process entities content + * through xmlStringDecodeEntities + * + * TODO: remove it in next major release. + * * [67] Reference ::= EntityRef | CharRef * * [69] PEReference ::= '%' Name ';' @@ -2382,10 +2416,11 @@ xmlDecodeEntities(xmlParserCtxtPtr ctxt, int len, int what, /* * Ok loop until we reach one of the ending char or a size limit. */ + GROW; c = CUR_CHAR(l); - while ((nbchars < max) && (c != end) && + while ((nbchars < max) && (c != end) && /* NOTUSED */ (c != end2) && (c != end3)) { - + GROW; if (c == 0) break; if (((c == '&') && (ctxt->token != '&')) && (NXT(1) == '#')) { int val = xmlParseCharRef(ctxt); @@ -2399,7 +2434,7 @@ xmlDecodeEntities(xmlParserCtxtPtr ctxt, int len, int what, if ((ent != NULL) && (ctxt->replaceEntities != 0)) { current = ent->content; - while (*current != 0) { + while (*current != 0) { /* non input consuming loop */ buffer[nbchars++] = *current++; if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) { growBuffer(buffer); @@ -2412,7 +2447,7 @@ xmlDecodeEntities(xmlParserCtxtPtr ctxt, int len, int what, if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) { growBuffer(buffer); } - while (*cur != 0) { + while (*cur != 0) { /* non input consuming loop */ buffer[nbchars++] = *cur++; } buffer[nbchars++] = ';'; @@ -2432,7 +2467,7 @@ xmlDecodeEntities(xmlParserCtxtPtr ctxt, int len, int what, /* * Pop-up of finished entities. */ - while ((RAW == 0) && (ctxt->inputNr > 1)) + while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */ xmlPopInput(ctxt); break; @@ -2458,6 +2493,8 @@ xmlDecodeEntities(xmlParserCtxtPtr ctxt, int len, int what, * @end2: an end marker xmlChar, 0 if none * @end3: an end marker xmlChar, 0 if none * + * Takes a entity string content and process to do the adequate subtitutions. + * * [67] Reference ::= EntityRef | CharRef * * [69] PEReference ::= '%' Name ';' @@ -2501,9 +2538,11 @@ xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what, /* * Ok loop until we reach one of the ending char or a size limit. + * we are operating on already parsed values. */ c = CUR_SCHAR(str, l); - while ((c != 0) && (c != end) && (c != end2) && (c != end3)) { + while ((c != 0) && (c != end) && /* non input consuming loop */ + (c != end2) && (c != end3)) { if (c == 0) break; if ((c == '&') && (str[1] == '#')) { @@ -2516,7 +2555,8 @@ xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what, fprintf(stderr, "String decoding Entity Reference: %.30s\n", str); ent = xmlParseStringEntityRef(ctxt, &str); - if ((ent != NULL) && (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { + if ((ent != NULL) && + (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) { if (ent->content != NULL) { COPY_BUF(0,buffer,nbchars,ent->content[0]); } else { @@ -2533,7 +2573,7 @@ xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what, ctxt->depth--; if (rep != NULL) { current = rep; - while (*current != 0) { + while (*current != 0) { /* non input consuming loop */ buffer[nbchars++] = *current++; if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) { @@ -2567,7 +2607,7 @@ xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what, ctxt->depth--; if (rep != NULL) { current = rep; - while (*current != 0) { + while (*current != 0) { /* non input consuming loop */ buffer[nbchars++] = *current++; if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) { @@ -2603,6 +2643,9 @@ xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what, * * Checks that the value conforms to the LanguageID production: * + * NOTE: this is somewhat deprecated, those productions were removed from + * the XML Second edition. + * * [33] LanguageID ::= Langcode ('-' Subcode)* * [34] Langcode ::= ISO639Code | IanaCode | UserCode * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z]) @@ -2624,7 +2667,7 @@ xmlCheckLanguageID(const xmlChar *lang) { * IANA code */ cur += 2; - while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || + while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */ ((cur[0] >= 'a') && (cur[0] <= 'z'))) cur++; } else if (((cur[0] == 'x') && (cur[1] == '-')) || @@ -2633,7 +2676,7 @@ xmlCheckLanguageID(const xmlChar *lang) { * User code */ cur += 2; - while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || + while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */ ((cur[0] >= 'a') && (cur[0] <= 'z'))) cur++; } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) || @@ -2649,7 +2692,7 @@ xmlCheckLanguageID(const xmlChar *lang) { return(0); } else return(0); - while (cur[0] != 0) { + while (cur[0] != 0) { /* non input consuming */ if (cur[0] != '-') return(0); cur++; @@ -2658,7 +2701,7 @@ xmlCheckLanguageID(const xmlChar *lang) { cur++; else return(0); - while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || + while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */ ((cur[0] >= 'a') && (cur[0] <= 'z'))) cur++; } @@ -3010,7 +3053,7 @@ xmlStrdup(const xmlChar *cur) { const xmlChar *p = cur; if (cur == NULL) return(NULL); - while (*p != 0) p++; + while (*p != 0) p++; /* non input consuming */ return(xmlStrndup(cur, p - cur)); } @@ -3057,7 +3100,7 @@ xmlCharStrdup(const char *cur) { const char *p = cur; if (cur == NULL) return(NULL); - while (*p != '\0') p++; + while (*p != '\0') p++; /* non input consuming */ return(xmlCharStrndup(cur, p - cur)); } @@ -3081,7 +3124,7 @@ xmlStrcmp(const xmlChar *str1, const xmlChar *str2) { do { tmp = *str1++ - *str2++; if (tmp != 0) return(tmp); - } while ((*str1 != 0) && (*str2 != 0)); + } while ((*str1 != 0) && (*str2 != 0)); /* non input consuming */ return (*str1 - *str2); } @@ -3109,7 +3152,7 @@ xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) { if (tmp != 0) return(tmp); len--; if (len <= 0) return(0); - } while ((*str1 != 0) && (*str2 != 0)); + } while ((*str1 != 0) && (*str2 != 0)); /* non input consuming */ return (*str1 - *str2); } @@ -3126,7 +3169,7 @@ xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) { const xmlChar * xmlStrchr(const xmlChar *str, xmlChar val) { if (str == NULL) return(NULL); - while (*str != 0) { + while (*str != 0) { /* non input consuming */ if (*str == val) return((xmlChar *) str); str++; } @@ -3152,7 +3195,7 @@ xmlStrstr(const xmlChar *str, xmlChar *val) { n = xmlStrlen(val); if (n == 0) return(str); - while (*str != 0) { + while (*str != 0) { /* non input consuming */ if (*str == *val) { if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str); } @@ -3202,7 +3245,7 @@ xmlStrlen(const xmlChar *str) { int len = 0; if (str == NULL) return(0); - while (*str != 0) { + while (*str != 0) { /* non input consuming */ str++; len++; } @@ -3261,7 +3304,7 @@ xmlStrcat(xmlChar *cur, const xmlChar *add) { if (cur == NULL) return(xmlStrdup(add)); - while (*p != 0) p++; + while (*p != 0) p++; /* non input consuming */ return(xmlStrncat(cur, add, p - add)); } @@ -3392,6 +3435,9 @@ void xmlParseReference(xmlParserCtxtPtr ctxt); * * parse an XML namespace name. * + * TODO: this seems not in use anymore, the namespace handling is done on + * top of the SAX interfaces, i.e. not on raw input. + * * [NS 3] NCName ::= (Letter | '_') (NCNameChar)* * * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' | @@ -3407,9 +3453,11 @@ xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt) { int cur = CUR_CHAR(l); /* load first the value of the char !!! */ + GROW; if (!IS_LETTER(cur) && (cur != '_')) return(NULL); - while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) || +fprintf(stderr, "xmlNamespaceParseNCName: reached loop 3\n"); + while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) || /* NOT REACHED */ (cur == '.') || (cur == '-') || (cur == '_') || (IS_COMBINING(cur)) || @@ -3420,7 +3468,7 @@ xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt) { if (len >= XML_MAX_NAMELEN) { fprintf(stderr, "xmlNamespaceParseNCName: reached XML_MAX_NAMELEN limit\n"); - while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) || + while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||/* NOT REACHED */ (cur == '.') || (cur == '-') || (cur == '_') || (IS_COMBINING(cur)) || @@ -3439,6 +3487,9 @@ xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt) { * @ctxt: an XML parser context * @prefix: a xmlChar ** * + * TODO: this seems not in use anymore, the namespace handling is done on + * top of the SAX interfaces, i.e. not on raw input. + * * parse an XML qualified name * * [NS 5] QName ::= (Prefix ':')? LocalPart @@ -3466,74 +3517,15 @@ xmlNamespaceParseQName(xmlParserCtxtPtr ctxt, xmlChar **prefix) { return(ret); } -/** - * xmlSplitQName: - * @ctxt: an XML parser context - * @name: an XML parser context - * @prefix: a xmlChar ** - * - * parse an UTF8 encoded XML qualified name string - * - * [NS 5] QName ::= (Prefix ':')? LocalPart - * - * [NS 6] Prefix ::= NCName - * - * [NS 7] LocalPart ::= NCName - * - * Returns the local part, and prefix is updated - * to get the Prefix if any. - */ - -xmlChar * -xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) { - xmlChar buf[XML_MAX_NAMELEN + 5]; - int len = 0; - xmlChar *ret = NULL; - const xmlChar *cur = name; - int c; - - *prefix = NULL; - - /* xml: prefix is not really a namespace */ - if ((cur[0] == 'x') && (cur[1] == 'm') && - (cur[2] == 'l') && (cur[3] == ':')) - return(xmlStrdup(name)); - - /* nasty but valid */ - if (cur[0] == ':') - return(xmlStrdup(name)); - - c = *cur++; - while ((c != 0) && (c != ':')) { - buf[len++] = c; - c = *cur++; - } - - ret = xmlStrndup(buf, len); - - if (c == ':') { - c = *cur++; - if (c == 0) return(ret); - *prefix = ret; - len = 0; - - while (c != 0) { - buf[len++] = c; - c = *cur++; - } - - ret = xmlStrndup(buf, len); - } - - return(ret); -} - /** * xmlNamespaceParseNSDef: * @ctxt: an XML parser context * * parse a namespace prefix declaration * + * TODO: this seems not in use anymore, the namespace handling is done on + * top of the SAX interfaces, i.e. not on raw input. + * * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral * * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)? @@ -3557,12 +3549,152 @@ xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt) { return(name); } +/** + * xmlSplitQName: + * @ctxt: an XML parser context + * @name: an XML parser context + * @prefix: a xmlChar ** + * + * parse an UTF8 encoded XML qualified name string + * + * [NS 5] QName ::= (Prefix ':')? LocalPart + * + * [NS 6] Prefix ::= NCName + * + * [NS 7] LocalPart ::= NCName + * + * Returns the local part, and prefix is updated + * to get the Prefix if any. + */ + +xmlChar * +xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) { + xmlChar buf[XML_MAX_NAMELEN + 5]; + xmlChar *buffer = NULL; + int len = 0; + int max = XML_MAX_NAMELEN; + xmlChar *ret = NULL; + const xmlChar *cur = name; + int c; + + *prefix = NULL; + + /* xml: prefix is not really a namespace */ + if ((cur[0] == 'x') && (cur[1] == 'm') && + (cur[2] == 'l') && (cur[3] == ':')) + return(xmlStrdup(name)); + + /* nasty but valid */ + if (cur[0] == ':') + return(xmlStrdup(name)); + + c = *cur++; + while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */ + buf[len++] = c; + c = *cur++; + } + if (len >= max) { + /* + * Okay someone managed to make a huge name, so he's ready to pay + * for the processing speed. + */ + max = len * 2; + + buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar)); + if (buffer == NULL) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlSplitQName: out of memory\n"); + return(NULL); + } + memcpy(buffer, buf, len); + while ((c != 0) && (c != ':')) { /* tested bigname.xml */ + if (len + 10 > max) { + max *= 2; + buffer = (xmlChar *) xmlRealloc(buffer, + max * sizeof(xmlChar)); + if (buffer == NULL) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlSplitQName: out of memory\n"); + return(NULL); + } + } + buffer[len++] = c; + c = *cur++; + } + buffer[len] = 0; + } + + if (buffer == NULL) + ret = xmlStrndup(buf, len); + else { + ret = buffer; + buffer = NULL; + max = XML_MAX_NAMELEN; + } + + + if (c == ':') { + c = *cur++; + if (c == 0) return(ret); + *prefix = ret; + len = 0; + + while ((c != 0) && (len < max)) { /* tested bigname2.xml */ + buf[len++] = c; + c = *cur++; + } + if (len >= max) { + /* + * Okay someone managed to make a huge name, so he's ready to pay + * for the processing speed. + */ + max = len * 2; + + buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar)); + if (buffer == NULL) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlSplitQName: out of memory\n"); + return(NULL); + } + memcpy(buffer, buf, len); + while (c != 0) { /* tested bigname2.xml */ + if (len + 10 > max) { + max *= 2; + buffer = (xmlChar *) xmlRealloc(buffer, + max * sizeof(xmlChar)); + if (buffer == NULL) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlSplitQName: out of memory\n"); + return(NULL); + } + } + buffer[len++] = c; + c = *cur++; + } + buffer[len] = 0; + } + + if (buffer == NULL) + ret = xmlStrndup(buf, len); + else { + ret = buffer; + } + } + + return(ret); +} + /** * xmlParseQuotedString: * @ctxt: an XML parser context * - * [OLD] Parse and return a string between quotes or doublequotes - * To be removed at next drop of binary compatibility + * Parse and return a string between quotes or doublequotes + * + * TODO: Deprecated, to be removed at next drop of binary compatibility * * Returns the string parser or NULL. */ @@ -3578,10 +3710,11 @@ xmlParseQuotedString(xmlParserCtxtPtr ctxt) { fprintf(stderr, "malloc of %d byte failed\n", size); return(NULL); } +fprintf(stderr, "xmlParseQuotedString: reached loop 4\n"); if (RAW == '"') { NEXT; c = CUR_CHAR(l); - while (IS_CHAR(c) && (c != '"')) { + while (IS_CHAR(c) && (c != '"')) { /* NOTUSED */ if (len + 5 >= size) { size *= 2; buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); @@ -3607,7 +3740,7 @@ xmlParseQuotedString(xmlParserCtxtPtr ctxt) { } else if (RAW == '\''){ NEXT; c = CUR; - while (IS_CHAR(c) && (c != '\'')) { + while (IS_CHAR(c) && (c != '\'')) { /* NOTUSED */ if (len + 1 >= size) { size *= 2; buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); @@ -3638,12 +3771,14 @@ xmlParseQuotedString(xmlParserCtxtPtr ctxt) { * xmlParseNamespace: * @ctxt: an XML parser context * - * [OLD] xmlParseNamespace: parse specific PI '')) { +fprintf(stderr, "xmlParseNamespace: reached loop 5\n"); + while (IS_CHAR(RAW) && (RAW != '>')) { /* NOT REACHED */ /* * We can have "ns" or "prefix" attributes * Old encoding as 'href' or 'AS' attributes is still supported @@ -3755,7 +3891,10 @@ xmlParseNamespace(xmlParserCtxtPtr ctxt) { * @ctxt: an XML parser context * * Trickery: parse an XML name but without consuming the input flow - * Needed for rollback cases. + * Needed for rollback cases. Used only when parsing entities references. + * + * TODO: seems deprecated now, only used in the default part of + * xmlParserHandleReference * * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | * CombiningChar | Extender @@ -3778,17 +3917,20 @@ xmlScanName(xmlParserCtxtPtr ctxt) { return(NULL); } - while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) || + + while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) || /* NOT REACHED */ (NXT(len) == '.') || (NXT(len) == '-') || (NXT(len) == '_') || (NXT(len) == ':') || (IS_COMBINING(NXT(len))) || (IS_EXTENDER(NXT(len)))) { + GROW; buf[len] = NXT(len); len++; if (len >= XML_MAX_NAMELEN) { fprintf(stderr, "xmlScanName: reached XML_MAX_NAMELEN limit\n"); - while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) || + while ((IS_LETTER(NXT(len))) || /* NOT REACHED */ + (IS_DIGIT(NXT(len))) || (NXT(len) == '.') || (NXT(len) == '-') || (NXT(len) == '_') || (NXT(len) == ':') || (IS_COMBINING(NXT(len))) || @@ -3821,6 +3963,7 @@ xmlParseName(xmlParserCtxtPtr ctxt) { xmlChar buf[XML_MAX_NAMELEN + 5]; int len = 0, l; int c; + int count = 0; GROW; c = CUR_CHAR(l); @@ -3830,27 +3973,61 @@ xmlParseName(xmlParserCtxtPtr ctxt) { return(NULL); } - while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */ + while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */ ((IS_LETTER(c)) || (IS_DIGIT(c)) || (c == '.') || (c == '-') || (c == '_') || (c == ':') || (IS_COMBINING(c)) || (IS_EXTENDER(c)))) { + if (count++ > 100) { + count = 0; + GROW; + } COPY_BUF(l,buf,len,c); NEXTL(l); c = CUR_CHAR(l); if (len >= XML_MAX_NAMELEN) { - fprintf(stderr, - "xmlParseName: reached XML_MAX_NAMELEN limit\n"); - while ((IS_LETTER(c)) || (IS_DIGIT(c)) || + /* + * Okay someone managed to make a huge name, so he's ready to pay + * for the processing speed. + */ + xmlChar *buffer; + int max = len * 2; + + buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar)); + if (buffer == NULL) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlParseName: out of memory\n"); + return(NULL); + } + memcpy(buffer, buf, len); + while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */ (c == '.') || (c == '-') || (c == '_') || (c == ':') || (IS_COMBINING(c)) || (IS_EXTENDER(c))) { + if (count++ > 100) { + count = 0; + GROW; + } + if (len + 10 > max) { + max *= 2; + buffer = (xmlChar *) xmlRealloc(buffer, + max * sizeof(xmlChar)); + if (buffer == NULL) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlParseName: out of memory\n"); + return(NULL); + } + } + COPY_BUF(l,buffer,len,c); NEXTL(l); c = CUR_CHAR(l); } - break; + buffer[len] = 0; + return(buffer); } } return(xmlStrndup(buf, len)); @@ -3887,7 +4064,7 @@ xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) { return(NULL); } - while ((IS_LETTER(c)) || (IS_DIGIT(c)) || + while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */ (c == '.') || (c == '-') || (c == '_') || (c == ':') || (IS_COMBINING(c)) || @@ -3895,18 +4072,45 @@ xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) { COPY_BUF(l,buf,len,c); cur += l; c = CUR_SCHAR(cur, l); - if (len >= XML_MAX_NAMELEN) { - fprintf(stderr, - "xmlParseName: reached XML_MAX_NAMELEN limit\n"); - while ((IS_LETTER(c)) || (IS_DIGIT(c)) || + if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */ + /* + * Okay someone managed to make a huge name, so he's ready to pay + * for the processing speed. + */ + xmlChar *buffer; + int max = len * 2; + + buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar)); + if (buffer == NULL) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlParseStringName: out of memory\n"); + return(NULL); + } + memcpy(buffer, buf, len); + while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */ (c == '.') || (c == '-') || (c == '_') || (c == ':') || (IS_COMBINING(c)) || (IS_EXTENDER(c))) { + if (len + 10 > max) { + max *= 2; + buffer = (xmlChar *) xmlRealloc(buffer, + max * sizeof(xmlChar)); + if (buffer == NULL) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlParseStringName: out of memory\n"); + return(NULL); + } + } + COPY_BUF(l,buffer,len,c); cur += l; c = CUR_SCHAR(cur, l); } - break; + buffer[len] = 0; + *str = cur; + return(buffer); } } *str = cur; @@ -3928,32 +4132,68 @@ xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) { xmlChar * xmlParseNmtoken(xmlParserCtxtPtr ctxt) { - xmlChar buf[XML_MAX_NAMELEN]; - int len = 0; - int c,l; + xmlChar buf[XML_MAX_NAMELEN + 5]; + int len = 0, l; + int c; + int count = 0; GROW; c = CUR_CHAR(l); - while ((IS_LETTER(c)) || (IS_DIGIT(c)) || + + while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */ (c == '.') || (c == '-') || (c == '_') || (c == ':') || (IS_COMBINING(c)) || (IS_EXTENDER(c))) { + if (count++ > 100) { + count = 0; + GROW; + } COPY_BUF(l,buf,len,c); NEXTL(l); c = CUR_CHAR(l); if (len >= XML_MAX_NAMELEN) { - fprintf(stderr, - "xmlParseNmtoken: reached XML_MAX_NAMELEN limit\n"); - while ((IS_LETTER(c)) || (IS_DIGIT(c)) || + /* + * Okay someone managed to make a huge token, so he's ready to pay + * for the processing speed. + */ + xmlChar *buffer; + int max = len * 2; + + buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar)); + if (buffer == NULL) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlParseNmtoken: out of memory\n"); + return(NULL); + } + memcpy(buffer, buf, len); + while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */ (c == '.') || (c == '-') || (c == '_') || (c == ':') || (IS_COMBINING(c)) || (IS_EXTENDER(c))) { + if (count++ > 100) { + count = 0; + GROW; + } + if (len + 10 > max) { + max *= 2; + buffer = (xmlChar *) xmlRealloc(buffer, + max * sizeof(xmlChar)); + if (buffer == NULL) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "xmlParseName: out of memory\n"); + return(NULL); + } + } + COPY_BUF(l,buffer,len,c); NEXTL(l); c = CUR_CHAR(l); } - break; + buffer[len] = 0; + return(buffer); } } if (len == 0) @@ -4019,7 +4259,8 @@ xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) { * In practice it means we stop the loop only when back at parsing * the initial entity and the quote is found */ - while (IS_CHAR(c) && ((c != stop) || (ctxt->input != input))) { + while ((IS_CHAR(c)) && ((c != stop) || /* checked */ + (ctxt->input != input))) { if (len + 5 >= size) { size *= 2; buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar)); @@ -4033,7 +4274,7 @@ xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) { /* * Pop-up of finished entities. */ - while ((RAW == 0) && (ctxt->inputNr > 1)) + while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */ xmlPopInput(ctxt); GROW; @@ -6837,7 +7078,7 @@ xmlParseReference(xmlParserCtxtPtr ctxt) { (value[1] == 0) && (value[0] == '<') && (!xmlStrcmp(ent->name, BAD_CAST "lt"))) { /* - * TODO: get definite answer on this !!! + * DONE: get definite answer on this !!! * Lots of entity decls are used to declare a single * char * @@ -6852,6 +7093,11 @@ xmlParseReference(xmlParserCtxtPtr ctxt) { * tests, this is broken. However the XML REC uses * it. Is the XML REC not well-formed ???? * This is a hack to avoid this problem + * + * ANSWER: since lt gt amp .. are already defined, + * this is a redefinition and hence the fact that the + * contentis not well balanced is not a Wf error, this + * is lousy but acceptable. */ list = xmlNewDocText(ctxt->myDoc, value); if (list != NULL) { @@ -6931,46 +7177,64 @@ xmlParseReference(xmlParserCtxtPtr ctxt) { ctxt->sax->reference(ctxt->userData, ent->name); return; } else if (ctxt->replaceEntities) { - xmlParserInputPtr input; + if ((ctxt->node != NULL) && (ent->children != NULL)) { + /* + * Seems we are generating the DOM content, do + * a simple tree copy + */ + xmlNodePtr new; + new = xmlCopyNodeList(ent->children); + + xmlAddChildList(ctxt->node, new); + /* + * This is to avoid a nasty side effect, see + * characters() in SAX.c + */ + ctxt->nodemem = 0; + ctxt->nodelen = 0; + return; + } else { + /* + * Probably running in SAX mode + */ + xmlParserInputPtr input; - input = xmlNewEntityInputStream(ctxt, ent); - xmlPushInput(ctxt, input); - if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) && - (RAW == '<') && (NXT(1) == '?') && - (NXT(2) == 'x') && (NXT(3) == 'm') && - (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { - xmlParseTextDecl(ctxt); - if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { - /* - * The XML REC instructs us to stop parsing right here - */ - ctxt->instate = XML_PARSER_EOF; - return; - } - if (input->standalone == 1) { - if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) - ctxt->sax->error(ctxt->userData, - "external parsed entities cannot be standalone\n"); - ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE; - ctxt->wellFormed = 0; - ctxt->disableSAX = 1; + input = xmlNewEntityInputStream(ctxt, ent); + xmlPushInput(ctxt, input); + if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) && + (RAW == '<') && (NXT(1) == '?') && + (NXT(2) == 'x') && (NXT(3) == 'm') && + (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) { + xmlParseTextDecl(ctxt); + if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { + /* + * The XML REC instructs us to stop parsing right here + */ + ctxt->instate = XML_PARSER_EOF; + return; + } + if (input->standalone == 1) { + if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) + ctxt->sax->error(ctxt->userData, + "external parsed entities cannot be standalone\n"); + ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE; + ctxt->wellFormed = 0; + ctxt->disableSAX = 1; + } } + return; } - /* - * !!! TODO: build the tree under the entity first - * 1234 - */ - return; } + } else { + val = ent->content; + if (val == NULL) return; + /* + * inline the entity. + */ + if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && + (!ctxt->disableSAX)) + ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val)); } - val = ent->content; - if (val == NULL) return; - /* - * inline the entity. - */ - if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) && - (!ctxt->disableSAX)) - ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val)); } } @@ -7820,15 +8084,14 @@ xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) { /* * Check that xml:lang conforms to the specification + * No more registered as an error, just generate a warning now + * since this was deprecated in XML second edition */ if (!xmlStrcmp(name, BAD_CAST "xml:lang")) { if (!xmlCheckLanguageID(val)) { - if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) - ctxt->sax->error(ctxt->userData, - "Invalid value for xml:lang : %s\n", val); - ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE; - ctxt->wellFormed = 0; - ctxt->disableSAX = 1; + if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) + ctxt->sax->warning(ctxt->userData, + "Malformed value for xml:lang : %s\n", val); } } diff --git a/parserInternals.h b/parserInternals.h index 8fd6ffc6..7956dd3c 100644 --- a/parserInternals.h +++ b/parserInternals.h @@ -15,7 +15,7 @@ extern "C" { #endif -#define XML_MAX_NAMELEN 1000 +#define XML_MAX_NAMELEN 100 /************************************************************************ * * diff --git a/result/bigentname.xml b/result/bigentname.xml new file mode 100644 index 00000000..6b7183f0 --- /dev/null +++ b/result/bigentname.xml @@ -0,0 +1,6 @@ + + + +]> +&WhatHeSaid; diff --git a/result/bigname.xml b/result/bigname.xml new file mode 100644 index 00000000..885fd7c5 --- /dev/null +++ b/result/bigname.xml @@ -0,0 +1,2 @@ + + diff --git a/result/bigname2.xml b/result/bigname2.xml new file mode 100644 index 00000000..a48c3598 --- /dev/null +++ b/result/bigname2.xml @@ -0,0 +1,2 @@ + + diff --git a/result/noent/bigentname.xml b/result/noent/bigentname.xml new file mode 100644 index 00000000..f19c697e --- /dev/null +++ b/result/noent/bigentname.xml @@ -0,0 +1,6 @@ + + + +]> +He said "Yes" diff --git a/result/noent/bigname.xml b/result/noent/bigname.xml new file mode 100644 index 00000000..885fd7c5 --- /dev/null +++ b/result/noent/bigname.xml @@ -0,0 +1,2 @@ + + diff --git a/result/noent/bigname2.xml b/result/noent/bigname2.xml new file mode 100644 index 00000000..a48c3598 --- /dev/null +++ b/result/noent/bigname2.xml @@ -0,0 +1,2 @@ + + diff --git a/result/noent/dtd12 b/result/noent/dtd12 index 8c4bf36d..5639acc5 100644 --- a/result/noent/dtd12 +++ b/result/noent/dtd12 @@ -3,4 +3,4 @@ ]> -He said &YN; +He said "Yes" diff --git a/result/noent/tstblanks.xml b/result/noent/tstblanks.xml new file mode 100644 index 00000000..25618591 --- /dev/null +++ b/result/noent/tstblanks.xml @@ -0,0 +1,2 @@ + +content diff --git a/result/noent/xml1 b/result/noent/xml1 index a197468d..951830ad 100644 --- a/result/noent/xml1 +++ b/result/noent/xml1 @@ -7,5 +7,5 @@

An ampersand (&) may be escaped numerically (&#38;) or with a general entity - (&amp;amp;).

+ (&amp;).

diff --git a/result/tstblanks.xml b/result/tstblanks.xml new file mode 100644 index 00000000..25618591 --- /dev/null +++ b/result/tstblanks.xml @@ -0,0 +1,2 @@ + +content diff --git a/test/bigentname.xml b/test/bigentname.xml new file mode 100644 index 00000000..aa6e3364 --- /dev/null +++ b/test/bigentname.xml @@ -0,0 +1,5 @@ + + +]> +&WhatHeSaid; diff --git a/test/bigname.xml b/test/bigname.xml new file mode 100644 index 00000000..6c303e47 --- /dev/null +++ b/test/bigname.xml @@ -0,0 +1 @@ + diff --git a/test/bigname2.xml b/test/bigname2.xml new file mode 100644 index 00000000..c67cda91 --- /dev/null +++ b/test/bigname2.xml @@ -0,0 +1 @@ + diff --git a/test/tstblanks.xml b/test/tstblanks.xml new file mode 100644 index 00000000..7c5a23d5 --- /dev/null +++ b/test/tstblanks.xml @@ -0,0 +1,495 @@ + +content diff --git a/test/warning/ent9 b/test/warning/ent9 new file mode 100644 index 00000000..009e322e --- /dev/null +++ b/test/warning/ent9 @@ -0,0 +1,7 @@ + +prefix is indeclared here"> +]> + + &xml; + diff --git a/tree.c b/tree.c index b5e116ca..21c86635 100644 --- a/tree.c +++ b/tree.c @@ -129,19 +129,14 @@ xmlUpgradeOldNs(xmlDocPtr doc) { * Creation of a new Namespace. This function will refuse to create * a namespace with a similar prefix than an existing one present on this * node. + * We use href==NULL in the case of an element creation where the namespace + * was not defined. * Returns returns a new namespace pointer or NULL */ xmlNsPtr xmlNewNs(xmlNodePtr node, const xmlChar *href, const xmlChar *prefix) { xmlNsPtr cur; - if (href == NULL) { -#ifdef DEBUG_TREE - fprintf(stderr, "xmlNewNs: href == NULL !\n"); -#endif - return(NULL); - } - /* * Allocate a new Namespace and fill the fields. */ @@ -1244,9 +1239,8 @@ xmlNewPI(const xmlChar *name, const xmlChar *content) { * @ns: namespace if any * @name: the node name * - * Creation of a new node element. @ns and @content are optionnal (NULL). - * If content is non NULL, a child list containing the TEXTs and - * ENTITY_REFs node will be created. + * Creation of a new node element. @ns is optionnal (NULL). + * * Returns a pointer to the new node object. */ xmlNodePtr @@ -3217,6 +3211,10 @@ xmlGetNsList(xmlDocPtr doc, xmlNodePtr node) { * recurse on the parents until it finds the defined namespace * or return NULL otherwise. * @nameSpace can be NULL, this is a search for the default namespace. + * We don't allow to cross entities boundaries. If you don't declare + * the namespace within those you will be in troubles !!! A warning + * is generated to cover this case. + * * Returns the namespace pointer or NULL. */ xmlNsPtr @@ -3225,12 +3223,18 @@ xmlSearchNs(xmlDocPtr doc, xmlNodePtr node, const xmlChar *nameSpace) { if (node == NULL) return(NULL); while (node != NULL) { + if ((node->type == XML_ENTITY_REF_NODE) || + (node->type == XML_ENTITY_NODE) || + (node->type == XML_ENTITY_DECL)) + return(NULL); if (node->type == XML_ELEMENT_NODE) { cur = node->nsDef; while (cur != NULL) { - if ((cur->prefix == NULL) && (nameSpace == NULL)) + if ((cur->prefix == NULL) && (nameSpace == NULL) && + (cur->href != NULL)) return(cur); if ((cur->prefix != NULL) && (nameSpace != NULL) && + (cur->href != NULL) && (!xmlStrcmp(cur->prefix, nameSpace))) return(cur); cur = cur->next; @@ -4840,7 +4844,7 @@ xmlNsDumpOutput(xmlOutputBufferPtr buf, xmlNsPtr cur) { #endif return; } - if (cur->type == XML_LOCAL_NAMESPACE) { + if ((cur->type == XML_LOCAL_NAMESPACE) && (cur->href != NULL)) { /* Within the context of an element attributes */ if (cur->prefix != NULL) { xmlOutputBufferWriteString(buf, " xmlns:");